ccdown 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
@@ -0,0 +1,11 @@
1
+ ## Description
2
+
3
+ <!-- A summary of what this pull request achieves and a rough list of changes. Please also refence any related issue here -->
4
+
5
+ ## Breaking Changes
6
+
7
+ <!-- Optional, if there are any breaking changes document them, including how to migrate older code. -->
8
+
9
+ ## Notes & open questions
10
+
11
+ <!-- Any notes, remarks or open questions you have to make about the PR. -->
@@ -0,0 +1,92 @@
1
+ name: Python
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ linux:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ target: [x86_64]
18
+ steps:
19
+ - uses: actions/checkout@v5
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.12"
23
+ - name: Build wheels
24
+ uses: PyO3/maturin-action@v1
25
+ with:
26
+ target: ${{ matrix.target }}
27
+ args: --release --out dist --find-interpreter
28
+ manylinux: auto
29
+ - name: Upload wheels
30
+ uses: actions/upload-artifact@v4
31
+ with:
32
+ name: wheels-linux-${{ matrix.target }}
33
+ path: dist
34
+
35
+ macos:
36
+ runs-on: ${{ matrix.platform.runner }}
37
+ strategy:
38
+ matrix:
39
+ platform:
40
+ - runner: macos-latest
41
+ target: x86_64
42
+ - runner: macos-latest
43
+ target: aarch64
44
+ steps:
45
+ - uses: actions/checkout@v5
46
+ - uses: actions/setup-python@v5
47
+ with:
48
+ python-version: "3.12"
49
+ - name: Build wheels
50
+ uses: PyO3/maturin-action@v1
51
+ with:
52
+ target: ${{ matrix.platform.target }}
53
+ args: --release --out dist
54
+ - name: Upload wheels
55
+ uses: actions/upload-artifact@v4
56
+ with:
57
+ name: wheels-macos-${{ matrix.platform.target }}
58
+ path: dist
59
+
60
+ sdist:
61
+ runs-on: ubuntu-latest
62
+ steps:
63
+ - uses: actions/checkout@v5
64
+ - name: Build sdist
65
+ uses: PyO3/maturin-action@v1
66
+ with:
67
+ command: sdist
68
+ args: --out dist
69
+ - name: Upload sdist
70
+ uses: actions/upload-artifact@v4
71
+ with:
72
+ name: wheels-sdist
73
+ path: dist
74
+
75
+ publish:
76
+ name: Publish to PyPI
77
+ runs-on: ubuntu-latest
78
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
79
+ needs: [linux, macos, sdist]
80
+ environment: prod
81
+ permissions:
82
+ id-token: write
83
+ steps:
84
+ - uses: actions/download-artifact@v4
85
+ with:
86
+ pattern: wheels-*
87
+ merge-multiple: true
88
+ path: dist
89
+ - name: Publish to PyPI
90
+ uses: pypa/gh-action-pypi-publish@release/v1
91
+ with:
92
+ skip-existing: true
@@ -0,0 +1,24 @@
1
+ name: Release Plz
2
+
3
+ permissions:
4
+ pull-requests: write
5
+ contents: write
6
+
7
+ on:
8
+ push:
9
+ branches:
10
+ - main
11
+
12
+ jobs:
13
+ release-plz:
14
+ runs-on: ubuntu-latest
15
+ environment: prod
16
+ steps:
17
+ - uses: actions/checkout@v5
18
+ with:
19
+ fetch-depth: 0
20
+ - uses: dtolnay/rust-toolchain@stable
21
+ - uses: MarcoIeni/release-plz-action@v0.5
22
+ env:
23
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
24
+ CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
@@ -0,0 +1,58 @@
1
+ name: Release
2
+
3
+ permissions:
4
+ contents: write
5
+
6
+ on:
7
+ push:
8
+ tags:
9
+ - v[0-9]+.*
10
+
11
+ jobs:
12
+ create-release:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v5
16
+ - uses: taiki-e/create-gh-release-action@v1
17
+ with:
18
+ token: ${{ secrets.GITHUB_TOKEN }}
19
+
20
+ upload-assets:
21
+ needs: create-release
22
+ strategy:
23
+ matrix:
24
+ include:
25
+ - target: aarch64-unknown-linux-gnu
26
+ os: ubuntu-latest
27
+ - target: x86_64-unknown-linux-gnu
28
+ os: ubuntu-latest
29
+ - target: x86_64-apple-darwin
30
+ os: macos-latest
31
+ - target: aarch64-apple-darwin
32
+ os: macos-latest
33
+ - target: x86_64-pc-windows-msvc
34
+ os: windows-latest
35
+ runs-on: ${{ matrix.os }}
36
+ steps:
37
+ - uses: actions/checkout@v5
38
+ - uses: taiki-e/upload-rust-binary-action@v1
39
+ with:
40
+ bin: ccdown
41
+ target: ${{ matrix.target }}
42
+ archive: $bin-$tag-$target
43
+ tar: unix
44
+ zip: windows
45
+ include: LICENSE-APACHE,LICENSE-MIT,README.md
46
+ checksum: sha512
47
+ token: ${{ secrets.GITHUB_TOKEN }}
48
+
49
+ publish-crate:
50
+ needs: create-release
51
+ runs-on: ubuntu-latest
52
+ environment: prod
53
+ steps:
54
+ - uses: actions/checkout@v5
55
+ - uses: dtolnay/rust-toolchain@stable
56
+ - run: cargo publish
57
+ env:
58
+ CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
@@ -0,0 +1,25 @@
1
+ name: Rust
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main", "dev" ]
6
+ pull_request:
7
+ branches: [ "main", "dev" ]
8
+
9
+ env:
10
+ CARGO_TERM_COLOR: always
11
+
12
+ jobs:
13
+ build:
14
+
15
+ runs-on: ubuntu-latest
16
+
17
+ steps:
18
+ - uses: actions/checkout@v5
19
+ - name: Build
20
+ run: cargo build --verbose
21
+ - name: Run tests
22
+ run: cargo test --verbose
23
+ - name: Run clippy
24
+ run: cargo clippy --verbose
25
+
@@ -0,0 +1,44 @@
1
+ # General
2
+ .DS_Store
3
+ .AppleDouble
4
+ .LSOverride
5
+
6
+ # Icon must end with two \r
7
+ Icon
8
+
9
+ # Thumbnails
10
+ ._*
11
+
12
+ # Files that might appear in the root of a volume
13
+ .DocumentRevisions-V100
14
+ .fseventsd
15
+ .Spotlight-V100
16
+ .TemporaryItems
17
+ .Trashes
18
+ .VolumeIcon.icns
19
+ .com.apple.timemachine.donotpresent
20
+
21
+ # Directories potentially created on remote AFP share
22
+ .AppleDB
23
+ .AppleDesktop
24
+ Network Trash Folder
25
+ Temporary Items
26
+ .apdisk
27
+
28
+ # Generated by Cargo
29
+ # will have compiled files and executables
30
+ debug/
31
+ target/
32
+
33
+ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
34
+ # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
35
+ Cargo.lock
36
+
37
+ # These are backup files generated by rustfmt
38
+ **/*.rs.bk
39
+
40
+ # MSVC Windows builds of rustc generate these, which store debugging information
41
+ *.pdb
42
+
43
+ # Project specific files
44
+ test-download
@@ -0,0 +1,57 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.6.5](https://github.com/4thel00z/ccdown/compare/v0.6.4...v0.6.5) - 2026-04-01
11
+
12
+ ### Other
13
+
14
+ - add Python bindings and --strict flag to README
15
+ - Merge pull request #4 from 4thel00z/release-plz-2026-04-01T02-58-41Z
16
+ - drop linux aarch64, keep macos aarch64
17
+
18
+ ## [0.6.4](https://github.com/4thel00z/ccdown/compare/v0.6.3...v0.6.4) - 2026-04-01
19
+
20
+ ### Fixed
21
+
22
+ - add --find-interpreter for linux manylinux builds
23
+ - replace macos-13 with macos-latest, fix QEMU setup for aarch64
24
+ - add QEMU for aarch64 linux, drop --find-interpreter
25
+
26
+ ### Other
27
+
28
+ - drop windows support
29
+ - drop musllinux support
30
+ - Merge pull request #2 from 4thel00z/release-plz-2026-04-01T02-45-16Z
31
+
32
+ ## [0.6.3](https://github.com/4thel00z/ccdown/compare/v0.6.2...v0.6.3) - 2026-04-01
33
+
34
+ ### Added
35
+
36
+ - add unrecoverable error handling, Python bindings, lib refactor, and tests
37
+
38
+ ### Fixed
39
+
40
+ - bump PyO3 to 0.28 for Python 3.14 support
41
+
42
+ ### Other
43
+
44
+ - Merge pull request #1 from 4thel00z/release-plz-2026-04-01T01-35-18Z
45
+
46
+ ## [0.6.2](https://github.com/4thel00z/ccdown/compare/v0.6.1...v0.6.2) - 2026-04-01
47
+
48
+ ### Other
49
+
50
+ - add logo, badges, modernize README layout
51
+ - add changelog, bump actions/checkout to v5
52
+ # Changelog
53
+
54
+ All notable changes to this project will be documented in this file.
55
+
56
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
57
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
@@ -0,0 +1,128 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, or sexual identity
10
+ and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people
21
+ * Being respectful of differing opinions, viewpoints, and experiences
22
+ * Giving and gracefully accepting constructive feedback
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ * Focusing on what is best not just for us as individuals, but for the
26
+ overall community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or
31
+ advances of any kind
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks
33
+ * Public or private harassment
34
+ * Publishing others' private information, such as a physical or email
35
+ address, without their explicit permission
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official e-mail address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at
63
+ pedro@commoncrawl.org.
64
+ All complaints will be reviewed and investigated promptly and fairly.
65
+
66
+ All community leaders are obligated to respect the privacy and security of the
67
+ reporter of any incident.
68
+
69
+ ## Enforcement Guidelines
70
+
71
+ Community leaders will follow these Community Impact Guidelines in determining
72
+ the consequences for any action they deem in violation of this Code of Conduct:
73
+
74
+ ### 1. Correction
75
+
76
+ **Community Impact**: Use of inappropriate language or other behavior deemed
77
+ unprofessional or unwelcome in the community.
78
+
79
+ **Consequence**: A private, written warning from community leaders, providing
80
+ clarity around the nature of the violation and an explanation of why the
81
+ behavior was inappropriate. A public apology may be requested.
82
+
83
+ ### 2. Warning
84
+
85
+ **Community Impact**: A violation through a single incident or series
86
+ of actions.
87
+
88
+ **Consequence**: A warning with consequences for continued behavior. No
89
+ interaction with the people involved, including unsolicited interaction with
90
+ those enforcing the Code of Conduct, for a specified period of time. This
91
+ includes avoiding interactions in community spaces as well as external channels
92
+ like social media. Violating these terms may lead to a temporary or
93
+ permanent ban.
94
+
95
+ ### 3. Temporary Ban
96
+
97
+ **Community Impact**: A serious violation of community standards, including
98
+ sustained inappropriate behavior.
99
+
100
+ **Consequence**: A temporary ban from any sort of interaction or public
101
+ communication with the community for a specified period of time. No public or
102
+ private interaction with the people involved, including unsolicited interaction
103
+ with those enforcing the Code of Conduct, is allowed during this period.
104
+ Violating these terms may lead to a permanent ban.
105
+
106
+ ### 4. Permanent Ban
107
+
108
+ **Community Impact**: Demonstrating a pattern of violation of community
109
+ standards, including sustained inappropriate behavior, harassment of an
110
+ individual, or aggression toward or disparagement of classes of individuals.
111
+
112
+ **Consequence**: A permanent ban from any sort of public interaction within
113
+ the community.
114
+
115
+ ## Attribution
116
+
117
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118
+ version 2.0, available at
119
+ https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120
+
121
+ Community Impact Guidelines were inspired by [Mozilla's code of conduct
122
+ enforcement ladder](https://github.com/mozilla/diversity).
123
+
124
+ [homepage]: https://www.contributor-covenant.org
125
+
126
+ For answers to common questions about this code of conduct, see the FAQ at
127
+ https://www.contributor-covenant.org/faq. Translations are available at
128
+ https://www.contributor-covenant.org/translations.
@@ -0,0 +1,80 @@
1
+ # How to contribute to ccdown?
2
+
3
+ [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](CODE_OF_CONDUCT.md)
4
+
5
+ `ccdown` is an open source project, so all contributions and suggestions are welcome.
6
+
7
+ You can contribute in many different ways: giving ideas, answering questions, reporting bugs, proposing enhancements,
8
+ improving the documentation, fixing bugs,...
9
+
10
+ Many thanks in advance to every contributor.
11
+
12
+ In order to facilitate healthy, constructive behavior in an open and inclusive community, we all respect and abide by
13
+ our [code of conduct](CODE_OF_CONDUCT.md).
14
+
15
+ ## How to work on an open Issue?
16
+
17
+ You have the list of open Issues at: [https://github.com/4thel00z/ccdown/issues](https://github.com/4thel00z/ccdown/issues)
18
+
19
+ Some of them may have the label `help wanted`: that means that any contributor is welcomed!
20
+
21
+ If you would like to work on any of the open Issues:
22
+
23
+ 1. Make sure it is not already assigned to someone else. You have the assignee (if any) on the top of the right column of the Issue page.
24
+
25
+ 2. You can self-assign it by commenting on the Issue page with the keyword: `#self-assign`.
26
+
27
+ 3. Work on your self-assigned issue and eventually create a Pull Request.
28
+
29
+ ## How to create a Pull Request?
30
+
31
+ 1. Fork the [repository](https://github.com/4thel00z/ccdown) by clicking on the 'Fork' button on the repository's page. This creates a copy of the code under your GitHub user account.
32
+
33
+ 2. Clone your fork to your local disk, and add the base repository as a remote:
34
+
35
+ ```bash
36
+ git clone git@github.com:<your Github handle>/ccdown.git
37
+ cd datasets
38
+ git remote add upstream git@github.com:4thel00z/ccdown.git
39
+ ```
40
+
41
+ 3. Switch to the `dev` branch and then create a new branch to hold your development changes:
42
+
43
+ ```bash
44
+ git checkout dev
45
+ git checkout -b a-descriptive-name-for-my-changes
46
+ ```
47
+
48
+ **do not** work on the `main` or `dev` branches.
49
+
50
+ 4. Develop the features on your branch.
51
+
52
+ 5. Once you're happy with your contribution, add your changed files and make a commit to record your changes locally:
53
+
54
+ ```bash
55
+ git add -u
56
+ git commit
57
+ ```
58
+
59
+ It is a good idea to sync your copy of the code with the original
60
+ repository regularly. This way you can quickly account for changes:
61
+
62
+ ```bash
63
+ git fetch upstream
64
+ git rebase upstream/dev
65
+ ```
66
+
67
+ 6. Once you are satisfied, push the changes to your fork repo using:
68
+
69
+ ```bash
70
+ git push -u origin a-descriptive-name-for-my-changes
71
+ ```
72
+
73
+ Go the webpage of your fork on GitHub. Click on "Pull request" to send your to the project maintainers for review, and select the `dev` branch as the brach you'd like to merge your changes into.
74
+
75
+ Thank you for your contribution!
76
+
77
+ ## Code of conduct
78
+
79
+ This project adheres to the HuggingFace [code of conduct](CODE_OF_CONDUCT.md).
80
+ By participating, you are expected to abide by this code.