cadar 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. cadar-0.1.3/.github/workflows/docs.yml +127 -0
  2. cadar-0.1.3/.github/workflows/publish-pypi.yml +195 -0
  3. cadar-0.1.3/.gitignore +64 -0
  4. cadar-0.1.3/CHANGELOG.md +95 -0
  5. cadar-0.1.3/Cargo.lock +730 -0
  6. cadar-0.1.3/Cargo.toml +33 -0
  7. cadar-0.1.3/GITHUB_ACTIONS_FIX.md +94 -0
  8. cadar-0.1.3/LICENSE +21 -0
  9. cadar-0.1.3/PKG-INFO +262 -0
  10. cadar-0.1.3/PUSH_TO_GITHUB.md +231 -0
  11. cadar-0.1.3/QUICKSTART.md +272 -0
  12. cadar-0.1.3/README.md +327 -0
  13. cadar-0.1.3/README_PYPI.md +231 -0
  14. cadar-0.1.3/RELEASE_INSTRUCTIONS.md +262 -0
  15. cadar-0.1.3/SETUP_GITHUB_ACTIONS.md +280 -0
  16. cadar-0.1.3/build.sh +74 -0
  17. cadar-0.1.3/docs/api/python.md +340 -0
  18. cadar-0.1.3/docs/changelog.md +95 -0
  19. cadar-0.1.3/docs/dev/building.md +215 -0
  20. cadar-0.1.3/docs/dev/contributing.md +120 -0
  21. cadar-0.1.3/docs/dev/testing.md +220 -0
  22. cadar-0.1.3/docs/generate_api_docs.py +415 -0
  23. cadar-0.1.3/docs/generate_versions.py +166 -0
  24. cadar-0.1.3/docs/guide/architecture.md +147 -0
  25. cadar-0.1.3/docs/guide/examples.md +286 -0
  26. cadar-0.1.3/docs/guide/overview.md +39 -0
  27. cadar-0.1.3/docs/guide/python-api.md +137 -0
  28. cadar-0.1.3/docs/index.md +175 -0
  29. cadar-0.1.3/docs/installation.md +33 -0
  30. cadar-0.1.3/docs/license.md +21 -0
  31. cadar-0.1.3/docs/quickstart.md +136 -0
  32. cadar-0.1.3/docs/stylesheets/extra.css +52 -0
  33. cadar-0.1.3/examples/basic_usage.py +172 -0
  34. cadar-0.1.3/mkdocs.yml +137 -0
  35. cadar-0.1.3/push_and_release.sh +130 -0
  36. cadar-0.1.3/pyproject.toml +43 -0
  37. cadar-0.1.3/python/cadar/__init__.py +113 -0
  38. cadar-0.1.3/src/lib.rs +220 -0
  39. cadar-0.1.3/src/python_bindings.rs +231 -0
  40. cadar-0.1.3/src/stages/icr.rs +307 -0
  41. cadar-0.1.3/src/stages/mod.rs +48 -0
  42. cadar-0.1.3/src/stages/normalization.rs +202 -0
  43. cadar-0.1.3/src/stages/script_detection.rs +129 -0
  44. cadar-0.1.3/src/stages/script_generation.rs +289 -0
  45. cadar-0.1.3/src/stages/tokenization.rs +256 -0
  46. cadar-0.1.3/src/stages/validation.rs +281 -0
  47. cadar-0.1.3/src/types/dialect.rs +76 -0
  48. cadar-0.1.3/src/types/mod.rs +5 -0
  49. cadar-0.1.3/src/types/script.rs +80 -0
@@ -0,0 +1,127 @@
1
+ name: Build and Deploy Documentation
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*.*.*' # Triggers on tags like v0.1.0, v1.2.3, etc.
7
+ branches:
8
+ - main # Also build docs on main branch
9
+
10
+ permissions:
11
+ contents: write
12
+ pages: write
13
+ id-token: write
14
+
15
+ # Allow only one concurrent deployment
16
+ concurrency:
17
+ group: "pages"
18
+ cancel-in-progress: false
19
+
20
+ jobs:
21
+ build-docs:
22
+ name: Build Documentation
23
+ runs-on: ubuntu-latest
24
+
25
+ steps:
26
+ - name: Checkout code
27
+ uses: actions/checkout@v4
28
+ with:
29
+ fetch-depth: 0 # Fetch all history for proper versioning
30
+
31
+ - name: Set up Python
32
+ uses: actions/setup-python@v5
33
+ with:
34
+ python-version: '3.11'
35
+
36
+ - name: Set up Rust
37
+ uses: actions-rs/toolchain@v1
38
+ with:
39
+ profile: minimal
40
+ toolchain: stable
41
+ override: true
42
+
43
+ - name: Install dependencies
44
+ run: |
45
+ pip install --upgrade pip
46
+ pip install maturin
47
+ pip install mkdocs mkdocs-material mkdocstrings[python] markdown-include
48
+ pip install pymdown-extensions
49
+
50
+ - name: Build and install CaDaR
51
+ run: |
52
+ # Build wheel
53
+ maturin build --release --out dist
54
+ # Install the built wheel
55
+ pip install dist/*.whl
56
+
57
+ - name: Extract version
58
+ id: get_version
59
+ run: |
60
+ if [[ $GITHUB_REF == refs/tags/* ]]; then
61
+ VERSION=${GITHUB_REF#refs/tags/v}
62
+ echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
63
+ echo "IS_TAG=true" >> $GITHUB_OUTPUT
64
+ else
65
+ VERSION="dev"
66
+ echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
67
+ echo "IS_TAG=false" >> $GITHUB_OUTPUT
68
+ fi
69
+ echo "Building documentation for version: $VERSION"
70
+
71
+ - name: Generate Python API documentation
72
+ run: |
73
+ python3 docs/generate_api_docs.py
74
+
75
+ - name: Build Rust documentation
76
+ run: |
77
+ cargo doc --no-deps --document-private-items
78
+ mkdir -p site/rust
79
+ cp -r target/doc/* site/rust/
80
+
81
+ - name: Build MkDocs site
82
+ run: |
83
+ mkdocs build --strict
84
+
85
+ - name: Create version directory
86
+ if: steps.get_version.outputs.IS_TAG == 'true'
87
+ run: |
88
+ VERSION=${{ steps.get_version.outputs.VERSION }}
89
+ mkdir -p site/versions/$VERSION
90
+ cp -r site/* site/versions/$VERSION/ || true
91
+
92
+ - name: Generate versions index
93
+ run: |
94
+ python3 docs/generate_versions.py
95
+
96
+ - name: Upload artifact
97
+ uses: actions/upload-pages-artifact@v3
98
+ with:
99
+ path: site
100
+
101
+ deploy-docs:
102
+ name: Deploy to GitHub Pages
103
+ needs: build-docs
104
+ runs-on: ubuntu-latest
105
+ # Only deploy from main branch, not from tags (environment protection rules)
106
+ if: github.ref == 'refs/heads/main'
107
+
108
+ environment:
109
+ name: github-pages
110
+ url: ${{ steps.deployment.outputs.page_url }}
111
+
112
+ steps:
113
+ - name: Deploy to GitHub Pages
114
+ id: deployment
115
+ uses: actions/deploy-pages@v4
116
+
117
+ - name: Comment on PR (if applicable)
118
+ if: github.event_name == 'pull_request'
119
+ uses: actions/github-script@v7
120
+ with:
121
+ script: |
122
+ github.rest.issues.createComment({
123
+ issue_number: context.issue.number,
124
+ owner: context.repo.owner,
125
+ repo: context.repo.repo,
126
+ body: '📚 Documentation preview available at: ${{ steps.deployment.outputs.page_url }}'
127
+ })
@@ -0,0 +1,195 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*.*.*' # Triggers on tags like v0.1.0, v1.2.3, etc.
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ build-and-publish:
13
+ name: Build and publish Python distribution to PyPI
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ matrix:
17
+ os: [ubuntu-latest, macos-latest, windows-latest]
18
+ python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
19
+ exclude:
20
+ # Reduce build matrix - only build all Python versions on Linux
21
+ - os: macos-latest
22
+ python-version: '3.8'
23
+ - os: macos-latest
24
+ python-version: '3.9'
25
+ - os: windows-latest
26
+ python-version: '3.8'
27
+ - os: windows-latest
28
+ python-version: '3.9'
29
+
30
+ steps:
31
+ - name: Checkout code
32
+ uses: actions/checkout@v4
33
+
34
+ - name: Set up Python ${{ matrix.python-version }}
35
+ uses: actions/setup-python@v5
36
+ with:
37
+ python-version: ${{ matrix.python-version }}
38
+
39
+ - name: Set up Rust
40
+ uses: actions-rs/toolchain@v1
41
+ with:
42
+ profile: minimal
43
+ toolchain: stable
44
+ override: true
45
+
46
+ - name: Install maturin
47
+ run: pip install maturin
48
+
49
+ - name: Extract version from tag
50
+ id: get_version
51
+ shell: bash
52
+ run: |
53
+ # Remove 'v' prefix from tag (v0.1.0 -> 0.1.0)
54
+ VERSION=${GITHUB_REF#refs/tags/v}
55
+ echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
56
+ echo "Building version: $VERSION"
57
+
58
+ - name: Update version in Cargo.toml
59
+ shell: bash
60
+ run: |
61
+ VERSION=${{ steps.get_version.outputs.VERSION }}
62
+ # Update version in Cargo.toml
63
+ if [[ "$OSTYPE" == "darwin"* ]]; then
64
+ sed -i '' "s/^version = .*/version = \"$VERSION\"/" Cargo.toml
65
+ else
66
+ sed -i "s/^version = .*/version = \"$VERSION\"/" Cargo.toml
67
+ fi
68
+
69
+ - name: Update version in pyproject.toml
70
+ shell: bash
71
+ run: |
72
+ VERSION=${{ steps.get_version.outputs.VERSION }}
73
+ # Update version in pyproject.toml
74
+ if [[ "$OSTYPE" == "darwin"* ]]; then
75
+ sed -i '' "s/^version = .*/version = \"$VERSION\"/" pyproject.toml
76
+ else
77
+ sed -i "s/^version = .*/version = \"$VERSION\"/" pyproject.toml
78
+ fi
79
+
80
+ - name: Build wheels
81
+ run: |
82
+ maturin build --release --strip --out dist
83
+
84
+ - name: Upload wheels as artifacts
85
+ uses: actions/upload-artifact@v4
86
+ with:
87
+ name: wheels-${{ matrix.os }}-py${{ matrix.python-version }}
88
+ path: dist/
89
+
90
+ build-sdist:
91
+ name: Build source distribution
92
+ runs-on: ubuntu-latest
93
+ steps:
94
+ - name: Checkout code
95
+ uses: actions/checkout@v4
96
+
97
+ - name: Set up Python
98
+ uses: actions/setup-python@v5
99
+ with:
100
+ python-version: '3.12'
101
+
102
+ - name: Set up Rust
103
+ uses: actions-rs/toolchain@v1
104
+ with:
105
+ profile: minimal
106
+ toolchain: stable
107
+ override: true
108
+
109
+ - name: Install maturin
110
+ run: pip install maturin
111
+
112
+ - name: Extract version from tag
113
+ id: get_version
114
+ shell: bash
115
+ run: |
116
+ VERSION=${GITHUB_REF#refs/tags/v}
117
+ echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
118
+ echo "Building version: $VERSION"
119
+
120
+ - name: Update version in Cargo.toml
121
+ shell: bash
122
+ run: |
123
+ VERSION=${{ steps.get_version.outputs.VERSION }}
124
+ sed -i "s/^version = .*/version = \"$VERSION\"/" Cargo.toml
125
+
126
+ - name: Update version in pyproject.toml
127
+ shell: bash
128
+ run: |
129
+ VERSION=${{ steps.get_version.outputs.VERSION }}
130
+ sed -i "s/^version = .*/version = \"$VERSION\"/" pyproject.toml
131
+
132
+ - name: Build source distribution
133
+ run: |
134
+ maturin build --release --sdist --out dist
135
+
136
+ - name: Upload sdist as artifact
137
+ uses: actions/upload-artifact@v4
138
+ with:
139
+ name: sdist
140
+ path: dist/*.tar.gz
141
+
142
+ publish:
143
+ name: Publish to PyPI
144
+ needs: [build-and-publish, build-sdist]
145
+ runs-on: ubuntu-latest
146
+ permissions:
147
+ id-token: write # Required for trusted publishing
148
+
149
+ steps:
150
+ - name: Download wheel artifacts
151
+ uses: actions/download-artifact@v4
152
+ with:
153
+ path: dist/
154
+ pattern: wheels-*
155
+ merge-multiple: true
156
+
157
+ - name: Download sdist artifact
158
+ uses: actions/download-artifact@v4
159
+ with:
160
+ path: dist/
161
+ pattern: sdist
162
+ merge-multiple: true
163
+
164
+ - name: List distributions
165
+ run: ls -lh dist/
166
+
167
+ - name: Publish to PyPI
168
+ uses: pypa/gh-action-pypi-publish@release/v1
169
+ with:
170
+ password: ${{ secrets.PYPI_API_TOKEN }}
171
+ skip-existing: true
172
+ verbose: true
173
+
174
+ - name: Create GitHub Release
175
+ uses: softprops/action-gh-release@v1
176
+ with:
177
+ files: dist/*
178
+ generate_release_notes: true
179
+ body: |
180
+ ## CaDaR ${{ github.ref_name }}
181
+
182
+ Bidirectional transliteration for Darija (Moroccan Arabic)
183
+
184
+ ### Installation
185
+ ```bash
186
+ pip install cadar==${{ github.ref_name }}
187
+ ```
188
+
189
+ ### What's Changed
190
+ See the [CHANGELOG](https://github.com/Oit-Technologies/CaDaR/blob/main/CHANGELOG.md) for details.
191
+
192
+ ---
193
+ 🤖 Published automatically by GitHub Actions
194
+ env:
195
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
cadar-0.1.3/.gitignore ADDED
@@ -0,0 +1,64 @@
1
+ # Rust
2
+ /target/
3
+ **/*.rs.bk
4
+ Cargo.lock
5
+ *.pdb
6
+
7
+ # Python
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ *.so
12
+ *.egg
13
+ *.egg-info/
14
+ dist/
15
+ build/
16
+ .eggs/
17
+ pip-wheel-metadata/
18
+ *.whl
19
+
20
+ # Virtual environments
21
+ venv/
22
+ env/
23
+ ENV/
24
+ .venv/
25
+
26
+ # PyO3/Maturin
27
+ *.o
28
+ *.a
29
+ *.dylib
30
+ *.dll
31
+ *.so.*
32
+
33
+ # IDE
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+ *~
39
+ .DS_Store
40
+
41
+ # Testing
42
+ .pytest_cache/
43
+ .coverage
44
+ htmlcov/
45
+ .tox/
46
+ .mypy_cache/
47
+ .hypothesis/
48
+
49
+ # Documentation
50
+ docs/_build/
51
+ docs/.doctrees/
52
+ site/
53
+
54
+ # Benchmark results
55
+ criterion/
56
+ bench_results/
57
+
58
+ # Logs
59
+ *.log
60
+
61
+ # OS
62
+ Thumbs.db
63
+ .Spotlight-V100
64
+ .Trashes
@@ -0,0 +1,95 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.3] - 2026-01-28
11
+
12
+ ### Fixed
13
+
14
+ - **CI/CD**: Fixed Linux wheel builds in GitHub Actions (removed Docker, unified build process)
15
+ - **PyPI**: Added missing source distribution (.tar.gz) to PyPI releases
16
+ - **GitHub Pages**: Fixed deployment from tags (now only deploys from main branch)
17
+
18
+ ### Added
19
+
20
+ - Source distribution build job in CI/CD workflow
21
+ - All Python versions now build correctly on Linux
22
+
23
+ ### Changed
24
+
25
+ - Simplified wheel building across all platforms (no platform-specific logic)
26
+ - Updated publish workflow to download both wheels and source distribution
27
+
28
+ ## [0.1.2] - 2026-01-28
29
+
30
+ ### Fixed
31
+
32
+ - **Security**: Upgraded PyO3 from 0.20.3 to 0.24.2 to fix buffer overflow vulnerability (CVE)
33
+ - **Documentation**: Fixed MkDocs directory structure for proper builds
34
+ - **Documentation**: Removed Rust API from navigation to fix strict mode build
35
+ - **CI/CD**: Fixed GitHub Actions release permissions (403 error)
36
+ - **Documentation**: Added comprehensive documentation for all sections
37
+
38
+ ### Added
39
+
40
+ - Complete user guide with examples
41
+ - API reference documentation
42
+ - Development and contribution guides
43
+ - Installation instructions
44
+ - Architecture documentation
45
+
46
+ ### Changed
47
+
48
+ - Updated Python bindings to use PyO3 0.24+ Bound API
49
+ - Moved mkdocs.yml from docs/ to project root
50
+ - Updated all documentation paths for new structure
51
+
52
+ ## [0.1.0] - 2026-01-28
53
+
54
+ ### Added
55
+
56
+ - Initial release of CaDaR
57
+ - 6-stage FST-style transliteration pipeline
58
+ - Stage 1: Script Detection
59
+ - Stage 2: Noise Cleaning & Normalization
60
+ - Stage 3: Darija-aware Tokenization
61
+ - Stage 4: Intermediate Canonical Representation (ICR)
62
+ - Stage 5: Target Script Generation
63
+ - Stage 6: Post-validation & Fixes
64
+ - Python API with four main functions:
65
+ - `ara2bizi()` - Arabic to Latin transliteration
66
+ - `bizi2ara()` - Latin to Arabic transliteration
67
+ - `ara2ara()` - Arabic text standardization
68
+ - `bizi2bizi()` - Latin text standardization
69
+ - `CaDaR` class for reusable transliteration operations
70
+ - Convenience functions: `transliterate()` and `standardize()`
71
+ - Support for Moroccan Darija (dialect code: "Ma")
72
+ - Comprehensive documentation
73
+ - 41 unit tests with 100% pass rate
74
+ - Example scripts demonstrating usage
75
+ - MIT License
76
+
77
+ ### Features
78
+
79
+ - **Bidirectional transliteration**: Seamless conversion between Arabic and Latin scripts
80
+ - **Intelligent normalization**: Handles diacritics, repeated characters, and common variations
81
+ - **Darija-aware processing**: Recognizes Darija-specific patterns and constructs
82
+ - **High performance**: Rust core with Python bindings via PyO3
83
+ - **Extensible architecture**: Designed to support additional Darija dialects in future releases
84
+
85
+ ### Technical Details
86
+
87
+ - Built with Rust 1.93.0
88
+ - Python bindings using PyO3 0.20
89
+ - Supports Python 3.8+
90
+ - Cross-platform: Linux, macOS, Windows
91
+
92
+ [Unreleased]: https://github.com/Oit-Technologies/CaDaR/compare/v0.1.3...HEAD
93
+ [0.1.3]: https://github.com/Oit-Technologies/CaDaR/compare/v0.1.2...v0.1.3
94
+ [0.1.2]: https://github.com/Oit-Technologies/CaDaR/compare/v0.1.0...v0.1.2
95
+ [0.1.0]: https://github.com/Oit-Technologies/CaDaR/releases/tag/v0.1.0