sentencex 0.6.1__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentencex might be problematic. Click here for more details.

Files changed (198) hide show
  1. sentencex-1.0.1/.github/workflows/node.yaml +50 -0
  2. sentencex-1.0.1/.github/workflows/python.yaml +165 -0
  3. sentencex-1.0.1/.github/workflows/rust.yml +20 -0
  4. sentencex-1.0.1/.github/workflows/wasm.yaml +42 -0
  5. {sentencex-0.6.1 → sentencex-1.0.1}/.gitignore +1 -0
  6. sentencex-1.0.1/100-0.txt +196022 -0
  7. sentencex-1.0.1/11-0.txt +3384 -0
  8. sentencex-1.0.1/1661-0.txt +12306 -0
  9. sentencex-1.0.1/Cargo.lock +1049 -0
  10. sentencex-1.0.1/LICENSE +21 -0
  11. sentencex-1.0.1/PKG-INFO +31 -0
  12. sentencex-1.0.1/README.md +198 -0
  13. sentencex-1.0.1/TODO.md +3 -0
  14. sentencex-1.0.1/benches/segment_benchmark.rs +153 -0
  15. sentencex-1.0.1/bindings/python/.gitignore +2 -0
  16. sentencex-1.0.1/bindings/python/.python-version +1 -0
  17. sentencex-1.0.1/bindings/python/Cargo.lock +562 -0
  18. sentencex-1.0.1/bindings/python/Cargo.toml +19 -0
  19. sentencex-1.0.1/bindings/python/README.md +103 -0
  20. sentencex-1.0.1/bindings/python/example.py +16 -0
  21. sentencex-1.0.1/bindings/python/src/lib.rs +32 -0
  22. sentencex-1.0.1/bindings/python/tests/__init__.py +1 -0
  23. sentencex-1.0.1/bindings/python/tests/test_sentencex.py +173 -0
  24. sentencex-1.0.1/bindings/python/uv.lock +306 -0
  25. sentencex-1.0.1/demo/index.html +151 -0
  26. sentencex-1.0.1/examples/rust_example.rs +7 -0
  27. {sentencex-0.6.1 → sentencex-1.0.1}/pyproject.toml +24 -32
  28. sentencex-1.0.1/src/constants.rs +219 -0
  29. sentencex-1.0.1/src/languages/abbrev/am.txt +2 -0
  30. sentencex-1.0.1/src/languages/abbrev/ar.txt +17 -0
  31. sentencex-1.0.1/src/languages/abbrev/bg.txt +71 -0
  32. sentencex-1.0.1/src/languages/abbrev/bn.txt +26 -0
  33. sentencex-1.0.1/src/languages/abbrev/da.txt +474 -0
  34. sentencex-1.0.1/src/languages/abbrev/de.txt +149 -0
  35. sentencex-1.0.1/src/languages/abbrev/en.txt +217 -0
  36. sentencex-1.0.1/src/languages/abbrev/es.txt +254 -0
  37. sentencex-1.0.1/src/languages/abbrev/fi.txt +129 -0
  38. sentencex-1.0.1/src/languages/abbrev/fr.txt +95 -0
  39. sentencex-1.0.1/src/languages/abbrev/gu.txt +26 -0
  40. sentencex-1.0.1/src/languages/abbrev/hi.txt +26 -0
  41. sentencex-1.0.1/src/languages/abbrev/it.txt +2228 -0
  42. sentencex-1.0.1/src/languages/abbrev/kk.txt +290 -0
  43. sentencex-1.0.1/src/languages/abbrev/kn.txt +26 -0
  44. sentencex-1.0.1/src/languages/abbrev/ml.txt +38 -0
  45. sentencex-1.0.1/src/languages/abbrev/nl.txt +1586 -0
  46. sentencex-1.0.1/src/languages/abbrev/pa.txt +26 -0
  47. sentencex-1.0.1/src/languages/abbrev/pl.txt +134 -0
  48. sentencex-1.0.1/src/languages/abbrev/pt.txt +93 -0
  49. sentencex-1.0.1/src/languages/abbrev/ru.txt +69 -0
  50. sentencex-1.0.1/src/languages/abbrev/sk.txt +200 -0
  51. sentencex-1.0.1/src/languages/abbrev/ta.txt +26 -0
  52. sentencex-1.0.1/src/languages/abbrev/te.txt +26 -0
  53. sentencex-1.0.1/src/languages/am.rs +32 -0
  54. sentencex-1.0.1/src/languages/ar.rs +32 -0
  55. sentencex-1.0.1/src/languages/bg.rs +31 -0
  56. sentencex-1.0.1/src/languages/bn.rs +32 -0
  57. sentencex-1.0.1/src/languages/ca.rs +37 -0
  58. sentencex-1.0.1/src/languages/da.rs +37 -0
  59. sentencex-1.0.1/src/languages/de.rs +94 -0
  60. sentencex-1.0.1/src/languages/el.rs +31 -0
  61. sentencex-1.0.1/src/languages/en.rs +31 -0
  62. sentencex-1.0.1/src/languages/es.rs +29 -0
  63. sentencex-1.0.1/src/languages/fallbacks.yaml +368 -0
  64. sentencex-1.0.1/src/languages/fi.rs +79 -0
  65. sentencex-1.0.1/src/languages/fr.rs +31 -0
  66. sentencex-1.0.1/src/languages/gu.rs +32 -0
  67. sentencex-1.0.1/src/languages/hi.rs +32 -0
  68. sentencex-1.0.1/src/languages/hy.rs +36 -0
  69. sentencex-1.0.1/src/languages/it.rs +54 -0
  70. sentencex-1.0.1/src/languages/ja.rs +25 -0
  71. sentencex-1.0.1/src/languages/kk.rs +51 -0
  72. sentencex-1.0.1/src/languages/kn.rs +31 -0
  73. sentencex-1.0.1/src/languages/language.rs +289 -0
  74. sentencex-1.0.1/src/languages/ml.rs +32 -0
  75. sentencex-1.0.1/src/languages/mod.rs +93 -0
  76. sentencex-1.0.1/src/languages/mr.rs +22 -0
  77. sentencex-1.0.1/src/languages/my.rs +31 -0
  78. sentencex-1.0.1/src/languages/nl.rs +31 -0
  79. sentencex-1.0.1/src/languages/pa.rs +31 -0
  80. sentencex-1.0.1/src/languages/pl.rs +30 -0
  81. sentencex-1.0.1/src/languages/pt.rs +38 -0
  82. sentencex-1.0.1/src/languages/ru.rs +38 -0
  83. sentencex-1.0.1/src/languages/sk.rs +110 -0
  84. sentencex-1.0.1/src/languages/ta.rs +54 -0
  85. sentencex-1.0.1/src/languages/te.rs +32 -0
  86. sentencex-1.0.1/src/lib.rs +371 -0
  87. sentencex-1.0.1/src/main.rs +68 -0
  88. sentencex-1.0.1/tests/am.txt +10 -0
  89. sentencex-1.0.1/tests/ar.txt +34 -0
  90. sentencex-1.0.1/tests/bg.txt +18 -0
  91. sentencex-1.0.1/tests/da.txt +17 -0
  92. sentencex-1.0.1/tests/de.txt +160 -0
  93. sentencex-1.0.1/tests/el.txt +6 -0
  94. sentencex-1.0.1/tests/en.txt +256 -0
  95. sentencex-1.0.1/tests/es.txt +16 -0
  96. sentencex-1.0.1/tests/fi.txt +21 -0
  97. sentencex-1.0.1/tests/fr.txt +23 -0
  98. sentencex-1.0.1/tests/gu.txt +0 -0
  99. sentencex-1.0.1/tests/hi.txt +5 -0
  100. sentencex-1.0.1/tests/hy.txt +114 -0
  101. sentencex-1.0.1/tests/it.txt +186 -0
  102. sentencex-1.0.1/tests/ja.txt +36 -0
  103. sentencex-1.0.1/tests/kk.txt +59 -0
  104. sentencex-1.0.1/tests/kn.txt +0 -0
  105. sentencex-1.0.1/tests/ml.txt +28 -0
  106. sentencex-1.0.1/tests/mr.txt +23 -0
  107. sentencex-1.0.1/tests/my.txt +5 -0
  108. sentencex-1.0.1/tests/nl.txt +15 -0
  109. sentencex-1.0.1/tests/pa.txt +5 -0
  110. sentencex-1.0.1/tests/pl.txt +4 -0
  111. sentencex-1.0.1/tests/pt.txt +21 -0
  112. sentencex-1.0.1/tests/ru.txt +151 -0
  113. sentencex-1.0.1/tests/sk.txt +20 -0
  114. sentencex-1.0.1/tests/ta.txt +0 -0
  115. sentencex-1.0.1/tests/te.txt +0 -0
  116. sentencex-1.0.1/tests/ur.txt +6 -0
  117. sentencex-1.0.1/tests/zh.txt +10 -0
  118. sentencex-0.6.1/.github/workflows/publish.yaml +0 -48
  119. sentencex-0.6.1/.github/workflows/tests.yaml +0 -26
  120. sentencex-0.6.1/LICENSE.txt +0 -18
  121. sentencex-0.6.1/PKG-INFO +0 -122
  122. sentencex-0.6.1/README.md +0 -88
  123. sentencex-0.6.1/benchmarks/accuracy.py +0 -111
  124. sentencex-0.6.1/benchmarks/benchmark_speed.sh +0 -4
  125. sentencex-0.6.1/benchmarks/en_golden_rules.py +0 -209
  126. sentencex-0.6.1/benchmarks/requirements.txt +0 -7
  127. sentencex-0.6.1/benchmarks/speed.py +0 -108
  128. sentencex-0.6.1/docs/index.html +0 -142
  129. sentencex-0.6.1/requirements.txt +0 -43
  130. sentencex-0.6.1/sentencex/__init__.py +0 -33
  131. sentencex-0.6.1/sentencex/__main__.py +0 -18
  132. sentencex-0.6.1/sentencex/base.py +0 -190
  133. sentencex-0.6.1/sentencex/fallbacks.py +0 -243
  134. sentencex-0.6.1/sentencex/languages/__init__.py +0 -62
  135. sentencex-0.6.1/sentencex/languages/am.py +0 -9
  136. sentencex-0.6.1/sentencex/languages/ar.py +0 -26
  137. sentencex-0.6.1/sentencex/languages/bg.py +0 -79
  138. sentencex-0.6.1/sentencex/languages/bn.py +0 -39
  139. sentencex-0.6.1/sentencex/languages/ca.py +0 -5
  140. sentencex-0.6.1/sentencex/languages/da.py +0 -488
  141. sentencex-0.6.1/sentencex/languages/de.py +0 -190
  142. sentencex-0.6.1/sentencex/languages/el.py +0 -10
  143. sentencex-0.6.1/sentencex/languages/en.py +0 -225
  144. sentencex-0.6.1/sentencex/languages/es.py +0 -261
  145. sentencex-0.6.1/sentencex/languages/fi.py +0 -164
  146. sentencex-0.6.1/sentencex/languages/fr.py +0 -103
  147. sentencex-0.6.1/sentencex/languages/gu.py +0 -39
  148. sentencex-0.6.1/sentencex/languages/hi.py +0 -39
  149. sentencex-0.6.1/sentencex/languages/hy.py +0 -12
  150. sentencex-0.6.1/sentencex/languages/it.py +0 -2242
  151. sentencex-0.6.1/sentencex/languages/kk.py +0 -303
  152. sentencex-0.6.1/sentencex/languages/kn.py +0 -38
  153. sentencex-0.6.1/sentencex/languages/ml.py +0 -51
  154. sentencex-0.6.1/sentencex/languages/mr.py +0 -5
  155. sentencex-0.6.1/sentencex/languages/my.py +0 -12
  156. sentencex-0.6.1/sentencex/languages/nl.py +0 -1593
  157. sentencex-0.6.1/sentencex/languages/or_.py +0 -39
  158. sentencex-0.6.1/sentencex/languages/pa.py +0 -38
  159. sentencex-0.6.1/sentencex/languages/pl.py +0 -142
  160. sentencex-0.6.1/sentencex/languages/pt.py +0 -107
  161. sentencex-0.6.1/sentencex/languages/ru.py +0 -82
  162. sentencex-0.6.1/sentencex/languages/sk.py +0 -252
  163. sentencex-0.6.1/sentencex/languages/ta.py +0 -72
  164. sentencex-0.6.1/sentencex/languages/te.py +0 -39
  165. sentencex-0.6.1/sentencex/terminators.py +0 -166
  166. sentencex-0.6.1/test/unit/test_am.py +0 -19
  167. sentencex-0.6.1/test/unit/test_ar.py +0 -57
  168. sentencex-0.6.1/test/unit/test_bg.py +0 -35
  169. sentencex-0.6.1/test/unit/test_da.py +0 -30
  170. sentencex-0.6.1/test/unit/test_de.py +0 -173
  171. sentencex-0.6.1/test/unit/test_el.py +0 -20
  172. sentencex-0.6.1/test/unit/test_en.py +0 -263
  173. sentencex-0.6.1/test/unit/test_es.py +0 -166
  174. sentencex-0.6.1/test/unit/test_fa.py +0 -16
  175. sentencex-0.6.1/test/unit/test_fallbacks.py +0 -10
  176. sentencex-0.6.1/test/unit/test_fi.py +0 -43
  177. sentencex-0.6.1/test/unit/test_fr.py +0 -43
  178. sentencex-0.6.1/test/unit/test_gu.py +0 -20
  179. sentencex-0.6.1/test/unit/test_hi.py +0 -19
  180. sentencex-0.6.1/test/unit/test_hy.py +0 -148
  181. sentencex-0.6.1/test/unit/test_it.py +0 -109
  182. sentencex-0.6.1/test/unit/test_ja.py +0 -45
  183. sentencex-0.6.1/test/unit/test_kk.py +0 -76
  184. sentencex-0.6.1/test/unit/test_ml.py +0 -15
  185. sentencex-0.6.1/test/unit/test_mr.py +0 -23
  186. sentencex-0.6.1/test/unit/test_my.py +0 -12
  187. sentencex-0.6.1/test/unit/test_nl.py +0 -25
  188. sentencex-0.6.1/test/unit/test_pa.py +0 -19
  189. sentencex-0.6.1/test/unit/test_pl.py +0 -13
  190. sentencex-0.6.1/test/unit/test_pt.py +0 -31
  191. sentencex-0.6.1/test/unit/test_ru.py +0 -137
  192. sentencex-0.6.1/test/unit/test_sk.py +0 -36
  193. sentencex-0.6.1/test/unit/test_ur.py +0 -16
  194. sentencex-0.6.1/test/unit/test_zh.py +0 -23
  195. sentencex-0.6.1/tox.ini +0 -25
  196. /sentencex-0.6.1/benchmarks/__init__.py → /sentencex-1.0.1/src/languages/abbrev/el.txt +0 -0
  197. /sentencex-0.6.1/test/__init__.py → /sentencex-1.0.1/tests/bn.txt +0 -0
  198. /sentencex-0.6.1/test/pytest.ini → /sentencex-1.0.1/tests/ca.txt +0 -0
@@ -0,0 +1,50 @@
1
+ name: Node.js Binding Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ nodejs-binding:
14
+ name: Test Node.js Binding
15
+ runs-on: ${{ matrix.os }}
16
+ strategy:
17
+ matrix:
18
+ os: [ubuntu-latest, windows-latest, macos-latest]
19
+ node-version: [18, 20, 22]
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - name: Setup Node.js ${{ matrix.node-version }}
24
+ uses: actions/setup-node@v4
25
+ with:
26
+ node-version: ${{ matrix.node-version }}
27
+
28
+ - name: Install Rust
29
+ uses: dtolnay/rust-toolchain@stable
30
+
31
+ - name: Cache cargo
32
+ uses: actions/cache@v4
33
+ with:
34
+ path: |
35
+ ~/.cargo/registry
36
+ ~/.cargo/git
37
+ target
38
+ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
39
+
40
+ - name: Build
41
+ working-directory: bindings/nodejs
42
+ run: npm run build
43
+
44
+ - name: Test
45
+ working-directory: bindings/nodejs
46
+ run: npm test
47
+
48
+ - name: Test example
49
+ working-directory: bindings/nodejs
50
+ run: node example.js
@@ -0,0 +1,165 @@
1
+ # This file is autogenerated by maturin v1.9.6
2
+ # To update, run
3
+ #
4
+ # maturin generate-ci github -o ../../.github/workflows/python.yaml
5
+ #
6
+ name: Python binding CI
7
+
8
+ on:
9
+ push:
10
+ branches:
11
+ - main
12
+ - master
13
+ tags:
14
+ - '*'
15
+ pull_request:
16
+ workflow_dispatch:
17
+
18
+ permissions:
19
+ contents: read
20
+
21
+ jobs:
22
+ linux:
23
+ runs-on: ${{ matrix.platform.runner }}
24
+ strategy:
25
+ matrix:
26
+ platform:
27
+ - runner: ubuntu-24.04
28
+ target: x86_64
29
+ - runner: ubuntu-24.04
30
+ target: x86
31
+
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+ - uses: actions/setup-python@v5
35
+ with:
36
+ python-version: 3.x
37
+ - name: Build wheels
38
+ uses: PyO3/maturin-action@v1
39
+ with:
40
+ target: ${{ matrix.platform.target }}
41
+ args: --release --out dist --find-interpreter
42
+ sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
43
+ manylinux: auto
44
+ working-directory: bindings/python
45
+ - name: Install pytest
46
+ run: pip install pytest
47
+ - name: Install wheel for testing
48
+ run: pip install --find-links bindings/python/dist sentencex --force-reinstall
49
+ - name: Run tests
50
+ run: pytest bindings/python/tests/ -v
51
+ - name: Upload wheels
52
+ uses: actions/upload-artifact@v4
53
+ with:
54
+ name: wheels-linux-${{ matrix.platform.target }}
55
+ path: bindings/python/dist
56
+
57
+ windows:
58
+ runs-on: ${{ matrix.platform.runner }}
59
+ strategy:
60
+ matrix:
61
+ platform:
62
+ - runner: windows-latest
63
+ target: x64
64
+ - runner: windows-latest
65
+ target: x86
66
+ steps:
67
+ - uses: actions/checkout@v4
68
+ - uses: actions/setup-python@v5
69
+ with:
70
+ python-version: 3.x
71
+ architecture: ${{ matrix.platform.target }}
72
+ - name: Build wheels
73
+ uses: PyO3/maturin-action@v1
74
+ with:
75
+ target: ${{ matrix.platform.target }}
76
+ args: --release --out dist --find-interpreter
77
+ sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
78
+ working-directory: bindings/python
79
+ - name: Install pytest
80
+ run: pip install pytest
81
+ - name: Install wheel for testing
82
+ run: pip install --find-links bindings/python/dist sentencex --force-reinstall
83
+ - name: Run tests
84
+ run: pytest bindings/python/tests/ -v
85
+ - name: Upload wheels
86
+ uses: actions/upload-artifact@v4
87
+ with:
88
+ name: wheels-windows-${{ matrix.platform.target }}
89
+ path: bindings/python/dist
90
+
91
+ macos:
92
+ runs-on: ${{ matrix.platform.runner }}
93
+ strategy:
94
+ matrix:
95
+ platform:
96
+ - runner: macos-13
97
+ target: x86_64
98
+ - runner: macos-14
99
+ target: aarch64
100
+ steps:
101
+ - uses: actions/checkout@v4
102
+ - uses: actions/setup-python@v5
103
+ with:
104
+ python-version: 3.x
105
+ - name: Build wheels
106
+ uses: PyO3/maturin-action@v1
107
+ with:
108
+ target: ${{ matrix.platform.target }}
109
+ args: --release --out dist --find-interpreter
110
+ sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
111
+ working-directory: bindings/python
112
+ - name: Install pytest
113
+ run: pip install pytest
114
+ - name: Install wheel for testing
115
+ run: pip install --find-links bindings/python/dist sentencex --force-reinstall
116
+ - name: Run tests
117
+ run: pytest bindings/python/tests/ -v
118
+ - name: Upload wheels
119
+ uses: actions/upload-artifact@v4
120
+ with:
121
+ name: wheels-macos-${{ matrix.platform.target }}
122
+ path: bindings/python/dist
123
+
124
+ sdist:
125
+ runs-on: ubuntu-latest
126
+ steps:
127
+ - uses: actions/checkout@v4
128
+ - name: Build sdist
129
+ uses: PyO3/maturin-action@v1
130
+ with:
131
+ command: sdist
132
+ args: --out dist
133
+ working-directory: bindings/python
134
+ - name: Upload sdist
135
+ uses: actions/upload-artifact@v4
136
+ with:
137
+ name: wheels-sdist
138
+ path: bindings/python/dist
139
+
140
+ release:
141
+ name: Release
142
+ runs-on: ubuntu-latest
143
+ if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
144
+ needs: [linux, windows, macos, sdist]
145
+ permissions:
146
+ # Use to sign the release artifacts
147
+ id-token: write
148
+ # Used to upload release artifacts
149
+ contents: write
150
+ # Used to generate artifact attestation
151
+ attestations: write
152
+ steps:
153
+ - uses: actions/download-artifact@v4
154
+ - name: Generate artifact attestation
155
+ uses: actions/attest-build-provenance@v2
156
+ with:
157
+ subject-path: 'wheels-*/*'
158
+ - name: Publish to PyPI
159
+ if: ${{ startsWith(github.ref, 'refs/tags/') }}
160
+ uses: PyO3/maturin-action@v1
161
+ env:
162
+ MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
163
+ with:
164
+ command: upload
165
+ args: --non-interactive --skip-existing wheels-*/*
@@ -0,0 +1,20 @@
1
+ name: Rust
2
+
3
+ on:
4
+ push:
5
+ branches: [ "master" ]
6
+ pull_request:
7
+ branches: [ "master" ]
8
+
9
+ env:
10
+ CARGO_TERM_COLOR: always
11
+
12
+ jobs:
13
+ build:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - name: Build
18
+ run: cargo build --verbose
19
+ - name: Run tests
20
+ run: cargo test --verbose
@@ -0,0 +1,42 @@
1
+ name: WASM Binding Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ wasm-binding:
14
+ name: Test WASM Binding
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install Rust
20
+ uses: dtolnay/rust-toolchain@stable
21
+ with:
22
+ targets: wasm32-unknown-unknown
23
+
24
+ - name: Cache cargo
25
+ uses: actions/cache@v4
26
+ with:
27
+ path: |
28
+ ~/.cargo/registry
29
+ ~/.cargo/git
30
+ target
31
+ key: wasm-cargo-${{ hashFiles('**/Cargo.lock') }}
32
+
33
+ - name: Install wasm-pack
34
+ run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
35
+
36
+ - name: Build WASM
37
+ working-directory: bindings/wasm
38
+ run: ./build.sh
39
+
40
+ - name: Test WASM
41
+ working-directory: bindings/wasm
42
+ run: wasm-pack test --headless --firefox
@@ -5,3 +5,4 @@ __pycache__
5
5
  .ruff_cache
6
6
  .pytest_cache
7
7
  dist
8
+ /target