sentencex 0.6.1__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentencex might be problematic. Click here for more details.

Files changed (190) hide show
  1. sentencex-1.0.0/.github/workflows/python.yaml +181 -0
  2. sentencex-1.0.0/.github/workflows/rust.yml +22 -0
  3. {sentencex-0.6.1 → sentencex-1.0.0}/.gitignore +1 -0
  4. sentencex-1.0.0/Cargo.lock +966 -0
  5. sentencex-1.0.0/LICENSE +21 -0
  6. sentencex-1.0.0/PKG-INFO +24 -0
  7. sentencex-1.0.0/README.md +97 -0
  8. sentencex-1.0.0/TODO.md +3 -0
  9. sentencex-1.0.0/benches/segment_benchmark.rs +14 -0
  10. sentencex-1.0.0/bindings/python/.gitignore +2 -0
  11. sentencex-1.0.0/bindings/python/.python-version +1 -0
  12. sentencex-1.0.0/bindings/python/Cargo.lock +562 -0
  13. sentencex-1.0.0/bindings/python/Cargo.toml +19 -0
  14. sentencex-1.0.0/bindings/python/README.md +103 -0
  15. sentencex-1.0.0/bindings/python/example.py +16 -0
  16. sentencex-1.0.0/bindings/python/src/lib.rs +32 -0
  17. sentencex-1.0.0/bindings/python/uv.lock +8 -0
  18. sentencex-1.0.0/examples/rust_example.rs +7 -0
  19. {sentencex-0.6.1 → sentencex-1.0.0}/pyproject.toml +13 -36
  20. sentencex-1.0.0/src/constants.rs +219 -0
  21. sentencex-1.0.0/src/languages/abbrev/am.txt +2 -0
  22. sentencex-1.0.0/src/languages/abbrev/ar.txt +17 -0
  23. sentencex-1.0.0/src/languages/abbrev/bg.txt +71 -0
  24. sentencex-1.0.0/src/languages/abbrev/bn.txt +26 -0
  25. sentencex-1.0.0/src/languages/abbrev/da.txt +474 -0
  26. sentencex-1.0.0/src/languages/abbrev/de.txt +149 -0
  27. sentencex-1.0.0/src/languages/abbrev/en.txt +217 -0
  28. sentencex-1.0.0/src/languages/abbrev/es.txt +254 -0
  29. sentencex-1.0.0/src/languages/abbrev/fi.txt +129 -0
  30. sentencex-1.0.0/src/languages/abbrev/fr.txt +95 -0
  31. sentencex-1.0.0/src/languages/abbrev/gu.txt +26 -0
  32. sentencex-1.0.0/src/languages/abbrev/hi.txt +26 -0
  33. sentencex-1.0.0/src/languages/abbrev/it.txt +2228 -0
  34. sentencex-1.0.0/src/languages/abbrev/kk.txt +290 -0
  35. sentencex-1.0.0/src/languages/abbrev/kn.txt +26 -0
  36. sentencex-1.0.0/src/languages/abbrev/ml.txt +38 -0
  37. sentencex-1.0.0/src/languages/abbrev/nl.txt +1586 -0
  38. sentencex-1.0.0/src/languages/abbrev/pa.txt +26 -0
  39. sentencex-1.0.0/src/languages/abbrev/pl.txt +134 -0
  40. sentencex-1.0.0/src/languages/abbrev/pt.txt +93 -0
  41. sentencex-1.0.0/src/languages/abbrev/ru.txt +69 -0
  42. sentencex-1.0.0/src/languages/abbrev/sk.txt +200 -0
  43. sentencex-1.0.0/src/languages/abbrev/ta.txt +26 -0
  44. sentencex-1.0.0/src/languages/abbrev/te.txt +26 -0
  45. sentencex-1.0.0/src/languages/am.rs +32 -0
  46. sentencex-1.0.0/src/languages/ar.rs +32 -0
  47. sentencex-1.0.0/src/languages/bg.rs +31 -0
  48. sentencex-1.0.0/src/languages/bn.rs +32 -0
  49. sentencex-1.0.0/src/languages/ca.rs +37 -0
  50. sentencex-1.0.0/src/languages/da.rs +37 -0
  51. sentencex-1.0.0/src/languages/de.rs +94 -0
  52. sentencex-1.0.0/src/languages/el.rs +31 -0
  53. sentencex-1.0.0/src/languages/en.rs +31 -0
  54. sentencex-1.0.0/src/languages/es.rs +29 -0
  55. sentencex-1.0.0/src/languages/fallbacks.yaml +368 -0
  56. sentencex-1.0.0/src/languages/fi.rs +80 -0
  57. sentencex-1.0.0/src/languages/fr.rs +31 -0
  58. sentencex-1.0.0/src/languages/gu.rs +32 -0
  59. sentencex-1.0.0/src/languages/hi.rs +32 -0
  60. sentencex-1.0.0/src/languages/hy.rs +36 -0
  61. sentencex-1.0.0/src/languages/it.rs +54 -0
  62. sentencex-1.0.0/src/languages/ja.rs +25 -0
  63. sentencex-1.0.0/src/languages/kk.rs +44 -0
  64. sentencex-1.0.0/src/languages/kn.rs +31 -0
  65. sentencex-1.0.0/src/languages/language.rs +231 -0
  66. sentencex-1.0.0/src/languages/ml.rs +32 -0
  67. sentencex-1.0.0/src/languages/mod.rs +93 -0
  68. sentencex-1.0.0/src/languages/mr.rs +22 -0
  69. sentencex-1.0.0/src/languages/my.rs +31 -0
  70. sentencex-1.0.0/src/languages/nl.rs +31 -0
  71. sentencex-1.0.0/src/languages/pa.rs +31 -0
  72. sentencex-1.0.0/src/languages/pl.rs +30 -0
  73. sentencex-1.0.0/src/languages/pt.rs +38 -0
  74. sentencex-1.0.0/src/languages/ru.rs +38 -0
  75. sentencex-1.0.0/src/languages/sk.rs +106 -0
  76. sentencex-1.0.0/src/languages/ta.rs +54 -0
  77. sentencex-1.0.0/src/languages/te.rs +32 -0
  78. sentencex-1.0.0/src/lib.rs +194 -0
  79. sentencex-1.0.0/src/main.rs +59 -0
  80. sentencex-1.0.0/tests/am.txt +10 -0
  81. sentencex-1.0.0/tests/ar.txt +34 -0
  82. sentencex-1.0.0/tests/bg.txt +18 -0
  83. sentencex-1.0.0/tests/da.txt +17 -0
  84. sentencex-1.0.0/tests/de.txt +160 -0
  85. sentencex-1.0.0/tests/el.txt +6 -0
  86. sentencex-1.0.0/tests/en.txt +256 -0
  87. sentencex-1.0.0/tests/es.txt +16 -0
  88. sentencex-1.0.0/tests/fi.txt +21 -0
  89. sentencex-1.0.0/tests/fr.txt +23 -0
  90. sentencex-1.0.0/tests/gu.txt +0 -0
  91. sentencex-1.0.0/tests/hi.txt +5 -0
  92. sentencex-1.0.0/tests/hy.txt +114 -0
  93. sentencex-1.0.0/tests/it.txt +186 -0
  94. sentencex-1.0.0/tests/ja.txt +36 -0
  95. sentencex-1.0.0/tests/kk.txt +59 -0
  96. sentencex-1.0.0/tests/kn.txt +0 -0
  97. sentencex-1.0.0/tests/ml.txt +28 -0
  98. sentencex-1.0.0/tests/mr.txt +23 -0
  99. sentencex-1.0.0/tests/my.txt +5 -0
  100. sentencex-1.0.0/tests/nl.txt +15 -0
  101. sentencex-1.0.0/tests/pa.txt +5 -0
  102. sentencex-1.0.0/tests/pl.txt +4 -0
  103. sentencex-1.0.0/tests/pt.txt +21 -0
  104. sentencex-1.0.0/tests/ru.txt +151 -0
  105. sentencex-1.0.0/tests/sk.txt +20 -0
  106. sentencex-1.0.0/tests/ta.txt +0 -0
  107. sentencex-1.0.0/tests/te.txt +0 -0
  108. sentencex-1.0.0/tests/ur.txt +6 -0
  109. sentencex-1.0.0/tests/zh.txt +10 -0
  110. sentencex-0.6.1/LICENSE.txt +0 -18
  111. sentencex-0.6.1/PKG-INFO +0 -122
  112. sentencex-0.6.1/README.md +0 -88
  113. sentencex-0.6.1/benchmarks/accuracy.py +0 -111
  114. sentencex-0.6.1/benchmarks/benchmark_speed.sh +0 -4
  115. sentencex-0.6.1/benchmarks/en_golden_rules.py +0 -209
  116. sentencex-0.6.1/benchmarks/requirements.txt +0 -7
  117. sentencex-0.6.1/benchmarks/speed.py +0 -108
  118. sentencex-0.6.1/docs/index.html +0 -142
  119. sentencex-0.6.1/requirements.txt +0 -43
  120. sentencex-0.6.1/sentencex/__init__.py +0 -33
  121. sentencex-0.6.1/sentencex/__main__.py +0 -18
  122. sentencex-0.6.1/sentencex/base.py +0 -190
  123. sentencex-0.6.1/sentencex/fallbacks.py +0 -243
  124. sentencex-0.6.1/sentencex/languages/__init__.py +0 -62
  125. sentencex-0.6.1/sentencex/languages/am.py +0 -9
  126. sentencex-0.6.1/sentencex/languages/ar.py +0 -26
  127. sentencex-0.6.1/sentencex/languages/bg.py +0 -79
  128. sentencex-0.6.1/sentencex/languages/bn.py +0 -39
  129. sentencex-0.6.1/sentencex/languages/ca.py +0 -5
  130. sentencex-0.6.1/sentencex/languages/da.py +0 -488
  131. sentencex-0.6.1/sentencex/languages/de.py +0 -190
  132. sentencex-0.6.1/sentencex/languages/el.py +0 -10
  133. sentencex-0.6.1/sentencex/languages/en.py +0 -225
  134. sentencex-0.6.1/sentencex/languages/es.py +0 -261
  135. sentencex-0.6.1/sentencex/languages/fi.py +0 -164
  136. sentencex-0.6.1/sentencex/languages/fr.py +0 -103
  137. sentencex-0.6.1/sentencex/languages/gu.py +0 -39
  138. sentencex-0.6.1/sentencex/languages/hi.py +0 -39
  139. sentencex-0.6.1/sentencex/languages/hy.py +0 -12
  140. sentencex-0.6.1/sentencex/languages/it.py +0 -2242
  141. sentencex-0.6.1/sentencex/languages/kk.py +0 -303
  142. sentencex-0.6.1/sentencex/languages/kn.py +0 -38
  143. sentencex-0.6.1/sentencex/languages/ml.py +0 -51
  144. sentencex-0.6.1/sentencex/languages/mr.py +0 -5
  145. sentencex-0.6.1/sentencex/languages/my.py +0 -12
  146. sentencex-0.6.1/sentencex/languages/nl.py +0 -1593
  147. sentencex-0.6.1/sentencex/languages/or_.py +0 -39
  148. sentencex-0.6.1/sentencex/languages/pa.py +0 -38
  149. sentencex-0.6.1/sentencex/languages/pl.py +0 -142
  150. sentencex-0.6.1/sentencex/languages/pt.py +0 -107
  151. sentencex-0.6.1/sentencex/languages/ru.py +0 -82
  152. sentencex-0.6.1/sentencex/languages/sk.py +0 -252
  153. sentencex-0.6.1/sentencex/languages/ta.py +0 -72
  154. sentencex-0.6.1/sentencex/languages/te.py +0 -39
  155. sentencex-0.6.1/sentencex/terminators.py +0 -166
  156. sentencex-0.6.1/test/unit/test_am.py +0 -19
  157. sentencex-0.6.1/test/unit/test_ar.py +0 -57
  158. sentencex-0.6.1/test/unit/test_bg.py +0 -35
  159. sentencex-0.6.1/test/unit/test_da.py +0 -30
  160. sentencex-0.6.1/test/unit/test_de.py +0 -173
  161. sentencex-0.6.1/test/unit/test_el.py +0 -20
  162. sentencex-0.6.1/test/unit/test_en.py +0 -263
  163. sentencex-0.6.1/test/unit/test_es.py +0 -166
  164. sentencex-0.6.1/test/unit/test_fa.py +0 -16
  165. sentencex-0.6.1/test/unit/test_fallbacks.py +0 -10
  166. sentencex-0.6.1/test/unit/test_fi.py +0 -43
  167. sentencex-0.6.1/test/unit/test_fr.py +0 -43
  168. sentencex-0.6.1/test/unit/test_gu.py +0 -20
  169. sentencex-0.6.1/test/unit/test_hi.py +0 -19
  170. sentencex-0.6.1/test/unit/test_hy.py +0 -148
  171. sentencex-0.6.1/test/unit/test_it.py +0 -109
  172. sentencex-0.6.1/test/unit/test_ja.py +0 -45
  173. sentencex-0.6.1/test/unit/test_kk.py +0 -76
  174. sentencex-0.6.1/test/unit/test_ml.py +0 -15
  175. sentencex-0.6.1/test/unit/test_mr.py +0 -23
  176. sentencex-0.6.1/test/unit/test_my.py +0 -12
  177. sentencex-0.6.1/test/unit/test_nl.py +0 -25
  178. sentencex-0.6.1/test/unit/test_pa.py +0 -19
  179. sentencex-0.6.1/test/unit/test_pl.py +0 -13
  180. sentencex-0.6.1/test/unit/test_pt.py +0 -31
  181. sentencex-0.6.1/test/unit/test_ru.py +0 -137
  182. sentencex-0.6.1/test/unit/test_sk.py +0 -36
  183. sentencex-0.6.1/test/unit/test_ur.py +0 -16
  184. sentencex-0.6.1/test/unit/test_zh.py +0 -23
  185. sentencex-0.6.1/tox.ini +0 -25
  186. {sentencex-0.6.1 → sentencex-1.0.0}/.github/workflows/publish.yaml +0 -0
  187. {sentencex-0.6.1 → sentencex-1.0.0}/.github/workflows/tests.yaml +0 -0
  188. /sentencex-0.6.1/benchmarks/__init__.py → /sentencex-1.0.0/src/languages/abbrev/el.txt +0 -0
  189. /sentencex-0.6.1/test/__init__.py → /sentencex-1.0.0/tests/bn.txt +0 -0
  190. /sentencex-0.6.1/test/pytest.ini → /sentencex-1.0.0/tests/ca.txt +0 -0
@@ -0,0 +1,181 @@
1
+ # This file is autogenerated by maturin v1.9.6
2
+ # To update, run
3
+ #
4
+ # maturin generate-ci github -o ../../.github/workflows/python.yaml
5
+ #
6
+ name: CI
7
+
8
+ on:
9
+ push:
10
+ branches:
11
+ - main
12
+ - master
13
+ tags:
14
+ - '*'
15
+ pull_request:
16
+ workflow_dispatch:
17
+
18
+ permissions:
19
+ contents: read
20
+
21
+ jobs:
22
+ linux:
23
+ runs-on: ${{ matrix.platform.runner }}
24
+ strategy:
25
+ matrix:
26
+ platform:
27
+ - runner: ubuntu-22.04
28
+ target: x86_64
29
+ - runner: ubuntu-22.04
30
+ target: x86
31
+ - runner: ubuntu-22.04
32
+ target: aarch64
33
+ - runner: ubuntu-22.04
34
+ target: armv7
35
+ - runner: ubuntu-22.04
36
+ target: s390x
37
+ - runner: ubuntu-22.04
38
+ target: ppc64le
39
+ steps:
40
+ - uses: actions/checkout@v4
41
+ - uses: actions/setup-python@v5
42
+ with:
43
+ python-version: 3.x
44
+ - name: Build wheels
45
+ uses: PyO3/maturin-action@v1
46
+ with:
47
+ target: ${{ matrix.platform.target }}
48
+ args: --release --out dist --find-interpreter
49
+ sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
50
+ manylinux: auto
51
+ - name: Upload wheels
52
+ uses: actions/upload-artifact@v4
53
+ with:
54
+ name: wheels-linux-${{ matrix.platform.target }}
55
+ path: dist
56
+
57
+ musllinux:
58
+ runs-on: ${{ matrix.platform.runner }}
59
+ strategy:
60
+ matrix:
61
+ platform:
62
+ - runner: ubuntu-22.04
63
+ target: x86_64
64
+ - runner: ubuntu-22.04
65
+ target: x86
66
+ - runner: ubuntu-22.04
67
+ target: aarch64
68
+ - runner: ubuntu-22.04
69
+ target: armv7
70
+ steps:
71
+ - uses: actions/checkout@v4
72
+ - uses: actions/setup-python@v5
73
+ with:
74
+ python-version: 3.x
75
+ - name: Build wheels
76
+ uses: PyO3/maturin-action@v1
77
+ with:
78
+ target: ${{ matrix.platform.target }}
79
+ args: --release --out dist --find-interpreter
80
+ sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
81
+ manylinux: musllinux_1_2
82
+ - name: Upload wheels
83
+ uses: actions/upload-artifact@v4
84
+ with:
85
+ name: wheels-musllinux-${{ matrix.platform.target }}
86
+ path: dist
87
+
88
+ windows:
89
+ runs-on: ${{ matrix.platform.runner }}
90
+ strategy:
91
+ matrix:
92
+ platform:
93
+ - runner: windows-latest
94
+ target: x64
95
+ - runner: windows-latest
96
+ target: x86
97
+ steps:
98
+ - uses: actions/checkout@v4
99
+ - uses: actions/setup-python@v5
100
+ with:
101
+ python-version: 3.x
102
+ architecture: ${{ matrix.platform.target }}
103
+ - name: Build wheels
104
+ uses: PyO3/maturin-action@v1
105
+ with:
106
+ target: ${{ matrix.platform.target }}
107
+ args: --release --out dist --find-interpreter
108
+ sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
109
+ - name: Upload wheels
110
+ uses: actions/upload-artifact@v4
111
+ with:
112
+ name: wheels-windows-${{ matrix.platform.target }}
113
+ path: dist
114
+
115
+ macos:
116
+ runs-on: ${{ matrix.platform.runner }}
117
+ strategy:
118
+ matrix:
119
+ platform:
120
+ - runner: macos-13
121
+ target: x86_64
122
+ - runner: macos-14
123
+ target: aarch64
124
+ steps:
125
+ - uses: actions/checkout@v4
126
+ - uses: actions/setup-python@v5
127
+ with:
128
+ python-version: 3.x
129
+ - name: Build wheels
130
+ uses: PyO3/maturin-action@v1
131
+ with:
132
+ target: ${{ matrix.platform.target }}
133
+ args: --release --out dist --find-interpreter
134
+ sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
135
+ - name: Upload wheels
136
+ uses: actions/upload-artifact@v4
137
+ with:
138
+ name: wheels-macos-${{ matrix.platform.target }}
139
+ path: dist
140
+
141
+ sdist:
142
+ runs-on: ubuntu-latest
143
+ steps:
144
+ - uses: actions/checkout@v4
145
+ - name: Build sdist
146
+ uses: PyO3/maturin-action@v1
147
+ with:
148
+ command: sdist
149
+ args: --out dist
150
+ - name: Upload sdist
151
+ uses: actions/upload-artifact@v4
152
+ with:
153
+ name: wheels-sdist
154
+ path: dist
155
+
156
+ release:
157
+ name: Release
158
+ runs-on: ubuntu-latest
159
+ if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
160
+ needs: [linux, musllinux, windows, macos, sdist]
161
+ permissions:
162
+ # Use to sign the release artifacts
163
+ id-token: write
164
+ # Used to upload release artifacts
165
+ contents: write
166
+ # Used to generate artifact attestation
167
+ attestations: write
168
+ steps:
169
+ - uses: actions/download-artifact@v4
170
+ - name: Generate artifact attestation
171
+ uses: actions/attest-build-provenance@v2
172
+ with:
173
+ subject-path: 'wheels-*/*'
174
+ - name: Publish to PyPI
175
+ if: ${{ startsWith(github.ref, 'refs/tags/') }}
176
+ uses: PyO3/maturin-action@v1
177
+ env:
178
+ MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
179
+ with:
180
+ command: upload
181
+ args: --non-interactive --skip-existing wheels-*/*
@@ -0,0 +1,22 @@
1
+ name: Rust
2
+
3
+ on:
4
+ push:
5
+ branches: [ "master" ]
6
+ pull_request:
7
+ branches: [ "master" ]
8
+
9
+ env:
10
+ CARGO_TERM_COLOR: always
11
+
12
+ jobs:
13
+ build:
14
+
15
+ runs-on: ubuntu-latest
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - name: Build
20
+ run: cargo build --verbose
21
+ - name: Run tests
22
+ run: cargo test --verbose
@@ -5,3 +5,4 @@ __pycache__
5
5
  .ruff_cache
6
6
  .pytest_cache
7
7
  dist
8
+ /target