sentencex 0.6.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentencex might be problematic. Click here for more details.
- sentencex-1.0.0/.github/workflows/python.yaml +181 -0
- sentencex-1.0.0/.github/workflows/rust.yml +22 -0
- {sentencex-0.6.0 → sentencex-1.0.0}/.gitignore +1 -0
- sentencex-1.0.0/Cargo.lock +966 -0
- sentencex-1.0.0/LICENSE +21 -0
- sentencex-1.0.0/PKG-INFO +24 -0
- sentencex-1.0.0/README.md +97 -0
- sentencex-1.0.0/TODO.md +3 -0
- sentencex-1.0.0/benches/segment_benchmark.rs +14 -0
- sentencex-1.0.0/bindings/python/.gitignore +2 -0
- sentencex-1.0.0/bindings/python/.python-version +1 -0
- sentencex-1.0.0/bindings/python/Cargo.lock +562 -0
- sentencex-1.0.0/bindings/python/Cargo.toml +19 -0
- sentencex-1.0.0/bindings/python/README.md +103 -0
- sentencex-1.0.0/bindings/python/example.py +16 -0
- sentencex-1.0.0/bindings/python/src/lib.rs +32 -0
- sentencex-1.0.0/bindings/python/uv.lock +8 -0
- sentencex-1.0.0/examples/rust_example.rs +7 -0
- {sentencex-0.6.0 → sentencex-1.0.0}/pyproject.toml +13 -36
- sentencex-1.0.0/src/constants.rs +219 -0
- sentencex-1.0.0/src/languages/abbrev/am.txt +2 -0
- sentencex-1.0.0/src/languages/abbrev/ar.txt +17 -0
- sentencex-1.0.0/src/languages/abbrev/bg.txt +71 -0
- sentencex-1.0.0/src/languages/abbrev/bn.txt +26 -0
- sentencex-1.0.0/src/languages/abbrev/da.txt +474 -0
- sentencex-1.0.0/src/languages/abbrev/de.txt +149 -0
- sentencex-1.0.0/src/languages/abbrev/en.txt +217 -0
- sentencex-1.0.0/src/languages/abbrev/es.txt +254 -0
- sentencex-1.0.0/src/languages/abbrev/fi.txt +129 -0
- sentencex-1.0.0/src/languages/abbrev/fr.txt +95 -0
- sentencex-1.0.0/src/languages/abbrev/gu.txt +26 -0
- sentencex-1.0.0/src/languages/abbrev/hi.txt +26 -0
- sentencex-1.0.0/src/languages/abbrev/it.txt +2228 -0
- sentencex-1.0.0/src/languages/abbrev/kk.txt +290 -0
- sentencex-1.0.0/src/languages/abbrev/kn.txt +26 -0
- sentencex-1.0.0/src/languages/abbrev/ml.txt +38 -0
- sentencex-1.0.0/src/languages/abbrev/nl.txt +1586 -0
- sentencex-1.0.0/src/languages/abbrev/pa.txt +26 -0
- sentencex-1.0.0/src/languages/abbrev/pl.txt +134 -0
- sentencex-1.0.0/src/languages/abbrev/pt.txt +93 -0
- sentencex-1.0.0/src/languages/abbrev/ru.txt +69 -0
- sentencex-1.0.0/src/languages/abbrev/sk.txt +200 -0
- sentencex-1.0.0/src/languages/abbrev/ta.txt +26 -0
- sentencex-1.0.0/src/languages/abbrev/te.txt +26 -0
- sentencex-1.0.0/src/languages/am.rs +32 -0
- sentencex-1.0.0/src/languages/ar.rs +32 -0
- sentencex-1.0.0/src/languages/bg.rs +31 -0
- sentencex-1.0.0/src/languages/bn.rs +32 -0
- sentencex-1.0.0/src/languages/ca.rs +37 -0
- sentencex-1.0.0/src/languages/da.rs +37 -0
- sentencex-1.0.0/src/languages/de.rs +94 -0
- sentencex-1.0.0/src/languages/el.rs +31 -0
- sentencex-1.0.0/src/languages/en.rs +31 -0
- sentencex-1.0.0/src/languages/es.rs +29 -0
- sentencex-1.0.0/src/languages/fallbacks.yaml +368 -0
- sentencex-1.0.0/src/languages/fi.rs +80 -0
- sentencex-1.0.0/src/languages/fr.rs +31 -0
- sentencex-1.0.0/src/languages/gu.rs +32 -0
- sentencex-1.0.0/src/languages/hi.rs +32 -0
- sentencex-1.0.0/src/languages/hy.rs +36 -0
- sentencex-1.0.0/src/languages/it.rs +54 -0
- sentencex-1.0.0/src/languages/ja.rs +25 -0
- sentencex-1.0.0/src/languages/kk.rs +44 -0
- sentencex-1.0.0/src/languages/kn.rs +31 -0
- sentencex-1.0.0/src/languages/language.rs +231 -0
- sentencex-1.0.0/src/languages/ml.rs +32 -0
- sentencex-1.0.0/src/languages/mod.rs +93 -0
- sentencex-1.0.0/src/languages/mr.rs +22 -0
- sentencex-1.0.0/src/languages/my.rs +31 -0
- sentencex-1.0.0/src/languages/nl.rs +31 -0
- sentencex-1.0.0/src/languages/pa.rs +31 -0
- sentencex-1.0.0/src/languages/pl.rs +30 -0
- sentencex-1.0.0/src/languages/pt.rs +38 -0
- sentencex-1.0.0/src/languages/ru.rs +38 -0
- sentencex-1.0.0/src/languages/sk.rs +106 -0
- sentencex-1.0.0/src/languages/ta.rs +54 -0
- sentencex-1.0.0/src/languages/te.rs +32 -0
- sentencex-1.0.0/src/lib.rs +194 -0
- sentencex-1.0.0/src/main.rs +59 -0
- sentencex-1.0.0/tests/am.txt +10 -0
- sentencex-1.0.0/tests/ar.txt +34 -0
- sentencex-1.0.0/tests/bg.txt +18 -0
- sentencex-1.0.0/tests/da.txt +17 -0
- sentencex-1.0.0/tests/de.txt +160 -0
- sentencex-1.0.0/tests/el.txt +6 -0
- sentencex-1.0.0/tests/en.txt +256 -0
- sentencex-1.0.0/tests/es.txt +16 -0
- sentencex-1.0.0/tests/fi.txt +21 -0
- sentencex-1.0.0/tests/fr.txt +23 -0
- sentencex-1.0.0/tests/gu.txt +0 -0
- sentencex-1.0.0/tests/hi.txt +5 -0
- sentencex-1.0.0/tests/hy.txt +114 -0
- sentencex-1.0.0/tests/it.txt +186 -0
- sentencex-1.0.0/tests/ja.txt +36 -0
- sentencex-1.0.0/tests/kk.txt +59 -0
- sentencex-1.0.0/tests/kn.txt +0 -0
- sentencex-1.0.0/tests/ml.txt +28 -0
- sentencex-1.0.0/tests/mr.txt +23 -0
- sentencex-1.0.0/tests/my.txt +5 -0
- sentencex-1.0.0/tests/nl.txt +15 -0
- sentencex-1.0.0/tests/pa.txt +5 -0
- sentencex-1.0.0/tests/pl.txt +4 -0
- sentencex-1.0.0/tests/pt.txt +21 -0
- sentencex-1.0.0/tests/ru.txt +151 -0
- sentencex-1.0.0/tests/sk.txt +20 -0
- sentencex-1.0.0/tests/ta.txt +0 -0
- sentencex-1.0.0/tests/te.txt +0 -0
- sentencex-1.0.0/tests/ur.txt +6 -0
- sentencex-1.0.0/tests/zh.txt +10 -0
- sentencex-0.6.0/LICENSE.txt +0 -18
- sentencex-0.6.0/PKG-INFO +0 -122
- sentencex-0.6.0/README.md +0 -88
- sentencex-0.6.0/benchmarks/accuracy.py +0 -111
- sentencex-0.6.0/benchmarks/benchmark_speed.sh +0 -4
- sentencex-0.6.0/benchmarks/en_golden_rules.py +0 -209
- sentencex-0.6.0/benchmarks/requirements.txt +0 -7
- sentencex-0.6.0/benchmarks/speed.py +0 -108
- sentencex-0.6.0/docs/index.html +0 -142
- sentencex-0.6.0/requirements.txt +0 -43
- sentencex-0.6.0/sentencex/__init__.py +0 -33
- sentencex-0.6.0/sentencex/__main__.py +0 -18
- sentencex-0.6.0/sentencex/base.py +0 -190
- sentencex-0.6.0/sentencex/fallbacks.py +0 -243
- sentencex-0.6.0/sentencex/languages/__init__.py +0 -62
- sentencex-0.6.0/sentencex/languages/am.py +0 -9
- sentencex-0.6.0/sentencex/languages/ar.py +0 -26
- sentencex-0.6.0/sentencex/languages/bg.py +0 -79
- sentencex-0.6.0/sentencex/languages/bn.py +0 -39
- sentencex-0.6.0/sentencex/languages/ca.py +0 -5
- sentencex-0.6.0/sentencex/languages/da.py +0 -488
- sentencex-0.6.0/sentencex/languages/de.py +0 -190
- sentencex-0.6.0/sentencex/languages/el.py +0 -10
- sentencex-0.6.0/sentencex/languages/en.py +0 -225
- sentencex-0.6.0/sentencex/languages/es.py +0 -261
- sentencex-0.6.0/sentencex/languages/fi.py +0 -164
- sentencex-0.6.0/sentencex/languages/fr.py +0 -103
- sentencex-0.6.0/sentencex/languages/gu.py +0 -39
- sentencex-0.6.0/sentencex/languages/hi.py +0 -39
- sentencex-0.6.0/sentencex/languages/hy.py +0 -13
- sentencex-0.6.0/sentencex/languages/it.py +0 -2242
- sentencex-0.6.0/sentencex/languages/kk.py +0 -303
- sentencex-0.6.0/sentencex/languages/kn.py +0 -38
- sentencex-0.6.0/sentencex/languages/ml.py +0 -51
- sentencex-0.6.0/sentencex/languages/mr.py +0 -5
- sentencex-0.6.0/sentencex/languages/my.py +0 -12
- sentencex-0.6.0/sentencex/languages/nl.py +0 -1593
- sentencex-0.6.0/sentencex/languages/or_.py +0 -39
- sentencex-0.6.0/sentencex/languages/pa.py +0 -38
- sentencex-0.6.0/sentencex/languages/pl.py +0 -142
- sentencex-0.6.0/sentencex/languages/pt.py +0 -107
- sentencex-0.6.0/sentencex/languages/ru.py +0 -82
- sentencex-0.6.0/sentencex/languages/sk.py +0 -252
- sentencex-0.6.0/sentencex/languages/ta.py +0 -72
- sentencex-0.6.0/sentencex/languages/te.py +0 -39
- sentencex-0.6.0/sentencex/terminators.py +0 -165
- sentencex-0.6.0/test/unit/test_am.py +0 -19
- sentencex-0.6.0/test/unit/test_ar.py +0 -57
- sentencex-0.6.0/test/unit/test_bg.py +0 -35
- sentencex-0.6.0/test/unit/test_da.py +0 -30
- sentencex-0.6.0/test/unit/test_de.py +0 -173
- sentencex-0.6.0/test/unit/test_el.py +0 -20
- sentencex-0.6.0/test/unit/test_en.py +0 -263
- sentencex-0.6.0/test/unit/test_es.py +0 -166
- sentencex-0.6.0/test/unit/test_fa.py +0 -16
- sentencex-0.6.0/test/unit/test_fallbacks.py +0 -10
- sentencex-0.6.0/test/unit/test_fi.py +0 -43
- sentencex-0.6.0/test/unit/test_fr.py +0 -43
- sentencex-0.6.0/test/unit/test_gu.py +0 -20
- sentencex-0.6.0/test/unit/test_hi.py +0 -19
- sentencex-0.6.0/test/unit/test_hy.py +0 -148
- sentencex-0.6.0/test/unit/test_it.py +0 -109
- sentencex-0.6.0/test/unit/test_ja.py +0 -45
- sentencex-0.6.0/test/unit/test_kk.py +0 -76
- sentencex-0.6.0/test/unit/test_ml.py +0 -15
- sentencex-0.6.0/test/unit/test_mr.py +0 -23
- sentencex-0.6.0/test/unit/test_my.py +0 -12
- sentencex-0.6.0/test/unit/test_nl.py +0 -25
- sentencex-0.6.0/test/unit/test_pa.py +0 -19
- sentencex-0.6.0/test/unit/test_pl.py +0 -13
- sentencex-0.6.0/test/unit/test_pt.py +0 -31
- sentencex-0.6.0/test/unit/test_ru.py +0 -137
- sentencex-0.6.0/test/unit/test_sk.py +0 -36
- sentencex-0.6.0/test/unit/test_ur.py +0 -16
- sentencex-0.6.0/test/unit/test_zh.py +0 -23
- sentencex-0.6.0/tox.ini +0 -25
- {sentencex-0.6.0 → sentencex-1.0.0}/.github/workflows/publish.yaml +0 -0
- {sentencex-0.6.0 → sentencex-1.0.0}/.github/workflows/tests.yaml +0 -0
- /sentencex-0.6.0/benchmarks/__init__.py → /sentencex-1.0.0/src/languages/abbrev/el.txt +0 -0
- /sentencex-0.6.0/test/__init__.py → /sentencex-1.0.0/tests/bn.txt +0 -0
- /sentencex-0.6.0/test/pytest.ini → /sentencex-1.0.0/tests/ca.txt +0 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# This file is autogenerated by maturin v1.9.6
|
|
2
|
+
# To update, run
|
|
3
|
+
#
|
|
4
|
+
# maturin generate-ci github -o ../../.github/workflows/python.yaml
|
|
5
|
+
#
|
|
6
|
+
name: CI
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
branches:
|
|
11
|
+
- main
|
|
12
|
+
- master
|
|
13
|
+
tags:
|
|
14
|
+
- '*'
|
|
15
|
+
pull_request:
|
|
16
|
+
workflow_dispatch:
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
linux:
|
|
23
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
24
|
+
strategy:
|
|
25
|
+
matrix:
|
|
26
|
+
platform:
|
|
27
|
+
- runner: ubuntu-22.04
|
|
28
|
+
target: x86_64
|
|
29
|
+
- runner: ubuntu-22.04
|
|
30
|
+
target: x86
|
|
31
|
+
- runner: ubuntu-22.04
|
|
32
|
+
target: aarch64
|
|
33
|
+
- runner: ubuntu-22.04
|
|
34
|
+
target: armv7
|
|
35
|
+
- runner: ubuntu-22.04
|
|
36
|
+
target: s390x
|
|
37
|
+
- runner: ubuntu-22.04
|
|
38
|
+
target: ppc64le
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
- uses: actions/setup-python@v5
|
|
42
|
+
with:
|
|
43
|
+
python-version: 3.x
|
|
44
|
+
- name: Build wheels
|
|
45
|
+
uses: PyO3/maturin-action@v1
|
|
46
|
+
with:
|
|
47
|
+
target: ${{ matrix.platform.target }}
|
|
48
|
+
args: --release --out dist --find-interpreter
|
|
49
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
50
|
+
manylinux: auto
|
|
51
|
+
- name: Upload wheels
|
|
52
|
+
uses: actions/upload-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: wheels-linux-${{ matrix.platform.target }}
|
|
55
|
+
path: dist
|
|
56
|
+
|
|
57
|
+
musllinux:
|
|
58
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
59
|
+
strategy:
|
|
60
|
+
matrix:
|
|
61
|
+
platform:
|
|
62
|
+
- runner: ubuntu-22.04
|
|
63
|
+
target: x86_64
|
|
64
|
+
- runner: ubuntu-22.04
|
|
65
|
+
target: x86
|
|
66
|
+
- runner: ubuntu-22.04
|
|
67
|
+
target: aarch64
|
|
68
|
+
- runner: ubuntu-22.04
|
|
69
|
+
target: armv7
|
|
70
|
+
steps:
|
|
71
|
+
- uses: actions/checkout@v4
|
|
72
|
+
- uses: actions/setup-python@v5
|
|
73
|
+
with:
|
|
74
|
+
python-version: 3.x
|
|
75
|
+
- name: Build wheels
|
|
76
|
+
uses: PyO3/maturin-action@v1
|
|
77
|
+
with:
|
|
78
|
+
target: ${{ matrix.platform.target }}
|
|
79
|
+
args: --release --out dist --find-interpreter
|
|
80
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
81
|
+
manylinux: musllinux_1_2
|
|
82
|
+
- name: Upload wheels
|
|
83
|
+
uses: actions/upload-artifact@v4
|
|
84
|
+
with:
|
|
85
|
+
name: wheels-musllinux-${{ matrix.platform.target }}
|
|
86
|
+
path: dist
|
|
87
|
+
|
|
88
|
+
windows:
|
|
89
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
90
|
+
strategy:
|
|
91
|
+
matrix:
|
|
92
|
+
platform:
|
|
93
|
+
- runner: windows-latest
|
|
94
|
+
target: x64
|
|
95
|
+
- runner: windows-latest
|
|
96
|
+
target: x86
|
|
97
|
+
steps:
|
|
98
|
+
- uses: actions/checkout@v4
|
|
99
|
+
- uses: actions/setup-python@v5
|
|
100
|
+
with:
|
|
101
|
+
python-version: 3.x
|
|
102
|
+
architecture: ${{ matrix.platform.target }}
|
|
103
|
+
- name: Build wheels
|
|
104
|
+
uses: PyO3/maturin-action@v1
|
|
105
|
+
with:
|
|
106
|
+
target: ${{ matrix.platform.target }}
|
|
107
|
+
args: --release --out dist --find-interpreter
|
|
108
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
109
|
+
- name: Upload wheels
|
|
110
|
+
uses: actions/upload-artifact@v4
|
|
111
|
+
with:
|
|
112
|
+
name: wheels-windows-${{ matrix.platform.target }}
|
|
113
|
+
path: dist
|
|
114
|
+
|
|
115
|
+
macos:
|
|
116
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
117
|
+
strategy:
|
|
118
|
+
matrix:
|
|
119
|
+
platform:
|
|
120
|
+
- runner: macos-13
|
|
121
|
+
target: x86_64
|
|
122
|
+
- runner: macos-14
|
|
123
|
+
target: aarch64
|
|
124
|
+
steps:
|
|
125
|
+
- uses: actions/checkout@v4
|
|
126
|
+
- uses: actions/setup-python@v5
|
|
127
|
+
with:
|
|
128
|
+
python-version: 3.x
|
|
129
|
+
- name: Build wheels
|
|
130
|
+
uses: PyO3/maturin-action@v1
|
|
131
|
+
with:
|
|
132
|
+
target: ${{ matrix.platform.target }}
|
|
133
|
+
args: --release --out dist --find-interpreter
|
|
134
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
135
|
+
- name: Upload wheels
|
|
136
|
+
uses: actions/upload-artifact@v4
|
|
137
|
+
with:
|
|
138
|
+
name: wheels-macos-${{ matrix.platform.target }}
|
|
139
|
+
path: dist
|
|
140
|
+
|
|
141
|
+
sdist:
|
|
142
|
+
runs-on: ubuntu-latest
|
|
143
|
+
steps:
|
|
144
|
+
- uses: actions/checkout@v4
|
|
145
|
+
- name: Build sdist
|
|
146
|
+
uses: PyO3/maturin-action@v1
|
|
147
|
+
with:
|
|
148
|
+
command: sdist
|
|
149
|
+
args: --out dist
|
|
150
|
+
- name: Upload sdist
|
|
151
|
+
uses: actions/upload-artifact@v4
|
|
152
|
+
with:
|
|
153
|
+
name: wheels-sdist
|
|
154
|
+
path: dist
|
|
155
|
+
|
|
156
|
+
release:
|
|
157
|
+
name: Release
|
|
158
|
+
runs-on: ubuntu-latest
|
|
159
|
+
if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
|
|
160
|
+
needs: [linux, musllinux, windows, macos, sdist]
|
|
161
|
+
permissions:
|
|
162
|
+
# Use to sign the release artifacts
|
|
163
|
+
id-token: write
|
|
164
|
+
# Used to upload release artifacts
|
|
165
|
+
contents: write
|
|
166
|
+
# Used to generate artifact attestation
|
|
167
|
+
attestations: write
|
|
168
|
+
steps:
|
|
169
|
+
- uses: actions/download-artifact@v4
|
|
170
|
+
- name: Generate artifact attestation
|
|
171
|
+
uses: actions/attest-build-provenance@v2
|
|
172
|
+
with:
|
|
173
|
+
subject-path: 'wheels-*/*'
|
|
174
|
+
- name: Publish to PyPI
|
|
175
|
+
if: ${{ startsWith(github.ref, 'refs/tags/') }}
|
|
176
|
+
uses: PyO3/maturin-action@v1
|
|
177
|
+
env:
|
|
178
|
+
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
|
|
179
|
+
with:
|
|
180
|
+
command: upload
|
|
181
|
+
args: --non-interactive --skip-existing wheels-*/*
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: Rust
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ "master" ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ "master" ]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
CARGO_TERM_COLOR: always
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build:
|
|
14
|
+
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- name: Build
|
|
20
|
+
run: cargo build --verbose
|
|
21
|
+
- name: Run tests
|
|
22
|
+
run: cargo test --verbose
|