sentencex 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sentencex might be problematic. Click here for more details.
- sentencex-1.0.2/.github/workflows/node.yaml +50 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/.github/workflows/python.yaml +31 -47
- {sentencex-1.0.0 → sentencex-1.0.2}/.github/workflows/rust.yml +0 -2
- sentencex-1.0.2/.github/workflows/wasm.yaml +42 -0
- sentencex-1.0.2/100-0.txt +196022 -0
- sentencex-1.0.2/11-0.txt +3384 -0
- sentencex-1.0.2/1661-0.txt +12306 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/Cargo.lock +196 -113
- {sentencex-1.0.0 → sentencex-1.0.2}/PKG-INFO +8 -1
- {sentencex-1.0.0 → sentencex-1.0.2}/README.md +119 -3
- sentencex-1.0.2/TODO.md +3 -0
- sentencex-1.0.2/benches/segment_benchmark.rs +153 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/Cargo.toml +1 -1
- {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/src/lib.rs +1 -1
- sentencex-1.0.2/bindings/python/tests/__init__.py +1 -0
- sentencex-1.0.2/bindings/python/tests/test_sentencex.py +173 -0
- sentencex-1.0.2/bindings/python/uv.lock +306 -0
- sentencex-1.0.2/demo/index.html +151 -0
- sentencex-1.0.2/oxygen.txt +1 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/pyproject.toml +16 -1
- {sentencex-1.0.0 → sentencex-1.0.2}/src/constants.rs +5 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/am.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ar.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/bg.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/bn.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ca.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/da.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/de.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/el.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/en.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/es.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/fi.rs +3 -4
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/fr.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/gu.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/hi.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/hy.rs +1 -1
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/it.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ja.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/kk.rs +13 -6
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/kn.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/language.rs +106 -48
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ml.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/mr.rs +1 -1
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/my.rs +1 -1
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/nl.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/pa.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/pl.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/pt.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ru.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/sk.rs +9 -5
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ta.rs +3 -3
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/te.rs +2 -2
- {sentencex-1.0.0 → sentencex-1.0.2}/src/lib.rs +181 -4
- {sentencex-1.0.0 → sentencex-1.0.2}/src/main.rs +10 -1
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/en.txt +60 -6
- sentencex-1.0.0/.github/workflows/publish.yaml +0 -48
- sentencex-1.0.0/.github/workflows/tests.yaml +0 -26
- sentencex-1.0.0/TODO.md +0 -3
- sentencex-1.0.0/benches/segment_benchmark.rs +0 -14
- sentencex-1.0.0/bindings/python/uv.lock +0 -8
- {sentencex-1.0.0 → sentencex-1.0.2}/.gitignore +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/LICENSE +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/.gitignore +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/.python-version +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/Cargo.lock +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/README.md +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/example.py +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/examples/rust_example.rs +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/am.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ar.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/bg.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/bn.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/da.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/de.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/el.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/en.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/es.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/fi.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/fr.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/gu.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/hi.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/it.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/kk.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/kn.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ml.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/nl.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/pa.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/pl.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/pt.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ru.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/sk.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ta.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/te.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/fallbacks.yaml +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/mod.rs +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/am.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/ar.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/bg.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/bn.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/ca.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/da.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/de.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/el.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/es.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/fi.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/fr.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/gu.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/hi.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/hy.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/it.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/ja.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/kk.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/kn.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/ml.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/mr.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/my.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/nl.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/pa.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/pl.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/pt.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/ru.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/sk.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/ta.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/te.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/ur.txt +0 -0
- {sentencex-1.0.0 → sentencex-1.0.2}/tests/zh.txt +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: Node.js Binding Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main, master ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main, master ]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
nodejs-binding:
|
|
14
|
+
name: Test Node.js Binding
|
|
15
|
+
runs-on: ${{ matrix.os }}
|
|
16
|
+
strategy:
|
|
17
|
+
matrix:
|
|
18
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
19
|
+
node-version: [18, 20, 22]
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Setup Node.js ${{ matrix.node-version }}
|
|
24
|
+
uses: actions/setup-node@v4
|
|
25
|
+
with:
|
|
26
|
+
node-version: ${{ matrix.node-version }}
|
|
27
|
+
|
|
28
|
+
- name: Install Rust
|
|
29
|
+
uses: dtolnay/rust-toolchain@stable
|
|
30
|
+
|
|
31
|
+
- name: Cache cargo
|
|
32
|
+
uses: actions/cache@v4
|
|
33
|
+
with:
|
|
34
|
+
path: |
|
|
35
|
+
~/.cargo/registry
|
|
36
|
+
~/.cargo/git
|
|
37
|
+
target
|
|
38
|
+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
|
39
|
+
|
|
40
|
+
- name: Build
|
|
41
|
+
working-directory: bindings/nodejs
|
|
42
|
+
run: npm run build
|
|
43
|
+
|
|
44
|
+
- name: Test
|
|
45
|
+
working-directory: bindings/nodejs
|
|
46
|
+
run: npm test
|
|
47
|
+
|
|
48
|
+
- name: Test example
|
|
49
|
+
working-directory: bindings/nodejs
|
|
50
|
+
run: node example.js
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#
|
|
4
4
|
# maturin generate-ci github -o ../../.github/workflows/python.yaml
|
|
5
5
|
#
|
|
6
|
-
name: CI
|
|
6
|
+
name: Python binding CI
|
|
7
7
|
|
|
8
8
|
on:
|
|
9
9
|
push:
|
|
@@ -24,18 +24,11 @@ jobs:
|
|
|
24
24
|
strategy:
|
|
25
25
|
matrix:
|
|
26
26
|
platform:
|
|
27
|
-
- runner: ubuntu-
|
|
27
|
+
- runner: ubuntu-24.04
|
|
28
28
|
target: x86_64
|
|
29
|
-
- runner: ubuntu-
|
|
29
|
+
- runner: ubuntu-24.04
|
|
30
30
|
target: x86
|
|
31
|
-
|
|
32
|
-
target: aarch64
|
|
33
|
-
- runner: ubuntu-22.04
|
|
34
|
-
target: armv7
|
|
35
|
-
- runner: ubuntu-22.04
|
|
36
|
-
target: s390x
|
|
37
|
-
- runner: ubuntu-22.04
|
|
38
|
-
target: ppc64le
|
|
31
|
+
|
|
39
32
|
steps:
|
|
40
33
|
- uses: actions/checkout@v4
|
|
41
34
|
- uses: actions/setup-python@v5
|
|
@@ -48,42 +41,18 @@ jobs:
|
|
|
48
41
|
args: --release --out dist --find-interpreter
|
|
49
42
|
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
50
43
|
manylinux: auto
|
|
44
|
+
working-directory: bindings/python
|
|
45
|
+
- name: Install pytest
|
|
46
|
+
run: pip install pytest
|
|
47
|
+
- name: Install wheel for testing
|
|
48
|
+
run: pip install --find-links bindings/python/dist sentencex --force-reinstall
|
|
49
|
+
- name: Run tests
|
|
50
|
+
run: pytest bindings/python/tests/ -v
|
|
51
51
|
- name: Upload wheels
|
|
52
52
|
uses: actions/upload-artifact@v4
|
|
53
53
|
with:
|
|
54
54
|
name: wheels-linux-${{ matrix.platform.target }}
|
|
55
|
-
path: dist
|
|
56
|
-
|
|
57
|
-
musllinux:
|
|
58
|
-
runs-on: ${{ matrix.platform.runner }}
|
|
59
|
-
strategy:
|
|
60
|
-
matrix:
|
|
61
|
-
platform:
|
|
62
|
-
- runner: ubuntu-22.04
|
|
63
|
-
target: x86_64
|
|
64
|
-
- runner: ubuntu-22.04
|
|
65
|
-
target: x86
|
|
66
|
-
- runner: ubuntu-22.04
|
|
67
|
-
target: aarch64
|
|
68
|
-
- runner: ubuntu-22.04
|
|
69
|
-
target: armv7
|
|
70
|
-
steps:
|
|
71
|
-
- uses: actions/checkout@v4
|
|
72
|
-
- uses: actions/setup-python@v5
|
|
73
|
-
with:
|
|
74
|
-
python-version: 3.x
|
|
75
|
-
- name: Build wheels
|
|
76
|
-
uses: PyO3/maturin-action@v1
|
|
77
|
-
with:
|
|
78
|
-
target: ${{ matrix.platform.target }}
|
|
79
|
-
args: --release --out dist --find-interpreter
|
|
80
|
-
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
81
|
-
manylinux: musllinux_1_2
|
|
82
|
-
- name: Upload wheels
|
|
83
|
-
uses: actions/upload-artifact@v4
|
|
84
|
-
with:
|
|
85
|
-
name: wheels-musllinux-${{ matrix.platform.target }}
|
|
86
|
-
path: dist
|
|
55
|
+
path: bindings/python/dist
|
|
87
56
|
|
|
88
57
|
windows:
|
|
89
58
|
runs-on: ${{ matrix.platform.runner }}
|
|
@@ -106,11 +75,18 @@ jobs:
|
|
|
106
75
|
target: ${{ matrix.platform.target }}
|
|
107
76
|
args: --release --out dist --find-interpreter
|
|
108
77
|
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
78
|
+
working-directory: bindings/python
|
|
79
|
+
- name: Install pytest
|
|
80
|
+
run: pip install pytest
|
|
81
|
+
- name: Install wheel for testing
|
|
82
|
+
run: pip install --find-links bindings/python/dist sentencex --force-reinstall
|
|
83
|
+
- name: Run tests
|
|
84
|
+
run: pytest bindings/python/tests/ -v
|
|
109
85
|
- name: Upload wheels
|
|
110
86
|
uses: actions/upload-artifact@v4
|
|
111
87
|
with:
|
|
112
88
|
name: wheels-windows-${{ matrix.platform.target }}
|
|
113
|
-
path: dist
|
|
89
|
+
path: bindings/python/dist
|
|
114
90
|
|
|
115
91
|
macos:
|
|
116
92
|
runs-on: ${{ matrix.platform.runner }}
|
|
@@ -132,11 +108,18 @@ jobs:
|
|
|
132
108
|
target: ${{ matrix.platform.target }}
|
|
133
109
|
args: --release --out dist --find-interpreter
|
|
134
110
|
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
111
|
+
working-directory: bindings/python
|
|
112
|
+
- name: Install pytest
|
|
113
|
+
run: pip install pytest
|
|
114
|
+
- name: Install wheel for testing
|
|
115
|
+
run: pip install --find-links bindings/python/dist sentencex --force-reinstall
|
|
116
|
+
- name: Run tests
|
|
117
|
+
run: pytest bindings/python/tests/ -v
|
|
135
118
|
- name: Upload wheels
|
|
136
119
|
uses: actions/upload-artifact@v4
|
|
137
120
|
with:
|
|
138
121
|
name: wheels-macos-${{ matrix.platform.target }}
|
|
139
|
-
path: dist
|
|
122
|
+
path: bindings/python/dist
|
|
140
123
|
|
|
141
124
|
sdist:
|
|
142
125
|
runs-on: ubuntu-latest
|
|
@@ -147,17 +130,18 @@ jobs:
|
|
|
147
130
|
with:
|
|
148
131
|
command: sdist
|
|
149
132
|
args: --out dist
|
|
133
|
+
working-directory: bindings/python
|
|
150
134
|
- name: Upload sdist
|
|
151
135
|
uses: actions/upload-artifact@v4
|
|
152
136
|
with:
|
|
153
137
|
name: wheels-sdist
|
|
154
|
-
path: dist
|
|
138
|
+
path: bindings/python/dist
|
|
155
139
|
|
|
156
140
|
release:
|
|
157
141
|
name: Release
|
|
158
142
|
runs-on: ubuntu-latest
|
|
159
143
|
if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
|
|
160
|
-
needs: [linux,
|
|
144
|
+
needs: [linux, windows, macos, sdist]
|
|
161
145
|
permissions:
|
|
162
146
|
# Use to sign the release artifacts
|
|
163
147
|
id-token: write
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: WASM Binding Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main, master ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main, master ]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
wasm-binding:
|
|
14
|
+
name: Test WASM Binding
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install Rust
|
|
20
|
+
uses: dtolnay/rust-toolchain@stable
|
|
21
|
+
with:
|
|
22
|
+
targets: wasm32-unknown-unknown
|
|
23
|
+
|
|
24
|
+
- name: Cache cargo
|
|
25
|
+
uses: actions/cache@v4
|
|
26
|
+
with:
|
|
27
|
+
path: |
|
|
28
|
+
~/.cargo/registry
|
|
29
|
+
~/.cargo/git
|
|
30
|
+
target
|
|
31
|
+
key: wasm-cargo-${{ hashFiles('**/Cargo.lock') }}
|
|
32
|
+
|
|
33
|
+
- name: Install wasm-pack
|
|
34
|
+
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
|
|
35
|
+
|
|
36
|
+
- name: Build WASM
|
|
37
|
+
working-directory: bindings/wasm
|
|
38
|
+
run: ./build.sh
|
|
39
|
+
|
|
40
|
+
- name: Test WASM
|
|
41
|
+
working-directory: bindings/wasm
|
|
42
|
+
run: wasm-pack test --headless --firefox
|