sentencex 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentencex might be problematic. Click here for more details.

Files changed (126) hide show
  1. sentencex-1.0.2/.github/workflows/node.yaml +50 -0
  2. {sentencex-1.0.0 → sentencex-1.0.2}/.github/workflows/python.yaml +31 -47
  3. {sentencex-1.0.0 → sentencex-1.0.2}/.github/workflows/rust.yml +0 -2
  4. sentencex-1.0.2/.github/workflows/wasm.yaml +42 -0
  5. sentencex-1.0.2/100-0.txt +196022 -0
  6. sentencex-1.0.2/11-0.txt +3384 -0
  7. sentencex-1.0.2/1661-0.txt +12306 -0
  8. {sentencex-1.0.0 → sentencex-1.0.2}/Cargo.lock +196 -113
  9. {sentencex-1.0.0 → sentencex-1.0.2}/PKG-INFO +8 -1
  10. {sentencex-1.0.0 → sentencex-1.0.2}/README.md +119 -3
  11. sentencex-1.0.2/TODO.md +3 -0
  12. sentencex-1.0.2/benches/segment_benchmark.rs +153 -0
  13. {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/Cargo.toml +1 -1
  14. {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/src/lib.rs +1 -1
  15. sentencex-1.0.2/bindings/python/tests/__init__.py +1 -0
  16. sentencex-1.0.2/bindings/python/tests/test_sentencex.py +173 -0
  17. sentencex-1.0.2/bindings/python/uv.lock +306 -0
  18. sentencex-1.0.2/demo/index.html +151 -0
  19. sentencex-1.0.2/oxygen.txt +1 -0
  20. {sentencex-1.0.0 → sentencex-1.0.2}/pyproject.toml +16 -1
  21. {sentencex-1.0.0 → sentencex-1.0.2}/src/constants.rs +5 -2
  22. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/am.rs +2 -2
  23. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ar.rs +2 -2
  24. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/bg.rs +2 -2
  25. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/bn.rs +2 -2
  26. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ca.rs +2 -2
  27. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/da.rs +2 -2
  28. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/de.rs +2 -2
  29. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/el.rs +2 -2
  30. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/en.rs +2 -2
  31. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/es.rs +2 -2
  32. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/fi.rs +3 -4
  33. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/fr.rs +2 -2
  34. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/gu.rs +2 -2
  35. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/hi.rs +2 -2
  36. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/hy.rs +1 -1
  37. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/it.rs +2 -2
  38. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ja.rs +2 -2
  39. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/kk.rs +13 -6
  40. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/kn.rs +2 -2
  41. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/language.rs +106 -48
  42. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ml.rs +2 -2
  43. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/mr.rs +1 -1
  44. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/my.rs +1 -1
  45. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/nl.rs +2 -2
  46. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/pa.rs +2 -2
  47. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/pl.rs +2 -2
  48. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/pt.rs +2 -2
  49. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ru.rs +2 -2
  50. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/sk.rs +9 -5
  51. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/ta.rs +3 -3
  52. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/te.rs +2 -2
  53. {sentencex-1.0.0 → sentencex-1.0.2}/src/lib.rs +181 -4
  54. {sentencex-1.0.0 → sentencex-1.0.2}/src/main.rs +10 -1
  55. {sentencex-1.0.0 → sentencex-1.0.2}/tests/en.txt +60 -6
  56. sentencex-1.0.0/.github/workflows/publish.yaml +0 -48
  57. sentencex-1.0.0/.github/workflows/tests.yaml +0 -26
  58. sentencex-1.0.0/TODO.md +0 -3
  59. sentencex-1.0.0/benches/segment_benchmark.rs +0 -14
  60. sentencex-1.0.0/bindings/python/uv.lock +0 -8
  61. {sentencex-1.0.0 → sentencex-1.0.2}/.gitignore +0 -0
  62. {sentencex-1.0.0 → sentencex-1.0.2}/LICENSE +0 -0
  63. {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/.gitignore +0 -0
  64. {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/.python-version +0 -0
  65. {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/Cargo.lock +0 -0
  66. {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/README.md +0 -0
  67. {sentencex-1.0.0 → sentencex-1.0.2}/bindings/python/example.py +0 -0
  68. {sentencex-1.0.0 → sentencex-1.0.2}/examples/rust_example.rs +0 -0
  69. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/am.txt +0 -0
  70. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ar.txt +0 -0
  71. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/bg.txt +0 -0
  72. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/bn.txt +0 -0
  73. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/da.txt +0 -0
  74. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/de.txt +0 -0
  75. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/el.txt +0 -0
  76. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/en.txt +0 -0
  77. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/es.txt +0 -0
  78. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/fi.txt +0 -0
  79. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/fr.txt +0 -0
  80. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/gu.txt +0 -0
  81. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/hi.txt +0 -0
  82. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/it.txt +0 -0
  83. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/kk.txt +0 -0
  84. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/kn.txt +0 -0
  85. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ml.txt +0 -0
  86. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/nl.txt +0 -0
  87. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/pa.txt +0 -0
  88. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/pl.txt +0 -0
  89. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/pt.txt +0 -0
  90. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ru.txt +0 -0
  91. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/sk.txt +0 -0
  92. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/ta.txt +0 -0
  93. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/abbrev/te.txt +0 -0
  94. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/fallbacks.yaml +0 -0
  95. {sentencex-1.0.0 → sentencex-1.0.2}/src/languages/mod.rs +0 -0
  96. {sentencex-1.0.0 → sentencex-1.0.2}/tests/am.txt +0 -0
  97. {sentencex-1.0.0 → sentencex-1.0.2}/tests/ar.txt +0 -0
  98. {sentencex-1.0.0 → sentencex-1.0.2}/tests/bg.txt +0 -0
  99. {sentencex-1.0.0 → sentencex-1.0.2}/tests/bn.txt +0 -0
  100. {sentencex-1.0.0 → sentencex-1.0.2}/tests/ca.txt +0 -0
  101. {sentencex-1.0.0 → sentencex-1.0.2}/tests/da.txt +0 -0
  102. {sentencex-1.0.0 → sentencex-1.0.2}/tests/de.txt +0 -0
  103. {sentencex-1.0.0 → sentencex-1.0.2}/tests/el.txt +0 -0
  104. {sentencex-1.0.0 → sentencex-1.0.2}/tests/es.txt +0 -0
  105. {sentencex-1.0.0 → sentencex-1.0.2}/tests/fi.txt +0 -0
  106. {sentencex-1.0.0 → sentencex-1.0.2}/tests/fr.txt +0 -0
  107. {sentencex-1.0.0 → sentencex-1.0.2}/tests/gu.txt +0 -0
  108. {sentencex-1.0.0 → sentencex-1.0.2}/tests/hi.txt +0 -0
  109. {sentencex-1.0.0 → sentencex-1.0.2}/tests/hy.txt +0 -0
  110. {sentencex-1.0.0 → sentencex-1.0.2}/tests/it.txt +0 -0
  111. {sentencex-1.0.0 → sentencex-1.0.2}/tests/ja.txt +0 -0
  112. {sentencex-1.0.0 → sentencex-1.0.2}/tests/kk.txt +0 -0
  113. {sentencex-1.0.0 → sentencex-1.0.2}/tests/kn.txt +0 -0
  114. {sentencex-1.0.0 → sentencex-1.0.2}/tests/ml.txt +0 -0
  115. {sentencex-1.0.0 → sentencex-1.0.2}/tests/mr.txt +0 -0
  116. {sentencex-1.0.0 → sentencex-1.0.2}/tests/my.txt +0 -0
  117. {sentencex-1.0.0 → sentencex-1.0.2}/tests/nl.txt +0 -0
  118. {sentencex-1.0.0 → sentencex-1.0.2}/tests/pa.txt +0 -0
  119. {sentencex-1.0.0 → sentencex-1.0.2}/tests/pl.txt +0 -0
  120. {sentencex-1.0.0 → sentencex-1.0.2}/tests/pt.txt +0 -0
  121. {sentencex-1.0.0 → sentencex-1.0.2}/tests/ru.txt +0 -0
  122. {sentencex-1.0.0 → sentencex-1.0.2}/tests/sk.txt +0 -0
  123. {sentencex-1.0.0 → sentencex-1.0.2}/tests/ta.txt +0 -0
  124. {sentencex-1.0.0 → sentencex-1.0.2}/tests/te.txt +0 -0
  125. {sentencex-1.0.0 → sentencex-1.0.2}/tests/ur.txt +0 -0
  126. {sentencex-1.0.0 → sentencex-1.0.2}/tests/zh.txt +0 -0
@@ -0,0 +1,50 @@
1
+ name: Node.js Binding Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ nodejs-binding:
14
+ name: Test Node.js Binding
15
+ runs-on: ${{ matrix.os }}
16
+ strategy:
17
+ matrix:
18
+ os: [ubuntu-latest, windows-latest, macos-latest]
19
+ node-version: [18, 20, 22]
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - name: Setup Node.js ${{ matrix.node-version }}
24
+ uses: actions/setup-node@v4
25
+ with:
26
+ node-version: ${{ matrix.node-version }}
27
+
28
+ - name: Install Rust
29
+ uses: dtolnay/rust-toolchain@stable
30
+
31
+ - name: Cache cargo
32
+ uses: actions/cache@v4
33
+ with:
34
+ path: |
35
+ ~/.cargo/registry
36
+ ~/.cargo/git
37
+ target
38
+ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
39
+
40
+ - name: Build
41
+ working-directory: bindings/nodejs
42
+ run: npm run build
43
+
44
+ - name: Test
45
+ working-directory: bindings/nodejs
46
+ run: npm test
47
+
48
+ - name: Test example
49
+ working-directory: bindings/nodejs
50
+ run: node example.js
@@ -3,7 +3,7 @@
3
3
  #
4
4
  # maturin generate-ci github -o ../../.github/workflows/python.yaml
5
5
  #
6
- name: CI
6
+ name: Python binding CI
7
7
 
8
8
  on:
9
9
  push:
@@ -24,18 +24,11 @@ jobs:
24
24
  strategy:
25
25
  matrix:
26
26
  platform:
27
- - runner: ubuntu-22.04
27
+ - runner: ubuntu-24.04
28
28
  target: x86_64
29
- - runner: ubuntu-22.04
29
+ - runner: ubuntu-24.04
30
30
  target: x86
31
- - runner: ubuntu-22.04
32
- target: aarch64
33
- - runner: ubuntu-22.04
34
- target: armv7
35
- - runner: ubuntu-22.04
36
- target: s390x
37
- - runner: ubuntu-22.04
38
- target: ppc64le
31
+
39
32
  steps:
40
33
  - uses: actions/checkout@v4
41
34
  - uses: actions/setup-python@v5
@@ -48,42 +41,18 @@ jobs:
48
41
  args: --release --out dist --find-interpreter
49
42
  sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
50
43
  manylinux: auto
44
+ working-directory: bindings/python
45
+ - name: Install pytest
46
+ run: pip install pytest
47
+ - name: Install wheel for testing
48
+ run: pip install --find-links bindings/python/dist sentencex --force-reinstall
49
+ - name: Run tests
50
+ run: pytest bindings/python/tests/ -v
51
51
  - name: Upload wheels
52
52
  uses: actions/upload-artifact@v4
53
53
  with:
54
54
  name: wheels-linux-${{ matrix.platform.target }}
55
- path: dist
56
-
57
- musllinux:
58
- runs-on: ${{ matrix.platform.runner }}
59
- strategy:
60
- matrix:
61
- platform:
62
- - runner: ubuntu-22.04
63
- target: x86_64
64
- - runner: ubuntu-22.04
65
- target: x86
66
- - runner: ubuntu-22.04
67
- target: aarch64
68
- - runner: ubuntu-22.04
69
- target: armv7
70
- steps:
71
- - uses: actions/checkout@v4
72
- - uses: actions/setup-python@v5
73
- with:
74
- python-version: 3.x
75
- - name: Build wheels
76
- uses: PyO3/maturin-action@v1
77
- with:
78
- target: ${{ matrix.platform.target }}
79
- args: --release --out dist --find-interpreter
80
- sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
81
- manylinux: musllinux_1_2
82
- - name: Upload wheels
83
- uses: actions/upload-artifact@v4
84
- with:
85
- name: wheels-musllinux-${{ matrix.platform.target }}
86
- path: dist
55
+ path: bindings/python/dist
87
56
 
88
57
  windows:
89
58
  runs-on: ${{ matrix.platform.runner }}
@@ -106,11 +75,18 @@ jobs:
106
75
  target: ${{ matrix.platform.target }}
107
76
  args: --release --out dist --find-interpreter
108
77
  sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
78
+ working-directory: bindings/python
79
+ - name: Install pytest
80
+ run: pip install pytest
81
+ - name: Install wheel for testing
82
+ run: pip install --find-links bindings/python/dist sentencex --force-reinstall
83
+ - name: Run tests
84
+ run: pytest bindings/python/tests/ -v
109
85
  - name: Upload wheels
110
86
  uses: actions/upload-artifact@v4
111
87
  with:
112
88
  name: wheels-windows-${{ matrix.platform.target }}
113
- path: dist
89
+ path: bindings/python/dist
114
90
 
115
91
  macos:
116
92
  runs-on: ${{ matrix.platform.runner }}
@@ -132,11 +108,18 @@ jobs:
132
108
  target: ${{ matrix.platform.target }}
133
109
  args: --release --out dist --find-interpreter
134
110
  sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
111
+ working-directory: bindings/python
112
+ - name: Install pytest
113
+ run: pip install pytest
114
+ - name: Install wheel for testing
115
+ run: pip install --find-links bindings/python/dist sentencex --force-reinstall
116
+ - name: Run tests
117
+ run: pytest bindings/python/tests/ -v
135
118
  - name: Upload wheels
136
119
  uses: actions/upload-artifact@v4
137
120
  with:
138
121
  name: wheels-macos-${{ matrix.platform.target }}
139
- path: dist
122
+ path: bindings/python/dist
140
123
 
141
124
  sdist:
142
125
  runs-on: ubuntu-latest
@@ -147,17 +130,18 @@ jobs:
147
130
  with:
148
131
  command: sdist
149
132
  args: --out dist
133
+ working-directory: bindings/python
150
134
  - name: Upload sdist
151
135
  uses: actions/upload-artifact@v4
152
136
  with:
153
137
  name: wheels-sdist
154
- path: dist
138
+ path: bindings/python/dist
155
139
 
156
140
  release:
157
141
  name: Release
158
142
  runs-on: ubuntu-latest
159
143
  if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
160
- needs: [linux, musllinux, windows, macos, sdist]
144
+ needs: [linux, windows, macos, sdist]
161
145
  permissions:
162
146
  # Use to sign the release artifacts
163
147
  id-token: write
@@ -11,9 +11,7 @@ env:
11
11
 
12
12
  jobs:
13
13
  build:
14
-
15
14
  runs-on: ubuntu-latest
16
-
17
15
  steps:
18
16
  - uses: actions/checkout@v4
19
17
  - name: Build
@@ -0,0 +1,42 @@
1
+ name: WASM Binding Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ wasm-binding:
14
+ name: Test WASM Binding
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install Rust
20
+ uses: dtolnay/rust-toolchain@stable
21
+ with:
22
+ targets: wasm32-unknown-unknown
23
+
24
+ - name: Cache cargo
25
+ uses: actions/cache@v4
26
+ with:
27
+ path: |
28
+ ~/.cargo/registry
29
+ ~/.cargo/git
30
+ target
31
+ key: wasm-cargo-${{ hashFiles('**/Cargo.lock') }}
32
+
33
+ - name: Install wasm-pack
34
+ run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
35
+
36
+ - name: Build WASM
37
+ working-directory: bindings/wasm
38
+ run: ./build.sh
39
+
40
+ - name: Test WASM
41
+ working-directory: bindings/wasm
42
+ run: wasm-pack test --headless --firefox