lindera-python 1.4.0__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {lindera_python-1.4.0 → lindera_python-2.0.0}/.github/workflows/periodic.yml +20 -4
  2. {lindera_python-1.4.0 → lindera_python-2.0.0}/.github/workflows/regression.yml +20 -4
  3. {lindera_python-1.4.0 → lindera_python-2.0.0}/.github/workflows/release.yml +55 -95
  4. {lindera_python-1.4.0 → lindera_python-2.0.0}/Cargo.lock +321 -87
  5. {lindera_python-1.4.0 → lindera_python-2.0.0}/Cargo.toml +16 -16
  6. {lindera_python-1.4.0 → lindera_python-2.0.0}/Makefile +6 -6
  7. {lindera_python-1.4.0 → lindera_python-2.0.0}/PKG-INFO +3 -3
  8. {lindera_python-1.4.0 → lindera_python-2.0.0}/poetry.lock +103 -97
  9. {lindera_python-1.4.0 → lindera_python-2.0.0}/pyproject.toml +7 -6
  10. {lindera_python-1.4.0 → lindera_python-2.0.0}/.github/FUNDING.yml +0 -0
  11. {lindera_python-1.4.0 → lindera_python-2.0.0}/.github/dependabot.yml +0 -0
  12. {lindera_python-1.4.0 → lindera_python-2.0.0}/.gitignore +0 -0
  13. {lindera_python-1.4.0 → lindera_python-2.0.0}/CHANGES.md +0 -0
  14. {lindera_python-1.4.0 → lindera_python-2.0.0}/LICENSE +0 -0
  15. {lindera_python-1.4.0 → lindera_python-2.0.0}/README.md +0 -0
  16. {lindera_python-1.4.0 → lindera_python-2.0.0}/examples/build_ipadic.py +0 -0
  17. {lindera_python-1.4.0 → lindera_python-2.0.0}/examples/tokenize.py +0 -0
  18. {lindera_python-1.4.0 → lindera_python-2.0.0}/examples/tokenize_with_decompose.py +0 -0
  19. {lindera_python-1.4.0 → lindera_python-2.0.0}/examples/tokenize_with_filters.py +0 -0
  20. {lindera_python-1.4.0 → lindera_python-2.0.0}/examples/tokenize_with_userdict.py +0 -0
  21. {lindera_python-1.4.0 → lindera_python-2.0.0}/examples/train_and_export.py +0 -0
  22. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/bocchan.txt +0 -0
  23. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/cc-cedict_metadata.json +0 -0
  24. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/cc-cedict_simple_userdic.bin +0 -0
  25. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/cc-cedict_simple_userdic.csv +0 -0
  26. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic-neologd_metadata.json +0 -0
  27. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic_detailed_userdic.csv +0 -0
  28. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic_metadata.json +0 -0
  29. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic_mixed_userdic.csv +0 -0
  30. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic_simple_userdic.bin +0 -0
  31. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic_simple_userdic.csv +0 -0
  32. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic_userdic_insufficient_number_of_fields.csv +0 -0
  33. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ipadic_userdic_invalid_word_cost.csv +0 -0
  34. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ko-dic_metadata.json +0 -0
  35. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ko-dic_simple_userdic.bin +0 -0
  36. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/ko-dic_simple_userdic.csv +0 -0
  37. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/lindera.yml +0 -0
  38. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/unidic_metadata.json +0 -0
  39. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/unidic_simple_userdic.bin +0 -0
  40. {lindera_python-1.4.0 → lindera_python-2.0.0}/resources/unidic_simple_userdic.csv +0 -0
  41. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/dictionary.rs +0 -0
  42. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/error.rs +0 -0
  43. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/lib.rs +0 -0
  44. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/metadata.rs +0 -0
  45. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/mode.rs +0 -0
  46. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/schema.rs +0 -0
  47. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/tokenizer.rs +0 -0
  48. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/trainer.rs +0 -0
  49. {lindera_python-1.4.0 → lindera_python-2.0.0}/src/util.rs +0 -0
  50. {lindera_python-1.4.0 → lindera_python-2.0.0}/tests/__init__py +0 -0
  51. {lindera_python-1.4.0 → lindera_python-2.0.0}/tests/test_tokenize_ipadic.py +0 -0
  52. {lindera_python-1.4.0 → lindera_python-2.0.0}/tests/test_trainer.py +0 -0
@@ -37,12 +37,17 @@ jobs:
37
37
  platform:
38
38
  - runner: ubuntu-latest
39
39
  target: x86_64-unknown-linux-gnu
40
- - runner: macOS-latest
41
- target: x86_64-apple-darwin
42
- - runner: macOS-latest
40
+ - runner: ubuntu-24.04-arm
41
+ target: aarch64-unknown-linux-gnu
42
+ - runner: macos-latest
43
43
  target: aarch64-apple-darwin
44
+ - runner: macos-15-intel
45
+ target: x86_64-apple-darwin
44
46
  - runner: windows-latest
45
47
  target: x86_64-pc-windows-msvc
48
+ - runner: windows-latest
49
+ target: aarch64-pc-windows-msvc
50
+ skip_test_run: true
46
51
  toolchain: [stable]
47
52
  runs-on: ${{ matrix.platform.runner }}
48
53
  steps:
@@ -56,5 +61,16 @@ jobs:
56
61
  target: ${{ matrix.platform.target }}
57
62
  components: rustfmt, clippy
58
63
 
64
+ - name: Setup Python
65
+ uses: actions/setup-python@v6
66
+ with:
67
+ python-version: 3.x
68
+
59
69
  - name: Run test
60
- run: cargo test --target "${{ matrix.platform.target }}" --all-features
70
+ shell: bash
71
+ run: |
72
+ if [ "${{ matrix.platform.skip_test_run }}" == "true" ]; then
73
+ cargo check --target "${{ matrix.platform.target }}" --all-features
74
+ else
75
+ cargo test --target "${{ matrix.platform.target }}" --all-features
76
+ fi
@@ -40,12 +40,17 @@ jobs:
40
40
  platform:
41
41
  - runner: ubuntu-latest
42
42
  target: x86_64-unknown-linux-gnu
43
- - runner: macOS-latest
44
- target: x86_64-apple-darwin
45
- - runner: macOS-latest
43
+ - runner: ubuntu-24.04-arm
44
+ target: aarch64-unknown-linux-gnu
45
+ - runner: macos-latest
46
46
  target: aarch64-apple-darwin
47
+ - runner: macos-15-intel
48
+ target: x86_64-apple-darwin
47
49
  - runner: windows-latest
48
50
  target: x86_64-pc-windows-msvc
51
+ - runner: windows-latest
52
+ target: aarch64-pc-windows-msvc
53
+ skip_test_run: true
49
54
  toolchain: [stable]
50
55
  runs-on: ${{ matrix.platform.runner }}
51
56
  steps:
@@ -59,5 +64,16 @@ jobs:
59
64
  target: ${{ matrix.platform.target }}
60
65
  components: rustfmt, clippy
61
66
 
67
+ - name: Setup Python
68
+ uses: actions/setup-python@v6
69
+ with:
70
+ python-version: 3.x
71
+
62
72
  - name: Run test
63
- run: cargo test --target "${{ matrix.platform.target }}" --all-features
73
+ shell: bash
74
+ run: |
75
+ if [ "${{ matrix.platform.skip_test_run }}" == "true" ]; then
76
+ cargo check --target "${{ matrix.platform.target }}" --all-features
77
+ else
78
+ cargo test --target "${{ matrix.platform.target }}" --all-features
79
+ fi
@@ -42,15 +42,20 @@ jobs:
42
42
  platform:
43
43
  - runner: ubuntu-latest
44
44
  target: x86_64-unknown-linux-gnu
45
- # - runner: macOS-latest
46
- # target: x86_64-apple-darwin
47
- - runner: macOS-latest
45
+ - runner: ubuntu-24.04-arm
46
+ target: aarch64-unknown-linux-gnu
47
+ - runner: macos-latest
48
48
  target: aarch64-apple-darwin
49
+ - runner: macos-15-intel
50
+ target: x86_64-apple-darwin
49
51
  - runner: windows-latest
50
52
  target: x86_64-pc-windows-msvc
53
+ - runner: windows-latest
54
+ target: aarch64-pc-windows-msvc
55
+ skip_test_run: true
51
56
  toolchain: [stable]
52
57
  features:
53
- - value: "embedded-ipadic"
58
+ - value: "embed-ipadic"
54
59
  package_name: "lindera-python-ipadic"
55
60
  package_description: "Python binding for Lindera with IPADIC dictionary"
56
61
 
@@ -66,98 +71,48 @@ jobs:
66
71
  target: ${{ matrix.platform.target }}
67
72
  components: rustfmt, clippy
68
73
 
69
- - name: Run test
70
- run: cargo test --target "${{ matrix.platform.target }}" --features="${{ matrix.features.value }}"
71
-
72
- linux:
73
- name: Linux
74
- needs: [test]
75
- strategy:
76
- max-parallel: 1
77
- matrix:
78
- platform:
79
- - runner: ubuntu-latest
80
- target: x86_64
81
- toolchain: [stable]
82
- features:
83
- - value: "default"
84
- package_name: "lindera-python"
85
- package_description: "Python binding for Lindera (no embedded dictionaries)"
86
- - value: "embedded-cjk"
87
- package_name: "lindera-python-cjk"
88
- package_description: "Python binding for Lindera with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT)"
89
- - value: "embedded-ipadic"
90
- package_name: "lindera-python-ipadic"
91
- package_description: "Python binding for Lindera with IPADIC dictionary"
92
- - value: "embedded-unidic"
93
- package_name: "lindera-python-unidic"
94
- package_description: "Python binding for Lindera with UniDic dictionary"
95
- - value: "embedded-ko-dic"
96
- package_name: "lindera-python-ko-dic"
97
- package_description: "Python binding for Lindera with ko-dic Korean dictionary"
98
- - value: "embedded-cc-cedict"
99
- package_name: "lindera-python-cc-cedict"
100
- package_description: "Python binding for Lindera with CC-CEDICT Chinese dictionary"
101
- runs-on: ${{ matrix.platform.runner }}
102
- steps:
103
- - name: Run checkout
104
- uses: actions/checkout@v6
105
-
106
74
  - name: Setup Python
107
75
  uses: actions/setup-python@v6
108
76
  with:
109
77
  python-version: 3.x
110
78
 
111
- - name: Create package-specific pyproject.toml
79
+ - name: Run test
80
+ shell: bash
112
81
  run: |
113
- # Modify pyproject.toml in place
114
- sed -i "s/^name = \"lindera-python\"/name = \"${{ matrix.features.package_name }}\"/" pyproject.toml
115
- sed -i "s/^description = \".*\"/description = \"${{ matrix.features.package_description }}\"/" pyproject.toml
116
-
117
- - name: Build wheels
118
- uses: PyO3/maturin-action@v1
119
- env:
120
- NODE_OPTIONS: "--max-old-space-size=8192"
121
- with:
122
- target: ${{ matrix.platform.target }}
123
- args: --release --out dist --find-interpreter --features=${{ matrix.features.value }}
124
- sccache: "true"
125
- manylinux: auto
126
- before-script-linux: "yum install openssl-devel devtoolset-10-libatomic-devel perl-IPC-Cmd -y"
127
-
128
- - name: Upload wheels
129
- uses: actions/upload-artifact@v6
130
- with:
131
- name: wheels-${{ matrix.features.package_name }}-linux-${{ matrix.platform.target }}-${{ github.ref_name }}
132
- path: dist
82
+ if [ "${{ matrix.platform.skip_test_run }}" == "true" ]; then
83
+ cargo check --target "${{ matrix.platform.target }}" --all-features
84
+ else
85
+ cargo test --target "${{ matrix.platform.target }}" --all-features
86
+ fi
133
87
 
134
- linux2:
135
- name: Linux2
88
+ linux:
89
+ name: Linux
136
90
  needs: [test]
137
91
  strategy:
138
- max-parallel: 1
139
92
  matrix:
140
93
  platform:
141
94
  - runner: ubuntu-latest
142
- target: aarch64
95
+ target: x86_64-unknown-linux-gnu
96
+ - runner: ubuntu-24.04-arm
97
+ target: aarch64-unknown-linux-gnu
143
98
  toolchain: [stable]
144
99
  features:
145
100
  - value: "default"
146
101
  package_name: "lindera-python"
147
102
  package_description: "Python binding for Lindera (no embedded dictionaries)"
148
- - value: "embedded-cjk"
103
+ - value: "embed-cjk"
149
104
  package_name: "lindera-python-cjk"
150
105
  package_description: "Python binding for Lindera with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT)"
151
- - value: "embedded-ipadic"
106
+ - value: "embed-ipadic"
152
107
  package_name: "lindera-python-ipadic"
153
108
  package_description: "Python binding for Lindera with IPADIC dictionary"
154
- - value: "embedded-unidic"
109
+ - value: "embed-unidic"
155
110
  package_name: "lindera-python-unidic"
156
111
  package_description: "Python binding for Lindera with UniDic dictionary"
157
- - value: "embedded-ko-dic"
112
+ - value: "embed-ko-dic"
158
113
  package_name: "lindera-python-ko-dic"
159
114
  package_description: "Python binding for Lindera with ko-dic Korean dictionary"
160
- - value: "embedded-cc-cedict"
115
+ - value: "embed-cc-cedict"
161
116
  package_name: "lindera-python-cc-cedict"
162
117
  package_description: "Python binding for Lindera with CC-CEDICT Chinese dictionary"
163
118
  runs-on: ${{ matrix.platform.runner }}
@@ -185,7 +140,12 @@ jobs:
185
140
  args: --release --out dist --find-interpreter --features=${{ matrix.features.value }}
186
141
  sccache: "true"
187
142
  manylinux: auto
188
- before-script-linux: "apt-get update && apt-get install libssl-dev pkg-config -y"
143
+ before-script-linux: |
144
+ if command -v yum >/dev/null 2>&1; then
145
+ yum install openssl-devel devtoolset-10-libatomic-devel perl-IPC-Cmd -y
146
+ elif command -v apt-get >/dev/null 2>&1; then
147
+ apt-get update && apt-get install libssl-dev pkg-config -y
148
+ fi
189
149
 
190
150
  - name: Upload wheels
191
151
  uses: actions/upload-artifact@v6
@@ -201,25 +161,27 @@ jobs:
201
161
  matrix:
202
162
  platform:
203
163
  - runner: windows-latest
204
- target: x64
164
+ target: x86_64-pc-windows-msvc
165
+ - runner: windows-latest
166
+ target: aarch64-pc-windows-msvc
205
167
  toolchain: [stable]
206
168
  features:
207
169
  - value: "default"
208
170
  package_name: "lindera-python"
209
171
  package_description: "Python binding for Lindera (no embedded dictionaries)"
210
- - value: "embedded-cjk"
172
+ - value: "embed-cjk"
211
173
  package_name: "lindera-python-cjk"
212
174
  package_description: "Python binding for Lindera with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT)"
213
- - value: "embedded-ipadic"
175
+ - value: "embed-ipadic"
214
176
  package_name: "lindera-python-ipadic"
215
177
  package_description: "Python binding for Lindera with IPADIC dictionary"
216
- - value: "embedded-unidic"
178
+ - value: "embed-unidic"
217
179
  package_name: "lindera-python-unidic"
218
180
  package_description: "Python binding for Lindera with UniDic dictionary"
219
- - value: "embedded-ko-dic"
181
+ - value: "embed-ko-dic"
220
182
  package_name: "lindera-python-ko-dic"
221
183
  package_description: "Python binding for Lindera with ko-dic Korean dictionary"
222
- - value: "embedded-cc-cedict"
184
+ - value: "embed-cc-cedict"
223
185
  package_name: "lindera-python-cc-cedict"
224
186
  package_description: "Python binding for Lindera with CC-CEDICT Chinese dictionary"
225
187
  runs-on: ${{ matrix.platform.runner }}
@@ -231,7 +193,6 @@ jobs:
231
193
  uses: actions/setup-python@v6
232
194
  with:
233
195
  python-version: 3.x
234
- architecture: ${{ matrix.platform.target }}
235
196
 
236
197
  - name: Create package-specific pyproject.toml
237
198
  shell: pwsh
@@ -258,34 +219,34 @@ jobs:
258
219
  path: dist
259
220
 
260
221
  macos:
261
- name: MacOS
222
+ name: macOS
262
223
  needs: [test]
263
224
  strategy:
264
225
  max-parallel: 1
265
226
  matrix:
266
227
  platform:
267
- # - runner: macos-latest
268
- # target: x86_64
269
228
  - runner: macos-latest
270
- target: aarch64
229
+ target: aarch64-apple-darwin
230
+ - runner: macos-15-intel
231
+ target: x86_64-apple-darwin
271
232
  toolchain: [stable]
272
233
  features:
273
234
  - value: "default"
274
235
  package_name: "lindera-python"
275
236
  package_description: "Python binding for Lindera (no embedded dictionaries)"
276
- - value: "embedded-cjk"
237
+ - value: "embed-cjk"
277
238
  package_name: "lindera-python-cjk"
278
239
  package_description: "Python binding for Lindera with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT)"
279
- - value: "embedded-ipadic"
240
+ - value: "embed-ipadic"
280
241
  package_name: "lindera-python-ipadic"
281
242
  package_description: "Python binding for Lindera with IPADIC dictionary"
282
- - value: "embedded-unidic"
243
+ - value: "embed-unidic"
283
244
  package_name: "lindera-python-unidic"
284
245
  package_description: "Python binding for Lindera with UniDic dictionary"
285
- - value: "embedded-ko-dic"
246
+ - value: "embed-ko-dic"
286
247
  package_name: "lindera-python-ko-dic"
287
248
  package_description: "Python binding for Lindera with ko-dic Korean dictionary"
288
- - value: "embedded-cc-cedict"
249
+ - value: "embed-cc-cedict"
289
250
  package_name: "lindera-python-cc-cedict"
290
251
  package_description: "Python binding for Lindera with CC-CEDICT Chinese dictionary"
291
252
  runs-on: ${{ matrix.platform.runner }}
@@ -339,8 +300,7 @@ jobs:
339
300
 
340
301
  release:
341
302
  name: GitHub Release
342
- # needs: [linux, linux2, musllinux, windows, macos, sdist]
343
- needs: [linux, linux2, windows, macos, sdist]
303
+ needs: [linux, windows, macos, sdist]
344
304
  runs-on: ubuntu-latest
345
305
  if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
346
306
  permissions:
@@ -374,7 +334,7 @@ jobs:
374
334
 
375
335
  publish-pypi:
376
336
  name: Publish to PyPI
377
- needs: [linux, linux2, windows, macos, sdist]
337
+ needs: [linux, windows, macos, sdist]
378
338
  runs-on: ubuntu-latest
379
339
  if: ${{ startsWith(github.ref, 'refs/tags/') }}
380
340
  permissions:
@@ -413,10 +373,10 @@ jobs:
413
373
 
414
374
  - name: Publish lindera-python
415
375
  run: |
416
- LINDERA_PY_VERSION=$(cargo metadata --no-deps --format-version=1 | jq -r '.packages[] | select(.name=="lindera-python") | .version')
417
- LINDERA_PY_VERSIONS=$(curl -s -XGET https://crates.io/api/v1/crates/lindera-python | jq -r 'select(.versions != null) | .versions[].num')
418
- if echo ${LINDERA_PY_VERSIONS} | grep ${LINDERA_PY_VERSION} >/dev/null; then
419
- echo "lindera-python ${LINDERA_PY_VERSION} has already published"
376
+ LINDERA_PYTHON_VERSION=$(cargo metadata --no-deps --format-version=1 | jq -r '.packages[] | select(.name=="lindera-python") | .version')
377
+ LINDERA_PYTHON_VERSIONS=$(curl -s -XGET https://crates.io/api/v1/crates/lindera-python | jq -r 'select(.versions != null) | .versions[].num')
378
+ if echo ${LINDERA_PYTHON_VERSIONS} | grep ${LINDERA_PYTHON_VERSION} >/dev/null; then
379
+ echo "lindera-python ${LINDERA_PYTHON_VERSION} has already published"
420
380
  else
421
381
  cargo publish
422
382
  fi