lindera-python 1.3.0__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {lindera_python-1.3.0 → lindera_python-1.3.1}/Cargo.lock +41 -91
  2. {lindera_python-1.3.0 → lindera_python-1.3.1}/Cargo.toml +3 -3
  3. {lindera_python-1.3.0 → lindera_python-1.3.1}/PKG-INFO +1 -1
  4. {lindera_python-1.3.0 → lindera_python-1.3.1}/poetry.lock +8 -8
  5. {lindera_python-1.3.0 → lindera_python-1.3.1}/pyproject.toml +1 -1
  6. lindera_python-1.3.1/src/dictionary.rs +302 -0
  7. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/error.rs +8 -0
  8. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/lib.rs +39 -0
  9. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/metadata.rs +47 -0
  10. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/mode.rs +46 -0
  11. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/schema.rs +50 -0
  12. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/tokenizer.rs +160 -1
  13. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/trainer.rs +29 -1
  14. {lindera_python-1.3.0 → lindera_python-1.3.1}/src/util.rs +37 -0
  15. lindera_python-1.3.0/src/dictionary.rs +0 -149
  16. {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/FUNDING.yml +0 -0
  17. {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/dependabot.yml +0 -0
  18. {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/workflows/periodic.yml +0 -0
  19. {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/workflows/regression.yml +0 -0
  20. {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/workflows/release.yml +0 -0
  21. {lindera_python-1.3.0 → lindera_python-1.3.1}/.gitignore +0 -0
  22. {lindera_python-1.3.0 → lindera_python-1.3.1}/CHANGES.md +0 -0
  23. {lindera_python-1.3.0 → lindera_python-1.3.1}/LICENSE +0 -0
  24. {lindera_python-1.3.0 → lindera_python-1.3.1}/Makefile +0 -0
  25. {lindera_python-1.3.0 → lindera_python-1.3.1}/README.md +0 -0
  26. {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/build_ipadic.py +0 -0
  27. {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize.py +0 -0
  28. {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize_with_decompose.py +0 -0
  29. {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize_with_filters.py +0 -0
  30. {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize_with_userdict.py +0 -0
  31. {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/train_and_export.py +0 -0
  32. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/bocchan.txt +0 -0
  33. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/cc-cedict_metadata.json +0 -0
  34. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/cc-cedict_simple_userdic.bin +0 -0
  35. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/cc-cedict_simple_userdic.csv +0 -0
  36. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic-neologd_metadata.json +0 -0
  37. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_detailed_userdic.csv +0 -0
  38. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_metadata.json +0 -0
  39. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_mixed_userdic.csv +0 -0
  40. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_simple_userdic.bin +0 -0
  41. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_simple_userdic.csv +0 -0
  42. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_userdic_insufficient_number_of_fields.csv +0 -0
  43. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_userdic_invalid_word_cost.csv +0 -0
  44. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ko-dic_metadata.json +0 -0
  45. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ko-dic_simple_userdic.bin +0 -0
  46. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ko-dic_simple_userdic.csv +0 -0
  47. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/lindera.yml +0 -0
  48. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/unidic_metadata.json +0 -0
  49. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/unidic_simple_userdic.bin +0 -0
  50. {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/unidic_simple_userdic.csv +0 -0
  51. {lindera_python-1.3.0 → lindera_python-1.3.1}/tests/__init__py +0 -0
  52. {lindera_python-1.3.0 → lindera_python-1.3.1}/tests/test_tokenize_ipadic.py +0 -0
  53. {lindera_python-1.3.0 → lindera_python-1.3.1}/tests/test_trainer.py +0 -0
@@ -2,15 +2,6 @@
2
2
  # It is not intended for manual editing.
3
3
  version = 4
4
4
 
5
- [[package]]
6
- name = "addr2line"
7
- version = "0.24.2"
8
- source = "registry+https://github.com/rust-lang/crates.io-index"
9
- checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
10
- dependencies = [
11
- "gimli",
12
- ]
13
-
14
5
  [[package]]
15
6
  name = "adler2"
16
7
  version = "2.0.0"
@@ -97,21 +88,6 @@ version = "1.1.0"
97
88
  source = "registry+https://github.com/rust-lang/crates.io-index"
98
89
  checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
99
90
 
100
- [[package]]
101
- name = "backtrace"
102
- version = "0.3.74"
103
- source = "registry+https://github.com/rust-lang/crates.io-index"
104
- checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
105
- dependencies = [
106
- "addr2line",
107
- "cfg-if",
108
- "libc",
109
- "miniz_oxide",
110
- "object",
111
- "rustc-demangle",
112
- "windows-targets",
113
- ]
114
-
115
91
  [[package]]
116
92
  name = "base64"
117
93
  version = "0.22.1"
@@ -540,12 +516,6 @@ dependencies = [
540
516
  "wasm-bindgen",
541
517
  ]
542
518
 
543
- [[package]]
544
- name = "gimli"
545
- version = "0.31.1"
546
- source = "registry+https://github.com/rust-lang/crates.io-index"
547
- checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
548
-
549
519
  [[package]]
550
520
  name = "glob"
551
521
  version = "0.3.3"
@@ -876,17 +846,6 @@ dependencies = [
876
846
  "cfg-if",
877
847
  ]
878
848
 
879
- [[package]]
880
- name = "io-uring"
881
- version = "0.7.9"
882
- source = "registry+https://github.com/rust-lang/crates.io-index"
883
- checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
884
- dependencies = [
885
- "bitflags 2.6.0",
886
- "cfg-if",
887
- "libc",
888
- ]
889
-
890
849
  [[package]]
891
850
  name = "ipnet"
892
851
  version = "2.10.1"
@@ -947,9 +906,9 @@ checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
947
906
 
948
907
  [[package]]
949
908
  name = "lindera"
950
- version = "1.4.0"
909
+ version = "1.4.1"
951
910
  source = "registry+https://github.com/rust-lang/crates.io-index"
952
- checksum = "737cdbd7df815d8cf5982bc9bbd30344c4964d041f8067c0ab6fbc3db0c924e4"
911
+ checksum = "f0e109baf46c2494539663466a49d54bd62f655a4356757269b72d1e1019ff2c"
953
912
  dependencies = [
954
913
  "anyhow",
955
914
  "bincode",
@@ -979,9 +938,9 @@ dependencies = [
979
938
 
980
939
  [[package]]
981
940
  name = "lindera-cc-cedict"
982
- version = "1.4.0"
941
+ version = "1.4.1"
983
942
  source = "registry+https://github.com/rust-lang/crates.io-index"
984
- checksum = "84620141298ea1060835800f116db105596f39ba5fe10bce397133e6cdd981d9"
943
+ checksum = "965ef21c68d3f5453076366a58d05c154e639a5d1e9b0d86a048a6a3486fb1cc"
985
944
  dependencies = [
986
945
  "anyhow",
987
946
  "bincode",
@@ -995,9 +954,9 @@ dependencies = [
995
954
 
996
955
  [[package]]
997
956
  name = "lindera-dictionary"
998
- version = "1.4.0"
957
+ version = "1.4.1"
999
958
  source = "registry+https://github.com/rust-lang/crates.io-index"
1000
- checksum = "609fd8f72a2ffc1b2a71e9bdb73f723d3c0f676255c37a14b3f4f02981c59437"
959
+ checksum = "d493e407b18951da7852d2b021b1f7e001a2963accb17a321c8fd48b60a9e343"
1001
960
  dependencies = [
1002
961
  "anyhow",
1003
962
  "bincode",
@@ -1030,9 +989,9 @@ dependencies = [
1030
989
 
1031
990
  [[package]]
1032
991
  name = "lindera-ipadic"
1033
- version = "1.4.0"
992
+ version = "1.4.1"
1034
993
  source = "registry+https://github.com/rust-lang/crates.io-index"
1035
- checksum = "292173f5cea3aa09af01f1b8e163f66e6be3fef7b81466e0efaae3314df56c58"
994
+ checksum = "567746f82c01052e05539f2731ce2ad62eddebe5ae96453e9534a87bc9cfafc9"
1036
995
  dependencies = [
1037
996
  "anyhow",
1038
997
  "bincode",
@@ -1046,9 +1005,9 @@ dependencies = [
1046
1005
 
1047
1006
  [[package]]
1048
1007
  name = "lindera-ipadic-neologd"
1049
- version = "1.4.0"
1008
+ version = "1.4.1"
1050
1009
  source = "registry+https://github.com/rust-lang/crates.io-index"
1051
- checksum = "7bac14263d5d3b0ac7674c0d2321b987230df96f5b3df59d2015491598d6102d"
1010
+ checksum = "350f17ced5cbf2964ee29ddef1094703fd1df7c20d464dec6106b7b7ee038030"
1052
1011
  dependencies = [
1053
1012
  "anyhow",
1054
1013
  "bincode",
@@ -1062,9 +1021,9 @@ dependencies = [
1062
1021
 
1063
1022
  [[package]]
1064
1023
  name = "lindera-ko-dic"
1065
- version = "1.4.0"
1024
+ version = "1.4.1"
1066
1025
  source = "registry+https://github.com/rust-lang/crates.io-index"
1067
- checksum = "5c1e5bde25a381e689466669f3a6e14e05b55a3fa6b60b98701ae12ca6fba330"
1026
+ checksum = "d2dda2995055967b409c4d1f311b7db796a3279c78607838325f8892a1542960"
1068
1027
  dependencies = [
1069
1028
  "anyhow",
1070
1029
  "bincode",
@@ -1078,7 +1037,7 @@ dependencies = [
1078
1037
 
1079
1038
  [[package]]
1080
1039
  name = "lindera-python"
1081
- version = "1.3.0"
1040
+ version = "1.3.1"
1082
1041
  dependencies = [
1083
1042
  "lindera",
1084
1043
  "num_cpus",
@@ -1089,9 +1048,9 @@ dependencies = [
1089
1048
 
1090
1049
  [[package]]
1091
1050
  name = "lindera-unidic"
1092
- version = "1.4.0"
1051
+ version = "1.4.1"
1093
1052
  source = "registry+https://github.com/rust-lang/crates.io-index"
1094
- checksum = "139acd137f64722a1f754f4ed8e824c59ea9b4a0547633444ec94720ebd1efc9"
1053
+ checksum = "e6a03bdf3acbd493e95ac182fef6194c09556a04dcbebbaeddca47d5ae6ec5f6"
1095
1054
  dependencies = [
1096
1055
  "anyhow",
1097
1056
  "bincode",
@@ -1222,15 +1181,6 @@ dependencies = [
1222
1181
  "libc",
1223
1182
  ]
1224
1183
 
1225
- [[package]]
1226
- name = "object"
1227
- version = "0.36.5"
1228
- source = "registry+https://github.com/rust-lang/crates.io-index"
1229
- checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e"
1230
- dependencies = [
1231
- "memchr",
1232
- ]
1233
-
1234
1184
  [[package]]
1235
1185
  name = "once_cell"
1236
1186
  version = "1.21.3"
@@ -1293,9 +1243,9 @@ dependencies = [
1293
1243
 
1294
1244
  [[package]]
1295
1245
  name = "pyo3"
1296
- version = "0.26.0"
1246
+ version = "0.27.1"
1297
1247
  source = "registry+https://github.com/rust-lang/crates.io-index"
1298
- checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
1248
+ checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf"
1299
1249
  dependencies = [
1300
1250
  "indoc",
1301
1251
  "libc",
@@ -1310,18 +1260,18 @@ dependencies = [
1310
1260
 
1311
1261
  [[package]]
1312
1262
  name = "pyo3-build-config"
1313
- version = "0.26.0"
1263
+ version = "0.27.1"
1314
1264
  source = "registry+https://github.com/rust-lang/crates.io-index"
1315
- checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
1265
+ checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb"
1316
1266
  dependencies = [
1317
1267
  "target-lexicon",
1318
1268
  ]
1319
1269
 
1320
1270
  [[package]]
1321
1271
  name = "pyo3-ffi"
1322
- version = "0.26.0"
1272
+ version = "0.27.1"
1323
1273
  source = "registry+https://github.com/rust-lang/crates.io-index"
1324
- checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
1274
+ checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be"
1325
1275
  dependencies = [
1326
1276
  "libc",
1327
1277
  "pyo3-build-config",
@@ -1329,9 +1279,9 @@ dependencies = [
1329
1279
 
1330
1280
  [[package]]
1331
1281
  name = "pyo3-macros"
1332
- version = "0.26.0"
1282
+ version = "0.27.1"
1333
1283
  source = "registry+https://github.com/rust-lang/crates.io-index"
1334
- checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
1284
+ checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71"
1335
1285
  dependencies = [
1336
1286
  "proc-macro2",
1337
1287
  "pyo3-macros-backend",
@@ -1341,9 +1291,9 @@ dependencies = [
1341
1291
 
1342
1292
  [[package]]
1343
1293
  name = "pyo3-macros-backend"
1344
- version = "0.26.0"
1294
+ version = "0.27.1"
1345
1295
  source = "registry+https://github.com/rust-lang/crates.io-index"
1346
- checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
1296
+ checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b"
1347
1297
  dependencies = [
1348
1298
  "heck",
1349
1299
  "proc-macro2",
@@ -1595,12 +1545,6 @@ dependencies = [
1595
1545
  "hashbrown",
1596
1546
  ]
1597
1547
 
1598
- [[package]]
1599
- name = "rustc-demangle"
1600
- version = "0.1.24"
1601
- source = "registry+https://github.com/rust-lang/crates.io-index"
1602
- checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
1603
-
1604
1548
  [[package]]
1605
1549
  name = "rustc-hash"
1606
1550
  version = "2.1.1"
@@ -2028,27 +1972,24 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
2028
1972
 
2029
1973
  [[package]]
2030
1974
  name = "tokio"
2031
- version = "1.47.1"
1975
+ version = "1.48.0"
2032
1976
  source = "registry+https://github.com/rust-lang/crates.io-index"
2033
- checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
1977
+ checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
2034
1978
  dependencies = [
2035
- "backtrace",
2036
1979
  "bytes",
2037
- "io-uring",
2038
1980
  "libc",
2039
1981
  "mio",
2040
1982
  "pin-project-lite",
2041
- "slab",
2042
1983
  "socket2 0.6.0",
2043
1984
  "tokio-macros",
2044
- "windows-sys 0.59.0",
1985
+ "windows-sys 0.61.2",
2045
1986
  ]
2046
1987
 
2047
1988
  [[package]]
2048
1989
  name = "tokio-macros"
2049
- version = "2.5.0"
1990
+ version = "2.6.0"
2050
1991
  source = "registry+https://github.com/rust-lang/crates.io-index"
2051
- checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
1992
+ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
2052
1993
  dependencies = [
2053
1994
  "proc-macro2",
2054
1995
  "quote",
@@ -2387,9 +2328,9 @@ dependencies = [
2387
2328
 
2388
2329
  [[package]]
2389
2330
  name = "windows-link"
2390
- version = "0.2.0"
2331
+ version = "0.2.1"
2391
2332
  source = "registry+https://github.com/rust-lang/crates.io-index"
2392
- checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
2333
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
2393
2334
 
2394
2335
  [[package]]
2395
2336
  name = "windows-result"
@@ -2442,6 +2383,15 @@ dependencies = [
2442
2383
  "windows-targets",
2443
2384
  ]
2444
2385
 
2386
+ [[package]]
2387
+ name = "windows-sys"
2388
+ version = "0.61.2"
2389
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2390
+ checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
2391
+ dependencies = [
2392
+ "windows-link",
2393
+ ]
2394
+
2445
2395
  [[package]]
2446
2396
  name = "windows-targets"
2447
2397
  version = "0.52.6"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "lindera-python"
3
- version = "1.3.0"
3
+ version = "1.3.1"
4
4
  edition = "2024"
5
5
  description = "Python binding for Lindera."
6
6
  documentation = "https://docs.rs/lindera-python"
@@ -39,9 +39,9 @@ train = ["lindera/train"] # Enable training functionality
39
39
  default = ["train"] # No directories included
40
40
 
41
41
  [dependencies]
42
- pyo3 = { version = "0.26.0", features = ["extension-module"] }
42
+ pyo3 = { version = "0.27.1", features = ["extension-module"] }
43
43
  serde = { version = "1.0.228", features = ["derive"] }
44
44
  serde_json = "1.0.145"
45
45
  num_cpus = "1.17.0"
46
46
 
47
- lindera = "1.4.0"
47
+ lindera = "1.4.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lindera-python
3
- Version: 1.3.0
3
+ Version: 1.3.1
4
4
  Classifier: Programming Language :: Python :: 3
5
5
  Classifier: Programming Language :: Python :: 3.8
6
6
  Classifier: Programming Language :: Python :: 3.9
@@ -125,14 +125,14 @@ dev = ["pyTest", "pyTest-cov"]
125
125
 
126
126
  [[package]]
127
127
  name = "iniconfig"
128
- version = "2.1.0"
128
+ version = "2.3.0"
129
129
  description = "brain-dead simple config-ini parsing"
130
130
  optional = false
131
- python-versions = ">=3.8"
131
+ python-versions = ">=3.10"
132
132
  groups = ["dev"]
133
133
  files = [
134
- {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
135
- {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
134
+ {file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"},
135
+ {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
136
136
  ]
137
137
 
138
138
  [[package]]
@@ -405,14 +405,14 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests
405
405
 
406
406
  [[package]]
407
407
  name = "pytokens"
408
- version = "0.1.10"
409
- description = "A Fast, spec compliant Python 3.12+ tokenizer that runs on older Pythons."
408
+ version = "0.2.0"
409
+ description = "A Fast, spec compliant Python 3.13+ tokenizer that runs on older Pythons."
410
410
  optional = false
411
411
  python-versions = ">=3.8"
412
412
  groups = ["dev"]
413
413
  files = [
414
- {file = "pytokens-0.1.10-py3-none-any.whl", hash = "sha256:db7b72284e480e69fb085d9f251f66b3d2df8b7166059261258ff35f50fb711b"},
415
- {file = "pytokens-0.1.10.tar.gz", hash = "sha256:c9a4bfa0be1d26aebce03e6884ba454e842f186a59ea43a6d3b25af58223c044"},
414
+ {file = "pytokens-0.2.0-py3-none-any.whl", hash = "sha256:74d4b318c67f4295c13782ddd9abcb7e297ec5630ad060eb90abf7ebbefe59f8"},
415
+ {file = "pytokens-0.2.0.tar.gz", hash = "sha256:532d6421364e5869ea57a9523bf385f02586d4662acbcc0342afd69511b4dd43"},
416
416
  ]
417
417
 
418
418
  [package.extras]
@@ -3,7 +3,7 @@ module-name = "lindera"
3
3
 
4
4
  [project]
5
5
  name = "lindera-python"
6
- version = "1.3.0"
6
+ version = "1.3.1"
7
7
  description = "Python binding for Lindera (no embedded dictionaries)"
8
8
  authors = [{ name = "Minoru Osuka", email = "minoru.osuka@gmail.com" }]
9
9
  license = { text = "MIT" }
@@ -0,0 +1,302 @@
1
+ //! Dictionary management for morphological analysis.
2
+ //!
3
+ //! This module provides functionality for building, loading, and managing dictionaries
4
+ //! used in morphological analysis.
5
+ //!
6
+ //! # Dictionary Types
7
+ //!
8
+ //! - **Dictionary**: Main dictionary for morphological analysis
9
+ //! - **UserDictionary**: Custom user-defined dictionary for additional words
10
+ //!
11
+ //! # Examples
12
+ //!
13
+ //! ```python
14
+ //! import lindera
15
+ //!
16
+ //! # Load a pre-built dictionary
17
+ //! dictionary = lindera.load_dictionary("ipadic")
18
+ //!
19
+ //! # Build a custom dictionary
20
+ //! metadata = lindera.Metadata()
21
+ //! lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
22
+ //!
23
+ //! # Build a user dictionary
24
+ //! lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
25
+ //! ```
26
+
27
+ use std::path::Path;
28
+
29
+ use pyo3::{exceptions::PyValueError, prelude::*};
30
+
31
+ use lindera::dictionary::{
32
+ Dictionary, DictionaryBuilder, Metadata, UserDictionary,
33
+ load_dictionary as lindera_load_dictionary,
34
+ load_user_dictionary as lindera_load_user_dictionary,
35
+ };
36
+
37
+ use crate::metadata::PyMetadata;
38
+
39
+ /// A morphological analysis dictionary.
40
+ ///
41
+ /// Contains the data structures needed for tokenization and morphological analysis.
42
+ ///
43
+ /// # Examples
44
+ ///
45
+ /// ```python
46
+ /// # Load a dictionary
47
+ /// dictionary = lindera.load_dictionary("ipadic")
48
+ ///
49
+ /// # Access metadata
50
+ /// print(dictionary.metadata_name())
51
+ /// print(dictionary.metadata_encoding())
52
+ /// ```
53
+ #[pyclass(name = "Dictionary")]
54
+ #[derive(Clone)]
55
+ pub struct PyDictionary {
56
+ pub inner: Dictionary,
57
+ }
58
+
59
+ #[pymethods]
60
+ impl PyDictionary {
61
+ /// Returns the name of the dictionary metadata.
62
+ pub fn metadata_name(&self) -> String {
63
+ self.inner.metadata.name.clone()
64
+ }
65
+
66
+ /// Returns the character encoding of the dictionary.
67
+ pub fn metadata_encoding(&self) -> String {
68
+ self.inner.metadata.encoding.clone()
69
+ }
70
+
71
+ /// Returns the full metadata object of the dictionary.
72
+ pub fn metadata(&self) -> PyMetadata {
73
+ PyMetadata::from(self.inner.metadata.clone())
74
+ }
75
+
76
+ fn __str__(&self) -> String {
77
+ "Dictionary".to_string()
78
+ }
79
+
80
+ fn __repr__(&self) -> String {
81
+ "Dictionary()".to_string()
82
+ }
83
+ }
84
+
85
+ impl PyDictionary {
86
+ // Internal helper function to create PyDictionary from Lindera Dictionary
87
+ pub fn new(dictionary: Dictionary) -> Self {
88
+ Self { inner: dictionary }
89
+ }
90
+ }
91
+
92
+ /// A user-defined dictionary for custom words.
93
+ ///
94
+ /// User dictionaries allow you to add custom words and their morphological features
95
+ /// that are not present in the main dictionary.
96
+ ///
97
+ /// # Examples
98
+ ///
99
+ /// ```python
100
+ /// # Build a user dictionary
101
+ /// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
102
+ ///
103
+ /// # Load it
104
+ /// metadata = lindera.Metadata()
105
+ /// user_dict = lindera.load_user_dictionary("/path/to/output", metadata)
106
+ /// ```
107
+ #[pyclass(name = "UserDictionary")]
108
+ #[derive(Clone)]
109
+ pub struct PyUserDictionary {
110
+ pub inner: UserDictionary,
111
+ }
112
+
113
+ #[pymethods]
114
+ impl PyUserDictionary {
115
+ fn __str__(&self) -> String {
116
+ "UserDictionary".to_string()
117
+ }
118
+
119
+ fn __repr__(&self) -> String {
120
+ "UserDictionary()".to_string()
121
+ }
122
+ }
123
+
124
+ impl PyUserDictionary {
125
+ // Internal helper function to create PyUserDictionary from Lindera UserDictionary
126
+ pub fn new(user_dictionary: UserDictionary) -> Self {
127
+ Self {
128
+ inner: user_dictionary,
129
+ }
130
+ }
131
+ }
132
+
133
+ /// Builds a dictionary from source files.
134
+ ///
135
+ /// # Arguments
136
+ ///
137
+ /// * `input_dir` - Directory containing dictionary source files.
138
+ /// * `output_dir` - Directory where the built dictionary will be saved.
139
+ /// * `metadata` - Metadata configuration for the dictionary.
140
+ ///
141
+ /// # Errors
142
+ ///
143
+ /// Returns an error if the input directory doesn't exist or if the build fails.
144
+ ///
145
+ /// # Examples
146
+ ///
147
+ /// ```python
148
+ /// metadata = lindera.Metadata(name="custom", encoding="UTF-8")
149
+ /// lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
150
+ /// ```
151
+ #[pyfunction]
152
+ #[pyo3(signature = (input_dir, output_dir, metadata))]
153
+ pub fn build_dictionary(input_dir: &str, output_dir: &str, metadata: PyMetadata) -> PyResult<()> {
154
+ let input_path = Path::new(input_dir);
155
+ let output_path = Path::new(output_dir);
156
+
157
+ if !input_path.exists() {
158
+ return Err(PyValueError::new_err(format!(
159
+ "Input directory does not exist: {input_dir}"
160
+ )));
161
+ }
162
+
163
+ let builder = DictionaryBuilder::new(metadata.into());
164
+
165
+ builder
166
+ .build_dictionary(input_path, output_path)
167
+ .map_err(|e| PyValueError::new_err(format!("Failed to build dictionary: {e}")))?;
168
+
169
+ Ok(())
170
+ }
171
+
172
+ /// Builds a user dictionary from a CSV file.
173
+ ///
174
+ /// # Arguments
175
+ ///
176
+ /// * `_kind` - Dictionary kind (currently unused, reserved for future use).
177
+ /// * `input_file` - Path to the CSV file containing user dictionary entries.
178
+ /// * `output_dir` - Directory where the built user dictionary will be saved.
179
+ /// * `metadata` - Optional metadata configuration. If None, default values are used.
180
+ ///
181
+ /// # CSV Format
182
+ ///
183
+ /// The CSV file should contain entries in the format specified by the dictionary schema.
184
+ /// Typically: surface,reading,pronunciation
185
+ ///
186
+ /// # Errors
187
+ ///
188
+ /// Returns an error if the input file doesn't exist or if the build fails.
189
+ ///
190
+ /// # Examples
191
+ ///
192
+ /// ```python
193
+ /// # Build with default metadata
194
+ /// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
195
+ ///
196
+ /// # Build with custom metadata
197
+ /// metadata = lindera.Metadata()
198
+ /// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output", metadata)
199
+ /// ```
200
+ #[pyfunction]
201
+ #[pyo3(signature = (_kind, input_file, output_dir, metadata=None))]
202
+ pub fn build_user_dictionary(
203
+ _kind: &str,
204
+ input_file: &str,
205
+ output_dir: &str,
206
+ metadata: Option<crate::metadata::PyMetadata>,
207
+ ) -> PyResult<()> {
208
+ let input_path = Path::new(input_file);
209
+ let output_path = Path::new(output_dir);
210
+
211
+ if !input_path.exists() {
212
+ return Err(PyValueError::new_err(format!(
213
+ "Input file does not exist: {input_file}"
214
+ )));
215
+ }
216
+
217
+ // Use provided metadata or create default
218
+ let meta = match metadata {
219
+ Some(py_metadata) => {
220
+ let lindera_meta: Metadata = py_metadata.into();
221
+ lindera_meta
222
+ }
223
+ None => Metadata::default(),
224
+ };
225
+
226
+ let builder = DictionaryBuilder::new(meta);
227
+
228
+ // Build user dictionary from CSV
229
+ builder
230
+ .build_user_dictionary(input_path, output_path)
231
+ .map_err(|e| PyValueError::new_err(format!("Failed to build user dictionary: {e}")))?;
232
+
233
+ Ok(())
234
+ }
235
+
236
+ /// Loads a dictionary from the specified URI.
237
+ ///
238
+ /// # Arguments
239
+ ///
240
+ /// * `uri` - URI to the dictionary. Can be a file path or embedded dictionary name.
241
+ ///
242
+ /// # Supported URIs
243
+ ///
244
+ /// - File paths: `/path/to/dictionary`
245
+ /// - Embedded dictionaries: `ipadic`, `unidic`, `ko-dic`, `cc-cedict`
246
+ ///
247
+ /// # Returns
248
+ ///
249
+ /// A loaded `Dictionary` object.
250
+ ///
251
+ /// # Errors
252
+ ///
253
+ /// Returns an error if the dictionary cannot be loaded from the specified URI.
254
+ ///
255
+ /// # Examples
256
+ ///
257
+ /// ```python
258
+ /// # Load an embedded dictionary
259
+ /// dict = lindera.load_dictionary("ipadic")
260
+ ///
261
+ /// # Load from file path
262
+ /// dict = lindera.load_dictionary("/path/to/dictionary")
263
+ /// ```
264
+ #[pyfunction]
265
+ #[pyo3(signature = (uri))]
266
+ pub fn load_dictionary(uri: &str) -> PyResult<PyDictionary> {
267
+ lindera_load_dictionary(uri)
268
+ .map_err(|e| PyValueError::new_err(format!("Failed to load dictionary from '{uri}': {e}")))
269
+ .map(PyDictionary::new)
270
+ }
271
+
272
+ /// Loads a user dictionary from the specified URI.
273
+ ///
274
+ /// # Arguments
275
+ ///
276
+ /// * `uri` - URI to the user dictionary directory.
277
+ /// * `metadata` - Metadata configuration for the user dictionary.
278
+ ///
279
+ /// # Returns
280
+ ///
281
+ /// A loaded `UserDictionary` object.
282
+ ///
283
+ /// # Errors
284
+ ///
285
+ /// Returns an error if the user dictionary cannot be loaded.
286
+ ///
287
+ /// # Examples
288
+ ///
289
+ /// ```python
290
+ /// metadata = lindera.Metadata()
291
+ /// user_dict = lindera.load_user_dictionary("/path/to/user_dict", metadata)
292
+ /// ```
293
+ #[pyfunction]
294
+ #[pyo3(signature = (uri, metadata))]
295
+ pub fn load_user_dictionary(uri: &str, metadata: PyMetadata) -> PyResult<PyUserDictionary> {
296
+ let meta: Metadata = metadata.into();
297
+ lindera_load_user_dictionary(uri, &meta)
298
+ .map_err(|e| {
299
+ PyValueError::new_err(format!("Failed to load user dictionary from '{uri}': {e}"))
300
+ })
301
+ .map(PyUserDictionary::new)
302
+ }