lindera-python 1.3.0__tar.gz → 1.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lindera_python-1.3.0 → lindera_python-1.3.1}/Cargo.lock +41 -91
- {lindera_python-1.3.0 → lindera_python-1.3.1}/Cargo.toml +3 -3
- {lindera_python-1.3.0 → lindera_python-1.3.1}/PKG-INFO +1 -1
- {lindera_python-1.3.0 → lindera_python-1.3.1}/poetry.lock +8 -8
- {lindera_python-1.3.0 → lindera_python-1.3.1}/pyproject.toml +1 -1
- lindera_python-1.3.1/src/dictionary.rs +302 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/error.rs +8 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/lib.rs +39 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/metadata.rs +47 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/mode.rs +46 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/schema.rs +50 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/tokenizer.rs +160 -1
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/trainer.rs +29 -1
- {lindera_python-1.3.0 → lindera_python-1.3.1}/src/util.rs +37 -0
- lindera_python-1.3.0/src/dictionary.rs +0 -149
- {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/FUNDING.yml +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/dependabot.yml +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/workflows/periodic.yml +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/workflows/regression.yml +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/.github/workflows/release.yml +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/.gitignore +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/CHANGES.md +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/LICENSE +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/Makefile +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/README.md +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/build_ipadic.py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize.py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize_with_decompose.py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize_with_filters.py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/tokenize_with_userdict.py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/examples/train_and_export.py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/bocchan.txt +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/cc-cedict_metadata.json +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/cc-cedict_simple_userdic.bin +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/cc-cedict_simple_userdic.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic-neologd_metadata.json +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_detailed_userdic.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_metadata.json +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_mixed_userdic.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_simple_userdic.bin +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_simple_userdic.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_userdic_insufficient_number_of_fields.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ipadic_userdic_invalid_word_cost.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ko-dic_metadata.json +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ko-dic_simple_userdic.bin +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/ko-dic_simple_userdic.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/lindera.yml +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/unidic_metadata.json +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/unidic_simple_userdic.bin +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/resources/unidic_simple_userdic.csv +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/tests/__init__py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/tests/test_tokenize_ipadic.py +0 -0
- {lindera_python-1.3.0 → lindera_python-1.3.1}/tests/test_trainer.py +0 -0
|
@@ -2,15 +2,6 @@
|
|
|
2
2
|
# It is not intended for manual editing.
|
|
3
3
|
version = 4
|
|
4
4
|
|
|
5
|
-
[[package]]
|
|
6
|
-
name = "addr2line"
|
|
7
|
-
version = "0.24.2"
|
|
8
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
-
checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
|
|
10
|
-
dependencies = [
|
|
11
|
-
"gimli",
|
|
12
|
-
]
|
|
13
|
-
|
|
14
5
|
[[package]]
|
|
15
6
|
name = "adler2"
|
|
16
7
|
version = "2.0.0"
|
|
@@ -97,21 +88,6 @@ version = "1.1.0"
|
|
|
97
88
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
98
89
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
|
99
90
|
|
|
100
|
-
[[package]]
|
|
101
|
-
name = "backtrace"
|
|
102
|
-
version = "0.3.74"
|
|
103
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
104
|
-
checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
|
|
105
|
-
dependencies = [
|
|
106
|
-
"addr2line",
|
|
107
|
-
"cfg-if",
|
|
108
|
-
"libc",
|
|
109
|
-
"miniz_oxide",
|
|
110
|
-
"object",
|
|
111
|
-
"rustc-demangle",
|
|
112
|
-
"windows-targets",
|
|
113
|
-
]
|
|
114
|
-
|
|
115
91
|
[[package]]
|
|
116
92
|
name = "base64"
|
|
117
93
|
version = "0.22.1"
|
|
@@ -540,12 +516,6 @@ dependencies = [
|
|
|
540
516
|
"wasm-bindgen",
|
|
541
517
|
]
|
|
542
518
|
|
|
543
|
-
[[package]]
|
|
544
|
-
name = "gimli"
|
|
545
|
-
version = "0.31.1"
|
|
546
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
547
|
-
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
|
|
548
|
-
|
|
549
519
|
[[package]]
|
|
550
520
|
name = "glob"
|
|
551
521
|
version = "0.3.3"
|
|
@@ -876,17 +846,6 @@ dependencies = [
|
|
|
876
846
|
"cfg-if",
|
|
877
847
|
]
|
|
878
848
|
|
|
879
|
-
[[package]]
|
|
880
|
-
name = "io-uring"
|
|
881
|
-
version = "0.7.9"
|
|
882
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
883
|
-
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
|
|
884
|
-
dependencies = [
|
|
885
|
-
"bitflags 2.6.0",
|
|
886
|
-
"cfg-if",
|
|
887
|
-
"libc",
|
|
888
|
-
]
|
|
889
|
-
|
|
890
849
|
[[package]]
|
|
891
850
|
name = "ipnet"
|
|
892
851
|
version = "2.10.1"
|
|
@@ -947,9 +906,9 @@ checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
|
|
947
906
|
|
|
948
907
|
[[package]]
|
|
949
908
|
name = "lindera"
|
|
950
|
-
version = "1.4.
|
|
909
|
+
version = "1.4.1"
|
|
951
910
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
952
|
-
checksum = "
|
|
911
|
+
checksum = "f0e109baf46c2494539663466a49d54bd62f655a4356757269b72d1e1019ff2c"
|
|
953
912
|
dependencies = [
|
|
954
913
|
"anyhow",
|
|
955
914
|
"bincode",
|
|
@@ -979,9 +938,9 @@ dependencies = [
|
|
|
979
938
|
|
|
980
939
|
[[package]]
|
|
981
940
|
name = "lindera-cc-cedict"
|
|
982
|
-
version = "1.4.
|
|
941
|
+
version = "1.4.1"
|
|
983
942
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
984
|
-
checksum = "
|
|
943
|
+
checksum = "965ef21c68d3f5453076366a58d05c154e639a5d1e9b0d86a048a6a3486fb1cc"
|
|
985
944
|
dependencies = [
|
|
986
945
|
"anyhow",
|
|
987
946
|
"bincode",
|
|
@@ -995,9 +954,9 @@ dependencies = [
|
|
|
995
954
|
|
|
996
955
|
[[package]]
|
|
997
956
|
name = "lindera-dictionary"
|
|
998
|
-
version = "1.4.
|
|
957
|
+
version = "1.4.1"
|
|
999
958
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1000
|
-
checksum = "
|
|
959
|
+
checksum = "d493e407b18951da7852d2b021b1f7e001a2963accb17a321c8fd48b60a9e343"
|
|
1001
960
|
dependencies = [
|
|
1002
961
|
"anyhow",
|
|
1003
962
|
"bincode",
|
|
@@ -1030,9 +989,9 @@ dependencies = [
|
|
|
1030
989
|
|
|
1031
990
|
[[package]]
|
|
1032
991
|
name = "lindera-ipadic"
|
|
1033
|
-
version = "1.4.
|
|
992
|
+
version = "1.4.1"
|
|
1034
993
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1035
|
-
checksum = "
|
|
994
|
+
checksum = "567746f82c01052e05539f2731ce2ad62eddebe5ae96453e9534a87bc9cfafc9"
|
|
1036
995
|
dependencies = [
|
|
1037
996
|
"anyhow",
|
|
1038
997
|
"bincode",
|
|
@@ -1046,9 +1005,9 @@ dependencies = [
|
|
|
1046
1005
|
|
|
1047
1006
|
[[package]]
|
|
1048
1007
|
name = "lindera-ipadic-neologd"
|
|
1049
|
-
version = "1.4.
|
|
1008
|
+
version = "1.4.1"
|
|
1050
1009
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1051
|
-
checksum = "
|
|
1010
|
+
checksum = "350f17ced5cbf2964ee29ddef1094703fd1df7c20d464dec6106b7b7ee038030"
|
|
1052
1011
|
dependencies = [
|
|
1053
1012
|
"anyhow",
|
|
1054
1013
|
"bincode",
|
|
@@ -1062,9 +1021,9 @@ dependencies = [
|
|
|
1062
1021
|
|
|
1063
1022
|
[[package]]
|
|
1064
1023
|
name = "lindera-ko-dic"
|
|
1065
|
-
version = "1.4.
|
|
1024
|
+
version = "1.4.1"
|
|
1066
1025
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1067
|
-
checksum = "
|
|
1026
|
+
checksum = "d2dda2995055967b409c4d1f311b7db796a3279c78607838325f8892a1542960"
|
|
1068
1027
|
dependencies = [
|
|
1069
1028
|
"anyhow",
|
|
1070
1029
|
"bincode",
|
|
@@ -1078,7 +1037,7 @@ dependencies = [
|
|
|
1078
1037
|
|
|
1079
1038
|
[[package]]
|
|
1080
1039
|
name = "lindera-python"
|
|
1081
|
-
version = "1.3.
|
|
1040
|
+
version = "1.3.1"
|
|
1082
1041
|
dependencies = [
|
|
1083
1042
|
"lindera",
|
|
1084
1043
|
"num_cpus",
|
|
@@ -1089,9 +1048,9 @@ dependencies = [
|
|
|
1089
1048
|
|
|
1090
1049
|
[[package]]
|
|
1091
1050
|
name = "lindera-unidic"
|
|
1092
|
-
version = "1.4.
|
|
1051
|
+
version = "1.4.1"
|
|
1093
1052
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1094
|
-
checksum = "
|
|
1053
|
+
checksum = "e6a03bdf3acbd493e95ac182fef6194c09556a04dcbebbaeddca47d5ae6ec5f6"
|
|
1095
1054
|
dependencies = [
|
|
1096
1055
|
"anyhow",
|
|
1097
1056
|
"bincode",
|
|
@@ -1222,15 +1181,6 @@ dependencies = [
|
|
|
1222
1181
|
"libc",
|
|
1223
1182
|
]
|
|
1224
1183
|
|
|
1225
|
-
[[package]]
|
|
1226
|
-
name = "object"
|
|
1227
|
-
version = "0.36.5"
|
|
1228
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1229
|
-
checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e"
|
|
1230
|
-
dependencies = [
|
|
1231
|
-
"memchr",
|
|
1232
|
-
]
|
|
1233
|
-
|
|
1234
1184
|
[[package]]
|
|
1235
1185
|
name = "once_cell"
|
|
1236
1186
|
version = "1.21.3"
|
|
@@ -1293,9 +1243,9 @@ dependencies = [
|
|
|
1293
1243
|
|
|
1294
1244
|
[[package]]
|
|
1295
1245
|
name = "pyo3"
|
|
1296
|
-
version = "0.
|
|
1246
|
+
version = "0.27.1"
|
|
1297
1247
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1298
|
-
checksum = "
|
|
1248
|
+
checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf"
|
|
1299
1249
|
dependencies = [
|
|
1300
1250
|
"indoc",
|
|
1301
1251
|
"libc",
|
|
@@ -1310,18 +1260,18 @@ dependencies = [
|
|
|
1310
1260
|
|
|
1311
1261
|
[[package]]
|
|
1312
1262
|
name = "pyo3-build-config"
|
|
1313
|
-
version = "0.
|
|
1263
|
+
version = "0.27.1"
|
|
1314
1264
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1315
|
-
checksum = "
|
|
1265
|
+
checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb"
|
|
1316
1266
|
dependencies = [
|
|
1317
1267
|
"target-lexicon",
|
|
1318
1268
|
]
|
|
1319
1269
|
|
|
1320
1270
|
[[package]]
|
|
1321
1271
|
name = "pyo3-ffi"
|
|
1322
|
-
version = "0.
|
|
1272
|
+
version = "0.27.1"
|
|
1323
1273
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1324
|
-
checksum = "
|
|
1274
|
+
checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be"
|
|
1325
1275
|
dependencies = [
|
|
1326
1276
|
"libc",
|
|
1327
1277
|
"pyo3-build-config",
|
|
@@ -1329,9 +1279,9 @@ dependencies = [
|
|
|
1329
1279
|
|
|
1330
1280
|
[[package]]
|
|
1331
1281
|
name = "pyo3-macros"
|
|
1332
|
-
version = "0.
|
|
1282
|
+
version = "0.27.1"
|
|
1333
1283
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1334
|
-
checksum = "
|
|
1284
|
+
checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71"
|
|
1335
1285
|
dependencies = [
|
|
1336
1286
|
"proc-macro2",
|
|
1337
1287
|
"pyo3-macros-backend",
|
|
@@ -1341,9 +1291,9 @@ dependencies = [
|
|
|
1341
1291
|
|
|
1342
1292
|
[[package]]
|
|
1343
1293
|
name = "pyo3-macros-backend"
|
|
1344
|
-
version = "0.
|
|
1294
|
+
version = "0.27.1"
|
|
1345
1295
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1346
|
-
checksum = "
|
|
1296
|
+
checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b"
|
|
1347
1297
|
dependencies = [
|
|
1348
1298
|
"heck",
|
|
1349
1299
|
"proc-macro2",
|
|
@@ -1595,12 +1545,6 @@ dependencies = [
|
|
|
1595
1545
|
"hashbrown",
|
|
1596
1546
|
]
|
|
1597
1547
|
|
|
1598
|
-
[[package]]
|
|
1599
|
-
name = "rustc-demangle"
|
|
1600
|
-
version = "0.1.24"
|
|
1601
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1602
|
-
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
|
1603
|
-
|
|
1604
1548
|
[[package]]
|
|
1605
1549
|
name = "rustc-hash"
|
|
1606
1550
|
version = "2.1.1"
|
|
@@ -2028,27 +1972,24 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
|
|
2028
1972
|
|
|
2029
1973
|
[[package]]
|
|
2030
1974
|
name = "tokio"
|
|
2031
|
-
version = "1.
|
|
1975
|
+
version = "1.48.0"
|
|
2032
1976
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2033
|
-
checksum = "
|
|
1977
|
+
checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
|
|
2034
1978
|
dependencies = [
|
|
2035
|
-
"backtrace",
|
|
2036
1979
|
"bytes",
|
|
2037
|
-
"io-uring",
|
|
2038
1980
|
"libc",
|
|
2039
1981
|
"mio",
|
|
2040
1982
|
"pin-project-lite",
|
|
2041
|
-
"slab",
|
|
2042
1983
|
"socket2 0.6.0",
|
|
2043
1984
|
"tokio-macros",
|
|
2044
|
-
"windows-sys 0.
|
|
1985
|
+
"windows-sys 0.61.2",
|
|
2045
1986
|
]
|
|
2046
1987
|
|
|
2047
1988
|
[[package]]
|
|
2048
1989
|
name = "tokio-macros"
|
|
2049
|
-
version = "2.
|
|
1990
|
+
version = "2.6.0"
|
|
2050
1991
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2051
|
-
checksum = "
|
|
1992
|
+
checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
|
|
2052
1993
|
dependencies = [
|
|
2053
1994
|
"proc-macro2",
|
|
2054
1995
|
"quote",
|
|
@@ -2387,9 +2328,9 @@ dependencies = [
|
|
|
2387
2328
|
|
|
2388
2329
|
[[package]]
|
|
2389
2330
|
name = "windows-link"
|
|
2390
|
-
version = "0.2.
|
|
2331
|
+
version = "0.2.1"
|
|
2391
2332
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2392
|
-
checksum = "
|
|
2333
|
+
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
|
2393
2334
|
|
|
2394
2335
|
[[package]]
|
|
2395
2336
|
name = "windows-result"
|
|
@@ -2442,6 +2383,15 @@ dependencies = [
|
|
|
2442
2383
|
"windows-targets",
|
|
2443
2384
|
]
|
|
2444
2385
|
|
|
2386
|
+
[[package]]
|
|
2387
|
+
name = "windows-sys"
|
|
2388
|
+
version = "0.61.2"
|
|
2389
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2390
|
+
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
|
2391
|
+
dependencies = [
|
|
2392
|
+
"windows-link",
|
|
2393
|
+
]
|
|
2394
|
+
|
|
2445
2395
|
[[package]]
|
|
2446
2396
|
name = "windows-targets"
|
|
2447
2397
|
version = "0.52.6"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "lindera-python"
|
|
3
|
-
version = "1.3.
|
|
3
|
+
version = "1.3.1"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
description = "Python binding for Lindera."
|
|
6
6
|
documentation = "https://docs.rs/lindera-python"
|
|
@@ -39,9 +39,9 @@ train = ["lindera/train"] # Enable training functionality
|
|
|
39
39
|
default = ["train"] # No directories included
|
|
40
40
|
|
|
41
41
|
[dependencies]
|
|
42
|
-
pyo3 = { version = "0.
|
|
42
|
+
pyo3 = { version = "0.27.1", features = ["extension-module"] }
|
|
43
43
|
serde = { version = "1.0.228", features = ["derive"] }
|
|
44
44
|
serde_json = "1.0.145"
|
|
45
45
|
num_cpus = "1.17.0"
|
|
46
46
|
|
|
47
|
-
lindera = "1.4.
|
|
47
|
+
lindera = "1.4.1"
|
|
@@ -125,14 +125,14 @@ dev = ["pyTest", "pyTest-cov"]
|
|
|
125
125
|
|
|
126
126
|
[[package]]
|
|
127
127
|
name = "iniconfig"
|
|
128
|
-
version = "2.
|
|
128
|
+
version = "2.3.0"
|
|
129
129
|
description = "brain-dead simple config-ini parsing"
|
|
130
130
|
optional = false
|
|
131
|
-
python-versions = ">=3.
|
|
131
|
+
python-versions = ">=3.10"
|
|
132
132
|
groups = ["dev"]
|
|
133
133
|
files = [
|
|
134
|
-
{file = "iniconfig-2.
|
|
135
|
-
{file = "iniconfig-2.
|
|
134
|
+
{file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"},
|
|
135
|
+
{file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"},
|
|
136
136
|
]
|
|
137
137
|
|
|
138
138
|
[[package]]
|
|
@@ -405,14 +405,14 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests
|
|
|
405
405
|
|
|
406
406
|
[[package]]
|
|
407
407
|
name = "pytokens"
|
|
408
|
-
version = "0.
|
|
409
|
-
description = "A Fast, spec compliant Python 3.
|
|
408
|
+
version = "0.2.0"
|
|
409
|
+
description = "A Fast, spec compliant Python 3.13+ tokenizer that runs on older Pythons."
|
|
410
410
|
optional = false
|
|
411
411
|
python-versions = ">=3.8"
|
|
412
412
|
groups = ["dev"]
|
|
413
413
|
files = [
|
|
414
|
-
{file = "pytokens-0.
|
|
415
|
-
{file = "pytokens-0.
|
|
414
|
+
{file = "pytokens-0.2.0-py3-none-any.whl", hash = "sha256:74d4b318c67f4295c13782ddd9abcb7e297ec5630ad060eb90abf7ebbefe59f8"},
|
|
415
|
+
{file = "pytokens-0.2.0.tar.gz", hash = "sha256:532d6421364e5869ea57a9523bf385f02586d4662acbcc0342afd69511b4dd43"},
|
|
416
416
|
]
|
|
417
417
|
|
|
418
418
|
[package.extras]
|
|
@@ -3,7 +3,7 @@ module-name = "lindera"
|
|
|
3
3
|
|
|
4
4
|
[project]
|
|
5
5
|
name = "lindera-python"
|
|
6
|
-
version = "1.3.
|
|
6
|
+
version = "1.3.1"
|
|
7
7
|
description = "Python binding for Lindera (no embedded dictionaries)"
|
|
8
8
|
authors = [{ name = "Minoru Osuka", email = "minoru.osuka@gmail.com" }]
|
|
9
9
|
license = { text = "MIT" }
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
//! Dictionary management for morphological analysis.
|
|
2
|
+
//!
|
|
3
|
+
//! This module provides functionality for building, loading, and managing dictionaries
|
|
4
|
+
//! used in morphological analysis.
|
|
5
|
+
//!
|
|
6
|
+
//! # Dictionary Types
|
|
7
|
+
//!
|
|
8
|
+
//! - **Dictionary**: Main dictionary for morphological analysis
|
|
9
|
+
//! - **UserDictionary**: Custom user-defined dictionary for additional words
|
|
10
|
+
//!
|
|
11
|
+
//! # Examples
|
|
12
|
+
//!
|
|
13
|
+
//! ```python
|
|
14
|
+
//! import lindera
|
|
15
|
+
//!
|
|
16
|
+
//! # Load a pre-built dictionary
|
|
17
|
+
//! dictionary = lindera.load_dictionary("ipadic")
|
|
18
|
+
//!
|
|
19
|
+
//! # Build a custom dictionary
|
|
20
|
+
//! metadata = lindera.Metadata()
|
|
21
|
+
//! lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
|
|
22
|
+
//!
|
|
23
|
+
//! # Build a user dictionary
|
|
24
|
+
//! lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
|
|
25
|
+
//! ```
|
|
26
|
+
|
|
27
|
+
use std::path::Path;
|
|
28
|
+
|
|
29
|
+
use pyo3::{exceptions::PyValueError, prelude::*};
|
|
30
|
+
|
|
31
|
+
use lindera::dictionary::{
|
|
32
|
+
Dictionary, DictionaryBuilder, Metadata, UserDictionary,
|
|
33
|
+
load_dictionary as lindera_load_dictionary,
|
|
34
|
+
load_user_dictionary as lindera_load_user_dictionary,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
use crate::metadata::PyMetadata;
|
|
38
|
+
|
|
39
|
+
/// A morphological analysis dictionary.
|
|
40
|
+
///
|
|
41
|
+
/// Contains the data structures needed for tokenization and morphological analysis.
|
|
42
|
+
///
|
|
43
|
+
/// # Examples
|
|
44
|
+
///
|
|
45
|
+
/// ```python
|
|
46
|
+
/// # Load a dictionary
|
|
47
|
+
/// dictionary = lindera.load_dictionary("ipadic")
|
|
48
|
+
///
|
|
49
|
+
/// # Access metadata
|
|
50
|
+
/// print(dictionary.metadata_name())
|
|
51
|
+
/// print(dictionary.metadata_encoding())
|
|
52
|
+
/// ```
|
|
53
|
+
#[pyclass(name = "Dictionary")]
|
|
54
|
+
#[derive(Clone)]
|
|
55
|
+
pub struct PyDictionary {
|
|
56
|
+
pub inner: Dictionary,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
#[pymethods]
|
|
60
|
+
impl PyDictionary {
|
|
61
|
+
/// Returns the name of the dictionary metadata.
|
|
62
|
+
pub fn metadata_name(&self) -> String {
|
|
63
|
+
self.inner.metadata.name.clone()
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/// Returns the character encoding of the dictionary.
|
|
67
|
+
pub fn metadata_encoding(&self) -> String {
|
|
68
|
+
self.inner.metadata.encoding.clone()
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/// Returns the full metadata object of the dictionary.
|
|
72
|
+
pub fn metadata(&self) -> PyMetadata {
|
|
73
|
+
PyMetadata::from(self.inner.metadata.clone())
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
fn __str__(&self) -> String {
|
|
77
|
+
"Dictionary".to_string()
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
fn __repr__(&self) -> String {
|
|
81
|
+
"Dictionary()".to_string()
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
impl PyDictionary {
|
|
86
|
+
// Internal helper function to create PyDictionary from Lindera Dictionary
|
|
87
|
+
pub fn new(dictionary: Dictionary) -> Self {
|
|
88
|
+
Self { inner: dictionary }
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/// A user-defined dictionary for custom words.
|
|
93
|
+
///
|
|
94
|
+
/// User dictionaries allow you to add custom words and their morphological features
|
|
95
|
+
/// that are not present in the main dictionary.
|
|
96
|
+
///
|
|
97
|
+
/// # Examples
|
|
98
|
+
///
|
|
99
|
+
/// ```python
|
|
100
|
+
/// # Build a user dictionary
|
|
101
|
+
/// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
|
|
102
|
+
///
|
|
103
|
+
/// # Load it
|
|
104
|
+
/// metadata = lindera.Metadata()
|
|
105
|
+
/// user_dict = lindera.load_user_dictionary("/path/to/output", metadata)
|
|
106
|
+
/// ```
|
|
107
|
+
#[pyclass(name = "UserDictionary")]
|
|
108
|
+
#[derive(Clone)]
|
|
109
|
+
pub struct PyUserDictionary {
|
|
110
|
+
pub inner: UserDictionary,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
#[pymethods]
|
|
114
|
+
impl PyUserDictionary {
|
|
115
|
+
fn __str__(&self) -> String {
|
|
116
|
+
"UserDictionary".to_string()
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
fn __repr__(&self) -> String {
|
|
120
|
+
"UserDictionary()".to_string()
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
impl PyUserDictionary {
|
|
125
|
+
// Internal helper function to create PyUserDictionary from Lindera UserDictionary
|
|
126
|
+
pub fn new(user_dictionary: UserDictionary) -> Self {
|
|
127
|
+
Self {
|
|
128
|
+
inner: user_dictionary,
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/// Builds a dictionary from source files.
|
|
134
|
+
///
|
|
135
|
+
/// # Arguments
|
|
136
|
+
///
|
|
137
|
+
/// * `input_dir` - Directory containing dictionary source files.
|
|
138
|
+
/// * `output_dir` - Directory where the built dictionary will be saved.
|
|
139
|
+
/// * `metadata` - Metadata configuration for the dictionary.
|
|
140
|
+
///
|
|
141
|
+
/// # Errors
|
|
142
|
+
///
|
|
143
|
+
/// Returns an error if the input directory doesn't exist or if the build fails.
|
|
144
|
+
///
|
|
145
|
+
/// # Examples
|
|
146
|
+
///
|
|
147
|
+
/// ```python
|
|
148
|
+
/// metadata = lindera.Metadata(name="custom", encoding="UTF-8")
|
|
149
|
+
/// lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
|
|
150
|
+
/// ```
|
|
151
|
+
#[pyfunction]
|
|
152
|
+
#[pyo3(signature = (input_dir, output_dir, metadata))]
|
|
153
|
+
pub fn build_dictionary(input_dir: &str, output_dir: &str, metadata: PyMetadata) -> PyResult<()> {
|
|
154
|
+
let input_path = Path::new(input_dir);
|
|
155
|
+
let output_path = Path::new(output_dir);
|
|
156
|
+
|
|
157
|
+
if !input_path.exists() {
|
|
158
|
+
return Err(PyValueError::new_err(format!(
|
|
159
|
+
"Input directory does not exist: {input_dir}"
|
|
160
|
+
)));
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
let builder = DictionaryBuilder::new(metadata.into());
|
|
164
|
+
|
|
165
|
+
builder
|
|
166
|
+
.build_dictionary(input_path, output_path)
|
|
167
|
+
.map_err(|e| PyValueError::new_err(format!("Failed to build dictionary: {e}")))?;
|
|
168
|
+
|
|
169
|
+
Ok(())
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/// Builds a user dictionary from a CSV file.
|
|
173
|
+
///
|
|
174
|
+
/// # Arguments
|
|
175
|
+
///
|
|
176
|
+
/// * `_kind` - Dictionary kind (currently unused, reserved for future use).
|
|
177
|
+
/// * `input_file` - Path to the CSV file containing user dictionary entries.
|
|
178
|
+
/// * `output_dir` - Directory where the built user dictionary will be saved.
|
|
179
|
+
/// * `metadata` - Optional metadata configuration. If None, default values are used.
|
|
180
|
+
///
|
|
181
|
+
/// # CSV Format
|
|
182
|
+
///
|
|
183
|
+
/// The CSV file should contain entries in the format specified by the dictionary schema.
|
|
184
|
+
/// Typically: surface,reading,pronunciation
|
|
185
|
+
///
|
|
186
|
+
/// # Errors
|
|
187
|
+
///
|
|
188
|
+
/// Returns an error if the input file doesn't exist or if the build fails.
|
|
189
|
+
///
|
|
190
|
+
/// # Examples
|
|
191
|
+
///
|
|
192
|
+
/// ```python
|
|
193
|
+
/// # Build with default metadata
|
|
194
|
+
/// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
|
|
195
|
+
///
|
|
196
|
+
/// # Build with custom metadata
|
|
197
|
+
/// metadata = lindera.Metadata()
|
|
198
|
+
/// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output", metadata)
|
|
199
|
+
/// ```
|
|
200
|
+
#[pyfunction]
|
|
201
|
+
#[pyo3(signature = (_kind, input_file, output_dir, metadata=None))]
|
|
202
|
+
pub fn build_user_dictionary(
|
|
203
|
+
_kind: &str,
|
|
204
|
+
input_file: &str,
|
|
205
|
+
output_dir: &str,
|
|
206
|
+
metadata: Option<crate::metadata::PyMetadata>,
|
|
207
|
+
) -> PyResult<()> {
|
|
208
|
+
let input_path = Path::new(input_file);
|
|
209
|
+
let output_path = Path::new(output_dir);
|
|
210
|
+
|
|
211
|
+
if !input_path.exists() {
|
|
212
|
+
return Err(PyValueError::new_err(format!(
|
|
213
|
+
"Input file does not exist: {input_file}"
|
|
214
|
+
)));
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Use provided metadata or create default
|
|
218
|
+
let meta = match metadata {
|
|
219
|
+
Some(py_metadata) => {
|
|
220
|
+
let lindera_meta: Metadata = py_metadata.into();
|
|
221
|
+
lindera_meta
|
|
222
|
+
}
|
|
223
|
+
None => Metadata::default(),
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
let builder = DictionaryBuilder::new(meta);
|
|
227
|
+
|
|
228
|
+
// Build user dictionary from CSV
|
|
229
|
+
builder
|
|
230
|
+
.build_user_dictionary(input_path, output_path)
|
|
231
|
+
.map_err(|e| PyValueError::new_err(format!("Failed to build user dictionary: {e}")))?;
|
|
232
|
+
|
|
233
|
+
Ok(())
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/// Loads a dictionary from the specified URI.
|
|
237
|
+
///
|
|
238
|
+
/// # Arguments
|
|
239
|
+
///
|
|
240
|
+
/// * `uri` - URI to the dictionary. Can be a file path or embedded dictionary name.
|
|
241
|
+
///
|
|
242
|
+
/// # Supported URIs
|
|
243
|
+
///
|
|
244
|
+
/// - File paths: `/path/to/dictionary`
|
|
245
|
+
/// - Embedded dictionaries: `ipadic`, `unidic`, `ko-dic`, `cc-cedict`
|
|
246
|
+
///
|
|
247
|
+
/// # Returns
|
|
248
|
+
///
|
|
249
|
+
/// A loaded `Dictionary` object.
|
|
250
|
+
///
|
|
251
|
+
/// # Errors
|
|
252
|
+
///
|
|
253
|
+
/// Returns an error if the dictionary cannot be loaded from the specified URI.
|
|
254
|
+
///
|
|
255
|
+
/// # Examples
|
|
256
|
+
///
|
|
257
|
+
/// ```python
|
|
258
|
+
/// # Load an embedded dictionary
|
|
259
|
+
/// dict = lindera.load_dictionary("ipadic")
|
|
260
|
+
///
|
|
261
|
+
/// # Load from file path
|
|
262
|
+
/// dict = lindera.load_dictionary("/path/to/dictionary")
|
|
263
|
+
/// ```
|
|
264
|
+
#[pyfunction]
|
|
265
|
+
#[pyo3(signature = (uri))]
|
|
266
|
+
pub fn load_dictionary(uri: &str) -> PyResult<PyDictionary> {
|
|
267
|
+
lindera_load_dictionary(uri)
|
|
268
|
+
.map_err(|e| PyValueError::new_err(format!("Failed to load dictionary from '{uri}': {e}")))
|
|
269
|
+
.map(PyDictionary::new)
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/// Loads a user dictionary from the specified URI.
|
|
273
|
+
///
|
|
274
|
+
/// # Arguments
|
|
275
|
+
///
|
|
276
|
+
/// * `uri` - URI to the user dictionary directory.
|
|
277
|
+
/// * `metadata` - Metadata configuration for the user dictionary.
|
|
278
|
+
///
|
|
279
|
+
/// # Returns
|
|
280
|
+
///
|
|
281
|
+
/// A loaded `UserDictionary` object.
|
|
282
|
+
///
|
|
283
|
+
/// # Errors
|
|
284
|
+
///
|
|
285
|
+
/// Returns an error if the user dictionary cannot be loaded.
|
|
286
|
+
///
|
|
287
|
+
/// # Examples
|
|
288
|
+
///
|
|
289
|
+
/// ```python
|
|
290
|
+
/// metadata = lindera.Metadata()
|
|
291
|
+
/// user_dict = lindera.load_user_dictionary("/path/to/user_dict", metadata)
|
|
292
|
+
/// ```
|
|
293
|
+
#[pyfunction]
|
|
294
|
+
#[pyo3(signature = (uri, metadata))]
|
|
295
|
+
pub fn load_user_dictionary(uri: &str, metadata: PyMetadata) -> PyResult<PyUserDictionary> {
|
|
296
|
+
let meta: Metadata = metadata.into();
|
|
297
|
+
lindera_load_user_dictionary(uri, &meta)
|
|
298
|
+
.map_err(|e| {
|
|
299
|
+
PyValueError::new_err(format!("Failed to load user dictionary from '{uri}': {e}"))
|
|
300
|
+
})
|
|
301
|
+
.map(PyUserDictionary::new)
|
|
302
|
+
}
|