jpreprocess 0.1.6__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ import os
2
+ from .jpreprocess import (
3
+ __version__,
4
+ JPreprocess,
5
+ build_dictionary,
6
+ JPREPROCESS_VERSION,
7
+ )
8
+ from .dictionary import download_dictionary, dictionary_path
9
+
10
+
11
+ __all__ = [
12
+ "JPreprocess",
13
+ "build_dictionary",
14
+ "download_dictionary",
15
+ "JPREPROCESS_VERSION",
16
+ ]
17
+
18
+
19
+ def jpreprocess(dictionary_version: str = f"v{JPREPROCESS_VERSION}", user_dictionary: str | None = None) -> JPreprocess:
20
+ """
21
+ Create jpreprocess instance with naist-jdic dictionary.
22
+
23
+ If the system dictionary is not present, this function will download it.
24
+
25
+ Arguments:
26
+ - dictionary_version (str): Version of dictionary to download.
27
+ We don't recommend specifying this argument unless you are aware of what you are doing.
28
+ - user_dictionary (str | None): Path to user dictionary. The extionsion must be ".csv" or ".bin".
29
+ """
30
+ dict_path = dictionary_path(dictionary_version)
31
+ if not os.path.exists(dict_path):
32
+ download_dictionary(dictionary_version)
33
+ return JPreprocess(dict_path, user_dictionary)
@@ -0,0 +1,51 @@
1
+ from contextlib import ExitStack
2
+ import sys
3
+ import atexit
4
+
5
+ # from https://github.com/r9y9/pyopenjtalk/pull/74
6
+
7
+ if sys.version_info >= (3, 9):
8
+ from importlib.resources import as_file, files
9
+ else:
10
+ from importlib_resources import as_file, files
11
+
12
+ _file_manager = ExitStack()
13
+ atexit.register(_file_manager.close)
14
+ _file_ref = files(__package__)
15
+
16
+
17
+ def dictionary_path(version: str) -> str:
18
+ return str(_file_manager.enter_context(
19
+ as_file(_file_ref / version / "naist-jdic")))
20
+
21
+
22
+ def download_dictionary(version: str) -> str:
23
+ from urllib.request import urlopen
24
+ import tarfile
25
+ import tempfile
26
+
27
+ if version == "latest":
28
+ url = f"https://github.com/jpreprocess/jpreprocess/releases/latest/download/naist-jdic-jpreprocess.tar.gz"
29
+ else:
30
+ url = f"https://github.com/jpreprocess/jpreprocess/releases/download/{version}/naist-jdic-jpreprocess.tar.gz"
31
+
32
+ target_dir = _file_ref / version
33
+
34
+ with tempfile.TemporaryFile() as file:
35
+ print('Downloading: "{}"'.format(url))
36
+ with urlopen(url) as response:
37
+ try:
38
+ from tqdm.auto import tqdm
39
+ with tqdm.wrapattr(file, "write", total=getattr(response, "length", None)) as tar:
40
+ for chunk in response:
41
+ tar.write(chunk)
42
+ except ImportError:
43
+ for chunk in response:
44
+ file.write(chunk)
45
+ file.seek(0)
46
+ print("Extracting tar file")
47
+ with tarfile.open(mode="r|gz", fileobj=file) as f, as_file(target_dir) as dir:
48
+ f.extractall(path=dir)
49
+ print("done")
50
+
51
+ return str(target_dir)
@@ -0,0 +1,94 @@
1
+ from typing import TypedDict, Literal, overload
2
+
3
+ __version__: str
4
+ JPREPROCESS_VERSION: str
5
+
6
+
7
+ class NjdObject(TypedDict):
8
+ string: str
9
+ pos: str
10
+ pos_group1: str
11
+ pos_group2: str
12
+ pos_group3: str
13
+ ctype: str
14
+ cform: str
15
+ orig: str
16
+ read: str
17
+ pron: str
18
+ acc: int
19
+ mora_size: int
20
+ chain_rule: str
21
+ chain_flag: int
22
+
23
+
24
+ class JPreprocess:
25
+ def __init__(self, dictionary: str, user_dictionary: str |
26
+ None = None) -> None: ...
27
+
28
+ def run_frontend(self, text: str) -> list[NjdObject]:
29
+ """
30
+ Run text-processing frontend.
31
+
32
+ Arguments:
33
+ - text (str): Input japanese text.
34
+
35
+ Returns:
36
+ - list[NjdObject]: list of NJDNode(s).
37
+ """
38
+ def make_label(self, njd_features: list[NjdObject]) -> list[str]:
39
+ """
40
+ Make full-context label using NjdObject
41
+
42
+ Arguments:
43
+ - njd_features (list[NjdObject]): list of NJDNode(s).
44
+
45
+ Returns:
46
+ - list[str]: list of full-context labels.
47
+ """
48
+ def extract_fullcontext(self, text: str) -> list[str]:
49
+ """
50
+ Extract full-context label from the input text.
51
+
52
+ Arguments:
53
+ - text (str): Input japanese text.
54
+
55
+ Returns:
56
+ - list[str]: list of full-context labels.
57
+ """
58
+
59
+ @overload
60
+ def g2p(self, text: str, kana: bool = False,
61
+ join: Literal[True] = True) -> str: ...
62
+
63
+ @overload
64
+ def g2p(self, text: str, kana: bool = False, *,
65
+ join: Literal[False]) -> list[str]: ...
66
+
67
+ @overload
68
+ def g2p(self, text: str, kana: bool,
69
+ join: Literal[False]) -> list[str]:
70
+ """
71
+ Grapheme-to-phoneme (G2P) conversion.
72
+
73
+ Arguments:
74
+ - text (str): Input japanese text.
75
+ - kana (bool): Whether to generate alphabetical phoneme (False) or kana (True).
76
+ - join (bool): Whether to generate a list of phoneme or kana (False) or join the output by delimiter (True).
77
+
78
+ Returns:
79
+ - list[str] (when join = False): list of phoneme (when kana = False) or kana (when kana = True).
80
+ - str (when join = True): list of phoneme joined with space (when kana = False) or kana joined with empty string (when kana = True).
81
+ """
82
+
83
+
84
+ def build_dictionary(input: str, output: str, user: bool = False,
85
+ serializer: Literal["jpreprocess", "lindera"] = "jpreprocess") -> None:
86
+ """
87
+ Build dictionary binary file(s).
88
+
89
+ Arguments:
90
+ - input (str): Path to source directory (system dictionary) or file (user dictionary).
91
+ - output (str): Path to destination directory (system dictionary) or file (user dictionary).
92
+ - user (bool): Whether to build system dictionary (False) or user dictionary (True). Default to False.
93
+ - serializer (str): The name of serializer to use ("lindera" or "jpreprocess"). Default to "jpreprocess"
94
+ """
jpreprocess/py.typed ADDED
File without changes
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: jpreprocess
3
+ Version: 0.1.6
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: Implementation :: CPython
6
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
7
+ Classifier: License :: OSI Approved :: BSD License
8
+ Classifier: Topic :: Scientific/Engineering
9
+ Classifier: Topic :: Software Development
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Intended Audience :: Developers
12
+ Requires-Dist: importlib-resources ; python_full_version < '3.9'
13
+ Requires-Dist: pytest>=8.0.1 ; extra == 'dev'
14
+ Requires-Dist: tqdm ; extra == 'progress'
15
+ Provides-Extra: dev
16
+ Provides-Extra: progress
17
+ Requires-Python: >=3.8, <3.15
@@ -0,0 +1,9 @@
1
+ jpreprocess/__init__.py,sha256=9M2JDa43Z4iddAPp9hvL-zNNlM21mm3gPIe_VpRuPik,1039
2
+ jpreprocess/dictionary.py,sha256=NcyypwcL3yX-h6-KmiAqd00E-0D5Hc2snlXJJ8NegRM,1637
3
+ jpreprocess/jpreprocess.cp313-win_amd64.pyd,sha256=1B7QVGTmdywHDvKeG44CXFZ7XGa2F3_f6AHQm5S5NP4,3188224
4
+ jpreprocess/jpreprocess.pyi,sha256=Lx6vCk-x7x3B7cjhLZH6-oBdYnyni6o00i-NLxoMLOE,2867
5
+ jpreprocess/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ jpreprocess-0.1.6.dist-info/METADATA,sha256=yiUCuGx0zxZDlrLSKQUV6iJ0DrrYVV1ULqFDiRcSSLA,696
7
+ jpreprocess-0.1.6.dist-info/WHEEL,sha256=z3sDn4xNPtieBDo9mUKkT1e80gbhCuRsQQi1_g6mdQM,97
8
+ jpreprocess-0.1.6.dist-info/sboms/jpreprocess-python.cyclonedx.json,sha256=br7Ex7ZUtRjHlJWJ2d3XF-FPuA15P0zAl3N1owMqNTQ,203162
9
+ jpreprocess-0.1.6.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.14.1)
3
+ Root-Is-Purelib: false
4
+ Tag: cp313-cp313-win_amd64