pysinrom 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pysinrom-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Anonymous Authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,84 @@
1
+ Metadata-Version: 2.4
2
+ Name: pysinrom
3
+ Version: 0.1.0
4
+ Summary: Python tools for Sinitic romanization, currently only as research utilities for conversion between Jyutping and CantRomZJ1
5
+ License-Expression: MIT
6
+ Classifier: Development Status :: 3 - Alpha
7
+ Classifier: Operating System :: OS Independent
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Topic :: Text Processing :: Linguistic
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: pycantonese<6,>=5.0
18
+ Provides-Extra: test
19
+ Requires-Dist: pytest>=8; extra == "test"
20
+ Dynamic: license-file
21
+
22
+ # PySinRom
23
+
24
+ ## Installation
25
+
26
+ After publication on PyPI:
27
+
28
+ ```bash
29
+ pip install pysinrom
30
+ ```
31
+
32
+ For local installation from the project directory:
33
+
34
+ ```bash
35
+ python -m pip install .
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ ```python
41
+ from pysinrom import (
42
+ jyutping_to_cantromzj1,
43
+ cantromzj1_to_jyutping,
44
+ )
45
+
46
+ cantromzj1, syllables = jyutping_to_cantromzj1("hoeng1gong2")
47
+ print(cantromzj1)
48
+ # heong1A|55gong2A|35
49
+
50
+ jyutping, syllables = cantromzj1_to_jyutping("heong1A|55gong2A|35")
51
+ print(jyutping)
52
+ # hoeng1gong2
53
+ ```
54
+
55
+ Space-separated input and output are also supported:
56
+
57
+ ```python
58
+ result = jyutping_to_cantromzj1(
59
+ "hoeng1 gong2",
60
+ output_separator=" ",
61
+ return_mode="string",
62
+ )
63
+ print(result)
64
+ # heong1A|55 gong2A|35
65
+ ```
66
+
67
+ ## Main functions
68
+
69
+ - `jyutping_to_cantromzj1()`
70
+ - `jyutping_syllable_to_cantromzj1()`
71
+ - `cantromzj1_to_jyutping()`
72
+ - `cantromzj1_syllable_to_jyutping()`
73
+ - `parse_cantromzj1()`
74
+ - `parse_cantromzj1_syllable()`
75
+
76
+ The default return mode for full-string conversion is a tuple containing the joined output string and a list of converted syllables. Set `return_mode="string"` or `return_mode="list"` to request only one representation.
77
+
78
+ ## Development status
79
+
80
+ This is research software released as an alpha version. The current implementation focuses on the romanization correspondences described in the accompanying anonymous manuscript.
81
+
82
+ ## License
83
+
84
+ MIT License.
@@ -0,0 +1,63 @@
1
+ # PySinRom
2
+
3
+ ## Installation
4
+
5
+ After publication on PyPI:
6
+
7
+ ```bash
8
+ pip install pysinrom
9
+ ```
10
+
11
+ For local installation from the project directory:
12
+
13
+ ```bash
14
+ python -m pip install .
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```python
20
+ from pysinrom import (
21
+ jyutping_to_cantromzj1,
22
+ cantromzj1_to_jyutping,
23
+ )
24
+
25
+ cantromzj1, syllables = jyutping_to_cantromzj1("hoeng1gong2")
26
+ print(cantromzj1)
27
+ # heong1A|55gong2A|35
28
+
29
+ jyutping, syllables = cantromzj1_to_jyutping("heong1A|55gong2A|35")
30
+ print(jyutping)
31
+ # hoeng1gong2
32
+ ```
33
+
34
+ Space-separated input and output are also supported:
35
+
36
+ ```python
37
+ result = jyutping_to_cantromzj1(
38
+ "hoeng1 gong2",
39
+ output_separator=" ",
40
+ return_mode="string",
41
+ )
42
+ print(result)
43
+ # heong1A|55 gong2A|35
44
+ ```
45
+
46
+ ## Main functions
47
+
48
+ - `jyutping_to_cantromzj1()`
49
+ - `jyutping_syllable_to_cantromzj1()`
50
+ - `cantromzj1_to_jyutping()`
51
+ - `cantromzj1_syllable_to_jyutping()`
52
+ - `parse_cantromzj1()`
53
+ - `parse_cantromzj1_syllable()`
54
+
55
+ The default return mode for full-string conversion is a tuple containing the joined output string and a list of converted syllables. Set `return_mode="string"` or `return_mode="list"` to request only one representation.
56
+
57
+ ## Development status
58
+
59
+ This is research software released as an alpha version. The current implementation focuses on the romanization correspondences described in the accompanying anonymous manuscript.
60
+
61
+ ## License
62
+
63
+ MIT License.
@@ -0,0 +1,34 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77.0.3"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pysinrom"
7
+ version = "0.1.0"
8
+ description = "Python tools for Sinitic romanization, currently only as research utilities for conversion between Jyutping and CantRomZJ1"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ dependencies = [
14
+ "pycantonese>=5.0,<6",
15
+ ]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ "Topic :: Text Processing :: Linguistic",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ test = ["pytest>=8"]
29
+
30
+ [tool.setuptools.packages.find]
31
+ where = ["src"]
32
+
33
+ [tool.pytest.ini_options]
34
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,28 @@
1
+ """Bidirectional conversion between Jyutping and CantRomZJ1."""
2
+
3
+ from .jyutping_to_cantromzj1 import (
4
+ jyutping_syllable_to_cantromzj1,
5
+ jyutping_to_cantromzj1,
6
+ parse_jyutping_to_cantromzj1_objects,
7
+ )
8
+ from .cantromzj1_parse import (
9
+ parse_cantromzj1,
10
+ parse_cantromzj1_syllable,
11
+ )
12
+ from .cantromzj1_to_jyutping import (
13
+ cantromzj1_syllable_to_jyutping,
14
+ cantromzj1_to_jyutping,
15
+ )
16
+
17
+ __version__ = "0.1.0"
18
+
19
+ __all__ = [
20
+ "__version__",
21
+ "jyutping_syllable_to_cantromzj1",
22
+ "jyutping_to_cantromzj1",
23
+ "parse_jyutping_to_cantromzj1_objects",
24
+ "parse_cantromzj1",
25
+ "parse_cantromzj1_syllable",
26
+ "cantromzj1_syllable_to_jyutping",
27
+ "cantromzj1_to_jyutping",
28
+ ]
@@ -0,0 +1,100 @@
1
+ """Shared maps for Jyutping <-> CantRomZJ1 conversion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ CHECKED_CODAS = {"p", "t", "k"}
6
+
7
+ # Jyutping nucleus -> CantRomZJ1 nucleus
8
+ JYUTPING_NUCLEUS_TO_CANTROMZJ1 = {
9
+ "aa": "a",
10
+ "a": "e",
11
+ "e": "ea",
12
+ "oe": "eo",
13
+ "eo": "oe",
14
+ }
15
+
16
+ CANTROMZJ1_NUCLEUS_TO_JYUTPING = {
17
+ value: key for key, value in JYUTPING_NUCLEUS_TO_CANTROMZJ1.items()
18
+ }
19
+
20
+ # Non-entering tones
21
+ JYUTPING_TONE_TO_CANTROMZJ1_OPEN = {
22
+ "1": "1A|55",
23
+ "4": "1B|21",
24
+ "2": "2A|35",
25
+ "5": "2B|13",
26
+ "3": "3A|33",
27
+ "6": "3B|22",
28
+ }
29
+
30
+ # Entering tones, identified by checked codas -p, -t, -k
31
+ JYUTPING_TONE_TO_CANTROMZJ1_CHECKED = {
32
+ "1": "4Aa|5",
33
+ "3": "4Ab|3",
34
+ "6": "4B|2",
35
+ }
36
+
37
+ CANTROMZJ1_TONE_TO_JYUTPING = {
38
+ "1A|55": "1",
39
+ "1B|21": "4",
40
+ "2A|35": "2",
41
+ "2B|13": "5",
42
+ "3A|33": "3",
43
+ "3B|22": "6",
44
+ "4Aa|5": "1",
45
+ "4Ab|3": "3",
46
+ "4B|2": "6",
47
+ }
48
+
49
+ CANTROMZJ1_TONES = tuple(
50
+ sorted(CANTROMZJ1_TONE_TO_JYUTPING.keys(), key=len, reverse=True)
51
+ )
52
+
53
+ # PyCantonese/Jyutping onset inventory.
54
+ # Keep "onset" in the returned PyCantonese Jyutping object because that is the library field name.
55
+ JYUTPING_ONSETS = (
56
+ "gw", "kw", "ng",
57
+ "b", "p", "m", "f",
58
+ "d", "t", "n", "l",
59
+ "g", "k", "h",
60
+ "w", "z", "c", "s", "j",
61
+ "",
62
+ )
63
+
64
+ JYUTPING_CODAS = ("ng", "p", "t", "k", "m", "n", "i", "u", "")
65
+
66
+ # Nuclei after Jyutping -> CantRomZJ1 conversion.
67
+ VALID_CANTROMZJ1_NUCLEI = {
68
+ "a", "e", "ea", "i", "o", "u", "eo", "oe", "yu", "m", "ng"
69
+ }
70
+
71
+
72
+ def convert_nucleus_jyutping_to_cantromzj1(nucleus: str) -> str:
73
+ """Convert one Jyutping nucleus to CantRomZJ1."""
74
+ return JYUTPING_NUCLEUS_TO_CANTROMZJ1.get(nucleus, nucleus)
75
+
76
+
77
+ def convert_nucleus_cantromzj1_to_jyutping(nucleus: str) -> str:
78
+ """Convert one CantRomZJ1 nucleus back to Jyutping."""
79
+ return CANTROMZJ1_NUCLEUS_TO_JYUTPING.get(nucleus, nucleus)
80
+
81
+
82
+ def convert_tone_jyutping_to_cantromzj1(tone: str, coda: str) -> str:
83
+ """Convert one Jyutping tone number to CantRomZJ1 tone notation."""
84
+ if coda in CHECKED_CODAS:
85
+ tone_map = JYUTPING_TONE_TO_CANTROMZJ1_CHECKED
86
+ else:
87
+ tone_map = JYUTPING_TONE_TO_CANTROMZJ1_OPEN
88
+
89
+ try:
90
+ return tone_map[tone]
91
+ except KeyError as exc:
92
+ raise ValueError(f"Unsupported Jyutping tone {tone!r} for coda {coda!r}") from exc
93
+
94
+
95
+ def convert_tone_cantromzj1_to_jyutping(tone: str) -> str:
96
+ """Convert one CantRomZJ1 tone notation back to a Jyutping tone number."""
97
+ try:
98
+ return CANTROMZJ1_TONE_TO_JYUTPING[tone]
99
+ except KeyError as exc:
100
+ raise ValueError(f"Unsupported CantRomZJ1 tone {tone!r}") from exc
@@ -0,0 +1,121 @@
1
+ """Parse CantRomZJ1 strings into PyCantonese-style Jyutping objects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pycantonese.jyutping import Jyutping
6
+
7
+ from .cantromzj1_maps import (
8
+ CANTROMZJ1_TONES,
9
+ JYUTPING_CODAS,
10
+ JYUTPING_ONSETS,
11
+ VALID_CANTROMZJ1_NUCLEI,
12
+ )
13
+
14
+
15
+ def _split_cantromzj1_tone(syllable: str) -> tuple[str, str]:
16
+ """Split one CantRomZJ1 syllable into segmental body and tone notation."""
17
+ for tone in CANTROMZJ1_TONES:
18
+ if syllable.endswith(tone):
19
+ body = syllable[: -len(tone)]
20
+ if not body:
21
+ raise ValueError(f"Missing segmental body before tone in {syllable!r}")
22
+ return body, tone
23
+ raise ValueError(f"Cannot find a valid CantRomZJ1 tone suffix in {syllable!r}")
24
+
25
+
26
+ def _split_body(body: str) -> tuple[str, str, str]:
27
+ """
28
+ Split CantRomZJ1 segmental body into onset, nucleus, coda.
29
+
30
+ Returned field name in the PyCantonese object is still 'onset',
31
+ because PyCantonese uses Jyutping(onset=..., nucleus=..., coda=..., tone=...).
32
+ """
33
+
34
+ # Syllabic nasals, e.g. m4 / ng4 after conversion.
35
+ if body in {"m", "ng"}:
36
+ return "", body, ""
37
+
38
+ for onset in JYUTPING_ONSETS:
39
+ if onset and not body.startswith(onset):
40
+ continue
41
+
42
+ rest = body[len(onset):] if onset else body
43
+ if not rest:
44
+ continue
45
+
46
+ for coda in JYUTPING_CODAS:
47
+ if coda and not rest.endswith(coda):
48
+ continue
49
+
50
+ nucleus = rest[: -len(coda)] if coda else rest
51
+ if nucleus in VALID_CANTROMZJ1_NUCLEI:
52
+ return onset, nucleus, coda
53
+
54
+ raise ValueError(f"Cannot split CantRomZJ1 syllable body {body!r}")
55
+
56
+
57
+ def parse_cantromzj1_syllable(syllable: str) -> Jyutping:
58
+ """
59
+ Parse one CantRomZJ1 syllable into a PyCantonese-style Jyutping object.
60
+
61
+ Example:
62
+ parse_cantromzj1_syllable("heong1A|55")
63
+ -> Jyutping(onset='h', nucleus='eo', coda='ng', tone='1A|55')
64
+ """
65
+ body, tone = _split_cantromzj1_tone(syllable.strip())
66
+ onset, nucleus, coda = _split_body(body)
67
+ return Jyutping(onset=onset, nucleus=nucleus, coda=coda, tone=tone)
68
+
69
+
70
+ def _split_cantromzj1_syllables(text: str) -> list[str]:
71
+ """
72
+ Split a concatenated CantRomZJ1 string into syllables by tone suffixes.
73
+
74
+ Example:
75
+ "heong1A|55gong2A|35" -> ["heong1A|55", "gong2A|35"]
76
+ """
77
+ text = text.strip()
78
+ if not text:
79
+ return []
80
+
81
+ if " " in text:
82
+ return [item for item in text.split() if item]
83
+
84
+ syllables: list[str] = []
85
+ start = 0
86
+ i = 0
87
+
88
+ while i < len(text):
89
+ matched_tone = None
90
+ for tone in CANTROMZJ1_TONES:
91
+ if text.startswith(tone, i):
92
+ matched_tone = tone
93
+ break
94
+
95
+ if matched_tone is None:
96
+ i += 1
97
+ continue
98
+
99
+ end = i + len(matched_tone)
100
+ syllables.append(text[start:end])
101
+ start = end
102
+ i = end
103
+
104
+ if start != len(text):
105
+ raise ValueError(f"Unparsed trailing content in CantRomZJ1 string: {text[start:]!r}")
106
+
107
+ return syllables
108
+
109
+
110
+ def parse_cantromzj1(text: str) -> list[Jyutping]:
111
+ """
112
+ Parse a CantRomZJ1 string into PyCantonese-style Jyutping objects.
113
+
114
+ Both concatenated and space-separated input are accepted.
115
+
116
+ Example:
117
+ parse_cantromzj1("heong1A|55gong2A|35")
118
+ -> [Jyutping(onset='h', nucleus='eo', coda='ng', tone='1A|55'),
119
+ Jyutping(onset='g', nucleus='o', coda='ng', tone='2A|35')]
120
+ """
121
+ return [parse_cantromzj1_syllable(syl) for syl in _split_cantromzj1_syllables(text)]
@@ -0,0 +1,71 @@
1
+ """Convert CantRomZJ1 strings back to Jyutping strings."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal
6
+
7
+ from .cantromzj1_maps import (
8
+ convert_nucleus_cantromzj1_to_jyutping,
9
+ convert_tone_cantromzj1_to_jyutping,
10
+ )
11
+ from .cantromzj1_parse import parse_cantromzj1, parse_cantromzj1_syllable
12
+
13
+
14
+ ReturnMode = Literal["tuple", "string", "list"]
15
+
16
+
17
+ def cantromzj1_syllable_to_jyutping(cantromzj1_syllable: str) -> str:
18
+ """
19
+ Convert one CantRomZJ1 syllable string to one Jyutping syllable string.
20
+
21
+ Example:
22
+ cantromzj1_syllable_to_jyutping("heong1A|55") -> "hoeng1"
23
+ """
24
+ parsed = parse_cantromzj1_syllable(cantromzj1_syllable)
25
+ nucleus = convert_nucleus_cantromzj1_to_jyutping(parsed.nucleus)
26
+ tone = convert_tone_cantromzj1_to_jyutping(parsed.tone)
27
+ return f"{parsed.onset}{nucleus}{parsed.coda}{tone}"
28
+
29
+
30
+ def cantromzj1_to_jyutping(
31
+ cantromzj1: str,
32
+ *,
33
+ output_separator: str = "",
34
+ return_mode: ReturnMode = "tuple",
35
+ ) -> str | list[str] | tuple[str, list[str]]:
36
+ """
37
+ Convert a CantRomZJ1 string to Jyutping.
38
+
39
+ Args:
40
+ cantromzj1:
41
+ A CantRomZJ1 string. Both concatenated and space-separated input are accepted.
42
+ output_separator:
43
+ Separator used when joining converted syllables. Use "" for concatenated
44
+ output or " " for space-separated output.
45
+ return_mode:
46
+ "tuple": return (joined_string, syllable_list).
47
+ "string": return only the joined string.
48
+ "list": return only the list of converted syllables.
49
+
50
+ Example:
51
+ cantromzj1_to_jyutping("heong1A|55gong2A|35")
52
+ -> ("hoeng1gong2", ["hoeng1", "gong2"])
53
+ """
54
+ parsed_items = parse_cantromzj1(cantromzj1)
55
+ converted = []
56
+
57
+ for item in parsed_items:
58
+ nucleus = convert_nucleus_cantromzj1_to_jyutping(item.nucleus)
59
+ tone = convert_tone_cantromzj1_to_jyutping(item.tone)
60
+ converted.append(f"{item.onset}{nucleus}{item.coda}{tone}")
61
+
62
+ joined = output_separator.join(converted)
63
+
64
+ if return_mode == "tuple":
65
+ return joined, converted
66
+ if return_mode == "string":
67
+ return joined
68
+ if return_mode == "list":
69
+ return converted
70
+
71
+ raise ValueError("return_mode must be one of: 'tuple', 'string', 'list'")
@@ -0,0 +1,97 @@
1
+ """Convert Jyutping strings to CantRomZJ1 strings or PyCantonese-style objects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal
6
+
7
+ import pycantonese
8
+ from pycantonese.jyutping import Jyutping
9
+
10
+ from .cantromzj1_maps import (
11
+ convert_nucleus_jyutping_to_cantromzj1,
12
+ convert_tone_jyutping_to_cantromzj1,
13
+ )
14
+
15
+
16
+ ReturnMode = Literal["tuple", "string", "list"]
17
+
18
+
19
+ def _convert_parsed_jyutping_to_cantromzj1(parsed: Jyutping) -> tuple[str, Jyutping]:
20
+ """Convert one parsed PyCantonese Jyutping object to CantRomZJ1."""
21
+ initial = parsed.onset
22
+ nucleus = convert_nucleus_jyutping_to_cantromzj1(parsed.nucleus)
23
+ coda = parsed.coda
24
+ tone = convert_tone_jyutping_to_cantromzj1(parsed.tone, coda)
25
+
26
+ syllable = f"{initial}{nucleus}{coda}{tone}"
27
+ obj = Jyutping(onset=initial, nucleus=nucleus, coda=coda, tone=tone)
28
+ return syllable, obj
29
+
30
+
31
+ def parse_jyutping_to_cantromzj1_objects(jyutping: str) -> list[Jyutping]:
32
+ """
33
+ Convert a Jyutping string into PyCantonese-style Jyutping objects,
34
+ but with CantRomZJ1-style nucleus and tone fields.
35
+
36
+ Example:
37
+ parse_jyutping_to_cantromzj1_objects("hoeng1gong2")
38
+ -> [Jyutping(onset='h', nucleus='eo', coda='ng', tone='1A|55'), ...]
39
+ """
40
+ parsed_items = pycantonese.parse_jyutping(jyutping.replace(" ", ""))
41
+ return [_convert_parsed_jyutping_to_cantromzj1(item)[1] for item in parsed_items]
42
+
43
+
44
+ def jyutping_syllable_to_cantromzj1(jyutping_syllable: str) -> str:
45
+ """Convert one Jyutping syllable to one CantRomZJ1 syllable string."""
46
+ parsed_items = pycantonese.parse_jyutping(jyutping_syllable.strip())
47
+ if len(parsed_items) != 1:
48
+ raise ValueError(
49
+ f"Expected exactly one Jyutping syllable, got {len(parsed_items)}: "
50
+ f"{jyutping_syllable!r}"
51
+ )
52
+ return _convert_parsed_jyutping_to_cantromzj1(parsed_items[0])[0]
53
+
54
+
55
+ def jyutping_to_cantromzj1(
56
+ jyutping: str,
57
+ *,
58
+ output_separator: str = "",
59
+ return_mode: ReturnMode = "tuple",
60
+ ) -> str | list[str] | tuple[str, list[str]]:
61
+ """
62
+ Convert a Jyutping string to CantRomZJ1.
63
+
64
+ Args:
65
+ jyutping:
66
+ A Jyutping string. Both concatenated and space-separated input are accepted,
67
+ e.g. "hoeng1gong2" or "hoeng1 gong2".
68
+ output_separator:
69
+ Separator used when joining converted syllables. Use "" for concatenated
70
+ output or " " for space-separated output.
71
+ return_mode:
72
+ "tuple": return (joined_string, syllable_list), matching the old behavior.
73
+ "string": return only the joined string.
74
+ "list": return only the list of converted syllables.
75
+
76
+ Example:
77
+ jyutping_to_cantromzj1("hoeng1gong2")
78
+ -> ("heong1A|55gong2A|35", ["heong1A|55", "gong2A|35"])
79
+
80
+ jyutping_to_cantromzj1("hoeng1gong2", output_separator=" ", return_mode="string")
81
+ -> "heong1A|55 gong2A|35"
82
+ """
83
+ parsed_items = pycantonese.parse_jyutping(jyutping.replace(" ", ""))
84
+ converted = [
85
+ _convert_parsed_jyutping_to_cantromzj1(item)[0]
86
+ for item in parsed_items
87
+ ]
88
+ joined = output_separator.join(converted)
89
+
90
+ if return_mode == "tuple":
91
+ return joined, converted
92
+ if return_mode == "string":
93
+ return joined
94
+ if return_mode == "list":
95
+ return converted
96
+
97
+ raise ValueError("return_mode must be one of: 'tuple', 'string', 'list'")
@@ -0,0 +1,84 @@
1
+ Metadata-Version: 2.4
2
+ Name: pysinrom
3
+ Version: 0.1.0
4
+ Summary: Python tools for Sinitic romanization, currently only as research utilities for conversion between Jyutping and CantRomZJ1
5
+ License-Expression: MIT
6
+ Classifier: Development Status :: 3 - Alpha
7
+ Classifier: Operating System :: OS Independent
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Topic :: Text Processing :: Linguistic
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: pycantonese<6,>=5.0
18
+ Provides-Extra: test
19
+ Requires-Dist: pytest>=8; extra == "test"
20
+ Dynamic: license-file
21
+
22
+ # PySinRom
23
+
24
+ ## Installation
25
+
26
+ After publication on PyPI:
27
+
28
+ ```bash
29
+ pip install pysinrom
30
+ ```
31
+
32
+ For local installation from the project directory:
33
+
34
+ ```bash
35
+ python -m pip install .
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ ```python
41
+ from pysinrom import (
42
+ jyutping_to_cantromzj1,
43
+ cantromzj1_to_jyutping,
44
+ )
45
+
46
+ cantromzj1, syllables = jyutping_to_cantromzj1("hoeng1gong2")
47
+ print(cantromzj1)
48
+ # heong1A|55gong2A|35
49
+
50
+ jyutping, syllables = cantromzj1_to_jyutping("heong1A|55gong2A|35")
51
+ print(jyutping)
52
+ # hoeng1gong2
53
+ ```
54
+
55
+ Space-separated input and output are also supported:
56
+
57
+ ```python
58
+ result = jyutping_to_cantromzj1(
59
+ "hoeng1 gong2",
60
+ output_separator=" ",
61
+ return_mode="string",
62
+ )
63
+ print(result)
64
+ # heong1A|55 gong2A|35
65
+ ```
66
+
67
+ ## Main functions
68
+
69
+ - `jyutping_to_cantromzj1()`
70
+ - `jyutping_syllable_to_cantromzj1()`
71
+ - `cantromzj1_to_jyutping()`
72
+ - `cantromzj1_syllable_to_jyutping()`
73
+ - `parse_cantromzj1()`
74
+ - `parse_cantromzj1_syllable()`
75
+
76
+ The default return mode for full-string conversion is a tuple containing the joined output string and a list of converted syllables. Set `return_mode="string"` or `return_mode="list"` to request only one representation.
77
+
78
+ ## Development status
79
+
80
+ This is research software released as an alpha version. The current implementation focuses on the romanization correspondences described in the accompanying anonymous manuscript.
81
+
82
+ ## License
83
+
84
+ MIT License.
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/pysinrom/__init__.py
5
+ src/pysinrom/cantromzj1_maps.py
6
+ src/pysinrom/cantromzj1_parse.py
7
+ src/pysinrom/cantromzj1_to_jyutping.py
8
+ src/pysinrom/jyutping_to_cantromzj1.py
9
+ src/pysinrom.egg-info/PKG-INFO
10
+ src/pysinrom.egg-info/SOURCES.txt
11
+ src/pysinrom.egg-info/dependency_links.txt
12
+ src/pysinrom.egg-info/requires.txt
13
+ src/pysinrom.egg-info/top_level.txt
14
+ tests/test_conversion.py
@@ -0,0 +1,4 @@
1
+ pycantonese<6,>=5.0
2
+
3
+ [test]
4
+ pytest>=8
@@ -0,0 +1 @@
1
+ pysinrom
@@ -0,0 +1,37 @@
1
+ import pytest
2
+
3
+ from pysinrom import (
4
+ cantromzj1_syllable_to_jyutping,
5
+ cantromzj1_to_jyutping,
6
+ jyutping_syllable_to_cantromzj1,
7
+ jyutping_to_cantromzj1,
8
+ )
9
+
10
+
11
+ def test_jyutping_to_cantromzj1_default_tuple():
12
+ joined, syllables = jyutping_to_cantromzj1("hoeng1gong2")
13
+ assert joined == "heong1A|55gong2A|35"
14
+ assert syllables == ["heong1A|55", "gong2A|35"]
15
+
16
+
17
+ def test_cantromzj1_to_jyutping_default_tuple():
18
+ joined, syllables = cantromzj1_to_jyutping("heong1A|55gong2A|35")
19
+ assert joined == "hoeng1gong2"
20
+ assert syllables == ["hoeng1", "gong2"]
21
+
22
+
23
+ def test_space_separated_conversion():
24
+ result = jyutping_to_cantromzj1(
25
+ "hoeng1 gong2", output_separator=" ", return_mode="string"
26
+ )
27
+ assert result == "heong1A|55 gong2A|35"
28
+
29
+
30
+ def test_single_syllable_conversion():
31
+ assert jyutping_syllable_to_cantromzj1("hoeng1") == "heong1A|55"
32
+ assert cantromzj1_syllable_to_jyutping("heong1A|55") == "hoeng1"
33
+
34
+
35
+ def test_invalid_return_mode():
36
+ with pytest.raises(ValueError):
37
+ jyutping_to_cantromzj1("hoeng1", return_mode="invalid")