romanization 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Joumaico Maulas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.1
2
+ Name: romanization
3
+ Version: 1.0.0
4
+ Summary: Revised Romanization of Korean
5
+ Home-page: https://github.com/joumaico/romanization
6
+ Author: Joumaico Maulas
7
+ Classifier: Development Status :: 5 - Production/Stable
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3.7
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.7
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: numpy
19
+
20
+ # Revised Romanization of Korean
21
+
22
+ This software converts Korean Hangul text into Latin alphabet, following the guidelines of the Revised Romanization of Korean which ensures accurate and consistent transliteration. It's particularly useful for applications in language learning, text processing, and international communication, making it easier for non-Korean speakers to read and pronounce Korean words.
23
+
24
+ ## Installation
25
+
26
+ ```
27
+ Dependencies:
28
+
29
+ - Python 3.7, 3.8, 3.9, 3.11, 3.12
30
+
31
+ Installation:
32
+
33
+ $ pip install romanization
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ ```python
39
+ >>> from romanization import romanize
40
+
41
+ >>> romanize("좋아 첫 눈에 반해 버린")
42
+ "joha cheot nune banhae beorin"
43
+ ```
44
+
45
+ ## References
46
+
47
+ - https://en.wikipedia.org/w/index.php?title=Revised_Romanization_of_Korean&oldid=1064463473
@@ -0,0 +1,28 @@
1
+ # Revised Romanization of Korean
2
+
3
+ This software converts Korean Hangul text into Latin alphabet, following the guidelines of the Revised Romanization of Korean which ensures accurate and consistent transliteration. It's particularly useful for applications in language learning, text processing, and international communication, making it easier for non-Korean speakers to read and pronounce Korean words.
4
+
5
+ ## Installation
6
+
7
+ ```
8
+ Dependencies:
9
+
10
+ - Python 3.7, 3.8, 3.9, 3.11, 3.12
11
+
12
+ Installation:
13
+
14
+ $ pip install romanization
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```python
20
+ >>> from romanization import romanize
21
+
22
+ >>> romanize("좋아 첫 눈에 반해 버린")
23
+ "joha cheot nune banhae beorin"
24
+ ```
25
+
26
+ ## References
27
+
28
+ - https://en.wikipedia.org/w/index.php?title=Revised_Romanization_of_Korean&oldid=1064463473
@@ -0,0 +1,6 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools >= 40.9.0",
4
+ "wheel",
5
+ ]
6
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.1
2
+ Name: romanization
3
+ Version: 1.0.0
4
+ Summary: Revised Romanization of Korean
5
+ Home-page: https://github.com/joumaico/romanization
6
+ Author: Joumaico Maulas
7
+ Classifier: Development Status :: 5 - Production/Stable
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3.7
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.7
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: numpy
19
+
20
+ # Revised Romanization of Korean
21
+
22
+ This software converts Korean Hangul text into Latin alphabet, following the guidelines of the Revised Romanization of Korean which ensures accurate and consistent transliteration. It's particularly useful for applications in language learning, text processing, and international communication, making it easier for non-Korean speakers to read and pronounce Korean words.
23
+
24
+ ## Installation
25
+
26
+ ```
27
+ Dependencies:
28
+
29
+ - Python 3.7, 3.8, 3.9, 3.11, 3.12
30
+
31
+ Installation:
32
+
33
+ $ pip install romanization
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ ```python
39
+ >>> from romanization import romanize
40
+
41
+ >>> romanize("좋아 첫 눈에 반해 버린")
42
+ "joha cheot nune banhae beorin"
43
+ ```
44
+
45
+ ## References
46
+
47
+ - https://en.wikipedia.org/w/index.php?title=Revised_Romanization_of_Korean&oldid=1064463473
@@ -0,0 +1,22 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ romanization.egg-info/PKG-INFO
6
+ romanization.egg-info/SOURCES.txt
7
+ romanization.egg-info/dependency_links.txt
8
+ romanization.egg-info/requires.txt
9
+ romanization.egg-info/top_level.txt
10
+ src/romanization/__init__.py
11
+ src/romanization/const.py
12
+ src/romanization/romanize.py
13
+ src/romanization/utils.py
14
+ src/romanization/convert/data/provisions
15
+ src/romanization/convert/data/latin/JUNGSUNG
16
+ src/romanization/convert/data/raw/CHOSUNG
17
+ src/romanization/convert/data/raw/JONGSUNG
18
+ src/romanization/convert/data/raw/JUNGSUNG
19
+ src/romanization/convert/output/latin/jungsung.json
20
+ src/romanization/convert/output/raw/chosung.json
21
+ src/romanization/convert/output/raw/jongsung.json
22
+ tests/test_romanize.py
@@ -0,0 +1 @@
1
+ romanization
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,46 @@
1
+ from setuptools import setup
2
+
3
+
4
+ def readme():
5
+ with open("README.md") as f:
6
+ return f.read()
7
+
8
+
9
+ setup(
10
+ name="romanization",
11
+ version="1.0.0",
12
+ author="Joumaico Maulas",
13
+ description="Revised Romanization of Korean",
14
+ long_description=readme(),
15
+ long_description_content_type="text/markdown",
16
+ url="https://github.com/joumaico/romanization",
17
+ classifiers=[
18
+ "Development Status :: 5 - Production/Stable",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3.7",
22
+ "Programming Language :: Python :: 3.8",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ ],
27
+ packages=[
28
+ "romanization",
29
+ ],
30
+ package_dir={
31
+ "romanization": "src/romanization",
32
+ },
33
+ package_data={
34
+ "romanization": [
35
+ "convert/data/latin/*",
36
+ "convert/data/raw/*",
37
+ "convert/data/provisions",
38
+ "convert/output/latin/*",
39
+ "convert/output/raw/*",
40
+ ]
41
+ },
42
+ python_requires=">=3.7",
43
+ install_requires=[
44
+ "numpy",
45
+ ],
46
+ )
@@ -0,0 +1 @@
1
+ from .romanize import romanize
@@ -0,0 +1,57 @@
1
+ import json
2
+ import pathlib
3
+
4
+ PATH = pathlib.Path(__file__).parent
5
+
6
+ LATIN = {
7
+ "CHOSUNG": {
8
+ "": "",
9
+ "ㅇ": "",
10
+ "ㄱ": "g",
11
+ "ㄴ": "n",
12
+ "ㄷ": "d",
13
+ "ㄹ": "r",
14
+ "ㅁ": "m",
15
+ "ㅂ": "b",
16
+ "ㅅ": "s",
17
+ "ㅈ": "j",
18
+ "ㅊ": "ch",
19
+ "ㅋ": "k",
20
+ "ㅌ": "t",
21
+ "ㅍ": "p",
22
+ "ㅎ": "h",
23
+ },
24
+ "JONGSUNG": {
25
+ "ㄱ": "k",
26
+ "ㄴ": "n",
27
+ "ㄷ": "t",
28
+ "ㄹ": "l",
29
+ "ㅁ": "m",
30
+ "ㅂ": "p",
31
+ "ㅅ": "t",
32
+ "ㅇ": "ng",
33
+ "ㅈ": "t",
34
+ "ㅊ": "t",
35
+ "ㅌ": "t",
36
+ "ㅎ": "t",
37
+ "ㅋ": "k",
38
+ "ㅍ": "p",
39
+ }
40
+ }
41
+
42
+ TWIN_CASE_PROVISION = {
43
+ "ㅂ": "pp",
44
+ "ㅈ": "jj",
45
+ "ㄷ": "tt",
46
+ "ㄱ": "kk",
47
+ "ㅅ": "ss"
48
+ }
49
+
50
+ with open(PATH / "convert/output/raw/chosung.json", "r") as f:
51
+ CHOSUNG = json.load(f)
52
+
53
+ with open(PATH / "convert/output/latin/jungsung.json", "r") as f:
54
+ JUNGSUNG = json.load(f)
55
+
56
+ with open(PATH / "convert/output/raw/jongsung.json", "r") as f:
57
+ JONGSUNG = json.load(f)
@@ -0,0 +1,71 @@
1
+ 4449 'ᅡ' A
2
+ 4450 'ᅢ' AE
3
+ 4451 'ᅣ' YA
4
+ 4452 'ᅤ' YAE
5
+ 4453 'ᅥ' EO
6
+ 4454 'ᅦ' E
7
+ 4455 'ᅧ' YEO
8
+ 4456 'ᅨ' YE
9
+ 4457 'ᅩ' O
10
+ 4458 'ᅪ' WA
11
+ 4459 'ᅫ' WAE
12
+ 4460 'ᅬ' OE
13
+ 4461 'ᅭ' YO
14
+ 4462 'ᅮ' U
15
+ 4463 'ᅯ' WO
16
+ 4464 'ᅰ' WE
17
+ 4465 'ᅱ' WI
18
+ 4466 'ᅲ' YU
19
+ 4467 'ᅳ' EU
20
+ 4468 'ᅴ' UI
21
+ 4469 'ᅵ' I
22
+ 4470 'ᅶ'
23
+ 4471 'ᅷ'
24
+ 4472 'ᅸ'
25
+ 4473 'ᅹ'
26
+ 4474 'ᅺ'
27
+ 4475 'ᅻ'
28
+ 4476 'ᅼ'
29
+ 4477 'ᅽ'
30
+ 4478 'ᅾ'
31
+ 4479 'ᅿ'
32
+ 4480 'ᆀ'
33
+ 4481 'ᆁ'
34
+ 4482 'ᆂ'
35
+ 4483 'ᆃ'
36
+ 4484 'ᆄ'
37
+ 4485 'ᆅ'
38
+ 4486 'ᆆ'
39
+ 4487 'ᆇ'
40
+ 4488 'ᆈ'
41
+ 4489 'ᆉ'
42
+ 4490 'ᆊ'
43
+ 4491 'ᆋ'
44
+ 4492 'ᆌ'
45
+ 4493 'ᆍ'
46
+ 4494 'ᆎ'
47
+ 4495 'ᆏ'
48
+ 4496 'ᆐ'
49
+ 4497 'ᆑ'
50
+ 4498 'ᆒ'
51
+ 4499 'ᆓ'
52
+ 4500 'ᆔ'
53
+ 4501 'ᆕ'
54
+ 4502 'ᆖ'
55
+ 4503 'ᆗ'
56
+ 4504 'ᆘ'
57
+ 4505 'ᆙ'
58
+ 4506 'ᆚ'
59
+ 4507 'ᆛ'
60
+ 4508 'ᆜ'
61
+ 4509 'ᆝ'
62
+ 4510 'ᆞ'
63
+ 4511 'ᆟ'
64
+ 4512 'ᆠ'
65
+ 4513 'ᆡ'
66
+ 4514 'ᆢ'
67
+ 4515 'ᆣ'
68
+ 4516 'ᆤ'
69
+ 4517 'ᆥ'
70
+ 4518 'ᆦ'
71
+ 4519 'ᆧ'
@@ -0,0 +1,14 @@
1
+ k g kg ngn kd ngn ngm kb ks kj kch kk kt kp kh
2
+ n n ng nn nd ll nm nb ns nj nch nk nt np nh
3
+ t d tg nn td nn nm tb ts tj tch tk tt tp th
4
+ l r lg ll ld ll lm lb ls lj lch lk lt lp lh
5
+ m m mg mn md mn mm mb ms mj mch mk mt mp mh
6
+ p b pg mn pd mn mm pb ps pj pch pk pt pp ph
7
+ t s tg nn td nn nm tb ts tj tch tk tt tp th
8
+ ng ng ngg ngn ngd ngn ngm ngb ngs ngj ngch ngk ngt ngp ngh
9
+ t j tg nn td nn nm tb ts tj tch tk tt tp th
10
+ t ch tg nn td nn nm tb ts tj tch tk tt tp th
11
+ t t tg nn td nn nm tb ts tj tch tk tt tp th
12
+ t h k nn t nn nm p hs ch tch tk t tp t
13
+ k k kg ngn kd ngn ngm kb ks kj kch kk kt kp kh
14
+ p p pg mn pd mn mm pb ps pj pch pk pt pp ph
@@ -0,0 +1,95 @@
1
+ 4352 'ᄀ' ㄱ
2
+ 4353 'ᄁ' ㄱㄱ
3
+ 4354 'ᄂ' ㄴ
4
+ 4355 'ᄃ' ㄷ
5
+ 4356 'ᄄ' ㄷㄷ
6
+ 4357 'ᄅ' ㄹ
7
+ 4358 'ᄆ' ㅁ
8
+ 4359 'ᄇ' ㅂ
9
+ 4360 'ᄈ' ㅂㅂ
10
+ 4361 'ᄉ' ㅅ
11
+ 4362 'ᄊ' ㅅㅅ
12
+ 4363 'ᄋ' ㅇ
13
+ 4364 'ᄌ' ㅈ
14
+ 4365 'ᄍ' ㅈㅈ
15
+ 4366 'ᄎ' ㅊ
16
+ 4367 'ᄏ' ㅋ
17
+ 4368 'ᄐ' ㅌ
18
+ 4369 'ᄑ' ㅍ
19
+ 4370 'ᄒ' ㅎ
20
+ 4371 'ᄓ' ㄴㄱ
21
+ 4372 'ᄔ' ㄴㄴ
22
+ 4373 'ᄕ' ㄴㄷ
23
+ 4374 'ᄖ' ㄴㅂ
24
+ 4375 'ᄗ' ㄷㄱ
25
+ 4376 'ᄘ' ㄹㄴ
26
+ 4377 'ᄙ' ㄹㄹ
27
+ 4378 'ᄚ' ㄹㅎ
28
+ 4379 'ᄛ'
29
+ 4380 'ᄜ' ㅁㅂ
30
+ 4381 'ᄝ'
31
+ 4382 'ᄞ' ㅂㄱ
32
+ 4383 'ᄟ' ㅂㄴ
33
+ 4384 'ᄠ' ㅂㄷ
34
+ 4385 'ᄡ' ㅂㅅ
35
+ 4386 'ᄢ' ㅂㅅㄱ
36
+ 4387 'ᄣ' ㅂㅅㄷ
37
+ 4388 'ᄤ' ㅂㅅㅂ
38
+ 4389 'ᄥ' ㅂㅅㅅ
39
+ 4390 'ᄦ' ㅂㅅㅈ
40
+ 4391 'ᄧ' ㅂㅈ
41
+ 4392 'ᄨ' ㅂㅊ
42
+ 4393 'ᄩ' ㅂㅌ
43
+ 4394 'ᄪ' ㅂㅍ
44
+ 4395 'ᄫ'
45
+ 4396 'ᄬ'
46
+ 4397 'ᄭ' ㅅㄱ
47
+ 4398 'ᄮ' ㅅㄴ
48
+ 4399 'ᄯ' ㅅㄷ
49
+ 4400 'ᄰ' ㅅㄹ
50
+ 4401 'ᄱ' ㅅㅁ
51
+ 4402 'ᄲ' ㅅㅂ
52
+ 4403 'ᄳ' ㅅㅂㄱ
53
+ 4404 'ᄴ' ㅅㅅㅅ
54
+ 4405 'ᄵ'
55
+ 4406 'ᄶ' ㅅㅈ
56
+ 4407 'ᄷ' ㅅㅊ
57
+ 4408 'ᄸ' ㅅㅋ
58
+ 4409 'ᄹ' ㅅㅌ
59
+ 4410 'ᄺ' ㅅㅍ
60
+ 4411 'ᄻ' ㅅㅎ
61
+ 4412 'ᄼ'
62
+ 4413 'ᄽ'
63
+ 4414 'ᄾ'
64
+ 4415 'ᄿ'
65
+ 4416 'ᅀ'
66
+ 4417 'ᅁ'
67
+ 4418 'ᅂ'
68
+ 4419 'ᅃ'
69
+ 4420 'ᅄ'
70
+ 4421 'ᅅ'
71
+ 4422 'ᅆ'
72
+ 4423 'ᅇ'
73
+ 4424 'ᅈ'
74
+ 4425 'ᅉ'
75
+ 4426 'ᅊ'
76
+ 4427 'ᅋ'
77
+ 4428 'ᅌ'
78
+ 4429 'ᅍ'
79
+ 4430 'ᅎ'
80
+ 4431 'ᅏ'
81
+ 4432 'ᅐ'
82
+ 4433 'ᅑ'
83
+ 4434 'ᅒ' ㅊㅋ
84
+ 4435 'ᅓ' ㅊㅎ
85
+ 4436 'ᅔ'
86
+ 4437 'ᅕ'
87
+ 4438 'ᅖ' ㅍㅂ
88
+ 4439 'ᅗ'
89
+ 4440 'ᅘ' ㅎㅎ
90
+ 4441 'ᅙ'
91
+ 4442 'ᅚ' ㄱㄷ
92
+ 4443 'ᅛ' ㄴㅅ
93
+ 4444 'ᅜ' ㄴㅈ
94
+ 4445 'ᅝ' ㄴㅎ
95
+ 4446 'ᅞ' ㄷㄹ
@@ -0,0 +1,88 @@
1
+ 4520 'ᆨ' ㄱ
2
+ 4521 'ᆩ' ㄱㄱ
3
+ 4522 'ᆪ' ㄱㅅ
4
+ 4523 'ᆫ' ㄴ
5
+ 4524 'ᆬ' ㄴㅈ
6
+ 4525 'ᆭ' ㄴㅎ
7
+ 4526 'ᆮ' ㄷ
8
+ 4527 'ᆯ' ㄹ
9
+ 4528 'ᆰ' ㄹㄱ
10
+ 4529 'ᆱ' ㄹㅁ
11
+ 4530 'ᆲ' ㄹㅂ
12
+ 4531 'ᆳ' ㄹㅅ
13
+ 4532 'ᆴ' ㄹㅌ
14
+ 4533 'ᆵ' ㄹㅍ
15
+ 4534 'ᆶ' ㄹㅎ
16
+ 4535 'ᆷ' ㅁ
17
+ 4536 'ᆸ' ㅂ
18
+ 4537 'ᆹ' ㅂㅅ
19
+ 4538 'ᆺ' ㅅ
20
+ 4539 'ᆻ' ㅅㅅ
21
+ 4540 'ᆼ' ㅇ
22
+ 4541 'ᆽ' ㅈ
23
+ 4542 'ᆾ' ㅊ
24
+ 4543 'ᆿ' ㅋ
25
+ 4544 'ᇀ' ㅌ
26
+ 4545 'ᇁ' ㅍ
27
+ 4546 'ᇂ' ㅎ
28
+ 4547 'ᇃ' ㄱㄹ
29
+ 4548 'ᇄ' ㄱㅅㄱ
30
+ 4549 'ᇅ' ㄴㄱ
31
+ 4550 'ᇆ' ㄴㄷ
32
+ 4551 'ᇇ' ㄴㅅ
33
+ 4552 'ᇈ'
34
+ 4553 'ᇉ' ㄴㅌ
35
+ 4554 'ᇊ' ㄷㄱ
36
+ 4555 'ᇋ' ㄷㄹ
37
+ 4556 'ᇌ' ㄹㄱㅅ
38
+ 4557 'ᇍ' ㄹㄴ
39
+ 4558 'ᇎ' ㄹㄷ
40
+ 4559 'ᇏ' ㄹㄷㅎ
41
+ 4560 'ᇐ' ㄹㄹ
42
+ 4561 'ᇑ' ㄹㅁㄱ
43
+ 4562 'ᇒ' ㄹㅁㅅ
44
+ 4563 'ᇓ' ㄹㅂㅅ
45
+ 4564 'ᇔ' ㄹㅂㅎ
46
+ 4565 'ᇕ'
47
+ 4566 'ᇖ' ㄹㅅㅅ
48
+ 4567 'ᇗ'
49
+ 4568 'ᇘ' ㄹㅋ
50
+ 4569 'ᇙ' ㄹㅎ
51
+ 4570 'ᇚ' ㅁㄱ
52
+ 4571 'ᇛ' ㅁㄹ
53
+ 4572 'ᇜ' ㅁㅂ
54
+ 4573 'ᇝ' ㅁㅅ
55
+ 4574 'ᇞ' ㅁㅅㅅ
56
+ 4575 'ᇟ'
57
+ 4576 'ᇠ' ㅁㅊ
58
+ 4577 'ᇡ' ㅁㅎ
59
+ 4578 'ᇢ'
60
+ 4579 'ᇣ' ㅂㄹ
61
+ 4580 'ᇤ' ㅂㅍ
62
+ 4581 'ᇥ' ㅂㅎ
63
+ 4582 'ᇦ'
64
+ 4583 'ᇧ' ㅅㄱ
65
+ 4584 'ᇨ' ㅅㄷ
66
+ 4585 'ᇩ' ㅅㄹ
67
+ 4586 'ᇪ' ㅅㅂ
68
+ 4587 'ᇫ'
69
+ 4588 'ᇬ'
70
+ 4589 'ᇭ'
71
+ 4590 'ᇮ'
72
+ 4591 'ᇯ'
73
+ 4592 'ᇰ'
74
+ 4593 'ᇱ'
75
+ 4594 'ᇲ'
76
+ 4595 'ᇳ' ㅍㅂ
77
+ 4596 'ᇴ'
78
+ 4597 'ᇵ' ㅎㄴ
79
+ 4598 'ᇶ' ㅎㄹ
80
+ 4599 'ᇷ' ㅎㅁ
81
+ 4600 'ᇸ' ㅎㅂ
82
+ 4601 'ᇹ'
83
+ 4602 'ᇺ' ㄱㄴ
84
+ 4603 'ᇻ' ㄱㅂ
85
+ 4604 'ᇼ' ㄱㅊ
86
+ 4605 'ᇽ' ㄱㅋ
87
+ 4606 'ᇾ' ㄱㅎ
88
+ 4607 'ᇿ' ㄴㄴ
@@ -0,0 +1,71 @@
1
+ 4449 'ᅡ'
2
+ 4450 'ᅢ'
3
+ 4451 'ᅣ'
4
+ 4452 'ᅤ'
5
+ 4453 'ᅥ'
6
+ 4454 'ᅦ'
7
+ 4455 'ᅧ'
8
+ 4456 'ᅨ'
9
+ 4457 'ᅩ'
10
+ 4458 'ᅪ'
11
+ 4459 'ᅫ'
12
+ 4460 'ᅬ'
13
+ 4461 'ᅭ'
14
+ 4462 'ᅮ'
15
+ 4463 'ᅯ'
16
+ 4464 'ᅰ'
17
+ 4465 'ᅱ'
18
+ 4466 'ᅲ'
19
+ 4467 'ᅳ'
20
+ 4468 'ᅴ'
21
+ 4469 'ᅵ'
22
+ 4470 'ᅶ'
23
+ 4471 'ᅷ'
24
+ 4472 'ᅸ'
25
+ 4473 'ᅹ'
26
+ 4474 'ᅺ'
27
+ 4475 'ᅻ'
28
+ 4476 'ᅼ'
29
+ 4477 'ᅽ'
30
+ 4478 'ᅾ'
31
+ 4479 'ᅿ'
32
+ 4480 'ᆀ'
33
+ 4481 'ᆁ'
34
+ 4482 'ᆂ'
35
+ 4483 'ᆃ'
36
+ 4484 'ᆄ'
37
+ 4485 'ᆅ'
38
+ 4486 'ᆆ'
39
+ 4487 'ᆇ'
40
+ 4488 'ᆈ'
41
+ 4489 'ᆉ'
42
+ 4490 'ᆊ'
43
+ 4491 'ᆋ'
44
+ 4492 'ᆌ'
45
+ 4493 'ᆍ'
46
+ 4494 'ᆎ'
47
+ 4495 'ᆏ'
48
+ 4496 'ᆐ'
49
+ 4497 'ᆑ'
50
+ 4498 'ᆒ'
51
+ 4499 'ᆓ'
52
+ 4500 'ᆔ'
53
+ 4501 'ᆕ'
54
+ 4502 'ᆖ'
55
+ 4503 'ᆗ'
56
+ 4504 'ᆘ'
57
+ 4505 'ᆙ'
58
+ 4506 'ᆚ'
59
+ 4507 'ᆛ'
60
+ 4508 'ᆜ'
61
+ 4509 'ᆝ'
62
+ 4510 'ᆞ'
63
+ 4511 'ᆟ'
64
+ 4512 'ᆠ'
65
+ 4513 'ᆡ'
66
+ 4514 'ᆢ'
67
+ 4515 'ᆣ'
68
+ 4516 'ᆤ'
69
+ 4517 'ᆥ'
70
+ 4518 'ᆦ'
71
+ 4519 'ᆧ'
@@ -0,0 +1 @@
1
+ {"'\u1161'": "A", "'\u1162'": "AE", "'\u1163'": "YA", "'\u1164'": "YAE", "'\u1165'": "EO", "'\u1166'": "E", "'\u1167'": "YEO", "'\u1168'": "YE", "'\u1169'": "O", "'\u116a'": "WA", "'\u116b'": "WAE", "'\u116c'": "OE", "'\u116d'": "YO", "'\u116e'": "U", "'\u116f'": "WO", "'\u1170'": "WE", "'\u1171'": "WI", "'\u1172'": "YU", "'\u1173'": "EU", "'\u1174'": "UI", "'\u1175'": "I", "'\u1176'": "", "'\u1177'": "", "'\u1178'": "", "'\u1179'": "", "'\u117a'": "", "'\u117b'": "", "'\u117c'": "", "'\u117d'": "", "'\u117e'": "", "'\u117f'": "", "'\u1180'": "", "'\u1181'": "", "'\u1182'": "", "'\u1183'": "", "'\u1184'": "", "'\u1185'": "", "'\u1186'": "", "'\u1187'": "", "'\u1188'": "", "'\u1189'": "", "'\u118a'": "", "'\u118b'": "", "'\u118c'": "", "'\u118d'": "", "'\u118e'": "", "'\u118f'": "", "'\u1190'": "", "'\u1191'": "", "'\u1192'": "", "'\u1193'": "", "'\u1194'": "", "'\u1195'": "", "'\u1196'": "", "'\u1197'": "", "'\u1198'": "", "'\u1199'": "", "'\u119a'": "", "'\u119b'": "", "'\u119c'": "", "'\u119d'": "", "'\u119e'": "", "'\u119f'": "", "'\u11a0'": "", "'\u11a1'": "", "'\u11a2'": "", "'\u11a3'": "", "'\u11a4'": "", "'\u11a5'": "", "'\u11a6'": "", "'\u11a7'": ""}
@@ -0,0 +1 @@
1
+ {"'\u1100'": ["\u3131"], "'\u1101'": ["\u3131", "\u3131"], "'\u1102'": ["\u3134"], "'\u1103'": ["\u3137"], "'\u1104'": ["\u3137", "\u3137"], "'\u1105'": ["\u3139"], "'\u1106'": ["\u3141"], "'\u1107'": ["\u3142"], "'\u1108'": ["\u3142", "\u3142"], "'\u1109'": ["\u3145"], "'\u110a'": ["\u3145", "\u3145"], "'\u110b'": ["\u3147"], "'\u110c'": ["\u3148"], "'\u110d'": ["\u3148", "\u3148"], "'\u110e'": ["\u314a"], "'\u110f'": ["\u314b"], "'\u1110'": ["\u314c"], "'\u1111'": ["\u314d"], "'\u1112'": ["\u314e"], "'\u1113'": ["\u3134", "\u3131"], "'\u1114'": ["\u3134", "\u3134"], "'\u1115'": ["\u3134", "\u3137"], "'\u1116'": ["\u3134", "\u3142"], "'\u1117'": ["\u3137", "\u3131"], "'\u1118'": ["\u3139", "\u3134"], "'\u1119'": ["\u3139", "\u3139"], "'\u111a'": ["\u3139", "\u314e"], "'\u111b'": [], "'\u111c'": ["\u3141", "\u3142"], "'\u111d'": [], "'\u111e'": ["\u3142", "\u3131"], "'\u111f'": ["\u3142", "\u3134"], "'\u1120'": ["\u3142", "\u3137"], "'\u1121'": ["\u3142", "\u3145"], "'\u1122'": ["\u3142", "\u3145", "\u3131"], "'\u1123'": ["\u3142", "\u3145", "\u3137"], "'\u1124'": ["\u3142", "\u3145", "\u3142"], "'\u1125'": ["\u3142", "\u3145", "\u3145"], "'\u1126'": ["\u3142", "\u3145", "\u3148"], "'\u1127'": ["\u3142", "\u3148"], "'\u1128'": ["\u3142", "\u314a"], "'\u1129'": ["\u3142", "\u314c"], "'\u112a'": ["\u3142", "\u314d"], "'\u112b'": [], "'\u112c'": [], "'\u112d'": ["\u3145", "\u3131"], "'\u112e'": ["\u3145", "\u3134"], "'\u112f'": ["\u3145", "\u3137"], "'\u1130'": ["\u3145", "\u3139"], "'\u1131'": ["\u3145", "\u3141"], "'\u1132'": ["\u3145", "\u3142"], "'\u1133'": ["\u3145", "\u3142", "\u3131"], "'\u1134'": ["\u3145", "\u3145", "\u3145"], "'\u1135'": [], "'\u1136'": ["\u3145", "\u3148"], "'\u1137'": ["\u3145", "\u314a"], "'\u1138'": ["\u3145", "\u314b"], "'\u1139'": ["\u3145", "\u314c"], "'\u113a'": ["\u3145", "\u314d"], "'\u113b'": ["\u3145", "\u314e"], "'\u113c'": [], "'\u113d'": [], "'\u113e'": [], "'\u113f'": [], "'\u1140'": [], "'\u1141'": [], "'\u1142'": [], "'\u1143'": [], "'\u1144'": [], "'\u1145'": [], "'\u1146'": [], "'\u1147'": [], "'\u1148'": [], "'\u1149'": [], "'\u114a'": [], "'\u114b'": [], "'\u114c'": [], "'\u114d'": [], "'\u114e'": [], "'\u114f'": [], "'\u1150'": [], "'\u1151'": [], "'\u1152'": ["\u314a", "\u314b"], "'\u1153'": ["\u314a", "\u314e"], "'\u1154'": [], "'\u1155'": [], "'\u1156'": ["\u314d", "\u3142"], "'\u1157'": [], "'\u1158'": ["\u314e", "\u314e"], "'\u1159'": [], "'\u115a'": ["\u3131", "\u3137"], "'\u115b'": ["\u3134", "\u3145"], "'\u115c'": ["\u3134", "\u3148"], "'\u115d'": ["\u3134", "\u314e"], "'\u115e'": ["\u3137", "\u3139"]}
@@ -0,0 +1 @@
1
+ {"'\u11a8'": ["\u3131"], "'\u11a9'": ["\u3131", "\u3131"], "'\u11aa'": ["\u3131", "\u3145"], "'\u11ab'": ["\u3134"], "'\u11ac'": ["\u3134", "\u3148"], "'\u11ad'": ["\u3134", "\u314e"], "'\u11ae'": ["\u3137"], "'\u11af'": ["\u3139"], "'\u11b0'": ["\u3139", "\u3131"], "'\u11b1'": ["\u3139", "\u3141"], "'\u11b2'": ["\u3139", "\u3142"], "'\u11b3'": ["\u3139", "\u3145"], "'\u11b4'": ["\u3139", "\u314c"], "'\u11b5'": ["\u3139", "\u314d"], "'\u11b6'": ["\u3139", "\u314e"], "'\u11b7'": ["\u3141"], "'\u11b8'": ["\u3142"], "'\u11b9'": ["\u3142", "\u3145"], "'\u11ba'": ["\u3145"], "'\u11bb'": ["\u3145", "\u3145"], "'\u11bc'": ["\u3147"], "'\u11bd'": ["\u3148"], "'\u11be'": ["\u314a"], "'\u11bf'": ["\u314b"], "'\u11c0'": ["\u314c"], "'\u11c1'": ["\u314d"], "'\u11c2'": ["\u314e"], "'\u11c3'": ["\u3131", "\u3139"], "'\u11c4'": ["\u3131", "\u3145", "\u3131"], "'\u11c5'": ["\u3134", "\u3131"], "'\u11c6'": ["\u3134", "\u3137"], "'\u11c7'": ["\u3134", "\u3145"], "'\u11c8'": [], "'\u11c9'": ["\u3134", "\u314c"], "'\u11ca'": ["\u3137", "\u3131"], "'\u11cb'": ["\u3137", "\u3139"], "'\u11cc'": ["\u3139", "\u3131", "\u3145"], "'\u11cd'": ["\u3139", "\u3134"], "'\u11ce'": ["\u3139", "\u3137"], "'\u11cf'": ["\u3139", "\u3137", "\u314e"], "'\u11d0'": ["\u3139", "\u3139"], "'\u11d1'": ["\u3139", "\u3141", "\u3131"], "'\u11d2'": ["\u3139", "\u3141", "\u3145"], "'\u11d3'": ["\u3139", "\u3142", "\u3145"], "'\u11d4'": ["\u3139", "\u3142", "\u314e"], "'\u11d5'": [], "'\u11d6'": ["\u3139", "\u3145", "\u3145"], "'\u11d7'": [], "'\u11d8'": ["\u3139", "\u314b"], "'\u11d9'": ["\u3139", "\u314e"], "'\u11da'": ["\u3141", "\u3131"], "'\u11db'": ["\u3141", "\u3139"], "'\u11dc'": ["\u3141", "\u3142"], "'\u11dd'": ["\u3141", "\u3145"], "'\u11de'": ["\u3141", "\u3145", "\u3145"], "'\u11df'": [], "'\u11e0'": ["\u3141", "\u314a"], "'\u11e1'": ["\u3141", "\u314e"], "'\u11e2'": [], "'\u11e3'": ["\u3142", "\u3139"], "'\u11e4'": ["\u3142", "\u314d"], "'\u11e5'": ["\u3142", "\u314e"], "'\u11e6'": [], "'\u11e7'": ["\u3145", "\u3131"], "'\u11e8'": ["\u3145", "\u3137"], "'\u11e9'": ["\u3145", "\u3139"], "'\u11ea'": ["\u3145", "\u3142"], "'\u11eb'": [], "'\u11ec'": [], "'\u11ed'": [], "'\u11ee'": [], "'\u11ef'": [], "'\u11f0'": [], "'\u11f1'": [], "'\u11f2'": [], "'\u11f3'": ["\u314d", "\u3142"], "'\u11f4'": [], "'\u11f5'": ["\u314e", "\u3134"], "'\u11f6'": ["\u314e", "\u3139"], "'\u11f7'": ["\u314e", "\u3141"], "'\u11f8'": ["\u314e", "\u3142"], "'\u11f9'": [], "'\u11fa'": ["\u3131", "\u3134"], "'\u11fb'": ["\u3131", "\u3142"], "'\u11fc'": ["\u3131", "\u314a"], "'\u11fd'": ["\u3131", "\u314b"], "'\u11fe'": ["\u3131", "\u314e"], "'\u11ff'": ["\u3134", "\u3134"]}
@@ -0,0 +1,96 @@
1
+ import numpy as np
2
+ import unicodedata
3
+
4
+ from .const import LATIN
5
+ from .const import TWIN_CASE_PROVISION
6
+
7
+ from .utils import Locator
8
+ from .utils import custom_split
9
+ from .utils import get_chosung
10
+ from .utils import get_jongsung
11
+ from .utils import get_jungsung
12
+ from .utils import split_into_chunks
13
+ from .utils import split_jamo
14
+
15
+
16
+ locator = Locator()
17
+
18
+
19
+ def romanize(text: str) -> str:
20
+ """
21
+ Romanizes Korean Hangul text into the Latin alphabet according to the Revised Romanization of Korean.
22
+
23
+ Parameters
24
+ ----------
25
+ text : str
26
+ The input string containing Korean Hangul text to be romanized.
27
+
28
+ Returns
29
+ -------
30
+ str
31
+ The romanized string.
32
+
33
+ Examples
34
+ --------
35
+ >>> romanize("좋아 첫 눈에 반해 버린")
36
+ "joha cheot nune banhae beorin"
37
+
38
+ References
39
+ ----------
40
+ https://en.wikipedia.org/w/index.php?title=Revised_Romanization_of_Korean&oldid=1064463473
41
+ """
42
+ result = []
43
+
44
+ for word in custom_split(text):
45
+ process = False
46
+ for i in word:
47
+ if unicodedata.category(i) == "Lo": # checks if a word has Hangul syllable
48
+ process = True
49
+ break
50
+ if process:
51
+ dump = []
52
+ for index, block in enumerate(split_into_chunks((j for i in split_jamo(word) for j in i), 3)):
53
+ if len(block) == 1: # for standalone syllable "책"
54
+ block = (block[0], "", "")
55
+
56
+ chosung = get_chosung(block[1])
57
+ jungsung = get_jungsung(block[2])
58
+ jongsung = get_jongsung(block[0])
59
+
60
+ if chosung:
61
+ if len(chosung) > 1 and len(set(chosung)) == 1:
62
+ if jongsung: # "올까"
63
+ for i in jongsung:
64
+ dump.append(LATIN["JONGSUNG"][i])
65
+ if ord(chosung[0]) == 12593: # ㄲ: "깐다"
66
+ dump.append(TWIN_CASE_PROVISION[chosung[0]])
67
+ else:
68
+ if index == 0: # "뚜두"
69
+ dump.append(LATIN["CHOSUNG"][chosung[0]] * 2)
70
+ else: # "오빠"
71
+ dump.append(TWIN_CASE_PROVISION[chosung[0]])
72
+ else:
73
+ if jongsung:
74
+ if len(jongsung) > 1 and len(set(jongsung)) == 1 and ord(chosung[0]) == 12615: # ㅇ: "있을까"
75
+ dump.append(TWIN_CASE_PROVISION[jongsung[0]])
76
+ else:
77
+ if len(jongsung) > 1 and len(set(jongsung)) >= 2: # "없어요"
78
+ for i in jongsung[:-1]:
79
+ dump.append(LATIN["JONGSUNG"][i])
80
+ col_index = np.where(locator.COL_LABELS == chosung[0])[0][0]
81
+ row_index = np.where(locator.ROW_LABELS == jongsung[-1])[0][0]
82
+ dump.append(locator.TABLE[row_index, col_index])
83
+ else:
84
+ dump.append(LATIN["CHOSUNG"][chosung[0]])
85
+ else:
86
+ if jongsung:
87
+ dump.append(LATIN["JONGSUNG"][jongsung[0]])
88
+ if jungsung:
89
+ dump.append(jungsung.lower())
90
+ else:
91
+ dump.append(block[2])
92
+ result.append("".join(dump))
93
+ else:
94
+ result.append(word)
95
+
96
+ return "".join(result)
@@ -0,0 +1,264 @@
1
+ import itertools
2
+ import numpy as np
3
+ import re
4
+ import typing as t
5
+
6
+ from .const import CHOSUNG
7
+ from .const import JONGSUNG
8
+ from .const import JUNGSUNG
9
+ from .const import LATIN
10
+ from .const import PATH
11
+
12
+
13
+ class Locator:
14
+ """
15
+ A class used to represent a Locator that reads provision data and stores it
16
+ in a table, along with column and row labels.
17
+
18
+ Attributes
19
+ ----------
20
+ TABLE : np.ndarray
21
+ A 2D numpy array containing the provision data read from the file "convert/data/provisions".
22
+ COL_LABELS : np.ndarray
23
+ A 1D numpy array containing column labels derived from the keys of the "CHOSUNG" dictionary in LATIN.
24
+ ROW_LABELS : np.ndarray
25
+ A 1D numpy array containing row labels derived from the keys of the "JONGSUNG" dictionary in LATIN.
26
+
27
+ Methods
28
+ -------
29
+ None
30
+ """
31
+
32
+ with open(PATH / "convert/data/provisions", "r") as f:
33
+ TABLE = np.array([i.strip().split(" ") for i in f.readlines()])
34
+
35
+ COL_LABELS = np.array(list(LATIN["CHOSUNG"].keys()))
36
+ ROW_LABELS = np.array(list(LATIN["JONGSUNG"].keys()))
37
+
38
+
39
+ def custom_split(text: str) -> t.List[str]:
40
+ """
41
+ Splits a given text into a list of strings, separating by whitespace and non-whitespace sequences.
42
+ Consecutive whitespace characters are split into individual space characters.
43
+
44
+ Parameters
45
+ ----------
46
+ text : str
47
+ The input text to be split.
48
+
49
+ Returns
50
+ -------
51
+ List[str]
52
+ A list of substrings, where each substring is either a single non-whitespace sequence
53
+ or a single whitespace character.
54
+
55
+ Examples
56
+ --------
57
+ >>> custom_split("Hello world")
58
+ ["Hello", " ", " ", " ", "world"]
59
+
60
+ >>> custom_split("a\nb\tc")
61
+ ["a", "\n", "b", "\t", "c"]
62
+ """
63
+ pattern = re.compile(r"(\s+|[^\s]+)")
64
+ matches = pattern.findall(text)
65
+ result = []
66
+ for match in matches:
67
+ if len(match) > 1 and match.isspace():
68
+ result.extend(list(match))
69
+ else:
70
+ result.append(match)
71
+ return result
72
+
73
+
74
+ def decompose_hangul(char: str) -> t.Tuple[str, str, str]:
75
+ """
76
+ Decomposes a Hangul syllable character into its constituent Jamo components.
77
+
78
+ Parameters
79
+ ----------
80
+ char : str
81
+ A single Hangul syllable character.
82
+
83
+ Returns
84
+ -------
85
+ Tuple[str, str, str]
86
+ A tuple containing three strings:
87
+ - The leading consonant (choseong)
88
+ - The vowel (jungseong)
89
+ - The trailing consonant (jongseong), or an empty string if there is no trailing consonant.
90
+
91
+ Raises
92
+ ------
93
+ ValueError
94
+ If the input character is not a Hangul syllable character.
95
+
96
+ Examples
97
+ --------
98
+ >>> decompose_hangul("가")
99
+ ("ᄀ", "ᅡ", "")
100
+
101
+ >>> decompose_hangul("각")
102
+ ("ᄀ", "ᅡ", "ᆨ")
103
+ """
104
+ x = ord(char)
105
+ if 44032 <= x <= 55203:
106
+ a = x - 44032
107
+ b = a % 28
108
+ c = 1 + ((a - b) % 588) // 28
109
+ d = 1 + a // 588
110
+ q = [*map(sum, zip(*[[d, c, b], [4351, 4448, 4519]]))]
111
+ if b:
112
+ return (chr(q[0]), chr(q[1]), chr(q[2]))
113
+ return (chr(q[0]), chr(q[1]), "")
114
+ return ("", char, "")
115
+
116
+
117
+ def get_chosung(char: str) -> t.List[str]:
118
+ """
119
+ Retrieves the Chosung (initial consonant) candidates for a given character.
120
+
121
+ Parameters
122
+ ----------
123
+ char : str
124
+ A single character to look up in the CHOSUNG dictionary.
125
+
126
+ Returns
127
+ -------
128
+ List[str]
129
+ A list of Chosung candidates corresponding to the input character.
130
+ Returns an empty list if the character is not found in the CHOSUNG dictionary.
131
+
132
+ Examples
133
+ --------
134
+ >>> get_chosung("ㄱ")
135
+ ["ᄀ", "ᄁ"]
136
+
137
+ >>> get_chosung("x")
138
+ []
139
+ """
140
+ try:
141
+ return CHOSUNG[f"'{char}'"]
142
+ except KeyError:
143
+ return []
144
+
145
+
146
+ def get_jongsung(char: str) -> t.List[str]:
147
+ """
148
+ Retrieves the Jongsung (final consonant) candidates for a given character.
149
+
150
+ Parameters
151
+ ----------
152
+ char : str
153
+ A single character to look up in the JONGSUNG dictionary.
154
+
155
+ Returns
156
+ -------
157
+ List[str]
158
+ A list of Jongsung candidates corresponding to the input character.
159
+ Returns an empty list if the character is not found in the JONGSUNG dictionary.
160
+
161
+ Examples
162
+ --------
163
+ >>> get_jongsung("ㄱ")
164
+ ["ᆨ", "ᆩ"]
165
+
166
+ >>> get_jongsung("x")
167
+ []
168
+ """
169
+ try:
170
+ return JONGSUNG[f"'{char}'"]
171
+ except KeyError:
172
+ return []
173
+
174
+
175
+ def get_jungsung(char: str) -> str:
176
+ """
177
+ Retrieves the Jungsung (medial vowel) for a given character.
178
+
179
+ Parameters
180
+ ----------
181
+ char : str
182
+ A single character to look up in the JUNGSUNG dictionary.
183
+
184
+ Returns
185
+ -------
186
+ str
187
+ The corresponding Jungsung for the input character.
188
+ Returns an empty string if the character is not found in the JUNGSUNG dictionary.
189
+
190
+ Examples
191
+ --------
192
+ >>> get_jungsung("ㅏ")
193
+ "ᅡ"
194
+
195
+ >>> get_jungsung("x")
196
+ ""
197
+ """
198
+ try:
199
+ return JUNGSUNG[f"'{char}'"]
200
+ except KeyError:
201
+ return ""
202
+
203
+
204
+ def split_into_chunks(data: t.Iterable[t.Any], size: int) -> t.Iterator[t.List[str]]:
205
+ """
206
+ Splits an iterable into chunks of a specified size.
207
+
208
+ Parameters
209
+ ----------
210
+ data : iterable
211
+ The iterable to be split into chunks.
212
+ size : int
213
+ The size of each chunk.
214
+
215
+ Returns
216
+ -------
217
+ Iterator[List[str]]
218
+ An iterator where each item is a list containing a chunk of the original data.
219
+
220
+ Examples
221
+ --------
222
+ >>> list(split_into_chunks([1, 2, 3, 4, 5], 2))
223
+ [[1, 2], [3, 4], [5]]
224
+
225
+ >>> list(split_into_chunks("abcdef", 3))
226
+ [["a", "b", "c"], ["d", "e", "f"]]
227
+ """
228
+ def slize_size(g):
229
+ return lambda: tuple(itertools.islice(g, size))
230
+ return iter(slize_size(iter(data)), ())
231
+
232
+
233
+ def split_jamo(text: str) -> t.List[t.Tuple[str]]:
234
+ """
235
+ Splits a string of Hangul characters into their constituent Jamo components.
236
+
237
+ Parameters
238
+ ----------
239
+ text : str
240
+ The string of Hangul characters to be split.
241
+
242
+ Returns
243
+ -------
244
+ List[Tuple[str]]
245
+ A list of strings where each item is a tuple containing the Jamo components of the corresponding Hangul character.
246
+
247
+ Examples
248
+ --------
249
+ >>> split_jamo("한글")
250
+ [("ᄒ", "ᅡ", "ᆫ"), ("ᄀ", "ᅳ", "ᆯ")]
251
+
252
+ >>> split_jamo("가")
253
+ [("ᄀ", "ᅡ", "")]
254
+ """
255
+ result = []
256
+ for i, char in enumerate(text):
257
+ jamo_components = decompose_hangul(char)
258
+ if i == 0:
259
+ result.append(("", *jamo_components))
260
+ elif i == len(text) - 1:
261
+ result.append((*jamo_components, "", ""))
262
+ else:
263
+ result.append(jamo_components)
264
+ return result
@@ -0,0 +1,5 @@
1
+ from romanization import romanize
2
+
3
+
4
+ def test_romanize():
5
+ assert romanize("안녕") == "annyeong"