interscript 0.1.1 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  6. data/lib/g2pwrapper.py +34 -0
  7. data/lib/interscript-opal.rb +2 -0
  8. data/lib/interscript.rb +138 -20
  9. data/lib/interscript/command.rb +28 -0
  10. data/lib/interscript/fs.rb +71 -0
  11. data/lib/interscript/mapping.rb +142 -0
  12. data/lib/interscript/opal.rb +27 -0
  13. data/lib/interscript/opal/maps.js.erb +10 -0
  14. data/lib/interscript/opal_map_translate.rb +12 -0
  15. data/lib/interscript/version.rb +1 -1
  16. data/lib/model-7 +0 -0
  17. data/lib/tha-pt-b-7 +0 -0
  18. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  19. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  20. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  21. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  22. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  23. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  24. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +125 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  27. data/maps/alalc-ell-Grek-Latn-1997.yaml +624 -0
  28. data/maps/alalc-ell-Grek-Latn-2010.yaml +627 -0
  29. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  30. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  31. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  32. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  33. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  34. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  35. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  36. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  37. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +221 -0
  38. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  39. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  40. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  41. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  42. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  43. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  44. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +174 -0
  45. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  46. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  47. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  48. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  49. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  50. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  51. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  52. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  53. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  54. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  55. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  56. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +285 -0
  57. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  58. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  59. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +701 -0
  60. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +19 -0
  61. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  62. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  63. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  64. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  65. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  66. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  67. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  68. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  69. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  70. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +92 -0
  71. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  72. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  73. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +162 -0
  74. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  75. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  76. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  77. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  78. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  79. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  80. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  81. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  82. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  83. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  84. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  85. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  86. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  87. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  88. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  89. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  90. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  91. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  92. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  93. data/maps/ggg-kat-Geor-Latn-2002.yaml +88 -0
  94. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  95. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  96. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +186 -0
  97. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  98. data/maps/icao-bel-Cyrl-Latn-9303.yaml +136 -0
  99. data/maps/icao-bul-Cyrl-Latn-9303.yaml +118 -0
  100. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  101. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  102. data/maps/icao-per-Arab-Latn-9303.yaml +103 -0
  103. data/maps/icao-rus-Cyrl-Latn-9303.yaml +117 -0
  104. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  105. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +119 -0
  106. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  107. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +609 -0
  108. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +40 -0
  109. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  110. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +271 -0
  111. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  112. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  113. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  114. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  115. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  116. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  117. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  118. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  119. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  120. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  121. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  122. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  123. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  124. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  125. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  126. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  127. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  128. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  129. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  130. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  131. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  132. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  133. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  134. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  135. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  136. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  137. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  138. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  139. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  140. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  141. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  142. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  143. data/maps/ses-ara-Arab-Latn-1930.yaml +279 -0
  144. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  145. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  146. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  147. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  148. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  149. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  150. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  151. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  152. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  153. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  154. data/maps/un-mon-Mong-Latn-2013.yaml +99 -0
  155. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  156. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  157. data/maps/un-ukr-Cyrl-Latn-1998.yaml +30 -0
  158. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  159. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  160. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  161. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  162. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  163. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  164. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  165. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  166. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  167. data/spec/interscript/mapping_spec.rb +42 -0
  168. data/spec/interscript_spec.rb +26 -0
  169. data/spec/spec_helper.rb +3 -0
  170. metadata +298 -18
@@ -0,0 +1,19 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1996
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1996 System
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693694/ROMANIZATION_OF_GREEK.pdf
9
+ creation_date: 1996
10
+ description: |
11
+ BGN/PCGN Romanization table for Greek
12
+
13
+ note:
14
+ - Identical to ELOT 743:1982, which is also adopted as ISO 843:1997 and by UNGEGN
15
+
16
+ map:
17
+ character_separator: ""
18
+ word_separator: " "
19
+ inherit: "elot-ell-Grek-Latn-743-1982-ts"
@@ -0,0 +1,257 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1930
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: Japanese Kana Modified Hepburn 1930 System
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+ - Segmentation needs to be done before using this map
15
+ - Note 5 in the specification states that when identical repeating vowels
16
+ belong to different kanji characters, they shall be romanized
17
+ individually and an apostrophe (’) shall be placed between the vowels.
18
+ However since this is a map from Kana to Hepburn, there is no way to
19
+ implement this feature.
20
+ - The documentation did not specify how the hyphen should be used.
21
+
22
+ tests:
23
+
24
+ - source: てがた-からみでん
25
+ expected: "Tegata-karamiden"
26
+ - source: てがた-すみよしちょう
27
+ expected: "Tegata-sumiyoshichō"
28
+ - source: さいのはま
29
+ expected: "Sainohama"
30
+ - source: てがた-たなか
31
+ expected: "Tegata-tanaka"
32
+ - source: ほりおでん
33
+ expected: "Horioden"
34
+ - source: そえがわ
35
+ expected: "Soegawa"
36
+ - source: ふねがさわ
37
+ expected: "Funegasawa"
38
+ - source: とくまんだて
39
+ expected: "Tokumandate"
40
+ - source: たてない
41
+ expected: "Tatenai"
42
+ - source: つるがさき
43
+ expected: "Tsurugasaki"
44
+ - source: しもやつせ
45
+ expected: "Shimoyatsuse"
46
+ - source: かみやつせ
47
+ expected: "Kamiyatsuse"
48
+ - source: しんとうだ
49
+ expected: "Shintōda"
50
+ - source: かじのめ
51
+ expected: "Kajinome"
52
+ - source: まえぎ
53
+ expected: "Maegi"
54
+ - source: くろさわ やま
55
+ expected: "Kurosawa Yama"
56
+ - source: いちのさわ がわ
57
+ expected: "Ichinosawa Gawa"
58
+ - source: はちやまえ
59
+ expected: "Hachiyamae"
60
+ - source: やち
61
+ expected: "Yachi"
62
+ - source: たてぬま
63
+ expected: "Tatenuma"
64
+ - source: しらはま
65
+ expected: "Shirahama"
66
+ - source: けせんまち
67
+ expected: "Kesenmachi"
68
+ - source: けいだい-かわら
69
+ expected: "Keidai-kawara"
70
+ - source: いしやました
71
+ expected: "Ishiyamashita"
72
+ - source: なえひら-やち
73
+ expected: "Naehira-yachi"
74
+ - source: とみの
75
+ expected: "Tomino"
76
+ - source: あらや-たかみまち
77
+ expected: "Araya-takamimachi"
78
+ - source: ながた
79
+ expected: "Nagata"
80
+ - source: とどろき おんせん
81
+ expected: "Todoroki Onsen"
82
+ - source: かしわぎはら
83
+ expected: "Kashiwagihara"
84
+ - source: とやけもり やま
85
+ expected: "Toyakemori Yama"
86
+ - source: なかさい
87
+ expected: "Nakasai"
88
+ - source: たけした
89
+ expected: "Takeshita"
90
+ - source: みと
91
+ expected: "Mito"
92
+ - source: みなみなかさと
93
+ expected: "Minaminakasato"
94
+ - source: みずおし
95
+ expected: "Mizuoshi"
96
+ - source: なかさと
97
+ expected: "Nakasato"
98
+ - source: しんかりば
99
+ expected: "Shinkariba"
100
+ - source: しんかみぬま
101
+ expected: "Shinkaminuma"
102
+ - source: しんばし
103
+ expected: "Shinbashi"
104
+ - source: りくぜんやました えき
105
+ expected: "Rikuzen’yamashita Eki"
106
+ - source: うしじまにし
107
+ expected: "Ushijimanishi"
108
+ - source: はまえば
109
+ expected: "Hamaeba"
110
+ - source: ぬまむかい
111
+ expected: "Numamukai"
112
+ - source: さんげんやち
113
+ expected: "Sangen’yachi"
114
+ - source: にけんやち
115
+ expected: "Niken’yachi"
116
+ - source: やちなか
117
+ expected: "Yachinaka"
118
+ - source: なす がわ
119
+ expected: "Nasu Gawa"
120
+ - source: おおはらはま
121
+ expected: "Ōharahama"
122
+ - source: うるご がわ
123
+ expected: "Urugo Gawa"
124
+ - source: なかばせ
125
+ expected: "Nakabase"
126
+ - source: うと えき
127
+ expected: "Uto Eki"
128
+ - source: みずまち
129
+ expected: "Mizumachi"
130
+ - source: ごんげんどう
131
+ expected: "Gongendō"
132
+ - source: いとひさ
133
+ expected: "Itohisa"
134
+ - source: あらおい
135
+ expected: "Araoi"
136
+ - source: わんめ
137
+ expected: "Wanme"
138
+ - source: かじろ
139
+ expected: "Kajiro"
140
+ - source: みやばら
141
+ expected: "Miyabara"
142
+ - source: いまどみ
143
+ expected: "Imadomi"
144
+ - source: かいほ
145
+ expected: "Kaiho"
146
+ - source: かいほ ぼえん
147
+ expected: "Kaiho Boen"
148
+ - source: ひきだ
149
+ expected: "Hikida"
150
+ - source: あさい-こむかい
151
+ expected: "Asai-komukai"
152
+ - source: こうざか
153
+ expected: "Kōzaka"
154
+ - source: こうふうだい
155
+ expected: "Kōfūdai"
156
+ - source: たての
157
+ expected: "Tateno"
158
+ - source: センター
159
+ expected: "Sentā"
160
+ - source: フィリピン
161
+ expected: "Firipin"
162
+ - source: ヴィオリン
163
+ expected: "Viorin"
164
+ - source: クォーター
165
+ expected: "Kwōtā"
166
+ - source: パッチリ
167
+ expected: "Patchiri"
168
+ - source: ぽっぽっや
169
+ expected: "Poppoyya"
170
+
171
+ map:
172
+ character_separator: ""
173
+ word_separator: " "
174
+ title_case: True
175
+ inherit: var-jpn-Hrkt-Latn-hepburn-1954
176
+
177
+ characters:
178
+ # Rare sounds, Table 2 & 4
179
+
180
+ "くぁ": "kwa"
181
+ "クァ": "kwa"
182
+ "ぐぁ": "gwa"
183
+ "グァ": "gwa"
184
+ "くぃ": "kwi"
185
+ "クィ": "kwi"
186
+ "ぐぃ": "gwa"
187
+ "グィ": "gwa"
188
+ "きぇ": "kye"
189
+ "キェ": "kye"
190
+ "ぎぇ": "gye"
191
+ "ギェ": "gye"
192
+ "くぇ": "kwe"
193
+ "クェ": "kwe"
194
+ "ぐぇ": "gwe"
195
+ "グェ": "gwe"
196
+ "くぉ": "kwo"
197
+ "クォ": "kwo"
198
+ "ぐぉ": "gwo"
199
+ "グォ": "gwo"
200
+ "しぇ": "she"
201
+ "シェ": "she"
202
+ "じぇ": "je"
203
+ "ジェ": "je"
204
+ "つぁ": "tsa"
205
+ "ツァ": "tsa"
206
+ "てぃ": "ti"
207
+ "ティ": "ti"
208
+ "でぃ": "di"
209
+ "ディ": "di"
210
+ "てゅ": "tyu"
211
+ "テュ": "tyu"
212
+ "でゅ": "dyu"
213
+ "デュ": "dyu"
214
+ "とゅ": "tu"
215
+ "トュ": "tu"
216
+ "どゅ": "du"
217
+ "ドュ": "du"
218
+ "ちぇ": "che"
219
+ "チェ": "che"
220
+ "ぢぇ": "je"
221
+ "ヂェ": "je"
222
+ "つぇ": "tse"
223
+ "ツェ": "tse"
224
+ "つぉ": "tso"
225
+ "ツォ": "tso"
226
+ "にぇ": "nye"
227
+ "ニェ": "nye"
228
+ "ふぁ": "fa"
229
+ "ファ": "fa"
230
+ "ふぃ": "fi"
231
+ "フィ": "fi"
232
+ "ふぇ": "fe"
233
+ "フェ": "fe"
234
+ "ふぉ": "fo"
235
+ "フォ": "fo"
236
+ "みぇ": "mye"
237
+ "ミェ": "mye"
238
+ "ぃぇ": "ye"
239
+ "ィェ": "ye"
240
+ "りぇ": "rye"
241
+ "リェ": "rye"
242
+ "ゔぁ": "va"
243
+ "ヴァ": "va"
244
+ "うぃ": "wi"
245
+ "ウィ": "wi"
246
+ "ゔぃ": "vi"
247
+ "ヴィ": "vi"
248
+ "うぇ": "we"
249
+ "ウェ": "we"
250
+ "ゔぇ": "ve"
251
+ "ヴェ": "ve"
252
+ "うぉ": "wo"
253
+ "ウォ": "wo"
254
+ "ゔぉ": "vo"
255
+ "ヴォ": "vo"
256
+ "ゔ": "vu"
257
+ "ヴ": "vu"
@@ -0,0 +1,127 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1981
4
+ language: kat
5
+ source_script: Geor
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF GEORGIAN; BGN/PCGN 1981 System
8
+ url: https://transliteration.eki.ee/pdf/Georgian.pdf
9
+ creation_date: 1981
10
+ confirmation_date: 1981
11
+ description: |
12
+ BGN/PCGN system of 1981.
13
+
14
+ notes:
15
+
16
+ tests:
17
+ - source: ჰებუდი
18
+ expected: hebudi
19
+
20
+ - source: ჯვრის წყალსაცავი
21
+ expected: jvris tsqalsats’avi
22
+
23
+ - source: ჯვავიაკვარა
24
+ expected: jvaviak’vara
25
+
26
+ - source: ჯობრია
27
+ expected: jobria
28
+
29
+ - source: ძულუხირა
30
+ expected: dzulukhira
31
+
32
+ - source: ლეკუხონა
33
+ expected: lek’ukhona
34
+
35
+ - source: აბაშა
36
+ expected: abasha
37
+
38
+ - source: ააცი
39
+ expected: aats’i
40
+
41
+ # TODO: This belongs to which system?!
42
+ # - source: აბააჟახვუ
43
+ # expected: abaazhvakhu
44
+
45
+ # TODO: These examples from GNDB are clearly using the BGNPCGN 2009 system
46
+ #
47
+ # - source: ხობის მუნიციპალიტეტი
48
+ # expected: khobis munitsip’alit’et’i
49
+ #
50
+ # - source: მყინვარი ჩრდილოეთი ლეადაშატი
51
+ # expected: mq’invari chrdiloeti leadashat’i
52
+ #
53
+ # - source: ხეწკვარა
54
+ # expected: khets’k’vara
55
+ #
56
+ # - source: ჯამპალი
57
+ # expected: jamp’ali
58
+ #
59
+ # - source: ჯავის მუნიციპალიტეტი
60
+ # expected: javis munitsip’alit’et’i
61
+ #
62
+ # - source: ხოიჯგეთა
63
+ # expected: khoijgeta
64
+ #
65
+ # - source: ხობის მუნიციპალიტეტი
66
+ # expected: khobis munitsip’alit’et’i
67
+ #
68
+ # - source: წვიშარხუ
69
+ # expected: ts’visharkhu
70
+ # - source: აღმოსავლეთი გუმისთა
71
+ # expected: aghmosavleti gumista
72
+ #
73
+ # - source: ქვემო ბირცხა
74
+ # expected: kvemo birtskha
75
+ #
76
+ # - source: ზემო ბირცხა
77
+ # expected: zemo birtskha
78
+ #
79
+ # - source: აბჟაყვა
80
+ # expected: abzhaq’va
81
+
82
+
83
+
84
+ map:
85
+ characters:
86
+ '\u10d0' : 'a' # ა
87
+ '\u10d1' : 'b' # ბ
88
+ '\u10d2' : 'g' # გ
89
+ '\u10d3' : 'd' # დ
90
+ '\u10d4' : 'e' # ე
91
+ '\u10d5' : 'v' # ვ
92
+ '\u10d6' : 'z' # ზ
93
+
94
+ '\u10f1' : 'ey' # ჱ
95
+
96
+ '\u10d7' : 't’' # თ
97
+ '\u10d8' : 'i' # ი
98
+ '\u10d9' : 'k’' # კ
99
+ '\u10da' : 'l' # ლ
100
+ '\u10db' : 'm' # მ
101
+ '\u10dc' : 'n' # ნ
102
+
103
+ '\u10f2' : 'j' # ჲ
104
+
105
+ '\u10dd' : 'o' # ო
106
+ '\u10de' : 'p' # პ
107
+ '\u10df' : 'zh' # ჟ
108
+ '\u10e0' : 'r' # რ
109
+ '\u10e1' : 's' # ს
110
+ '\u10e2' : 't' # ტ
111
+ '\u10e3' : 'u' # უ
112
+ '\u10e4' : 'p’' # ფ
113
+ '\u10e5' : 'k’' # ქ
114
+ '\u10e6' : 'gh' # ღ
115
+ '\u10e7' : 'q' # ყ
116
+ '\u10e8' : 'sh' # შ
117
+ '\u10e9' : 'ch’' # ჩ
118
+ '\u10ea' : 'ts’' # ც
119
+ '\u10eb' : 'dz' # ძ
120
+ '\u10ec' : 'ts' # წ
121
+ '\u10ed' : 'ch' # ჭ
122
+ '\u10ee' : 'kh' # ხ
123
+
124
+ '\u10f4' : 'q’' # ჴ
125
+
126
+ '\u10ef' : 'j' # ჯ
127
+ '\u10f0' : 'h' # ჰ
@@ -0,0 +1,42 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2009
4
+ language: kat
5
+ source_script: Geor
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF GEORGIAN; Georgia 2011 national system; BGN/PCGN 2009 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/810202/ROMANIZATION_OF_GEORGIAN.pdf
9
+ creation_date: 2009
10
+ confirmation_date: 2011
11
+ description: |
12
+ This romanization system corresponds to that devised in 2002 by the
13
+ State Department of Geodesy and Cartography of Georgia and the
14
+ Institute of Linguistics of the Georgian Academy of Sciences, and
15
+ approved by Presidential Decree 109 of 24 February 2011. It represents
16
+ the Mkhedruli alphabet, as presently used in Georgia. This system was
17
+ adopted by BGN and PCGN in 2009, superseding the BGN/PCGN system of
18
+ 1981.
19
+
20
+ notes:
21
+ - This system denotes ejective (glottalised) consonants by means of
22
+ an apostrophe. The BGN/PCGN 1981 system instead used the apostrophe to
23
+ denote aspirated consonants (letters 8, 21, 22, 26 & 27).
24
+ - The Romanization columns show only lowercase forms but, when
25
+ romanizing, uppercase and lowercase Roman letters as appropriate should
26
+ be used.
27
+
28
+ tests:
29
+ - source: თბილისი
30
+ expected: tbilisi
31
+
32
+ - source: მეღვინეთუხუცესი
33
+ expected: meghvinetukhutsesi
34
+
35
+ - source: ჭიანჭველა
36
+ expected: ch’ianch’vela
37
+
38
+ - source: ბაყაყი
39
+ expected: baq’aq’i
40
+
41
+ map:
42
+ inherit: "ggg-kat-Geor-Latn-2002"
@@ -0,0 +1,253 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: kn-1945
4
+ language: kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1945 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
9
+ creation_date: 1945
10
+ adoption_date:
11
+ description:
12
+
13
+ notes: "
14
+
15
+ 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
+ as in the following example:
17
+
18
+ 평양 → P’yŏngyang
19
+
20
+ At the beginning of a syllable, the character ᄋ is silent and
21
+ should not be romanized. An example follows:
22
+
23
+ 용화 → Yonghwa
24
+
25
+ 2. Syllable boundaries within words are not reflected in romanization.
26
+ In the different types of syllables shown in the table below, C
27
+ represents any consonant character, V represents any vowel character
28
+ and / represents a syllable boundary.
29
+
30
+ Han’gŭl 개성 남포 안양
31
+ Syllable boundaries CV/CVC CVC/CV VC/VC
32
+ Romanization Kaesŏng Namp’o Anyang
33
+
34
+ 3. Euphonic changes occurring within a word, including between the
35
+ specific and generic of a geographical name, should be reflected in
36
+ romanization. Generic terms are usually seen separated from the name
37
+ by a hyphen and with a lower case initial letter rather than as a
38
+ separate word:
39
+
40
+ 영진리 → Yŏngjil-li
41
+ 덕흥리 → Tŏkhŭng-ni
42
+ 압록강 → Amnok-kang
43
+ 대동강 → Taedong-gang
44
+
45
+ 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
+ published in North Korea in 1966), unlike the Korean spoken in the
47
+ Republic of Korea, the language spoken in the Democratic People’s
48
+ Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
49
+ The use of the word-initial ᄅ ('r') can be seen in official news
50
+ reports as well as native mapping. Since such examples exist, the
51
+ word initial ᄅ ('r') is reflected as an option in the tables given above.
52
+
53
+ 5. The Romanization column shows only lowercase forms but, when romanizing,
54
+ uppercase and lowercase Roman letters as appropriate should be used.
55
+ "
56
+
57
+ tests:
58
+ - source: "평양"
59
+ expected: "P’yŏngyang"
60
+ - source: "용화"
61
+ expected: "Yonghwa"
62
+ - source: "개성"
63
+ expected: "Kaesŏng"
64
+ - source: "남포"
65
+ expected: "Namp’o"
66
+ - source: "안양"
67
+ expected: "Anyang"
68
+ - source: "영진-리"
69
+ expected: "Yŏngjil-li"
70
+ - source: "덕흥-리"
71
+ expected: "Tŏkhŭng-ni"
72
+ - source: "압록-강"
73
+ expected: "Amnok-kang"
74
+ - source: "대동-강"
75
+ expected: "Taedong-gang"
76
+ - source: "라선특별시"
77
+ expected: "Rasŏnt’ŭkpyŏlsi"
78
+ - source: 은하-리
79
+ expected: "Ŭnha-ri"
80
+ - source: 은중-리
81
+ expected: "Ŭnjung-ni"
82
+ - source: 은장-령
83
+ expected: "Ŭnjang-nyŏng"
84
+ - source: 은혜-동
85
+ expected: "Ŭnhye-dong"
86
+ - source: 은호-리
87
+ expected: "Ŭnho-ri"
88
+ - source: 은행정
89
+ expected: "Ŭnhaengjŏng"
90
+ - source: 은행-동
91
+ expected: "Ŭnhaeng-dong"
92
+ - source: 은행-촌
93
+ expected: "Ŭnhaeng-ch’on"
94
+ - source: 원수
95
+ expected: "Wŏnsu"
96
+ - source: 원소리-고개
97
+ expected: "Wŏnsori-gogae"
98
+ - source: 원소참
99
+ expected: "Wŏnsoch’am"
100
+ - source: 원소-리
101
+ expected: "Wŏnso-ri"
102
+ - source: 원신-리
103
+ expected: "Wŏnsil-li"
104
+ - source: 난곡
105
+ expected: "Nan’gok"
106
+ - source: 난산-리
107
+ expected: "Nansal-li"
108
+ - source: 난직
109
+ expected: "Nanjik"
110
+ - source: 영곡
111
+ expected: "Yŏnggok"
112
+ - source: 윗두밀
113
+ expected: "Wittumil"
114
+ - source: 윗도심이
115
+ expected: "Wittosimi"
116
+ - source: 둔지
117
+ expected: "Tunji"
118
+ - source: 서승
119
+ expected: "Sŏsŭng"
120
+ - source: 신촌
121
+ expected: "Sinch’on"
122
+ - source: 비암덕
123
+ expected: "Piamdŏk"
124
+ - source: 바위안
125
+ expected: "Pawian"
126
+ - source: 오송평
127
+ expected: "Osongp’yŏng"
128
+ - source: 그물목
129
+ expected: "Kŭmulmok"
130
+ - source: 구원정
131
+ expected: "Kuwŏnjŏng"
132
+ - source: 일하
133
+ expected: "Irha"
134
+ - source: 황우
135
+ expected: "Hwangu"
136
+ - source: 자작보
137
+ expected: "Chajakpo"
138
+ - source: 비파1-동
139
+ expected: "Pip’a Il-tong"
140
+ - source: 문암 오-동
141
+ expected: "Munam O-dong"
142
+
143
+ map:
144
+ character_separator: ""
145
+ word_separator: " "
146
+ title_case: True
147
+ inherit: [var-kor-Hang-Latn-mr-1939]
148
+
149
+ rules:
150
+ # Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
151
+ # So that the word-initial conversion rules will be blocked.
152
+ - pattern: "^"
153
+ result: "\u200B"
154
+ - pattern: "(?<= )"
155
+ result: "\u200B"
156
+
157
+ # convert numbers to space + Hangul
158
+ - pattern: "([^0-9 ])(?=[0-9])"
159
+ result: "\\1 "
160
+ - pattern: "1"
161
+ result: "일"
162
+ - pattern: "2"
163
+ result: "이"
164
+ - pattern: "3"
165
+ result: "삼"
166
+ - pattern: "4"
167
+ result: "사"
168
+ - pattern: "5"
169
+ result: "오"
170
+ - pattern: "6"
171
+ result: "육"
172
+ - pattern: "7"
173
+ result: "칠"
174
+ - pattern: "8"
175
+ result: "팔"
176
+ - pattern: "9"
177
+ result: "구"
178
+
179
+ # This is a logic to add hyphen in front of generics
180
+ # - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
181
+ # result: "-\\1"
182
+
183
+ postrules:
184
+
185
+ # Add space to the two ends of the string for easier word boundary handling
186
+ - pattern: "^"
187
+ result: " "
188
+ - pattern: "$"
189
+ result: " "
190
+
191
+ # Initial rules in the inherited map were blocked, so that
192
+ # this set of updated rules (with the onset rules removed) will be used instead.
193
+ - pattern: "\u200B"
194
+ result: ""
195
+
196
+ - pattern: "(?<= )ᄀ"
197
+ result: "k" # HANGUL CHOSEONG KIYEOK
198
+ - pattern: "(?<= )ᄂ"
199
+ result: "n" # HANGUL CHOSEONG NIEUN
200
+ - pattern: "(?<= )ᄃ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
201
+ result: "ch" # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
202
+ - pattern: "(?<= )ᄃ"
203
+ result: "t" # HANGUL CHOSEONG TIEUT
204
+ - pattern: "(?<= )ᄅ"
205
+ result: "r" # HANGUL CHOSEONG RIEUL
206
+ - pattern: "(?<= )ᄆ"
207
+ result: "m" # HANGUL CHOSEONG MIEUM
208
+ - pattern: "(?<= )ᄇ"
209
+ result: "p" # HANGUL CHOSEONG PIEUP
210
+ - pattern: "(?<= )ᄉ(?=ᅱ)"
211
+ result: "sh" # HANGUL CHOSEONG SIOS
212
+ - pattern: "(?<= )ᄉ"
213
+ result: "s" # HANGUL CHOSEONG SIOS
214
+ - pattern: "(?<= )ᄋ"
215
+ result: "" # HANGUL CHOSEONG IEUNG
216
+ - pattern: "(?<= )ᄌ"
217
+ result: "ch" # HANGUL CHOSEONG CIEUC
218
+ - pattern: "(?<= )ᄎ"
219
+ result: "ch’" # HANGUL CHOSEONG CHIEUCH
220
+ - pattern: "(?<= )ᄏ"
221
+ result: "k’" # HANGUL CHOSEONG KHIEUKH
222
+ - pattern: "(?<= )ᄐ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
223
+ result: "ch’" # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
224
+ - pattern: "(?<= )ᄐ"
225
+ result: "t’" # HANGUL CHOSEONG THIEUTH
226
+ - pattern: "(?<= )ᄑ"
227
+ result: "p’" # HANGUL CHOSEONG PHIEUPH
228
+ - pattern: "(?<= )ᄒ"
229
+ result: "h" # HANGUL CHOSEONG HIEUH
230
+ - pattern: "(?<= )ᄁ"
231
+ result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
232
+ - pattern: "(?<= )ᄭ"
233
+ result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
234
+ - pattern: "(?<= )ᄄ"
235
+ result: "tt" # HANGUL CHOSEONG SSANGTIEUT
236
+ - pattern: "(?<= )ᄯ"
237
+ result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
238
+ - pattern: "(?<= )ᄈ"
239
+ result: "pp" # HANGUL CHOSEONG SSANGPIEUP
240
+ - pattern: "(?<= )ᄲ"
241
+ result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
242
+ - pattern: "(?<= )ᄊ"
243
+ result: "ss" # HANGUL CHOSEONG SSANGSIOS
244
+ - pattern: "(?<= )ᄍ"
245
+ result: "tch" # HANGUL CHOSEONG SSANGCIEUC
246
+ - pattern: "(?<= )ᄶ"
247
+ result: "tch" # HANGUL CHOSEONG SIOS-CIEUC
248
+
249
+ # Remove space added
250
+ - pattern: "^ "
251
+ result: ""
252
+ - pattern: " $"
253
+ result: ""