interscript 0.1.5 → 2.1.0a8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -1,253 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: kn-1945
4
- language: kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: BGN/PCGN 1945 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
9
- creation_date: 1945
10
- adoption_date:
11
- description:
12
-
13
- notes: "
14
-
15
- 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
- as in the following example:
17
-
18
- 평양 → P’yŏngyang
19
-
20
- At the beginning of a syllable, the character ᄋ is silent and
21
- should not be romanized. An example follows:
22
-
23
- 용화 → Yonghwa
24
-
25
- 2. Syllable boundaries within words are not reflected in romanization.
26
- In the different types of syllables shown in the table below, C
27
- represents any consonant character, V represents any vowel character
28
- and / represents a syllable boundary.
29
-
30
- Han’gŭl 개성 남포 안양
31
- Syllable boundaries CV/CVC CVC/CV VC/VC
32
- Romanization Kaesŏng Namp’o Anyang
33
-
34
- 3. Euphonic changes occurring within a word, including between the
35
- specific and generic of a geographical name, should be reflected in
36
- romanization. Generic terms are usually seen separated from the name
37
- by a hyphen and with a lower case initial letter rather than as a
38
- separate word:
39
-
40
- 영진리 → Yŏngjil-li
41
- 덕흥리 → Tŏkhŭng-ni
42
- 압록강 → Amnok-kang
43
- 대동강 → Taedong-gang
44
-
45
- 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
- published in North Korea in 1966), unlike the Korean spoken in the
47
- Republic of Korea, the language spoken in the Democratic People’s
48
- Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
49
- The use of the word-initial ᄅ ('r') can be seen in official news
50
- reports as well as native mapping. Since such examples exist, the
51
- word initial ᄅ ('r') is reflected as an option in the tables given above.
52
-
53
- 5. The Romanization column shows only lowercase forms but, when romanizing,
54
- uppercase and lowercase Roman letters as appropriate should be used.
55
- "
56
-
57
- tests:
58
- - source: "평양"
59
- expected: "P’yŏngyang"
60
- - source: "용화"
61
- expected: "Yonghwa"
62
- - source: "개성"
63
- expected: "Kaesŏng"
64
- - source: "남포"
65
- expected: "Namp’o"
66
- - source: "안양"
67
- expected: "Anyang"
68
- - source: "영진-리"
69
- expected: "Yŏngjil-li"
70
- - source: "덕흥-리"
71
- expected: "Tŏkhŭng-ni"
72
- - source: "압록-강"
73
- expected: "Amnok-kang"
74
- - source: "대동-강"
75
- expected: "Taedong-gang"
76
- - source: "라선특별시"
77
- expected: "Rasŏnt’ŭkpyŏlsi"
78
- - source: 은하-리
79
- expected: "Ŭnha-ri"
80
- - source: 은중-리
81
- expected: "Ŭnjung-ni"
82
- - source: 은장-령
83
- expected: "Ŭnjang-nyŏng"
84
- - source: 은혜-동
85
- expected: "Ŭnhye-dong"
86
- - source: 은호-리
87
- expected: "Ŭnho-ri"
88
- - source: 은행정
89
- expected: "Ŭnhaengjŏng"
90
- - source: 은행-동
91
- expected: "Ŭnhaeng-dong"
92
- - source: 은행-촌
93
- expected: "Ŭnhaeng-ch’on"
94
- - source: 원수
95
- expected: "Wŏnsu"
96
- - source: 원소리-고개
97
- expected: "Wŏnsori-gogae"
98
- - source: 원소참
99
- expected: "Wŏnsoch’am"
100
- - source: 원소-리
101
- expected: "Wŏnso-ri"
102
- - source: 원신-리
103
- expected: "Wŏnsil-li"
104
- - source: 난곡
105
- expected: "Nan’gok"
106
- - source: 난산-리
107
- expected: "Nansal-li"
108
- - source: 난직
109
- expected: "Nanjik"
110
- - source: 영곡
111
- expected: "Yŏnggok"
112
- - source: 윗두밀
113
- expected: "Wittumil"
114
- - source: 윗도심이
115
- expected: "Wittosimi"
116
- - source: 둔지
117
- expected: "Tunji"
118
- - source: 서승
119
- expected: "Sŏsŭng"
120
- - source: 신촌
121
- expected: "Sinch’on"
122
- - source: 비암덕
123
- expected: "Piamdŏk"
124
- - source: 바위안
125
- expected: "Pawian"
126
- - source: 오송평
127
- expected: "Osongp’yŏng"
128
- - source: 그물목
129
- expected: "Kŭmulmok"
130
- - source: 구원정
131
- expected: "Kuwŏnjŏng"
132
- - source: 일하
133
- expected: "Irha"
134
- - source: 황우
135
- expected: "Hwangu"
136
- - source: 자작보
137
- expected: "Chajakpo"
138
- - source: 비파1-동
139
- expected: "Pip’a Il-tong"
140
- - source: 문암 오-동
141
- expected: "Munam O-dong"
142
-
143
- map:
144
- character_separator: ""
145
- word_separator: " "
146
- title_case: True
147
- inherit: [var-kor-Hang-Latn-mr-1939]
148
-
149
- rules:
150
- # Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
151
- # So that the word-initial conversion rules will be blocked.
152
- - pattern: "^"
153
- result: "\u200B"
154
- - pattern: "(?<= )"
155
- result: "\u200B"
156
-
157
- # convert numbers to space + Hangul
158
- - pattern: "([^0-9 ])(?=[0-9])"
159
- result: "\\1 "
160
- - pattern: "1"
161
- result: "일"
162
- - pattern: "2"
163
- result: "이"
164
- - pattern: "3"
165
- result: "삼"
166
- - pattern: "4"
167
- result: "사"
168
- - pattern: "5"
169
- result: "오"
170
- - pattern: "6"
171
- result: "육"
172
- - pattern: "7"
173
- result: "칠"
174
- - pattern: "8"
175
- result: "팔"
176
- - pattern: "9"
177
- result: "구"
178
-
179
- # This is a logic to add hyphen in front of generics
180
- # - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
181
- # result: "-\\1"
182
-
183
- postrules:
184
-
185
- # Add space to the two ends of the string for easier word boundary handling
186
- - pattern: "^"
187
- result: " "
188
- - pattern: "$"
189
- result: " "
190
-
191
- # Initial rules in the inherited map were blocked, so that
192
- # this set of updated rules (with the onset rules removed) will be used instead.
193
- - pattern: "\u200B"
194
- result: ""
195
-
196
- - pattern: "(?<= )ᄀ"
197
- result: "k" # HANGUL CHOSEONG KIYEOK
198
- - pattern: "(?<= )ᄂ"
199
- result: "n" # HANGUL CHOSEONG NIEUN
200
- - pattern: "(?<= )ᄃ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
201
- result: "ch" # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
202
- - pattern: "(?<= )ᄃ"
203
- result: "t" # HANGUL CHOSEONG TIEUT
204
- - pattern: "(?<= )ᄅ"
205
- result: "r" # HANGUL CHOSEONG RIEUL
206
- - pattern: "(?<= )ᄆ"
207
- result: "m" # HANGUL CHOSEONG MIEUM
208
- - pattern: "(?<= )ᄇ"
209
- result: "p" # HANGUL CHOSEONG PIEUP
210
- - pattern: "(?<= )ᄉ(?=ᅱ)"
211
- result: "sh" # HANGUL CHOSEONG SIOS
212
- - pattern: "(?<= )ᄉ"
213
- result: "s" # HANGUL CHOSEONG SIOS
214
- - pattern: "(?<= )ᄋ"
215
- result: "" # HANGUL CHOSEONG IEUNG
216
- - pattern: "(?<= )ᄌ"
217
- result: "ch" # HANGUL CHOSEONG CIEUC
218
- - pattern: "(?<= )ᄎ"
219
- result: "ch’" # HANGUL CHOSEONG CHIEUCH
220
- - pattern: "(?<= )ᄏ"
221
- result: "k’" # HANGUL CHOSEONG KHIEUKH
222
- - pattern: "(?<= )ᄐ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
223
- result: "ch’" # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
224
- - pattern: "(?<= )ᄐ"
225
- result: "t’" # HANGUL CHOSEONG THIEUTH
226
- - pattern: "(?<= )ᄑ"
227
- result: "p’" # HANGUL CHOSEONG PHIEUPH
228
- - pattern: "(?<= )ᄒ"
229
- result: "h" # HANGUL CHOSEONG HIEUH
230
- - pattern: "(?<= )ᄁ"
231
- result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
232
- - pattern: "(?<= )ᄭ"
233
- result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
234
- - pattern: "(?<= )ᄄ"
235
- result: "tt" # HANGUL CHOSEONG SSANGTIEUT
236
- - pattern: "(?<= )ᄯ"
237
- result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
238
- - pattern: "(?<= )ᄈ"
239
- result: "pp" # HANGUL CHOSEONG SSANGPIEUP
240
- - pattern: "(?<= )ᄲ"
241
- result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
242
- - pattern: "(?<= )ᄊ"
243
- result: "ss" # HANGUL CHOSEONG SSANGSIOS
244
- - pattern: "(?<= )ᄍ"
245
- result: "tch" # HANGUL CHOSEONG SSANGCIEUC
246
- - pattern: "(?<= )ᄶ"
247
- result: "tch" # HANGUL CHOSEONG SIOS-CIEUC
248
-
249
- # Remove space added
250
- - pattern: "^ "
251
- result: ""
252
- - pattern: " $"
253
- result: ""
@@ -1,48 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2011
4
- language: kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
- BGN/PCGN 2011 Agreement
15
-
16
- tests:
17
- - source: 불국사
18
- expected: "Bulguksa"
19
- - source: 묵호
20
- expected: "Mukho"
21
- - source: 울산
22
- expected: "Ulsan"
23
- - source: 독립문
24
- expected: "Dongnimmun"
25
- - source: 강남역
26
- expected: "Gangnamyeok"
27
- - source: 남산리
28
- expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
- - source: 내월리
30
- expected: "Naewol-ri"
31
- - source: 울릉군
32
- expected: "Ulleung-gun"
33
- - source: 설악산
34
- expected: "Seoraksan"
35
- - source: 삼죽면
36
- expected: "Samjuk-myeon"
37
- - source: 평리1동
38
- expected: "Pyeongni Il-dong"
39
- - source: 평리2동
40
- expected: "Pyeongni I-dong"
41
- - source: 탑안이
42
- expected: "Tabani"
43
-
44
- map:
45
- character_separator: ""
46
- word_separator: " "
47
- title_case: True
48
- inherit: moct-kor-Hang-Latn-2000
@@ -1,48 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2011
4
- language: kor
5
- source_script: Kore
6
- destination_script: Latn
7
- name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
- BGN/PCGN 2011 Agreement
15
-
16
- tests:
17
- - source: 佛國寺
18
- expected: "Bulguksa"
19
- - source: 묵호
20
- expected: "Mukho"
21
- - source: 蔚山
22
- expected: "Ulsan"
23
- - source: 獨立門
24
- expected: "Dongnimmun"
25
- - source: 江南驛
26
- expected: "Gangnamyeok"
27
- - source: 南山里
28
- expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
- - source: 내월里
30
- expected: "Naewol-ri"
31
- - source: 鬱陵郡
32
- expected: "Ulleung-gun"
33
- - source: 雪嶽山
34
- expected: "Seoraksan"
35
- - source: 三竹面
36
- expected: "Samjuk-myeon"
37
- - source: 坪里1洞
38
- expected: "Pyeongni Il-dong"
39
- - source: 坪里2洞
40
- expected: "Pyeongni I-dong"
41
- - source: 탑안이
42
- expected: "Tabani"
43
-
44
- map:
45
- character_separator: ""
46
- word_separator: " "
47
- title_case: True
48
- inherit: [var-kor-Kore-Hang-2013, moct-kor-Hang-Latn-2000]
@@ -1,159 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1981
4
- language: mkd
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Makedonian Romanization, BGN/PCGN 1981 System
8
- url: https://github.com/riboseinc/interscript/files/4247920/USBGN_romanization_Macedonian_1981.pdf
9
- creation_date: 1981
10
- description: BGN/PCGN Romanization table for Makedonian.
11
-
12
- tests:
13
- - source: Ѓол
14
- expected: Đol
15
- - source: Јусек Тепеси
16
- expected: Jusek Tepesi
17
- - source: Љуги Ќарит
18
- expected: Ljugi Ćarit
19
- - source: Ќафа Сан
20
- expected: Ćafa San
21
- - source: Агроплод Ресен
22
- expected: Agroplod Resen
23
- - source: Алта Чука
24
- expected: Alta Čuka
25
- - source: Баш Тепе
26
- expected: Baš Tepe
27
- - source: Браќам
28
- expected: Braćam
29
- - source: Винарска Визба Агропин
30
- expected: Vinarska Vizba Agropin
31
- - source: Галичица
32
- expected: Galičica
33
- - source: Дрењево
34
- expected: Drenjevo
35
- - source: Енешево
36
- expected: Eneševo
37
- - source: Иберлија
38
- expected: Iberlija
39
- - source: Крмзи Су
40
- expected: Krmzi Su
41
- - source: Лесноски Рид
42
- expected: Lesnoski Rid
43
- - source: Мала Корабска Врата
44
- expected: Mala Korabska Vrata
45
- - source: Низок Врв
46
- expected: Nizok Vrv
47
- - source: Охридско Езеро
48
- expected: Ohridsko Ezero
49
- - source: Прлиќ
50
- expected: Prlić
51
- - source: Равна Гора
52
- expected: Ravna Gora
53
- - source: Сеѓавечкиот Рид
54
- expected: Seđavečkiot Rid
55
- - source: Трновите Њиве
56
- expected: Trnovite Njive
57
- - source: Фасов Рид
58
- expected: Fasov Rid
59
- - source: Црни Камен
60
- expected: Crni Kamen
61
- - source: Чатал Чешми
62
- expected: Čatal Češmi
63
- - source: Шехово
64
- expected: Šehovo
65
-
66
- notes:
67
- - The character ѓ should be romanized g when it occurs before е and и. In other
68
- instances, it should be romanized ǵ (Ǵ).
69
- - The character ќ should be romanized k when it occurs before е and и. In other
70
- instances, it should be romanized ć.
71
-
72
- map:
73
- rules:
74
- - pattern: "Ѓ(?=[еЕиИ])"
75
- result: "G"
76
- - pattern: "ѓ(?=[еЕиИ])"
77
- result: "g"
78
- - pattern: "Ќ(?=[еЕиИ])"
79
- result: "K"
80
- - pattern: "ќ(?=[еЕиИ])"
81
- result: "k"
82
-
83
- postrules:
84
- # DZ
85
- - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
86
- result: "DZ"
87
- #LJ
88
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
89
- result: "LJ"
90
- #NJ
91
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
92
- result: "NJ"
93
- #DŽ
94
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
95
- result: "DŽ"
96
-
97
- characters:
98
- "\u0410": "A"
99
- "\u0411": "B"
100
- "\u0412": "V"
101
- "\u0413": "G"
102
- "\u0414": "D"
103
- "\u0403": "\u0110" # Đ
104
- "\u0415": "E"
105
- "\u0416": "\u005a\u030c" # Ž
106
- "\u0417": "Z"
107
- "\u0405": "Dz"
108
- "\u0418": "I"
109
- "\u0408": "J"
110
- "\u041A": "K"
111
- "\u041B": "L"
112
- "\u0409": "Lj"
113
- "\u041C": "M"
114
- "\u041D": "N"
115
- "\u040A": "Nj"
116
- "\u041E": "O"
117
- "\u041F": "P"
118
- "\u0420": "R"
119
- "\u0421": "S"
120
- "\u0422": "T"
121
- "\u040c": "\u0106" # Ć
122
- "\u0423": "U"
123
- "\u0424": "F"
124
- "\u0425": "H"
125
- "\u0426": "C"
126
- "\u0427": "\u0043\u030c" # Č
127
- "\u040F": "D\u007a\u030c" # Dž
128
- "\u0428": "\u0053\u030c" # Š
129
- "\u0430": "a"
130
- "\u0431": "b"
131
- "\u0432": "v"
132
- "\u0433": "g"
133
- "\u0434": "d"
134
- "\u0453": "\u0111" # đ
135
- "\u0435": "e"
136
- "\u0436": "\u007a\u030c" # ž
137
- "\u0437": "z"
138
- "\u0455": "dz"
139
- "\u0438": "i"
140
- "\u0458": "j"
141
- "\u043A": "k"
142
- "\u043B": "l"
143
- "\u0459": "lj"
144
- "\u043C": "m"
145
- "\u043D": "n"
146
- "\u045A": "nj"
147
- "\u043E": "o"
148
- "\u043F": "p"
149
- "\u0440": "r"
150
- "\u0441": "s"
151
- "\u0442": "t"
152
- "\u045c": "\u0107" # ć
153
- "\u0443": "u"
154
- "\u0444": "f"
155
- "\u0445": "h"
156
- "\u0446": "c"
157
- "\u0447": "\u0063\u030c" # č
158
- "\u045F": "d\u007a\u030c" # dž
159
- "\u0448": "\u0073\u030c" # š