interscript 0.1.6 → 2.1.0a9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -1,253 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: kn-1945
4
- language: kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: BGN/PCGN 1945 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
9
- creation_date: 1945
10
- adoption_date:
11
- description:
12
-
13
- notes: "
14
-
15
- 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
- as in the following example:
17
-
18
- 평양 → P’yŏngyang
19
-
20
- At the beginning of a syllable, the character ᄋ is silent and
21
- should not be romanized. An example follows:
22
-
23
- 용화 → Yonghwa
24
-
25
- 2. Syllable boundaries within words are not reflected in romanization.
26
- In the different types of syllables shown in the table below, C
27
- represents any consonant character, V represents any vowel character
28
- and / represents a syllable boundary.
29
-
30
- Han’gŭl 개성 남포 안양
31
- Syllable boundaries CV/CVC CVC/CV VC/VC
32
- Romanization Kaesŏng Namp’o Anyang
33
-
34
- 3. Euphonic changes occurring within a word, including between the
35
- specific and generic of a geographical name, should be reflected in
36
- romanization. Generic terms are usually seen separated from the name
37
- by a hyphen and with a lower case initial letter rather than as a
38
- separate word:
39
-
40
- 영진리 → Yŏngjil-li
41
- 덕흥리 → Tŏkhŭng-ni
42
- 압록강 → Amnok-kang
43
- 대동강 → Taedong-gang
44
-
45
- 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
- published in North Korea in 1966), unlike the Korean spoken in the
47
- Republic of Korea, the language spoken in the Democratic People’s
48
- Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
49
- The use of the word-initial ᄅ ('r') can be seen in official news
50
- reports as well as native mapping. Since such examples exist, the
51
- word initial ᄅ ('r') is reflected as an option in the tables given above.
52
-
53
- 5. The Romanization column shows only lowercase forms but, when romanizing,
54
- uppercase and lowercase Roman letters as appropriate should be used.
55
- "
56
-
57
- tests:
58
- - source: "평양"
59
- expected: "P’yŏngyang"
60
- - source: "용화"
61
- expected: "Yonghwa"
62
- - source: "개성"
63
- expected: "Kaesŏng"
64
- - source: "남포"
65
- expected: "Namp’o"
66
- - source: "안양"
67
- expected: "Anyang"
68
- - source: "영진-리"
69
- expected: "Yŏngjil-li"
70
- - source: "덕흥-리"
71
- expected: "Tŏkhŭng-ni"
72
- - source: "압록-강"
73
- expected: "Amnok-kang"
74
- - source: "대동-강"
75
- expected: "Taedong-gang"
76
- - source: "라선특별시"
77
- expected: "Rasŏnt’ŭkpyŏlsi"
78
- - source: 은하-리
79
- expected: "Ŭnha-ri"
80
- - source: 은중-리
81
- expected: "Ŭnjung-ni"
82
- - source: 은장-령
83
- expected: "Ŭnjang-nyŏng"
84
- - source: 은혜-동
85
- expected: "Ŭnhye-dong"
86
- - source: 은호-리
87
- expected: "Ŭnho-ri"
88
- - source: 은행정
89
- expected: "Ŭnhaengjŏng"
90
- - source: 은행-동
91
- expected: "Ŭnhaeng-dong"
92
- - source: 은행-촌
93
- expected: "Ŭnhaeng-ch’on"
94
- - source: 원수
95
- expected: "Wŏnsu"
96
- - source: 원소리-고개
97
- expected: "Wŏnsori-gogae"
98
- - source: 원소참
99
- expected: "Wŏnsoch’am"
100
- - source: 원소-리
101
- expected: "Wŏnso-ri"
102
- - source: 원신-리
103
- expected: "Wŏnsil-li"
104
- - source: 난곡
105
- expected: "Nan’gok"
106
- - source: 난산-리
107
- expected: "Nansal-li"
108
- - source: 난직
109
- expected: "Nanjik"
110
- - source: 영곡
111
- expected: "Yŏnggok"
112
- - source: 윗두밀
113
- expected: "Wittumil"
114
- - source: 윗도심이
115
- expected: "Wittosimi"
116
- - source: 둔지
117
- expected: "Tunji"
118
- - source: 서승
119
- expected: "Sŏsŭng"
120
- - source: 신촌
121
- expected: "Sinch’on"
122
- - source: 비암덕
123
- expected: "Piamdŏk"
124
- - source: 바위안
125
- expected: "Pawian"
126
- - source: 오송평
127
- expected: "Osongp’yŏng"
128
- - source: 그물목
129
- expected: "Kŭmulmok"
130
- - source: 구원정
131
- expected: "Kuwŏnjŏng"
132
- - source: 일하
133
- expected: "Irha"
134
- - source: 황우
135
- expected: "Hwangu"
136
- - source: 자작보
137
- expected: "Chajakpo"
138
- - source: 비파1-동
139
- expected: "Pip’a Il-tong"
140
- - source: 문암 오-동
141
- expected: "Munam O-dong"
142
-
143
- map:
144
- character_separator: ""
145
- word_separator: " "
146
- title_case: True
147
- inherit: [var-kor-Hang-Latn-mr-1939]
148
-
149
- rules:
150
- # Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
151
- # So that the word-initial conversion rules will be blocked.
152
- - pattern: "^"
153
- result: "\u200B"
154
- - pattern: "(?<= )"
155
- result: "\u200B"
156
-
157
- # convert numbers to space + Hangul
158
- - pattern: "([^0-9 ])(?=[0-9])"
159
- result: "\\1 "
160
- - pattern: "1"
161
- result: "일"
162
- - pattern: "2"
163
- result: "이"
164
- - pattern: "3"
165
- result: "삼"
166
- - pattern: "4"
167
- result: "사"
168
- - pattern: "5"
169
- result: "오"
170
- - pattern: "6"
171
- result: "육"
172
- - pattern: "7"
173
- result: "칠"
174
- - pattern: "8"
175
- result: "팔"
176
- - pattern: "9"
177
- result: "구"
178
-
179
- # This is a logic to add hyphen in front of generics
180
- # - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
181
- # result: "-\\1"
182
-
183
- postrules:
184
-
185
- # Add space to the two ends of the string for easier word boundary handling
186
- - pattern: "^"
187
- result: " "
188
- - pattern: "$"
189
- result: " "
190
-
191
- # Initial rules in the inherited map were blocked, so that
192
- # this set of updated rules (with the onset rules removed) will be used instead.
193
- - pattern: "\u200B"
194
- result: ""
195
-
196
- - pattern: "(?<= )ᄀ"
197
- result: "k" # HANGUL CHOSEONG KIYEOK
198
- - pattern: "(?<= )ᄂ"
199
- result: "n" # HANGUL CHOSEONG NIEUN
200
- - pattern: "(?<= )ᄃ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
201
- result: "ch" # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
202
- - pattern: "(?<= )ᄃ"
203
- result: "t" # HANGUL CHOSEONG TIEUT
204
- - pattern: "(?<= )ᄅ"
205
- result: "r" # HANGUL CHOSEONG RIEUL
206
- - pattern: "(?<= )ᄆ"
207
- result: "m" # HANGUL CHOSEONG MIEUM
208
- - pattern: "(?<= )ᄇ"
209
- result: "p" # HANGUL CHOSEONG PIEUP
210
- - pattern: "(?<= )ᄉ(?=ᅱ)"
211
- result: "sh" # HANGUL CHOSEONG SIOS
212
- - pattern: "(?<= )ᄉ"
213
- result: "s" # HANGUL CHOSEONG SIOS
214
- - pattern: "(?<= )ᄋ"
215
- result: "" # HANGUL CHOSEONG IEUNG
216
- - pattern: "(?<= )ᄌ"
217
- result: "ch" # HANGUL CHOSEONG CIEUC
218
- - pattern: "(?<= )ᄎ"
219
- result: "ch’" # HANGUL CHOSEONG CHIEUCH
220
- - pattern: "(?<= )ᄏ"
221
- result: "k’" # HANGUL CHOSEONG KHIEUKH
222
- - pattern: "(?<= )ᄐ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
223
- result: "ch’" # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
224
- - pattern: "(?<= )ᄐ"
225
- result: "t’" # HANGUL CHOSEONG THIEUTH
226
- - pattern: "(?<= )ᄑ"
227
- result: "p’" # HANGUL CHOSEONG PHIEUPH
228
- - pattern: "(?<= )ᄒ"
229
- result: "h" # HANGUL CHOSEONG HIEUH
230
- - pattern: "(?<= )ᄁ"
231
- result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
232
- - pattern: "(?<= )ᄭ"
233
- result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
234
- - pattern: "(?<= )ᄄ"
235
- result: "tt" # HANGUL CHOSEONG SSANGTIEUT
236
- - pattern: "(?<= )ᄯ"
237
- result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
238
- - pattern: "(?<= )ᄈ"
239
- result: "pp" # HANGUL CHOSEONG SSANGPIEUP
240
- - pattern: "(?<= )ᄲ"
241
- result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
242
- - pattern: "(?<= )ᄊ"
243
- result: "ss" # HANGUL CHOSEONG SSANGSIOS
244
- - pattern: "(?<= )ᄍ"
245
- result: "tch" # HANGUL CHOSEONG SSANGCIEUC
246
- - pattern: "(?<= )ᄶ"
247
- result: "tch" # HANGUL CHOSEONG SIOS-CIEUC
248
-
249
- # Remove space added
250
- - pattern: "^ "
251
- result: ""
252
- - pattern: " $"
253
- result: ""
@@ -1,48 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2011
4
- language: kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
- BGN/PCGN 2011 Agreement
15
-
16
- tests:
17
- - source: 불국사
18
- expected: "Bulguksa"
19
- - source: 묵호
20
- expected: "Mukho"
21
- - source: 울산
22
- expected: "Ulsan"
23
- - source: 독립문
24
- expected: "Dongnimmun"
25
- - source: 강남역
26
- expected: "Gangnamyeok"
27
- - source: 남산리
28
- expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
- - source: 내월리
30
- expected: "Naewol-ri"
31
- - source: 울릉군
32
- expected: "Ulleung-gun"
33
- - source: 설악산
34
- expected: "Seoraksan"
35
- - source: 삼죽면
36
- expected: "Samjuk-myeon"
37
- - source: 평리1동
38
- expected: "Pyeongni Il-dong"
39
- - source: 평리2동
40
- expected: "Pyeongni I-dong"
41
- - source: 탑안이
42
- expected: "Tabani"
43
-
44
- map:
45
- character_separator: ""
46
- word_separator: " "
47
- title_case: True
48
- inherit: moct-kor-Hang-Latn-2000
@@ -1,48 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2011
4
- language: kor
5
- source_script: Kore
6
- destination_script: Latn
7
- name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
- BGN/PCGN 2011 Agreement
15
-
16
- tests:
17
- - source: 佛國寺
18
- expected: "Bulguksa"
19
- - source: 묵호
20
- expected: "Mukho"
21
- - source: 蔚山
22
- expected: "Ulsan"
23
- - source: 獨立門
24
- expected: "Dongnimmun"
25
- - source: 江南驛
26
- expected: "Gangnamyeok"
27
- - source: 南山里
28
- expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
- - source: 내월里
30
- expected: "Naewol-ri"
31
- - source: 鬱陵郡
32
- expected: "Ulleung-gun"
33
- - source: 雪嶽山
34
- expected: "Seoraksan"
35
- - source: 三竹面
36
- expected: "Samjuk-myeon"
37
- - source: 坪里1洞
38
- expected: "Pyeongni Il-dong"
39
- - source: 坪里2洞
40
- expected: "Pyeongni I-dong"
41
- - source: 탑안이
42
- expected: "Tabani"
43
-
44
- map:
45
- character_separator: ""
46
- word_separator: " "
47
- title_case: True
48
- inherit: [var-kor-Kore-Hang-2013, moct-kor-Hang-Latn-2000]
@@ -1,159 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1981
4
- language: mkd
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Makedonian Romanization, BGN/PCGN 1981 System
8
- url: https://github.com/riboseinc/interscript/files/4247920/USBGN_romanization_Macedonian_1981.pdf
9
- creation_date: 1981
10
- description: BGN/PCGN Romanization table for Makedonian.
11
-
12
- tests:
13
- - source: Ѓол
14
- expected: Đol
15
- - source: Јусек Тепеси
16
- expected: Jusek Tepesi
17
- - source: Љуги Ќарит
18
- expected: Ljugi Ćarit
19
- - source: Ќафа Сан
20
- expected: Ćafa San
21
- - source: Агроплод Ресен
22
- expected: Agroplod Resen
23
- - source: Алта Чука
24
- expected: Alta Čuka
25
- - source: Баш Тепе
26
- expected: Baš Tepe
27
- - source: Браќам
28
- expected: Braćam
29
- - source: Винарска Визба Агропин
30
- expected: Vinarska Vizba Agropin
31
- - source: Галичица
32
- expected: Galičica
33
- - source: Дрењево
34
- expected: Drenjevo
35
- - source: Енешево
36
- expected: Eneševo
37
- - source: Иберлија
38
- expected: Iberlija
39
- - source: Крмзи Су
40
- expected: Krmzi Su
41
- - source: Лесноски Рид
42
- expected: Lesnoski Rid
43
- - source: Мала Корабска Врата
44
- expected: Mala Korabska Vrata
45
- - source: Низок Врв
46
- expected: Nizok Vrv
47
- - source: Охридско Езеро
48
- expected: Ohridsko Ezero
49
- - source: Прлиќ
50
- expected: Prlić
51
- - source: Равна Гора
52
- expected: Ravna Gora
53
- - source: Сеѓавечкиот Рид
54
- expected: Seđavečkiot Rid
55
- - source: Трновите Њиве
56
- expected: Trnovite Njive
57
- - source: Фасов Рид
58
- expected: Fasov Rid
59
- - source: Црни Камен
60
- expected: Crni Kamen
61
- - source: Чатал Чешми
62
- expected: Čatal Češmi
63
- - source: Шехово
64
- expected: Šehovo
65
-
66
- notes:
67
- - The character ѓ should be romanized g when it occurs before е and и. In other
68
- instances, it should be romanized ǵ (Ǵ).
69
- - The character ќ should be romanized k when it occurs before е and и. In other
70
- instances, it should be romanized ć.
71
-
72
- map:
73
- rules:
74
- - pattern: "Ѓ(?=[еЕиИ])"
75
- result: "G"
76
- - pattern: "ѓ(?=[еЕиИ])"
77
- result: "g"
78
- - pattern: "Ќ(?=[еЕиИ])"
79
- result: "K"
80
- - pattern: "ќ(?=[еЕиИ])"
81
- result: "k"
82
-
83
- postrules:
84
- # DZ
85
- - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
86
- result: "DZ"
87
- #LJ
88
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
89
- result: "LJ"
90
- #NJ
91
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
92
- result: "NJ"
93
- #DŽ
94
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
95
- result: "DŽ"
96
-
97
- characters:
98
- "\u0410": "A"
99
- "\u0411": "B"
100
- "\u0412": "V"
101
- "\u0413": "G"
102
- "\u0414": "D"
103
- "\u0403": "\u0110" # Đ
104
- "\u0415": "E"
105
- "\u0416": "\u005a\u030c" # Ž
106
- "\u0417": "Z"
107
- "\u0405": "Dz"
108
- "\u0418": "I"
109
- "\u0408": "J"
110
- "\u041A": "K"
111
- "\u041B": "L"
112
- "\u0409": "Lj"
113
- "\u041C": "M"
114
- "\u041D": "N"
115
- "\u040A": "Nj"
116
- "\u041E": "O"
117
- "\u041F": "P"
118
- "\u0420": "R"
119
- "\u0421": "S"
120
- "\u0422": "T"
121
- "\u040c": "\u0106" # Ć
122
- "\u0423": "U"
123
- "\u0424": "F"
124
- "\u0425": "H"
125
- "\u0426": "C"
126
- "\u0427": "\u0043\u030c" # Č
127
- "\u040F": "D\u007a\u030c" # Dž
128
- "\u0428": "\u0053\u030c" # Š
129
- "\u0430": "a"
130
- "\u0431": "b"
131
- "\u0432": "v"
132
- "\u0433": "g"
133
- "\u0434": "d"
134
- "\u0453": "\u0111" # đ
135
- "\u0435": "e"
136
- "\u0436": "\u007a\u030c" # ž
137
- "\u0437": "z"
138
- "\u0455": "dz"
139
- "\u0438": "i"
140
- "\u0458": "j"
141
- "\u043A": "k"
142
- "\u043B": "l"
143
- "\u0459": "lj"
144
- "\u043C": "m"
145
- "\u043D": "n"
146
- "\u045A": "nj"
147
- "\u043E": "o"
148
- "\u043F": "p"
149
- "\u0440": "r"
150
- "\u0441": "s"
151
- "\u0442": "t"
152
- "\u045c": "\u0107" # ć
153
- "\u0443": "u"
154
- "\u0444": "f"
155
- "\u0445": "h"
156
- "\u0446": "c"
157
- "\u0447": "\u0063\u030c" # č
158
- "\u045F": "d\u007a\u030c" # dž
159
- "\u0448": "\u0073\u030c" # š