interscript 0.1.9 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,36 +0,0 @@
1
- ---
2
- authority_id: bgn
3
- id: mr-1939
4
- language: iso-639-2:kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: McCune-Reischauer System
8
- url:
9
- creation_date: 1939
10
- adoption_date:
11
- description:
12
- E. O. Reischauer and G. M. McCune
13
- The Romanization of the Korean Language Based on Its Phonetic Structure. XXIX:1-55.
14
- Korea Branch of the Royal Asiatic Society. Volume XXIX (PDF)
15
- notes:
16
- This map add Hanja resolution to var-kor-Hang-Latn-mr-1939
17
-
18
- tests:
19
- - source: "博物館"
20
- expected: "Pangmulgwan"
21
- - source: "사발"
22
- expected: "Sabal"
23
- - source: 韓國
24
- expected: Han’guk
25
- - source: "韓國 의 맛"
26
- expected: "Han’guk Ŭi Mat"
27
- - source: "金浦 國際 空港"
28
- expected: "Kŭmp’o Kukche Konghang"
29
- - source: "悠久한 歷史와 傳統에 빛나는 우리 大韓國民"
30
- expected: "Yuguhan Yŏksawa Chŏnt’onge Pinnanŭn Uri Taehan’gungmin"
31
-
32
- map:
33
- character_separator: ""
34
- word_separator: " "
35
- title_case: True
36
- inherit: [var-kor-Kore-Hang-2013, var-kor-Hang-Latn-mr-1939]
@@ -1,43 +0,0 @@
1
- ---
2
- authority_id: var
3
- id: 1872
4
- language: iso-639-2:mar
5
- source_script: Deva
6
- destination_script: Latn
7
- name: Marathi Hunterian system
8
- url: https://transliteration.eki.ee/pdf/Hindi-Marathi-Nepali.pdf
9
- creation_date: 1872
10
- description:
11
- The Hunterian system is the national system of romanization in India.
12
-
13
- notes:
14
-
15
- - a, i and u are used in word-final position. The a in gaon and the u in pur are not accented.
16
- - ज्ञ is transliterated gy.
17
- - v is used before i.
18
-
19
- tests:
20
- - source: "ठाणे - जिल्ह्यात बुधवारी एक हजार रुग्णांची वाढ, तर जणांच्या मृत्यूची नोंद"
21
- expected: "thaānae - jailhyaāta baudhawaāraī eka hajaāra raugnaānchaī waādha, tara janaānchyaā marityaūchaī naonda"
22
- - source: "एकता कपूर पुन्हा अडकली वादात, वेबसीरिजमधल्या 'त्या' सीनमुळे जमावाची घरावर दगडफेक"
23
- expected: "ekataā kapaūra paunhaā adakalaī waādaāta, waebasaīraijamadhalyaā 'tyaā' saīnamaulae jamaāwaāchaī gharaāwara dagadaphaeka"
24
- - source: "जाणून घ्या, बीएमसीच्या अधिकाऱ्यांनी कंगना राणौतच्या ऑफिसमधले नक्की काय- काय तोडलं"
25
- expected: "jaānaūna ghyaā, baīemasaīchyaā adhaikaāऱ्yaānnaī kanganaā raānaautachyaā ऑphaisamadhalae nakkaī kaāya- kaāya taodalan"
26
- - source: "कंगना मुंबईत दाखल होण्यापूर्वी 'मातोश्री'वरून फर्मान सुटले; प्रवक्त्यांना सक्त आदेश"
27
- expected: "kanganaā maunbaīta daākhala haonyaāpaūrwaī 'maātaosraī'waraūna pharmaāna sautalae; prawaktyaānnaā sakta ādaesa"
28
- - source: "मराठा आरक्षणास तात्पुरती स्थगिती; सर्वोच्च न्यायालयाचा निर्णय"
29
- expected: "maraāthaā ārakshanaāsa taātpaurataī sthagaitaī; sarwaochcha nyaāyaālayaāchaā nairnaya"
30
- - source: "भारताच्या तिन्ही लशींचा पहिला टप्पा यशस्वी, वाचा कधी येणार बाजारात"
31
- expected: "bhaārataāchyaā tainhaī lasaīnchaā pahailaā tappaā yasaswaī, waāchaā kadhaī yaenaāra baājaāraāta"
32
- - source: "रुग्णवाढीमुळे खाटांची चणचण"
33
- expected: "raugnawaādhaīmaulae khaātaānchaī chanachana"
34
- - source: "पीएम स्वनिधी कर्ज योजनेला मुंबईतून अल्प प्रतिसाद"
35
- expected: "paīema swanaidhaī karja yaojanaelaā maunbaītaūna alpa prataisaāda"
36
- - source: "सांताक्रूझ-चेंबूर लिंक रोडवरील उन्नत मार्गाला स्थगिती"
37
- expected: "saāntaākraūjha-chaenbaūra lainka raodawaraīla unnata maārgaālaā sthagaitaī"
38
- - source: "संपादक अर्णब गोस्वामी यांच्याविरूद्ध खडक पोलिस ठाण्यात तक्रार"
39
- expected: "sanpaādaka arnaba gaoswaāmaī yaānchyaāwairaūddha khadaka paolaisa thaānyaāta takraāra"
40
-
41
- map:
42
-
43
- inherit: 'var-hin-Deva-Latn-hunterian-1872'
@@ -1,102 +0,0 @@
1
- ---
2
- authority_id: var
3
- id: 1930
4
- language: iso-639-2:mon
5
- source_script: Mong
6
- destination_script: Latn
7
- name: Mongolian Latin alphabet
8
- url: https://en.m.wikipedia.org/wiki/Mongolian_Latin_alphabet
9
- creation_date:
10
- description: |
11
- Latin alphabet was using "y" as feminine "u", with additional
12
- feminine "o" ("ө") and with additional consonants "ç" for "ch", "ş"
13
- for "sh" and "ƶ" for "j", it successfully served in printing books
14
- and newspapers. A few of the letters (f, k, p, v) were rarely used,
15
- being found only in borrowings, while q, w and x were excluded
16
- altogether. Since k transcribed [h] in loans, it is unclear how loans
17
- in [kʰ] were written. "j" is used for vowel combinations of the [ja]
18
- type. Letter "c" is used for the sound [ts] and "k" is used for the
19
- ound [h]. The first version was inspired by the Yanalif script used
20
- for the Soviet Union's Turkic languages.
21
-
22
- The orthography of the Mongolian Latin is based on the orthography of
23
- the Classical Mongolian script. It preserves short final vowels. It
24
- does not drop unstressed vowels in the closing syllables when the word
25
- is conjugated. The suffixes and inflections without long or i-coupled
26
- vowels are made open syllables ending with a vowel, which is harmonized
27
- with the stressed vowel. The rule for the vowel harmony for unstressed
28
- vowels is similar to that of the Mongolian Cyrillic. It does not use
29
- consonant combinations to denote new consonant sounds. For both of the
30
- version, letter "b" is used both in the beginning and in the middle of
31
- the word. Because it phonetically assimilates into sound [w], no
32
- ambiguity is caused.
33
-
34
- notes:
35
- - A separated final form of vowels a or e is common, and can appear at the end
36
- of a word, word stem, or suffix. This form requires a final-shaped preceding
37
- consonant and an inter-word gap in between. The vowels themselves appear as ᠎ᠠ,
38
- and with consonants as ‍ᠬ᠎ᠠ q‑a, ‍ᠷ᠎ᠠ r‑a/r‑e, etc. This gap can be transliterated
39
- with a hyphen. In digital typesetting, these forms are triggered by inserting
40
- a U+180E ᠎ MONGOLIAN VOWEL SEPARATOR (HTML ᠎ · MVS) between the consonant
41
- and vowel.
42
- - All case suffixes, as well as any plural suffixes consisting of one or two
43
- syllables are likewise separated by a preceding and hyphen-transliterated gap.
44
- In digital typesetting, this gap is represented by a U+202F   NARROW NO-BREAK SPACE
45
- (HTML   · NNBSP). A maximum of two case suffixes can be added to a stem.
46
- Single-letter vowel suffixes appear with the final-shaped forms of a/e, i, or u/ü,
47
- as in ᠭᠠᠵᠠᠷ ᠠ γaǰar‑a 'to the country' and ᠡᠳᠦᠷ ᠡ edür‑e 'on the day', or ᠤᠯᠤᠰ ᠢ ulus‑i
48
- 'the state' etc. Multi-letter suffixes most often start with an initial- (consonants),
49
- medial- (vowels), or variant-shaped form.
50
-
51
- # Note: Provedid sample of "Latin 1931-1939" in https://en.m.wikipedia.org/wiki/Mongolian_Latin_alphabet is not actual transliteration.
52
- # It's kind of buggy transliteration that mixes Cyrillic and Traditional Mongolian script into Latin based on phonetic sound.
53
- # There is no expected transliteration sources found for the tests.
54
-
55
- # Latin 1931-1939
56
- # Manai ulasiin niislel koto Ulaanbaatar bol 80 000 şakam kyntei, ulasiin olon niitiin, aƶi akuin tөb gazaruud oroşison jikeeken oron bolno.
57
- # Tus ulasiin dotoroos garka tyykii zuiliig bolbosruulka aƶi yildberiin gazaruudiig baiguulka ni çukala.
58
-
59
- tests:
60
- - source: |
61
- ᠮᠠᠨ ᠤ ᠤᠯᠤᠰ ᠤᠨ ᠨᠡᠶᠢᠰᠯᠡᠯ ᠬᠣᠲᠠ ᠤᠯᠠᠭᠠᠨᠪᠠᠭᠠᠲᠤᠷ ᠪᠣᠯ 80 000 ᠰᠢᠬᠠᠮ ᠬᠦᠮᠦᠨ ᠲᠡᠢ᠂ ᠤᠯᠤᠰ ᠤᠨ ᠣᠯᠠᠨ ᠨᠡᠶᠢᠲᠡ ᠶᠢᠨ᠂ ᠠᠵᠤ ᠠᠬᠤᠢ ᠶᠢᠨ ᠲᠥᠪ ᠭᠠᠵᠠᠷ ᠤᠳ ᠣᠷᠣᠰᠢᠭᠰᠠᠨ ᠶᠡᠬᠡᠬᠡᠨ ᠣᠷᠣᠨ ᠪᠣᠯᠤᠨ᠎ᠠ᠃
62
- ᠲᠤᠰ ᠤᠯᠤᠰ ᠤᠨ ᠳᠣᠲᠣᠷ᠎ᠠ ᠠᠴᠠ ᠭᠠᠷᠬᠤ ᠲᠦᠭᠦᠬᠡᠢ ᠵᠤᠶᠢᠯ ᠢ ᠪᠣᠯᠪᠠᠰᠤᠷᠠᠭᠤᠯᠬᠤ ᠠᠵᠤ ᠦᠢᠯᠡᠳᠪᠦᠷᠢ ᠶᠢᠨ ᠭᠠᠵᠠᠷ ᠤᠳ ᠢ ᠪᠠᠶᠢᠭᠤᠯᠬᠤ ᠨᠢ ᠴᠢᠬᠤᠯᠠ᠃
63
- expected: |
64
- man-u ulus-un nejislel kota ulaganbagatur bol 80 000 sikam kymyn-tei, ulus-un olan nejite-jin, aƶu akui-jin tөb gaƶar-ud orosigsan jekeken oron bolun-a.
65
- tus ulus-un dotor-a-aça garku tygykei ƶujil-i bolbasuragulku aƶu yiledbyri-jin gaƶar-ud-i bajigulku ni çikula.
66
-
67
- map:
68
-
69
- characters:
70
- "ᠠ": "a" # \u1820
71
- "ᠡ": "e" # \u1821
72
- "ᠢ": "i" # \u1822
73
- "ᠣ": "o" # \u1823
74
- "ᠤ": "u" # \u1824
75
- "ᠥ": "ө" # \u1825
76
- "ᠦ": "y" # \u1826
77
- "ᠨ": "n" # \u1828
78
- "ᠮ": "m" # \u182e
79
- "ᠯ": "l" # \u182f
80
- "ᠪ": "b" # \u182a
81
- "ᠫ": "p" # \u182b
82
- "ᠹ": "f" # \u1839
83
- "ᠻ": "k" # \u183b
84
- "ᠬ": "k" # \u182c
85
- "ᠭ": "g" # \u182d
86
- "ᠰ": "s" # \u1830
87
- "ᠱ": "ş" # \u1831
88
- "ᠲ": "t" # \u1832
89
- "ᠳ": "d" # \u1833
90
- "ᠴ": "c" # \u1834
91
- "ᠴ": "ç" # \u1834
92
- "ᠵ": "z" # \u1835
93
- "ᠵ": "ƶ" # \u1835
94
- "ᠶ": "j" # \u1836
95
- "ᠷ": "r" # \u1837
96
- "ᠾ": "h" # \u183e
97
- "᠁": "..." # \u1801
98
- "᠂": "," # \u1802
99
- "᠃": "." # \u1803
100
- "᠄": ":" # \u1804
101
- "\u180e": "-" # MVS
102
- "\u202f": "-" # NNBSP
@@ -1,272 +0,0 @@
1
- ---
2
- authority_id: var
3
- id: lessing
4
- language: iso-639-2:mon
5
- source_script: Mong
6
- destination_script: Latn
7
- name: Transliteration Systems for Uyghur-Mongolian or Vertical or Old Script
8
- url: https://collab.its.virginia.edu/wiki/tibetan-script/Transliteration%20Schemes%20for%20Mongolian%20Vertical%20Script.html
9
- creation_date:
10
- description: |
11
- The only system for the Mongolian vertical script which is used commonly throughout
12
- Mongolian studies worldwide is the Vladimirtsov-Mostaert system (V-M). The only
13
- dictionary using this system is in an appendix to Antoine Mostaert’s Dictionnaire
14
- ordos. Thus the Harvard Journal of Asiatic Studies specifies, “please use the written
15
- Mongolian appendix to Antoine Mostaert’s Dictionnaire ordos. However, the V-M system
16
- has many difficult diacriticals and non-Latin characters that constitute a significant
17
- problem for editors and thus is difficult to recommend.
18
-
19
- notes:
20
- - A separated final form of vowels a or e is common, and can appear at the end
21
- of a word, word stem, or suffix. This form rexuires a final-shaped preceding
22
- consonant and an inter-word gap in between. The vowels themselves appear as ᠎ᠠ,
23
- and with consonants as ‍ᠬ᠎ᠠ q‑a, ‍ᠷ᠎ᠠ r‑a/r‑e, etc. This gap can be transliterated
24
- with a hyphen. In digital typesetting, these forms are triggered by inserting
25
- a U+180E ᠎ MONGOLIAN VOWEL SEPARATOR (HTML ᠎ · MVS) between the consonant
26
- and vowel.
27
- - All case suffixes, as well as any plural suffixes consisting of one or two
28
- syllables are likewise separated by a preceding and hyphen-transliterated gap.
29
- In digital typesetting, this gap is represented by a U+202F   NARROW NO-BREAK SPACE
30
- (HTML   · NNBSP). A maximum of two case suffixes can be added to a stem.
31
- Single-letter vowel suffixes appear with the final-shaped forms of a/e, i, or u/ü,
32
- as in ᠭᠠᠵᠠᠷ ᠠ γaǰar‑a 'to the country' and ᠡᠳᠦᠷ ᠡ edür‑e 'on the day', or ᠤᠯᠤᠰ ᠢ ulus‑i
33
- 'the state' etc. Multi-letter suffixes most often start with an initial- (consonants),
34
- medial- (vowels), or variant-shaped form.
35
-
36
- tests:
37
- - source: ᠬᠥᠬᠡᠬᠣᠲᠠ # Хөх хот
38
- expected: køkexota
39
- - source: ᠣᠷᠳᠣᠰ ᠬᠣᠲᠠ # Ордос хот
40
- expected: ordos xota
41
- - source: ᠪᠠᠶᠠᠨᠨᠠᠭᠤᠷ ᠬᠣᠲᠠ # Баяннуур хот
42
- expected: bajannaγur xota
43
- - source: ᠤᠯᠠᠭᠠᠨᠬᠣᠲᠠ # Улаан хот
44
- expected: ulaγanxota
45
- - source: ᠬᠣᠣᠯᠢᠠ ᠭᠣᠤᠯ ᠬᠣᠲᠠ # Хоолингол хот
46
- expected: xoolia γoul xota
47
- - source: ᠡᠷᠢᠶᠡᠨ ᠬᠣᠲᠠ # Эрээн хот
48
- expected: erijen xota
49
- - source: ᠷᠠᠰᠢᠶᠠᠨ ᠬᠣᠲᠠ # Рашаан хот
50
- expected: rasijan xota
51
- - source: ᠪᠠᠭᠠᠷᠢᠨ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Баарин баруун хошуу
52
- expected: baγarin baraγun xosiγu
53
- - source: ᠪᠠᠭᠠᠷᠢᠨ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Баарин зүүн хошуу
54
- expected: baγarin zegyn xosiγu
55
- - source: ᠲᠦᠩᠯᠢᠶᠣᠤ ᠬᠣᠲᠠ # Байшинт хот
56
- expected: tynglijou xota
57
- - source: ᠰᠢᠯᠢ ᠶᠢᠨ ᠬᠣᠲᠠ # Шилийн хот
58
- expected: sili-jin xota
59
- - source: ᠬᠣᠷᠢᠨ ᠭᠡᠷ ᠰᠢᠶᠠᠨ # Horinger County
60
- expected: xorin ger sijan
61
- - source: ᠤᠳᠠ ᠲᠣᠭᠣᠷᠢᠭ # Уда тойрог
62
- expected: uda toγoriγ
63
- - source: ᠦᠬᠠᠢ ᠬᠣᠲᠠ # Үхай хот
64
- expected: yxai xota
65
- - source: ᠬᠠᠶᠢᠨᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Hainan District
66
- expected: xainan toγoriγ
67
- - source: ᠬᠠᠶᠢᠷᠤᠪ ᠤᠨ ᠲᠣᠬᠣᠢ ᠲᠣᠭᠣᠷᠢᠭ # Haibowan District
68
- expected: xairub-un toxoi toγoriγ
69
- - source: ᠤᠯᠠᠭᠠᠨᠴᠠᠪ ᠬᠣᠲᠠ # Улаанцав хот
70
- expected: ulaγancab xota
71
- - source: ᠵᠢᠨᠢᠩ ᠲᠣᠭᠣᠷᠢᠭ # Жинин тойрог
72
- expected: zining toγoriγ
73
- - source: ᠹᠸᠩᠵᠸᠡ ᠬᠣᠲᠠ # Фенжень хот
74
- expected: fvngzve xota
75
- - source: ᠱᠠᠩᠳᠤ ᠰᠢᠶᠠᠨ # Шанду шянь
76
- expected: šangdu sijan
77
- - source: ᠯᠢᠶᠠᠩᠴᠠᠩ ᠰᠢᠶᠠᠨ # Liangcheng County
78
- expected: lijangcang sijan
79
- - source: ᠴᠠᠬᠠᠷ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠡᠮᠦᠨᠡᠳᠦ ᠬᠣᠰᠢᠭᠤ # Цахар баруун гарын өмнөд хошуу
80
- expected: caxar baraγun γarun emynedy xosiγu
81
- - source: ᠴᠠᠬᠠᠷ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Цахар баруун гарын дундад хошуу
82
- expected: caxar baraγun γarun dumdadu xosiγu
83
- - source: ᠴᠠᠬᠠᠷ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠬᠣᠶᠢᠲᠤ ᠬᠣᠰᠢᠭᠤ # Цахар баруун гарын хойд хошуу
84
- expected: caxar baraγun γarun xoitu xosiγu
85
- - source: ᠳᠥᠷᠪᠡᠳ ᠬᠣᠰᠢᠭᠤ # Дөрвөд хошуу
86
- expected: dørbed xosiγu
87
- - source: ᠪᠤᠭᠤᠲᠤ ᠬᠣᠲᠠ # Бугaт хот
88
- expected: buγutu xota
89
- - source: ᠬᠥᠨᠳᠡᠯᠡᠨ ᠲᠣᠭᠣᠷᠢᠭ # Hondlon District
90
- expected: køndelen toγoriγ
91
- - source: ᠴᠢᠩᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Qingshan District
92
- expected: cingšan toγoriγ
93
- - source: ᠰᠢᠭᠤᠶᠢᠲᠤ ᠲᠣᠭᠣᠷᠢᠭ # Шигуай тойрог
94
- expected: siγuitu toγoriγ
95
- - source: ᠵᠢᠦ ᠶᠤᠸᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Jiuyuan District
96
- expected: ziy juvan toγoriγ
97
- - source: ᠭᠦᠶᠠᠩ ᠰᠢᠶᠠᠨ # Guyang County
98
- expected: gyjang sijan
99
- - source: ᠲᠦᠮᠡᠳ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Түмэд Зүүн хошуу
100
- expected: tymed zegyn xosiγu
101
- - source: ᠲᠦᠮᠡᠳ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Түмэд Баруун хошуу
102
- expected: tymed baraγun xosiγu
103
- - source: ᠳᠠᠷᠬᠠᠨ ᠮᠤᠤᠮᠢᠩᠭ᠋ᠠᠨ ᠬᠣᠯᠪᠣᠭᠠᠲᠤ ᠬᠣᠰᠢᠭᠤ # Darhan Muminggan United Banner
104
- expected: darxan muumingγ᠋an xolboγatu xosiγu
105
- - source: ᠬᠡᠰᠢᠭᠲᠡᠨ ᠬᠣᠰᠢᠭᠤ # Hexigten Banner
106
- expected: kesiγten xosiγu
107
- - source: ᠰᠢᠨ᠎ᠡ ᠪᠠᠷᠭᠤ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # New Barag Left Banner
108
- expected: sin-e barγu zegyn xosiγu
109
- - source: ᠰᠢᠨ᠎ᠡ ᠪᠠᠷᠭᠤ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # New Barag Right Banner
110
- expected: sin-e barγu baraγun xosiγu
111
- - source: ᠣᠲᠣᠭ ᠬᠣᠰᠢᠭᠤ # Otog Banner
112
- expected: otoγ xosiγu
113
- - source: ᠳᠠᠯᠠᠳ ᠬᠣᠰᠢᠭᠤ # Dalad Banner
114
- expected: dalad xosiγu
115
- - source: ᠵᠡᠭᠦᠨᠭᠠᠷ ᠬᠣᠰᠢᠭᠤ # Jungar Banner
116
- expected: zegynγar xosiγu
117
- - source: ᠣᠲᠣᠭ ᠤᠨ ᠡᠮᠦᠨᠡᠳᠦ ᠬᠣᠰᠢᠭᠤ # Otog Front Banner
118
- expected: otoγ-un emynedy xosiγu
119
- - source: ᠬᠠᠩᠭᠢᠨ ᠬᠣᠰᠢᠭᠤ # Hanggin Banner
120
- expected: xangγin xosiγu
121
- - source: ᠦᠦᠰᠢᠨ ᠬᠣᠰᠢᠭᠤ # Uxin Banner
122
- expected: yysin xosiγu
123
- - source: ᠡᠵᠢᠨ ᠬᠣᠷᠣᠭ᠎ᠠ ᠬᠣᠰᠢᠭᠤ # Эзэн Хороо хошуу
124
- expected: ezin xoroγ-a xosiγu
125
- - source: ᠵᠠᠯᠠᠨ ᠠᠢ᠌ᠯ ᠬᠣᠲᠠ # Жалан-Айл хот
126
- expected: zalan ai᠌l xota
127
- - source: ᠶᠠᠭᠰᠢ ᠬᠣᠲᠠ # Ягши хот
128
- expected: jaγsi xota
129
- - source: ᠮᠠᠨᠵᠤᠤᠷ ᠬᠣᠲᠠ # Манжуур хот
130
- expected: manzuur xota
131
- - source: ᠬᠠᠶᠢᠯᠠᠷ ᠲᠣᠭᠣᠷᠢᠭ # Хайлаар тойрог
132
- expected: xailar toγoriγ
133
- - source: ᠬᠣᠷᠴᠢᠨ ᠲᠣᠭᠣᠷᠢᠭ # Horqin District
134
- expected: xorcin toγoriγ
135
- - source: ᠺᠠᠶᠢᠯᠦ ᠰᠢᠶᠠᠨ # Kailu County
136
- expected: kaily sijan
137
- - source: ᠬᠦᠷᠢᠶ᠎ᠡ ᠬᠣᠰᠢᠭᠤ # Hure Banner
138
- expected: kyrij-e xosiγu
139
- - source: ᠨᠠᠢᠮᠠᠨ ᠬᠣᠰᠢᠭᠤ # Naiman Banner
140
- expected: naiman xosiγu
141
- - source: ᠵᠠᠷᠤᠳ ᠬᠣᠰᠢᠭᠤ # Jarud Banner
142
- expected: zarud xosiγu
143
- - source: ᠬᠣᠷᠴᠢᠨ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Хорчин Баруун Гарын Дундад Хошуу
144
- expected: xorcin baraγun γarun dumdadu xosiγu
145
- - source: ᠬᠣᠷᠴᠢᠨ ᠵᠡᠭᠦᠨ ᠭᠠᠷᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Horqin Left Middle Banner
146
- expected: xorcin zegyn γarun dumdadu xosiγu
147
- - source: ᠬᠣᠷᠴᠢᠨ ᠵᠡᠭᠦᠨ ᠭᠠᠷᠤᠨ ᠬᠣᠶᠢᠲᠤ ᠬᠣᠰᠢᠭᠤ # Horqin Left Rear Banner
148
- expected: xorcin zegyn γarun xoitu xosiγu
149
- - source: ᠤᠯᠠᠭᠠᠨᠬᠠᠳᠠ ᠬᠣᠲᠠ # Улаанхад хот
150
- expected: ulaγanxada xota
151
- - source: ᠰᠡᠷᠡᠴᠢ # Salaqi
152
- expected: sereci
153
- - source: ᠭᠡᠭᠡᠨ ᠭᠣᠤᠯ ᠬᠣᠲᠠ # Гэгээнгол хот
154
- expected: gegen γoul xota
155
- - source: ᠠᠯᠠᠱᠠ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Alxa Left Banner
156
- expected: alaša zegyn xosiγu
157
- - source: ᠠᠯᠠᠱᠠ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Alxa Right Banner
158
- expected: alaša baraγun xosiγu # Alaša Baraγun xosiγu
159
- - source: ᠡᠵᠡᠨ᠎ᠡ ᠬᠣᠰᠢᠭᠤ # Ejin Banner
160
- expected: ezen-e xosiγu
161
- - source: ᠬᠥᠪᠡᠭᠡᠲᠦ ᠰᠢᠷ᠎ᠠ ᠬᠣᠰᠢᠭᠤ # Хөвөөт Шар хошуу
162
- expected: købegety sir-a xosiγu
163
- - source: ᠦᠶᠤᠸᠠᠨ ᠰᠢᠶᠠᠨ # Үюань шянь
164
- expected: yjuvan sijan
165
- - source: ᠦᠴᠤᠸᠠᠨ ᠰᠢᠶᠠᠨ # Wuchuan County
166
- expected: ycuvan sijan
167
- - source: ᠲᠦᠴᠢᠤᠸᠠᠨ ᠰᠢᠶᠠᠨ # Тучуань шянь
168
- expected: tyciuvan sijan
169
- - source: ᠯᠢᠨᠰᠢ ᠰᠢᠶᠠᠨ # Linxi County
170
- expected: linsi sijan
171
- - source: ᠬᠠᠷᠠᠴᠢᠨ ᠬᠣᠰᠢᠭᠤ # Harqin Banner
172
- expected: xaracin xosiγu
173
- - source: ᠠᠤᠬᠠᠨ ᠬᠣᠰᠢᠭᠤ # Aohan Banner
174
- expected: auxan xosiγu
175
- - source: ᠬᠣᠳᠣᠩ ᠠᠷᠠᠳ ᠤᠨ ᠲᠣᠭᠣᠷᠢᠭ # Huimin District
176
- expected: xodong arad-un toγoriγ
177
- - source: ᠰᠠᠶᠢᠬᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Saihan District
178
- expected: saixan toγoriγ
179
- - source: ᠰᠢᠨ᠎ᠡ ᠬᠣᠲᠠ ᠲᠣᠭᠣᠷᠢᠭ # Xincheng District
180
- expected: sin-e xota toγoriγ
181
- - source: ᠬᠠᠶᠢᠷᠤᠪ ᠤᠨ ᠲᠣᠬᠣᠢ ᠲᠣᠭᠣᠷᠢᠭ # Haibowan (Hairibin Tohoi) District
182
- expected: xairub-un toxoi toγoriγ
183
- - source: ᠰᠦᠩ ᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Songshan District
184
- expected: syng šan toγoriγ
185
- - source: ᠬᠦᠩ ᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Hongshan District
186
- expected: kyng šan toγoriγ
187
- - source: ᠠᠷᠤᠨ ᠬᠣᠰᠢᠭᠤ # Арун хошуу
188
- expected: arun xosiγu
189
- - source: ᠶᠤᠸᠠᠨ ᠪᠣᠣ ᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Юаньбаошань тойрог
190
- expected: juvan boo šan toγoriγ
191
- - source: ᠰᠢᠯᠤᠭᠤᠨ ᠬᠥᠪᠡᠭᠡᠲᠦ ᠴᠠᠭᠠᠨ ᠬᠣᠰᠢᠭᠤ # Шулуун Хөвөөт Цагаан хошуу
192
- expected: siluγun købegety caγan xosiγu
193
- - source: ᠰᠢᠯᠤᠭᠤᠨ ᠬᠥᠬᠡ ᠬᠣᠰᠢᠭᠤ # Шулуун хөх хошуу
194
- expected: siluγun køke xosiγu
195
- - source: ᠤᠷᠠᠳ ᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Урадын Дундад Хошуу
196
- expected: urad-un dumdadu xosiγu
197
- - source: ᠤᠷᠠᠳ ᠤᠨ ᠡᠮᠦᠨᠡᠳᠦ ᠬᠣᠰᠢᠭᠤ # Урадын Өмнөд Хошуу
198
- expected: urad-un emynedy xosiγu
199
- - source: ᠲᠣᠭᠲᠠᠬᠤ ᠰᠢᠶᠠᠨ # Тогтох шянь
200
- expected: toγtaxu sijan
201
- - source: ᠰᠥᠨᠡᠳ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Sonid Left Banner
202
- expected: søned zegyn xosiγu
203
- - source: ᠰᠥᠨᠡᠳ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Sonid Right Banner
204
- expected: søned baraγun xosiγu
205
- - source: ᠣᠩᠨᠢᠭᠤᠳ ᠬᠣᠰᠢᠭᠤ # Ongniud Banner
206
- expected: ongniγud xosiγu
207
- - source: ᠵᠠᠯᠠᠢᠳ ᠬᠣᠰᠢᠭᠤ # Jalaid Banner
208
- expected: zalaid xosiγu
209
- - source: ᠬᠠᠩᠭᠢᠨ ᠬᠣᠶᠢᠲᠤ ᠬᠣᠰᠢᠭᠤ # Hanggin Rear Banner
210
- expected: xangγin xoitu xosiγu
211
- - source: ᠵᠡᠭᠦᠨ ᠤᠵᠤᠮᠤᠴᠢᠨ ᠬᠣᠰᠢᠭᠤ # East Ujimqin Banner
212
- expected: zegyn uzumucin xosiγu
213
- - source: ᠠᠷᠤ ᠬᠣᠷᠴᠢᠨ ᠬᠣᠰᠢᠭᠤ # Ar Horqin Banner
214
- expected: aru xorcin xosiγu
215
- - source: ᠠᠪᠠᠭ᠎ᠠ ᠬᠣᠰᠢᠭᠤ # Abag Banner
216
- expected: abaγ-a xosiγu
217
- - source: ᠪᠤᠷᠢᠶᠠᠳ
218
- expected: burijad # Буриад
219
- - source: ᠤᠯᠠᠭᠠᠨᠪᠠᠭᠠᠲᠤᠷ # Улаанбаатар
220
- expected: ulaγanbaγatur
221
- - source: ᠴᠢᠩᠭᠢᠰ ᠬᠠᠭᠠᠨ # Чингис Хаан
222
- expected: cingγis xaγan
223
-
224
- map:
225
- rules:
226
- - pattern: \u182c(\u1821|u1822|\u1825|\u1826)
227
- result: "k\\1"
228
- - pattern: \u182d(\u1821|u1822|\u1825|\u1826)
229
- result: "g\\1"
230
- - pattern: \u1820\u1836\u1822
231
- result: "ai"
232
- - pattern: \u1821\u1836\u1822
233
- result: "ei"
234
- - pattern: \u1823\u1836\u1822
235
- result: "oi"
236
- - pattern: \u1824\u1836\u1822
237
- result: "ui"
238
-
239
- characters:
240
- "ᠠ": "a" # \u1820
241
- "ᠡ": "e" # \u1821
242
- "ᠢ": "i" # \u1822
243
- "ᠣ": "o" # \u1823
244
- "ᠤ": "u" # \u1824
245
- "ᠥ": "ø" # \u1825
246
- "ᠦ": "y" # \u1826
247
- "ᠨ": "n" # \u1828
248
- "ᠩ": "ng" # \u1829
249
- "ᠬ": "x" # \u182c
250
- "ᠭ": "γ" # \u182d
251
- "ᠪ": "b" # \u182a
252
- "ᠫ": "p" # \u182b
253
- "ᠹ": "f" # \u1839
254
- "ᠰ": "s" # \u1830
255
- "ᠱ": "š" # \u1831
256
- "ᠲ": "t" # \u1832
257
- "ᠳ": "d" # \u1833
258
- "ᠯ": "l" # \u182f
259
- "ᠮ": "m" # \u182e
260
- "ᠴ": "c" # \u1834
261
- "ᠵ": "z" # \u1835
262
- "ᠶ": "j" # \u1836
263
- "ᠺ": "k" # \u183a
264
- "ᠷ": "r" # \u1837
265
- "ᠸ": "v" # \u1838
266
- "ᠾ": "h" # \u183e
267
- "᠁": "..." # \u1801
268
- "᠂": "," # \u1802
269
- "᠃": "." # \u1803
270
- "᠄": ":" # \u1804
271
- "\u180e": "-" # MVS
272
- "\u202f": "-" # NNBSP