interscript 0.1.7 → 2.1.0b1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
@@ -1,36 +0,0 @@
1
- ---
2
- authority_id: bgn
3
- id: mr-1939
4
- language: iso-639-2:kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: McCune-Reischauer System
8
- url:
9
- creation_date: 1939
10
- adoption_date:
11
- description:
12
- E. O. Reischauer and G. M. McCune
13
- The Romanization of the Korean Language Based on Its Phonetic Structure. XXIX:1-55.
14
- Korea Branch of the Royal Asiatic Society. Volume XXIX (PDF)
15
- notes:
16
- This map add Hanja resolution to var-kor-Hang-Latn-mr-1939
17
-
18
- tests:
19
- - source: "博物館"
20
- expected: "Pangmulgwan"
21
- - source: "사발"
22
- expected: "Sabal"
23
- - source: 韓國
24
- expected: Han’guk
25
- - source: "韓國 의 맛"
26
- expected: "Han’guk Ŭi Mat"
27
- - source: "金浦 國際 空港"
28
- expected: "Kŭmp’o Kukche Konghang"
29
- - source: "悠久한 歷史와 傳統에 빛나는 우리 大韓國民"
30
- expected: "Yuguhan Yŏksawa Chŏnt’onge Pinnanŭn Uri Taehan’gungmin"
31
-
32
- map:
33
- character_separator: ""
34
- word_separator: " "
35
- title_case: True
36
- inherit: [var-kor-Kore-Hang-2013, var-kor-Hang-Latn-mr-1939]
@@ -1,43 +0,0 @@
1
- ---
2
- authority_id: var
3
- id: 1872
4
- language: iso-639-2:mar
5
- source_script: Deva
6
- destination_script: Latn
7
- name: Marathi Hunterian system
8
- url: https://transliteration.eki.ee/pdf/Hindi-Marathi-Nepali.pdf
9
- creation_date: 1872
10
- description:
11
- The Hunterian system is the national system of romanization in India.
12
-
13
- notes:
14
-
15
- - a, i and u are used in word-final position. The a in gaon and the u in pur are not accented.
16
- - ज्ञ is transliterated gy.
17
- - v is used before i.
18
-
19
- tests:
20
- - source: "ठाणे - जिल्ह्यात बुधवारी एक हजार रुग्णांची वाढ, तर जणांच्या मृत्यूची नोंद"
21
- expected: "thaānae - jailhyaāta baudhawaāraī eka hajaāra raugnaānchaī waādha, tara janaānchyaā marityaūchaī naonda"
22
- - source: "एकता कपूर पुन्हा अडकली वादात, वेबसीरिजमधल्या 'त्या' सीनमुळे जमावाची घरावर दगडफेक"
23
- expected: "ekataā kapaūra paunhaā adakalaī waādaāta, waebasaīraijamadhalyaā 'tyaā' saīnamaulae jamaāwaāchaī gharaāwara dagadaphaeka"
24
- - source: "जाणून घ्या, बीएमसीच्या अधिकाऱ्यांनी कंगना राणौतच्या ऑफिसमधले नक्की काय- काय तोडलं"
25
- expected: "jaānaūna ghyaā, baīemasaīchyaā adhaikaāऱ्yaānnaī kanganaā raānaautachyaā ऑphaisamadhalae nakkaī kaāya- kaāya taodalan"
26
- - source: "कंगना मुंबईत दाखल होण्यापूर्वी 'मातोश्री'वरून फर्मान सुटले; प्रवक्त्यांना सक्त आदेश"
27
- expected: "kanganaā maunbaīta daākhala haonyaāpaūrwaī 'maātaosraī'waraūna pharmaāna sautalae; prawaktyaānnaā sakta ādaesa"
28
- - source: "मराठा आरक्षणास तात्पुरती स्थगिती; सर्वोच्च न्यायालयाचा निर्णय"
29
- expected: "maraāthaā ārakshanaāsa taātpaurataī sthagaitaī; sarwaochcha nyaāyaālayaāchaā nairnaya"
30
- - source: "भारताच्या तिन्ही लशींचा पहिला टप्पा यशस्वी, वाचा कधी येणार बाजारात"
31
- expected: "bhaārataāchyaā tainhaī lasaīnchaā pahailaā tappaā yasaswaī, waāchaā kadhaī yaenaāra baājaāraāta"
32
- - source: "रुग्णवाढीमुळे खाटांची चणचण"
33
- expected: "raugnawaādhaīmaulae khaātaānchaī chanachana"
34
- - source: "पीएम स्वनिधी कर्ज योजनेला मुंबईतून अल्प प्रतिसाद"
35
- expected: "paīema swanaidhaī karja yaojanaelaā maunbaītaūna alpa prataisaāda"
36
- - source: "सांताक्रूझ-चेंबूर लिंक रोडवरील उन्नत मार्गाला स्थगिती"
37
- expected: "saāntaākraūjha-chaenbaūra lainka raodawaraīla unnata maārgaālaā sthagaitaī"
38
- - source: "संपादक अर्णब गोस्वामी यांच्याविरूद्ध खडक पोलिस ठाण्यात तक्रार"
39
- expected: "sanpaādaka arnaba gaoswaāmaī yaānchyaāwairaūddha khadaka paolaisa thaānyaāta takraāra"
40
-
41
- map:
42
-
43
- inherit: 'var-hin-Deva-Latn-hunterian-1872'
@@ -1,102 +0,0 @@
1
- ---
2
- authority_id: var
3
- id: 1930
4
- language: iso-639-2:mon
5
- source_script: Mong
6
- destination_script: Latn
7
- name: Mongolian Latin alphabet
8
- url: https://en.m.wikipedia.org/wiki/Mongolian_Latin_alphabet
9
- creation_date:
10
- description: |
11
- Latin alphabet was using "y" as feminine "u", with additional
12
- feminine "o" ("ө") and with additional consonants "ç" for "ch", "ş"
13
- for "sh" and "ƶ" for "j", it successfully served in printing books
14
- and newspapers. A few of the letters (f, k, p, v) were rarely used,
15
- being found only in borrowings, while q, w and x were excluded
16
- altogether. Since k transcribed [h] in loans, it is unclear how loans
17
- in [kʰ] were written. "j" is used for vowel combinations of the [ja]
18
- type. Letter "c" is used for the sound [ts] and "k" is used for the
19
- ound [h]. The first version was inspired by the Yanalif script used
20
- for the Soviet Union's Turkic languages.
21
-
22
- The orthography of the Mongolian Latin is based on the orthography of
23
- the Classical Mongolian script. It preserves short final vowels. It
24
- does not drop unstressed vowels in the closing syllables when the word
25
- is conjugated. The suffixes and inflections without long or i-coupled
26
- vowels are made open syllables ending with a vowel, which is harmonized
27
- with the stressed vowel. The rule for the vowel harmony for unstressed
28
- vowels is similar to that of the Mongolian Cyrillic. It does not use
29
- consonant combinations to denote new consonant sounds. For both of the
30
- version, letter "b" is used both in the beginning and in the middle of
31
- the word. Because it phonetically assimilates into sound [w], no
32
- ambiguity is caused.
33
-
34
- notes:
35
- - A separated final form of vowels a or e is common, and can appear at the end
36
- of a word, word stem, or suffix. This form requires a final-shaped preceding
37
- consonant and an inter-word gap in between. The vowels themselves appear as ᠎ᠠ,
38
- and with consonants as ‍ᠬ᠎ᠠ q‑a, ‍ᠷ᠎ᠠ r‑a/r‑e, etc. This gap can be transliterated
39
- with a hyphen. In digital typesetting, these forms are triggered by inserting
40
- a U+180E ᠎ MONGOLIAN VOWEL SEPARATOR (HTML ᠎ · MVS) between the consonant
41
- and vowel.
42
- - All case suffixes, as well as any plural suffixes consisting of one or two
43
- syllables are likewise separated by a preceding and hyphen-transliterated gap.
44
- In digital typesetting, this gap is represented by a U+202F   NARROW NO-BREAK SPACE
45
- (HTML   · NNBSP). A maximum of two case suffixes can be added to a stem.
46
- Single-letter vowel suffixes appear with the final-shaped forms of a/e, i, or u/ü,
47
- as in ᠭᠠᠵᠠᠷ ᠠ γaǰar‑a 'to the country' and ᠡᠳᠦᠷ ᠡ edür‑e 'on the day', or ᠤᠯᠤᠰ ᠢ ulus‑i
48
- 'the state' etc. Multi-letter suffixes most often start with an initial- (consonants),
49
- medial- (vowels), or variant-shaped form.
50
-
51
- # Note: Provedid sample of "Latin 1931-1939" in https://en.m.wikipedia.org/wiki/Mongolian_Latin_alphabet is not actual transliteration.
52
- # It's kind of buggy transliteration that mixes Cyrillic and Traditional Mongolian script into Latin based on phonetic sound.
53
- # There is no expected transliteration sources found for the tests.
54
-
55
- # Latin 1931-1939
56
- # Manai ulasiin niislel koto Ulaanbaatar bol 80 000 şakam kyntei, ulasiin olon niitiin, aƶi akuin tөb gazaruud oroşison jikeeken oron bolno.
57
- # Tus ulasiin dotoroos garka tyykii zuiliig bolbosruulka aƶi yildberiin gazaruudiig baiguulka ni çukala.
58
-
59
- tests:
60
- - source: |
61
- ᠮᠠᠨ ᠤ ᠤᠯᠤᠰ ᠤᠨ ᠨᠡᠶᠢᠰᠯᠡᠯ ᠬᠣᠲᠠ ᠤᠯᠠᠭᠠᠨᠪᠠᠭᠠᠲᠤᠷ ᠪᠣᠯ 80 000 ᠰᠢᠬᠠᠮ ᠬᠦᠮᠦᠨ ᠲᠡᠢ᠂ ᠤᠯᠤᠰ ᠤᠨ ᠣᠯᠠᠨ ᠨᠡᠶᠢᠲᠡ ᠶᠢᠨ᠂ ᠠᠵᠤ ᠠᠬᠤᠢ ᠶᠢᠨ ᠲᠥᠪ ᠭᠠᠵᠠᠷ ᠤᠳ ᠣᠷᠣᠰᠢᠭᠰᠠᠨ ᠶᠡᠬᠡᠬᠡᠨ ᠣᠷᠣᠨ ᠪᠣᠯᠤᠨ᠎ᠠ᠃
62
- ᠲᠤᠰ ᠤᠯᠤᠰ ᠤᠨ ᠳᠣᠲᠣᠷ᠎ᠠ ᠠᠴᠠ ᠭᠠᠷᠬᠤ ᠲᠦᠭᠦᠬᠡᠢ ᠵᠤᠶᠢᠯ ᠢ ᠪᠣᠯᠪᠠᠰᠤᠷᠠᠭᠤᠯᠬᠤ ᠠᠵᠤ ᠦᠢᠯᠡᠳᠪᠦᠷᠢ ᠶᠢᠨ ᠭᠠᠵᠠᠷ ᠤᠳ ᠢ ᠪᠠᠶᠢᠭᠤᠯᠬᠤ ᠨᠢ ᠴᠢᠬᠤᠯᠠ᠃
63
- expected: |
64
- man-u ulus-un nejislel kota ulaganbagatur bol 80 000 sikam kymyn-tei, ulus-un olan nejite-jin, aƶu akui-jin tөb gaƶar-ud orosigsan jekeken oron bolun-a.
65
- tus ulus-un dotor-a-aça garku tygykei ƶujil-i bolbasuragulku aƶu yiledbyri-jin gaƶar-ud-i bajigulku ni çikula.
66
-
67
- map:
68
-
69
- characters:
70
- "ᠠ": "a" # \u1820
71
- "ᠡ": "e" # \u1821
72
- "ᠢ": "i" # \u1822
73
- "ᠣ": "o" # \u1823
74
- "ᠤ": "u" # \u1824
75
- "ᠥ": "ө" # \u1825
76
- "ᠦ": "y" # \u1826
77
- "ᠨ": "n" # \u1828
78
- "ᠮ": "m" # \u182e
79
- "ᠯ": "l" # \u182f
80
- "ᠪ": "b" # \u182a
81
- "ᠫ": "p" # \u182b
82
- "ᠹ": "f" # \u1839
83
- "ᠻ": "k" # \u183b
84
- "ᠬ": "k" # \u182c
85
- "ᠭ": "g" # \u182d
86
- "ᠰ": "s" # \u1830
87
- "ᠱ": "ş" # \u1831
88
- "ᠲ": "t" # \u1832
89
- "ᠳ": "d" # \u1833
90
- "ᠴ": "c" # \u1834
91
- "ᠴ": "ç" # \u1834
92
- "ᠵ": "z" # \u1835
93
- "ᠵ": "ƶ" # \u1835
94
- "ᠶ": "j" # \u1836
95
- "ᠷ": "r" # \u1837
96
- "ᠾ": "h" # \u183e
97
- "᠁": "..." # \u1801
98
- "᠂": "," # \u1802
99
- "᠃": "." # \u1803
100
- "᠄": ":" # \u1804
101
- "\u180e": "-" # MVS
102
- "\u202f": "-" # NNBSP
@@ -1,272 +0,0 @@
1
- ---
2
- authority_id: var
3
- id: lessing
4
- language: iso-639-2:mon
5
- source_script: Mong
6
- destination_script: Latn
7
- name: Transliteration Systems for Uyghur-Mongolian or Vertical or Old Script
8
- url: https://collab.its.virginia.edu/wiki/tibetan-script/Transliteration%20Schemes%20for%20Mongolian%20Vertical%20Script.html
9
- creation_date:
10
- description: |
11
- The only system for the Mongolian vertical script which is used commonly throughout
12
- Mongolian studies worldwide is the Vladimirtsov-Mostaert system (V-M). The only
13
- dictionary using this system is in an appendix to Antoine Mostaert’s Dictionnaire
14
- ordos. Thus the Harvard Journal of Asiatic Studies specifies, “please use the written
15
- Mongolian appendix to Antoine Mostaert’s Dictionnaire ordos. However, the V-M system
16
- has many difficult diacriticals and non-Latin characters that constitute a significant
17
- problem for editors and thus is difficult to recommend.
18
-
19
- notes:
20
- - A separated final form of vowels a or e is common, and can appear at the end
21
- of a word, word stem, or suffix. This form rexuires a final-shaped preceding
22
- consonant and an inter-word gap in between. The vowels themselves appear as ᠎ᠠ,
23
- and with consonants as ‍ᠬ᠎ᠠ q‑a, ‍ᠷ᠎ᠠ r‑a/r‑e, etc. This gap can be transliterated
24
- with a hyphen. In digital typesetting, these forms are triggered by inserting
25
- a U+180E ᠎ MONGOLIAN VOWEL SEPARATOR (HTML ᠎ · MVS) between the consonant
26
- and vowel.
27
- - All case suffixes, as well as any plural suffixes consisting of one or two
28
- syllables are likewise separated by a preceding and hyphen-transliterated gap.
29
- In digital typesetting, this gap is represented by a U+202F   NARROW NO-BREAK SPACE
30
- (HTML   · NNBSP). A maximum of two case suffixes can be added to a stem.
31
- Single-letter vowel suffixes appear with the final-shaped forms of a/e, i, or u/ü,
32
- as in ᠭᠠᠵᠠᠷ ᠠ γaǰar‑a 'to the country' and ᠡᠳᠦᠷ ᠡ edür‑e 'on the day', or ᠤᠯᠤᠰ ᠢ ulus‑i
33
- 'the state' etc. Multi-letter suffixes most often start with an initial- (consonants),
34
- medial- (vowels), or variant-shaped form.
35
-
36
- tests:
37
- - source: ᠬᠥᠬᠡᠬᠣᠲᠠ # Хөх хот
38
- expected: køkexota
39
- - source: ᠣᠷᠳᠣᠰ ᠬᠣᠲᠠ # Ордос хот
40
- expected: ordos xota
41
- - source: ᠪᠠᠶᠠᠨᠨᠠᠭᠤᠷ ᠬᠣᠲᠠ # Баяннуур хот
42
- expected: bajannaγur xota
43
- - source: ᠤᠯᠠᠭᠠᠨᠬᠣᠲᠠ # Улаан хот
44
- expected: ulaγanxota
45
- - source: ᠬᠣᠣᠯᠢᠠ ᠭᠣᠤᠯ ᠬᠣᠲᠠ # Хоолингол хот
46
- expected: xoolia γoul xota
47
- - source: ᠡᠷᠢᠶᠡᠨ ᠬᠣᠲᠠ # Эрээн хот
48
- expected: erijen xota
49
- - source: ᠷᠠᠰᠢᠶᠠᠨ ᠬᠣᠲᠠ # Рашаан хот
50
- expected: rasijan xota
51
- - source: ᠪᠠᠭᠠᠷᠢᠨ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Баарин баруун хошуу
52
- expected: baγarin baraγun xosiγu
53
- - source: ᠪᠠᠭᠠᠷᠢᠨ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Баарин зүүн хошуу
54
- expected: baγarin zegyn xosiγu
55
- - source: ᠲᠦᠩᠯᠢᠶᠣᠤ ᠬᠣᠲᠠ # Байшинт хот
56
- expected: tynglijou xota
57
- - source: ᠰᠢᠯᠢ ᠶᠢᠨ ᠬᠣᠲᠠ # Шилийн хот
58
- expected: sili-jin xota
59
- - source: ᠬᠣᠷᠢᠨ ᠭᠡᠷ ᠰᠢᠶᠠᠨ # Horinger County
60
- expected: xorin ger sijan
61
- - source: ᠤᠳᠠ ᠲᠣᠭᠣᠷᠢᠭ # Уда тойрог
62
- expected: uda toγoriγ
63
- - source: ᠦᠬᠠᠢ ᠬᠣᠲᠠ # Үхай хот
64
- expected: yxai xota
65
- - source: ᠬᠠᠶᠢᠨᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Hainan District
66
- expected: xainan toγoriγ
67
- - source: ᠬᠠᠶᠢᠷᠤᠪ ᠤᠨ ᠲᠣᠬᠣᠢ ᠲᠣᠭᠣᠷᠢᠭ # Haibowan District
68
- expected: xairub-un toxoi toγoriγ
69
- - source: ᠤᠯᠠᠭᠠᠨᠴᠠᠪ ᠬᠣᠲᠠ # Улаанцав хот
70
- expected: ulaγancab xota
71
- - source: ᠵᠢᠨᠢᠩ ᠲᠣᠭᠣᠷᠢᠭ # Жинин тойрог
72
- expected: zining toγoriγ
73
- - source: ᠹᠸᠩᠵᠸᠡ ᠬᠣᠲᠠ # Фенжень хот
74
- expected: fvngzve xota
75
- - source: ᠱᠠᠩᠳᠤ ᠰᠢᠶᠠᠨ # Шанду шянь
76
- expected: šangdu sijan
77
- - source: ᠯᠢᠶᠠᠩᠴᠠᠩ ᠰᠢᠶᠠᠨ # Liangcheng County
78
- expected: lijangcang sijan
79
- - source: ᠴᠠᠬᠠᠷ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠡᠮᠦᠨᠡᠳᠦ ᠬᠣᠰᠢᠭᠤ # Цахар баруун гарын өмнөд хошуу
80
- expected: caxar baraγun γarun emynedy xosiγu
81
- - source: ᠴᠠᠬᠠᠷ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Цахар баруун гарын дундад хошуу
82
- expected: caxar baraγun γarun dumdadu xosiγu
83
- - source: ᠴᠠᠬᠠᠷ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠬᠣᠶᠢᠲᠤ ᠬᠣᠰᠢᠭᠤ # Цахар баруун гарын хойд хошуу
84
- expected: caxar baraγun γarun xoitu xosiγu
85
- - source: ᠳᠥᠷᠪᠡᠳ ᠬᠣᠰᠢᠭᠤ # Дөрвөд хошуу
86
- expected: dørbed xosiγu
87
- - source: ᠪᠤᠭᠤᠲᠤ ᠬᠣᠲᠠ # Бугaт хот
88
- expected: buγutu xota
89
- - source: ᠬᠥᠨᠳᠡᠯᠡᠨ ᠲᠣᠭᠣᠷᠢᠭ # Hondlon District
90
- expected: køndelen toγoriγ
91
- - source: ᠴᠢᠩᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Qingshan District
92
- expected: cingšan toγoriγ
93
- - source: ᠰᠢᠭᠤᠶᠢᠲᠤ ᠲᠣᠭᠣᠷᠢᠭ # Шигуай тойрог
94
- expected: siγuitu toγoriγ
95
- - source: ᠵᠢᠦ ᠶᠤᠸᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Jiuyuan District
96
- expected: ziy juvan toγoriγ
97
- - source: ᠭᠦᠶᠠᠩ ᠰᠢᠶᠠᠨ # Guyang County
98
- expected: gyjang sijan
99
- - source: ᠲᠦᠮᠡᠳ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Түмэд Зүүн хошуу
100
- expected: tymed zegyn xosiγu
101
- - source: ᠲᠦᠮᠡᠳ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Түмэд Баруун хошуу
102
- expected: tymed baraγun xosiγu
103
- - source: ᠳᠠᠷᠬᠠᠨ ᠮᠤᠤᠮᠢᠩᠭ᠋ᠠᠨ ᠬᠣᠯᠪᠣᠭᠠᠲᠤ ᠬᠣᠰᠢᠭᠤ # Darhan Muminggan United Banner
104
- expected: darxan muumingγ᠋an xolboγatu xosiγu
105
- - source: ᠬᠡᠰᠢᠭᠲᠡᠨ ᠬᠣᠰᠢᠭᠤ # Hexigten Banner
106
- expected: kesiγten xosiγu
107
- - source: ᠰᠢᠨ᠎ᠡ ᠪᠠᠷᠭᠤ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # New Barag Left Banner
108
- expected: sin-e barγu zegyn xosiγu
109
- - source: ᠰᠢᠨ᠎ᠡ ᠪᠠᠷᠭᠤ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # New Barag Right Banner
110
- expected: sin-e barγu baraγun xosiγu
111
- - source: ᠣᠲᠣᠭ ᠬᠣᠰᠢᠭᠤ # Otog Banner
112
- expected: otoγ xosiγu
113
- - source: ᠳᠠᠯᠠᠳ ᠬᠣᠰᠢᠭᠤ # Dalad Banner
114
- expected: dalad xosiγu
115
- - source: ᠵᠡᠭᠦᠨᠭᠠᠷ ᠬᠣᠰᠢᠭᠤ # Jungar Banner
116
- expected: zegynγar xosiγu
117
- - source: ᠣᠲᠣᠭ ᠤᠨ ᠡᠮᠦᠨᠡᠳᠦ ᠬᠣᠰᠢᠭᠤ # Otog Front Banner
118
- expected: otoγ-un emynedy xosiγu
119
- - source: ᠬᠠᠩᠭᠢᠨ ᠬᠣᠰᠢᠭᠤ # Hanggin Banner
120
- expected: xangγin xosiγu
121
- - source: ᠦᠦᠰᠢᠨ ᠬᠣᠰᠢᠭᠤ # Uxin Banner
122
- expected: yysin xosiγu
123
- - source: ᠡᠵᠢᠨ ᠬᠣᠷᠣᠭ᠎ᠠ ᠬᠣᠰᠢᠭᠤ # Эзэн Хороо хошуу
124
- expected: ezin xoroγ-a xosiγu
125
- - source: ᠵᠠᠯᠠᠨ ᠠᠢ᠌ᠯ ᠬᠣᠲᠠ # Жалан-Айл хот
126
- expected: zalan ai᠌l xota
127
- - source: ᠶᠠᠭᠰᠢ ᠬᠣᠲᠠ # Ягши хот
128
- expected: jaγsi xota
129
- - source: ᠮᠠᠨᠵᠤᠤᠷ ᠬᠣᠲᠠ # Манжуур хот
130
- expected: manzuur xota
131
- - source: ᠬᠠᠶᠢᠯᠠᠷ ᠲᠣᠭᠣᠷᠢᠭ # Хайлаар тойрог
132
- expected: xailar toγoriγ
133
- - source: ᠬᠣᠷᠴᠢᠨ ᠲᠣᠭᠣᠷᠢᠭ # Horqin District
134
- expected: xorcin toγoriγ
135
- - source: ᠺᠠᠶᠢᠯᠦ ᠰᠢᠶᠠᠨ # Kailu County
136
- expected: kaily sijan
137
- - source: ᠬᠦᠷᠢᠶ᠎ᠡ ᠬᠣᠰᠢᠭᠤ # Hure Banner
138
- expected: kyrij-e xosiγu
139
- - source: ᠨᠠᠢᠮᠠᠨ ᠬᠣᠰᠢᠭᠤ # Naiman Banner
140
- expected: naiman xosiγu
141
- - source: ᠵᠠᠷᠤᠳ ᠬᠣᠰᠢᠭᠤ # Jarud Banner
142
- expected: zarud xosiγu
143
- - source: ᠬᠣᠷᠴᠢᠨ ᠪᠠᠷᠠᠭᠤᠨ ᠭᠠᠷᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Хорчин Баруун Гарын Дундад Хошуу
144
- expected: xorcin baraγun γarun dumdadu xosiγu
145
- - source: ᠬᠣᠷᠴᠢᠨ ᠵᠡᠭᠦᠨ ᠭᠠᠷᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Horqin Left Middle Banner
146
- expected: xorcin zegyn γarun dumdadu xosiγu
147
- - source: ᠬᠣᠷᠴᠢᠨ ᠵᠡᠭᠦᠨ ᠭᠠᠷᠤᠨ ᠬᠣᠶᠢᠲᠤ ᠬᠣᠰᠢᠭᠤ # Horqin Left Rear Banner
148
- expected: xorcin zegyn γarun xoitu xosiγu
149
- - source: ᠤᠯᠠᠭᠠᠨᠬᠠᠳᠠ ᠬᠣᠲᠠ # Улаанхад хот
150
- expected: ulaγanxada xota
151
- - source: ᠰᠡᠷᠡᠴᠢ # Salaqi
152
- expected: sereci
153
- - source: ᠭᠡᠭᠡᠨ ᠭᠣᠤᠯ ᠬᠣᠲᠠ # Гэгээнгол хот
154
- expected: gegen γoul xota
155
- - source: ᠠᠯᠠᠱᠠ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Alxa Left Banner
156
- expected: alaša zegyn xosiγu
157
- - source: ᠠᠯᠠᠱᠠ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Alxa Right Banner
158
- expected: alaša baraγun xosiγu # Alaša Baraγun xosiγu
159
- - source: ᠡᠵᠡᠨ᠎ᠡ ᠬᠣᠰᠢᠭᠤ # Ejin Banner
160
- expected: ezen-e xosiγu
161
- - source: ᠬᠥᠪᠡᠭᠡᠲᠦ ᠰᠢᠷ᠎ᠠ ᠬᠣᠰᠢᠭᠤ # Хөвөөт Шар хошуу
162
- expected: købegety sir-a xosiγu
163
- - source: ᠦᠶᠤᠸᠠᠨ ᠰᠢᠶᠠᠨ # Үюань шянь
164
- expected: yjuvan sijan
165
- - source: ᠦᠴᠤᠸᠠᠨ ᠰᠢᠶᠠᠨ # Wuchuan County
166
- expected: ycuvan sijan
167
- - source: ᠲᠦᠴᠢᠤᠸᠠᠨ ᠰᠢᠶᠠᠨ # Тучуань шянь
168
- expected: tyciuvan sijan
169
- - source: ᠯᠢᠨᠰᠢ ᠰᠢᠶᠠᠨ # Linxi County
170
- expected: linsi sijan
171
- - source: ᠬᠠᠷᠠᠴᠢᠨ ᠬᠣᠰᠢᠭᠤ # Harqin Banner
172
- expected: xaracin xosiγu
173
- - source: ᠠᠤᠬᠠᠨ ᠬᠣᠰᠢᠭᠤ # Aohan Banner
174
- expected: auxan xosiγu
175
- - source: ᠬᠣᠳᠣᠩ ᠠᠷᠠᠳ ᠤᠨ ᠲᠣᠭᠣᠷᠢᠭ # Huimin District
176
- expected: xodong arad-un toγoriγ
177
- - source: ᠰᠠᠶᠢᠬᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Saihan District
178
- expected: saixan toγoriγ
179
- - source: ᠰᠢᠨ᠎ᠡ ᠬᠣᠲᠠ ᠲᠣᠭᠣᠷᠢᠭ # Xincheng District
180
- expected: sin-e xota toγoriγ
181
- - source: ᠬᠠᠶᠢᠷᠤᠪ ᠤᠨ ᠲᠣᠬᠣᠢ ᠲᠣᠭᠣᠷᠢᠭ # Haibowan (Hairibin Tohoi) District
182
- expected: xairub-un toxoi toγoriγ
183
- - source: ᠰᠦᠩ ᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Songshan District
184
- expected: syng šan toγoriγ
185
- - source: ᠬᠦᠩ ᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Hongshan District
186
- expected: kyng šan toγoriγ
187
- - source: ᠠᠷᠤᠨ ᠬᠣᠰᠢᠭᠤ # Арун хошуу
188
- expected: arun xosiγu
189
- - source: ᠶᠤᠸᠠᠨ ᠪᠣᠣ ᠱᠠᠨ ᠲᠣᠭᠣᠷᠢᠭ # Юаньбаошань тойрог
190
- expected: juvan boo šan toγoriγ
191
- - source: ᠰᠢᠯᠤᠭᠤᠨ ᠬᠥᠪᠡᠭᠡᠲᠦ ᠴᠠᠭᠠᠨ ᠬᠣᠰᠢᠭᠤ # Шулуун Хөвөөт Цагаан хошуу
192
- expected: siluγun købegety caγan xosiγu
193
- - source: ᠰᠢᠯᠤᠭᠤᠨ ᠬᠥᠬᠡ ᠬᠣᠰᠢᠭᠤ # Шулуун хөх хошуу
194
- expected: siluγun køke xosiγu
195
- - source: ᠤᠷᠠᠳ ᠤᠨ ᠳᠤᠮᠳᠠᠳᠤ ᠬᠣᠰᠢᠭᠤ # Урадын Дундад Хошуу
196
- expected: urad-un dumdadu xosiγu
197
- - source: ᠤᠷᠠᠳ ᠤᠨ ᠡᠮᠦᠨᠡᠳᠦ ᠬᠣᠰᠢᠭᠤ # Урадын Өмнөд Хошуу
198
- expected: urad-un emynedy xosiγu
199
- - source: ᠲᠣᠭᠲᠠᠬᠤ ᠰᠢᠶᠠᠨ # Тогтох шянь
200
- expected: toγtaxu sijan
201
- - source: ᠰᠥᠨᠡᠳ ᠵᠡᠭᠦᠨ ᠬᠣᠰᠢᠭᠤ # Sonid Left Banner
202
- expected: søned zegyn xosiγu
203
- - source: ᠰᠥᠨᠡᠳ ᠪᠠᠷᠠᠭᠤᠨ ᠬᠣᠰᠢᠭᠤ # Sonid Right Banner
204
- expected: søned baraγun xosiγu
205
- - source: ᠣᠩᠨᠢᠭᠤᠳ ᠬᠣᠰᠢᠭᠤ # Ongniud Banner
206
- expected: ongniγud xosiγu
207
- - source: ᠵᠠᠯᠠᠢᠳ ᠬᠣᠰᠢᠭᠤ # Jalaid Banner
208
- expected: zalaid xosiγu
209
- - source: ᠬᠠᠩᠭᠢᠨ ᠬᠣᠶᠢᠲᠤ ᠬᠣᠰᠢᠭᠤ # Hanggin Rear Banner
210
- expected: xangγin xoitu xosiγu
211
- - source: ᠵᠡᠭᠦᠨ ᠤᠵᠤᠮᠤᠴᠢᠨ ᠬᠣᠰᠢᠭᠤ # East Ujimqin Banner
212
- expected: zegyn uzumucin xosiγu
213
- - source: ᠠᠷᠤ ᠬᠣᠷᠴᠢᠨ ᠬᠣᠰᠢᠭᠤ # Ar Horqin Banner
214
- expected: aru xorcin xosiγu
215
- - source: ᠠᠪᠠᠭ᠎ᠠ ᠬᠣᠰᠢᠭᠤ # Abag Banner
216
- expected: abaγ-a xosiγu
217
- - source: ᠪᠤᠷᠢᠶᠠᠳ
218
- expected: burijad # Буриад
219
- - source: ᠤᠯᠠᠭᠠᠨᠪᠠᠭᠠᠲᠤᠷ # Улаанбаатар
220
- expected: ulaγanbaγatur
221
- - source: ᠴᠢᠩᠭᠢᠰ ᠬᠠᠭᠠᠨ # Чингис Хаан
222
- expected: cingγis xaγan
223
-
224
- map:
225
- rules:
226
- - pattern: \u182c(\u1821|u1822|\u1825|\u1826)
227
- result: "k\\1"
228
- - pattern: \u182d(\u1821|u1822|\u1825|\u1826)
229
- result: "g\\1"
230
- - pattern: \u1820\u1836\u1822
231
- result: "ai"
232
- - pattern: \u1821\u1836\u1822
233
- result: "ei"
234
- - pattern: \u1823\u1836\u1822
235
- result: "oi"
236
- - pattern: \u1824\u1836\u1822
237
- result: "ui"
238
-
239
- characters:
240
- "ᠠ": "a" # \u1820
241
- "ᠡ": "e" # \u1821
242
- "ᠢ": "i" # \u1822
243
- "ᠣ": "o" # \u1823
244
- "ᠤ": "u" # \u1824
245
- "ᠥ": "ø" # \u1825
246
- "ᠦ": "y" # \u1826
247
- "ᠨ": "n" # \u1828
248
- "ᠩ": "ng" # \u1829
249
- "ᠬ": "x" # \u182c
250
- "ᠭ": "γ" # \u182d
251
- "ᠪ": "b" # \u182a
252
- "ᠫ": "p" # \u182b
253
- "ᠹ": "f" # \u1839
254
- "ᠰ": "s" # \u1830
255
- "ᠱ": "š" # \u1831
256
- "ᠲ": "t" # \u1832
257
- "ᠳ": "d" # \u1833
258
- "ᠯ": "l" # \u182f
259
- "ᠮ": "m" # \u182e
260
- "ᠴ": "c" # \u1834
261
- "ᠵ": "z" # \u1835
262
- "ᠶ": "j" # \u1836
263
- "ᠺ": "k" # \u183a
264
- "ᠷ": "r" # \u1837
265
- "ᠸ": "v" # \u1838
266
- "ᠾ": "h" # \u183e
267
- "᠁": "..." # \u1801
268
- "᠂": "," # \u1802
269
- "᠃": "." # \u1803
270
- "᠄": ":" # \u1804
271
- "\u180e": "-" # MVS
272
- "\u202f": "-" # NNBSP