interscript 0.1.7 → 2.1.0b1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
@@ -1,23 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1996
4
- language: iso-639-2:ell
5
- source_script: Grek
6
- destination_script: Latn
7
- name: BGN/PCGN 1996 System
8
- alias:
9
- ogc11122:
10
- code: ell_Grek2Latn_ELOT743_1996
11
- description: Greek ELOT 743 System, US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1996 agreement
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693694/ROMANIZATION_OF_GREEK.pdf
13
- creation_date: 1996
14
- description: |
15
- BGN/PCGN Romanization table for Greek
16
-
17
- note:
18
- - Identical to ELOT 743:1982, which is also adopted as ISO 843:1997 and by UNGEGN
19
-
20
- map:
21
- character_separator: ""
22
- word_separator: " "
23
- inherit: "elot-ell-Grek-Latn-743-1982-ts"
@@ -1,96 +0,0 @@
1
- ---
2
- # TODO: This system is not complete/usable yet!
3
- # TODO: Add tests from PDF
4
- authority_id: bgnpcgn
5
- id: 1956
6
- language: iso-639-2:fas
7
- source_script: Arab
8
- destination_script: Latn
9
- name: BGN/PCGN 1956 System
10
- alias:
11
- ogc11122:
12
- code: fas_Arab2Latn_BGN_1958
13
- description: Persian (Afghan and Iranian) BGN/Permanent Committee on Geographical Names for British Official Use(PCGN) 1958 System
14
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/320079/Arabic_Romanization.pdf
15
- creation_date: 1947
16
- confirmation_date: 2019-06
17
- description: |
18
- This System was adopted by the BGN in 1946 and by the PCGN in 1956
19
- and is applied in the systematic romanization of geographic names in
20
- Bahrain, Egypt, Iraq, Jordan, Kuwait, Libya, Oman, Qatar, Saudi Arabia,
21
- Syria, the United Arab Emirates, and Yemen.
22
-
23
- Uniform results in the romanization of Arabic are difficult to
24
- obtain, since vowel points and diacritical marks are generally omitted
25
- from both manual and machine writing. It follows that for correct
26
- identification of the words which appear in any particular name,
27
- knowledge of its standard Arabic- script spelling including proper
28
- pointing, and recognition of dialectal and idiosyncratic deviations are
29
- essential.
30
-
31
- In order to bring about uniformity in the Roman-script spelling of
32
- geographic names in Arabic- language areas, the system is based insofar
33
- as possible on fully pointed modern standard Arabic. In the interest of
34
- clarity, vowel pointing has been applied to the examples below. Arabic
35
- is written from right to left, and does not make a distinction between
36
- upper and lower case.
37
-
38
- notes:
39
- - The symbol ◌ represents any Arabic consonant character.
40
- - "Hamzah (ء) is written in Arabic in association with most instances of initial alif, except those which belong to the definite article al or which bear a maddah (see note 11). Hamzah is written above the alif if the accompanying short vowel is a fatḩah (َأ) or ḑammah (ُأ) and below the alif if the accompanying short vowel is a kasrah (ِإ). When the purpose is to indicate the presence of a glottal stop, hamzah is written over medial and final alif (أ), wāw (ؤ) and yā’ without dots (ئ). Hamzah following kasrah (◌ِ ) is written (ئ). Almost always the yā’ is in the initial or medial form and the dots are omitted: example: (بئر). Hamzah following ḑammah (◌ُ )is written (ؤ). Hamzah following a long vowel is written without a bearer and is positioned on the line of print like a regular character. The romanization of hamzah (’) should always be carefully distinguished from that of ‘ayn (‘)."
41
- - Alif as such is not romanized when it is a bearer of hamzah, but see fatḩah alif (ا◌َ ) and alif maddah .in the vowel table. See also note 2 and 11 above ) آ (
42
- - "In certain endings, an original tā’ (ت) is written (ة), i.e., like hā’ with two dots, and is known as tā marbūţah. It is romanized h, except in the construct form, where it is romanized t instead. Example: hamzah,hamzatalqaţ‘.Theendingfatḩahhā’ (ه◌َ)mayberomanizeda·hwhenthecharacterhā’ .5 is not silent. Example: Muntaza·h. See also note )ه("
43
- - Occasionally, the character sequences ـدهـ ,ـتـهـ, ـكـهـ and سهـ occur. They may be romanized k·h, t·h, d·h, and s·h in order to differentiate those romanizations from the digraphs kh, th, dh, and sh. See also note 4.
44
- - Where special considerations are paramount, the sub-dot ( ִ ) may be used in place of the cedilla.
45
- - The character yā’ (in final form but without dots) preceded by the vowel point fatḩah is a combination known as alif maqşūrah. See character 7 in the vowel table.
46
- - "The classical Arabic grammatical endings written with the nunation symbols (tanwīn) may be romanized, when necessary, by an, in, un. In modern Arabic, these endings have become silent and should not be romanized: classical alifun modern alif."
47
- - Doubled consonant sounds are represented in Arabic script by placing a shaddah (◌ّ ) over a consonant character. In romanization the letter should be doubled. However, the combination of the consonant character yā’ with a shaddah preceded by a kasrah (ــــِّيــ) is romanized īy rather than iyy. e.g., (ـــِّيـة) is romanized (īyah) and not (iyyah). When the definite article (al) precedes a word beginning with one of the “sun letters” t, th, d, dh, r, z, s, sh, ş, ḑ, ţ, z̧ , l, or n – the l is assimilated in pronunciation and romanization, thus yielding tt, thth, etc., in romanization. Example, An Nīl, not Al Nīl.
48
- - Hamzat al waşl (ٱ), which is utilized only in the pointing of classical Arabic, is romanized ’ as illustrated in the classical form of its name hamzatu’l waşli.
49
- - Since maddah (آ), which is placed over alif (ا), nearly always occurs in word-initial position, no .)◌َا( as well as for fatḩah alif )آ( confusion results from the use of ā for alif maddah
50
- - The ligatures لا and لـا represent lām- alif, and should be romanized lā.
51
-
52
- special_rules:
53
- # TODO: These are not used
54
- - Initial definite articles and prepositions should be capitalized and hyphens should not be used to connect parts of names, e.g., Ash Shāriqah and Tall al Laḩm.
55
- - If any evidence is found for the use of the definite article in a name, the article should be used in the name chosen.
56
- - The Arabic word for God should be written Allāh (الله).
57
- - Names which consist of noun phrases should be written as separate words. The definite article within such names should be romanized al, not ul, e.g., ‘Abd Allāh, ‘Abd ar Raḩmān, Dhū al Faqār.
58
- - The Arabic word ِبنshould be romanized Bin rather than Ibn whenever written without alif, that is between two proper nouns, e.g., ‘Umar Bin al Khaţţāb.
59
- - The Turkish word Paşa should be romanized from Arabic script as Bāshā. The Turkish word Bey should be romanized as Bey in Egyptian names, no matter how it is written in Arabic-language sources, but in other Arabic areas it should be romanized as Bak where written بك and as Bayk .بيك when written
60
- - The modern colloquial word Sīdī should be give precedence over the classical form Sayyidī. This does not preclude the spelling Sayyidī if the latter is indicated by the Arabic script or other evidence – for instance, if the yā’ is written with a shaddah (◌ّ ).
61
- - The colloquial word Bū should not be changed to the standard form Abū.
62
- - The colloquial word for water, written مية on Arabic maps, should be romanized Mayyat.
63
- - Place names of Aramaic origin in Syria often contain initial consonant clusters consisting of b plus another consonant such as l or h. In romanization, the clusters bl, bh, etc., should be so represented.
64
- - In names containing the Arabic word for back, ridge, or hill, appearing as either ظهر or ضهر in Arabic sources, the word should be romanized to reflect the particular Arabic spelling shown.
65
- map:
66
- characters:
67
- '\u0627' : 'a'
68
- '\u0628' : 'b'
69
- '\u067e' : 'p'
70
- '\u062a' : 't'
71
- '\u062b' : 's'
72
- '\u062c' : 'j'
73
- '\u062d' : 'h'
74
- '\u0686' : 'ch'
75
- '\u062e' : 'kh'
76
- '\u062f' : 'd'
77
- '\u0630' : 'z'
78
- '\u0631' : 'r'
79
- '\u0632' : 'z'
80
- '\u0633' : 's'
81
- '\u0634' : 'sh'
82
- '\u0635' : 's'
83
- '\u0636' : 'z'
84
- '\u0637' : 't'
85
- '\u0638' : 'z'
86
- '\u0639' : '\u2018'
87
- '\u063a' : 'gh'
88
- '\u0641' : 'f'
89
- '\u0642' : 'q'
90
- '\u0643' : 'k'
91
- '\u0644' : 'l'
92
- '\u0645' : 'm'
93
- '\u0646' : 'n'
94
- '\u0647' : 'h'
95
- '\u0648' : 'v'
96
- '\u0649' : 'y'
@@ -1,257 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1930
4
- language: iso-639-2:jpn
5
- source_script: Hrkt
6
- destination_script: Latn
7
- name: Japanese Kana Modified Hepburn 1930 System
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
- - Segmentation needs to be done before using this map
15
- - Note 5 in the specification states that when identical repeating vowels
16
- belong to different kanji characters, they shall be romanized
17
- individually and an apostrophe (’) shall be placed between the vowels.
18
- However since this is a map from Kana to Hepburn, there is no way to
19
- implement this feature.
20
- - The documentation did not specify how the hyphen should be used.
21
-
22
- tests:
23
-
24
- - source: てがた-からみでん
25
- expected: "Tegata-karamiden"
26
- - source: てがた-すみよしちょう
27
- expected: "Tegata-sumiyoshichō"
28
- - source: さいのはま
29
- expected: "Sainohama"
30
- - source: てがた-たなか
31
- expected: "Tegata-tanaka"
32
- - source: ほりおでん
33
- expected: "Horioden"
34
- - source: そえがわ
35
- expected: "Soegawa"
36
- - source: ふねがさわ
37
- expected: "Funegasawa"
38
- - source: とくまんだて
39
- expected: "Tokumandate"
40
- - source: たてない
41
- expected: "Tatenai"
42
- - source: つるがさき
43
- expected: "Tsurugasaki"
44
- - source: しもやつせ
45
- expected: "Shimoyatsuse"
46
- - source: かみやつせ
47
- expected: "Kamiyatsuse"
48
- - source: しんとうだ
49
- expected: "Shintōda"
50
- - source: かじのめ
51
- expected: "Kajinome"
52
- - source: まえぎ
53
- expected: "Maegi"
54
- - source: くろさわ やま
55
- expected: "Kurosawa Yama"
56
- - source: いちのさわ がわ
57
- expected: "Ichinosawa Gawa"
58
- - source: はちやまえ
59
- expected: "Hachiyamae"
60
- - source: やち
61
- expected: "Yachi"
62
- - source: たてぬま
63
- expected: "Tatenuma"
64
- - source: しらはま
65
- expected: "Shirahama"
66
- - source: けせんまち
67
- expected: "Kesenmachi"
68
- - source: けいだい-かわら
69
- expected: "Keidai-kawara"
70
- - source: いしやました
71
- expected: "Ishiyamashita"
72
- - source: なえひら-やち
73
- expected: "Naehira-yachi"
74
- - source: とみの
75
- expected: "Tomino"
76
- - source: あらや-たかみまち
77
- expected: "Araya-takamimachi"
78
- - source: ながた
79
- expected: "Nagata"
80
- - source: とどろき おんせん
81
- expected: "Todoroki Onsen"
82
- - source: かしわぎはら
83
- expected: "Kashiwagihara"
84
- - source: とやけもり やま
85
- expected: "Toyakemori Yama"
86
- - source: なかさい
87
- expected: "Nakasai"
88
- - source: たけした
89
- expected: "Takeshita"
90
- - source: みと
91
- expected: "Mito"
92
- - source: みなみなかさと
93
- expected: "Minaminakasato"
94
- - source: みずおし
95
- expected: "Mizuoshi"
96
- - source: なかさと
97
- expected: "Nakasato"
98
- - source: しんかりば
99
- expected: "Shinkariba"
100
- - source: しんかみぬま
101
- expected: "Shinkaminuma"
102
- - source: しんばし
103
- expected: "Shinbashi"
104
- - source: りくぜんやました えき
105
- expected: "Rikuzen’yamashita Eki"
106
- - source: うしじまにし
107
- expected: "Ushijimanishi"
108
- - source: はまえば
109
- expected: "Hamaeba"
110
- - source: ぬまむかい
111
- expected: "Numamukai"
112
- - source: さんげんやち
113
- expected: "Sangen’yachi"
114
- - source: にけんやち
115
- expected: "Niken’yachi"
116
- - source: やちなか
117
- expected: "Yachinaka"
118
- - source: なす がわ
119
- expected: "Nasu Gawa"
120
- - source: おおはらはま
121
- expected: "Ōharahama"
122
- - source: うるご がわ
123
- expected: "Urugo Gawa"
124
- - source: なかばせ
125
- expected: "Nakabase"
126
- - source: うと えき
127
- expected: "Uto Eki"
128
- - source: みずまち
129
- expected: "Mizumachi"
130
- - source: ごんげんどう
131
- expected: "Gongendō"
132
- - source: いとひさ
133
- expected: "Itohisa"
134
- - source: あらおい
135
- expected: "Araoi"
136
- - source: わんめ
137
- expected: "Wanme"
138
- - source: かじろ
139
- expected: "Kajiro"
140
- - source: みやばら
141
- expected: "Miyabara"
142
- - source: いまどみ
143
- expected: "Imadomi"
144
- - source: かいほ
145
- expected: "Kaiho"
146
- - source: かいほ ぼえん
147
- expected: "Kaiho Boen"
148
- - source: ひきだ
149
- expected: "Hikida"
150
- - source: あさい-こむかい
151
- expected: "Asai-komukai"
152
- - source: こうざか
153
- expected: "Kōzaka"
154
- - source: こうふうだい
155
- expected: "Kōfūdai"
156
- - source: たての
157
- expected: "Tateno"
158
- - source: センター
159
- expected: "Sentā"
160
- - source: フィリピン
161
- expected: "Firipin"
162
- - source: ヴィオリン
163
- expected: "Viorin"
164
- - source: クォーター
165
- expected: "Kwōtā"
166
- - source: パッチリ
167
- expected: "Patchiri"
168
- - source: ぽっぽっや
169
- expected: "Poppoyya"
170
-
171
- map:
172
- character_separator: ""
173
- word_separator: " "
174
- title_case: True
175
- inherit: var-jpn-Hrkt-Latn-hepburn-1954
176
-
177
- characters:
178
- # Rare sounds, Table 2 & 4
179
-
180
- "くぁ": "kwa"
181
- "クァ": "kwa"
182
- "ぐぁ": "gwa"
183
- "グァ": "gwa"
184
- "くぃ": "kwi"
185
- "クィ": "kwi"
186
- "ぐぃ": "gwa"
187
- "グィ": "gwa"
188
- "きぇ": "kye"
189
- "キェ": "kye"
190
- "ぎぇ": "gye"
191
- "ギェ": "gye"
192
- "くぇ": "kwe"
193
- "クェ": "kwe"
194
- "ぐぇ": "gwe"
195
- "グェ": "gwe"
196
- "くぉ": "kwo"
197
- "クォ": "kwo"
198
- "ぐぉ": "gwo"
199
- "グォ": "gwo"
200
- "しぇ": "she"
201
- "シェ": "she"
202
- "じぇ": "je"
203
- "ジェ": "je"
204
- "つぁ": "tsa"
205
- "ツァ": "tsa"
206
- "てぃ": "ti"
207
- "ティ": "ti"
208
- "でぃ": "di"
209
- "ディ": "di"
210
- "てゅ": "tyu"
211
- "テュ": "tyu"
212
- "でゅ": "dyu"
213
- "デュ": "dyu"
214
- "とゅ": "tu"
215
- "トュ": "tu"
216
- "どゅ": "du"
217
- "ドュ": "du"
218
- "ちぇ": "che"
219
- "チェ": "che"
220
- "ぢぇ": "je"
221
- "ヂェ": "je"
222
- "つぇ": "tse"
223
- "ツェ": "tse"
224
- "つぉ": "tso"
225
- "ツォ": "tso"
226
- "にぇ": "nye"
227
- "ニェ": "nye"
228
- "ふぁ": "fa"
229
- "ファ": "fa"
230
- "ふぃ": "fi"
231
- "フィ": "fi"
232
- "ふぇ": "fe"
233
- "フェ": "fe"
234
- "ふぉ": "fo"
235
- "フォ": "fo"
236
- "みぇ": "mye"
237
- "ミェ": "mye"
238
- "ぃぇ": "ye"
239
- "ィェ": "ye"
240
- "りぇ": "rye"
241
- "リェ": "rye"
242
- "ゔぁ": "va"
243
- "ヴァ": "va"
244
- "うぃ": "wi"
245
- "ウィ": "wi"
246
- "ゔぃ": "vi"
247
- "ヴィ": "vi"
248
- "うぇ": "we"
249
- "ウェ": "we"
250
- "ゔぇ": "ve"
251
- "ヴェ": "ve"
252
- "うぉ": "wo"
253
- "ウォ": "wo"
254
- "ゔぉ": "vo"
255
- "ヴォ": "vo"
256
- "ゔ": "vu"
257
- "ヴ": "vu"
@@ -1,131 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1981
4
- language: iso-639-2:kat
5
- source_script: Geor
6
- destination_script: Latn
7
- name: ROMANIZATION OF GEORGIAN; BGN/PCGN 1981 System
8
- alias:
9
- ogc11122:
10
- code: kat_Geor2Latn_BGN_1981
11
- description: Georgian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1981 System
12
- url: https://transliteration.eki.ee/pdf/Georgian.pdf
13
- creation_date: 1981
14
- confirmation_date: 1981
15
- description: |
16
- BGN/PCGN system of 1981.
17
-
18
- notes:
19
-
20
- tests:
21
- - source: ჰებუდი
22
- expected: hebudi
23
-
24
- - source: ჯვრის წყალსაცავი
25
- expected: jvris tsqalsats’avi
26
-
27
- - source: ჯვავიაკვარა
28
- expected: jvaviak’vara
29
-
30
- - source: ჯობრია
31
- expected: jobria
32
-
33
- - source: ძულუხირა
34
- expected: dzulukhira
35
-
36
- - source: ლეკუხონა
37
- expected: lek’ukhona
38
-
39
- - source: აბაშა
40
- expected: abasha
41
-
42
- - source: ააცი
43
- expected: aats’i
44
-
45
- # TODO: This belongs to which system?!
46
- # - source: აბააჟახვუ
47
- # expected: abaazhvakhu
48
-
49
- # TODO: These examples from GNDB are clearly using the BGNPCGN 2009 system
50
- #
51
- # - source: ხობის მუნიციპალიტეტი
52
- # expected: khobis munitsip’alit’et’i
53
- #
54
- # - source: მყინვარი ჩრდილოეთი ლეადაშატი
55
- # expected: mq’invari chrdiloeti leadashat’i
56
- #
57
- # - source: ხეწკვარა
58
- # expected: khets’k’vara
59
- #
60
- # - source: ჯამპალი
61
- # expected: jamp’ali
62
- #
63
- # - source: ჯავის მუნიციპალიტეტი
64
- # expected: javis munitsip’alit’et’i
65
- #
66
- # - source: ხოიჯგეთა
67
- # expected: khoijgeta
68
- #
69
- # - source: ხობის მუნიციპალიტეტი
70
- # expected: khobis munitsip’alit’et’i
71
- #
72
- # - source: წვიშარხუ
73
- # expected: ts’visharkhu
74
- # - source: აღმოსავლეთი გუმისთა
75
- # expected: aghmosavleti gumista
76
- #
77
- # - source: ქვემო ბირცხა
78
- # expected: kvemo birtskha
79
- #
80
- # - source: ზემო ბირცხა
81
- # expected: zemo birtskha
82
- #
83
- # - source: აბჟაყვა
84
- # expected: abzhaq’va
85
-
86
-
87
-
88
- map:
89
- characters:
90
- '\u10d0' : 'a' # ა
91
- '\u10d1' : 'b' # ბ
92
- '\u10d2' : 'g' # გ
93
- '\u10d3' : 'd' # დ
94
- '\u10d4' : 'e' # ე
95
- '\u10d5' : 'v' # ვ
96
- '\u10d6' : 'z' # ზ
97
-
98
- '\u10f1' : 'ey' # ჱ
99
-
100
- '\u10d7' : 't’' # თ
101
- '\u10d8' : 'i' # ი
102
- '\u10d9' : 'k’' # კ
103
- '\u10da' : 'l' # ლ
104
- '\u10db' : 'm' # მ
105
- '\u10dc' : 'n' # ნ
106
-
107
- '\u10f2' : 'j' # ჲ
108
-
109
- '\u10dd' : 'o' # ო
110
- '\u10de' : 'p' # პ
111
- '\u10df' : 'zh' # ჟ
112
- '\u10e0' : 'r' # რ
113
- '\u10e1' : 's' # ს
114
- '\u10e2' : 't' # ტ
115
- '\u10e3' : 'u' # უ
116
- '\u10e4' : 'p’' # ფ
117
- '\u10e5' : 'k’' # ქ
118
- '\u10e6' : 'gh' # ღ
119
- '\u10e7' : 'q' # ყ
120
- '\u10e8' : 'sh' # შ
121
- '\u10e9' : 'ch’' # ჩ
122
- '\u10ea' : 'ts’' # ც
123
- '\u10eb' : 'dz' # ძ
124
- '\u10ec' : 'ts' # წ
125
- '\u10ed' : 'ch' # ჭ
126
- '\u10ee' : 'kh' # ხ
127
-
128
- '\u10f4' : 'q’' # ჴ
129
-
130
- '\u10ef' : 'j' # ჯ
131
- '\u10f0' : 'h' # ჰ