interscript 0.1.7 → 2.1.0b1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
@@ -1,459 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: yaghoubi
4
- language: iso-639-3:prs # prs stands for Dari (https://iso639-3.sil.org/code/prs&_ga=GA1.2.2054538372.1574092823)
5
- source_script: Arab
6
- destination_script: Latn
7
- name: BGN/PCGN NATIONAL ROMANIZATION SYSTEM FOR AFGHANISTAN -- BGN/PCGN 2007 System (Yaghoubi)
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693661/ROMANIZATION_FOR_AFGHANISTAN.pdf
9
- creation_date: 2007
10
- confirmation_date: 2017-11
11
- description: |
12
- This romanization system agreed by BGN and PCGN in November 2007,
13
- accommodates the linguistic complexity of Afghanistan as manifest in
14
- its geographical names.
15
-
16
- The following tabulation shows the original Perso-Arabic script with
17
- accompanying Unicode value (columns 1a and b), the Yaghoubi
18
- romanization (column 2), the BGN/PCGN romanization with accompanying
19
- Unicode value (columns 3a and b), an English phonetic example (column
20
- 4), and an example toponym (columns 5b and c).
21
-
22
- [The Yaghoubi romanization system was developed in 1959 by
23
- Muzaffarud Din Yaqubi (commonly seen as Yaghoubi). It is a native
24
- official system designed to reflect Afghan names, both Dari and Pashto,
25
- and both pronunciation and genuine linguistic truth.]
26
-
27
- The tables function as both a romanization system for Afghanistan (i.e.
28
- with access to the original script, these tables can be applied to get
29
- a standardized Roman result - moving from columns 1 to 3) and as a
30
- means of converting the available Yaghoubi Roman-script spellings, as
31
- appear on the Fairchild Aerial Surveys map series, to standard BGN/PCGN
32
- spellings (moving from columns 2 to 3).
33
-
34
- The points used in Arabic to mark short vowels and certain other
35
- diacritical marks are infrequently written in Afghanistan.
36
- Consequently, a reference source may sometimes be required to aid
37
- correct identification of the standard spellings and proper vowels and
38
- elimination of dialectal and idiosyncratic variations. In the interests
39
- of clarity, the example columns show script with vowel pointing from
40
- Arabic to indicate the short vowels that are included alongside the
41
- unpointed form that will usually be encountered. However it should be
42
- noted that the pronunciation of short vowels will vary.
43
-
44
- Note: it is recommended that a font such as Scheherazade, available
45
- from www.sil.org, which includes the Unicode extended Arabic sub-range,
46
- be used to view this system. [Please note that the identification of a
47
- particular font does not represent an endorsement of any specific
48
- product or manufacturer.]
49
-
50
- notes:
51
- - |
52
- Alif (ا) should be romanized as follows:
53
-
54
- a. Initially, it indicates that the word begins with a vowel or
55
- diphthong; the alif itself is not romanized, but rather the short vowel
56
- it “carr es” is romanized; e.g., ميړ أَسَلم ژرَندَه → Mī Aslam Zhrandah
57
- b. When it carries a maddah (آ) (see vowel table, row 6), it
58
- represents ā; e.g., آب بَند → Āb Band.
59
- c. Medially and finally it represents ā (see vowel table, row 5);
60
- e.g., ماڼۍ → Māṉêy
61
- d. Medially and finally in words of Arabic origin, alif may serve
62
- as the bearer of hamzah, e.g. رأس → ra’s.
63
-
64
- - Occasionally the letter sequences سه ,زه ,که, and گه occur without
65
- intervening vowels. They may be romanized k·h, z·h, s·h, and g·h in
66
- order to differentiate these romanizations from the digraphs kh, zh,
67
- sh, and gh, which are used to represent the letters ش ,ژ ,خ, and غ.
68
- Additionally, the Pashto letters څ and ځ, routinely romanized ts and
69
- dz, may be alternatively romanized s and z تس when for special reasons
70
- it is desired that confusion be avoided with the character sequences
71
- (ts) and دز (dz), respectively.
72
-
73
- - "The vagaries of written Afghan languages, as pertains to spacing
74
- and word division, are addressed as follows:
75
- Spaces may be added to or subtracted from Afghan words written in
76
- Arabic script, for the purposes of standardization. This is
77
- particularly relevant when the words are hand-written, are rendered
78
- “art st cally”, or express other s ch non-standard flourishes, as long
79
- as the sense of the toponym, word, or phrase is not compromised.
80
- Romanized toponyms are typically divided into constituent words
81
- (spaces and other grammatical rules applied) when those words can stand
82
- independently, for purposes of standardization and minimization of
83
- confusion, particularly in situations where Afghan writers are
84
- inconsistent in their application of spacing and word breaks. When the
85
- Afghan word or suffix is only used in combination with other nouns or
86
- adjectives, then it should be appended to the preceding word in its
87
- romanization. This includes (but is not limited to) - ābā , -zaī, -zā
88
- ah, - ū, -wand, -gaī, -kaī, -pūr, - ēsh, -lar, -lī, -lū and ullāh, as,
89
- for example, seen in Raḩmatābād (رحمت آباد) and Raḩmatullāh (رحمت االله),
90
- but Raḩmat Khēl (رحمتخيل) and Raḩmat Shahr (رحمتشهر)."
91
-
92
- - The one-letter words د (Pashto) and و (Dari) are romanized dê and
93
- wa, respectively.
94
-
95
- - The word الله, meaning God, should always be romanized Allāh,
96
- except as specified in note 3. Note that the Unicode value FDF2 spells
97
- Allāh, but omits the alif in some common fonts, including Times New
98
- Roman. If in doubt, try in Arial Unicode MS to verify. Also note that
99
- the “dagger al f” ( ) above the second ل (lām) n the ord الله, is not
100
- written but should be romanized ā, like a full-size alif.
101
-
102
- - In names of Arabic origin, the l of the definite article al s ass m
103
- lated before the ‘s n letters’ , , , , r, z, s, sh, ş, ẕ, , z, l and n.
104
- In its romanization, the article should be separated from the name it
105
- precedes and should not be capitalized except at the beginning of a
106
- name, e.g. جبل السراج→ Jabal
107
- as Sarāj
108
-
109
- - In Arabic names, a shaddah, ّ is used to denote the doubling of a
110
- particular consonant character, e.g. ُم َح َمد → Muḩammad. Ho ever, n
111
- Pashto th s ‘do bl ng’ s freq ently om tted n both Perso- Arabic script
112
- and the resulting romanization. Guidance on doubling may be taken from
113
- an authoritative names source, such as an Afghan government source or
114
- Pashto dictionary; for example, it is usual to see Ḩājī without and
115
- ‘Abbās with the doubled consonant. The doubled y consonant is almost
116
- always retained, as in Sayyid or Qayyūm.
117
-
118
- - In Afghan names which contain an iẕāfah, it should be romanized as
119
- -e or –ye according to
120
- common pronunciation, but generally, -e is used if the preceding word
121
- ends with a consonant other
122
- than silent heh, and -ye if the preceding word ends with a vowel
123
- sound e.g. غر ِحصار → Ghar-e ِ
124
- Ḩ şār; َقل َع ٔه َنو → Qal‘ah-ye Now. Scholarly sources indicate that
125
- heh is silent in darah and qal‘ah (thus darah-ye, qal‘ah-ye), but
126
- lightly spoken in kōh and chāh (thus kōh-e, chāh-e).
127
-
128
- - The character sequence خو, where followed by ا or ی should be
129
- romanized khwā or khwī, although the w is either not pronounced, or
130
- only weakly so, as in خواجه → khwājah.
131
-
132
- - Plural nouns ending in -hā or -ān should always be romanized as a
133
- single word, regardless of whether a space appears in a Perso-Arabic
134
- script source.
135
-
136
- - Unicode values listed in the tables above are required to ensure
137
- standardization and to minimize confusion from competing
138
- representations of a given character. It should be noted that the
139
- Persian Unicode value 0643 or FEDA( ك Unicode value 06A9) is
140
- recommended rather than the Arabic( ک or FED9), the Persian گ (Unicode
141
- value 06AF) is recommended rather than ګ (Unicode value 06AB) or ڰ
142
- (Unicode value 06B0) or ك (Unicode value 0643 or FEDA or FED9), and the
143
- Pashto character ځ (Unicode value 0681) is recommended rather than the
144
- heh with a dot above and a dot below (no Unicode value). For the letter ی
145
- in its many variations, care must be exercised to follow this romanization
146
- guide's recommendations to eliminate confusion for search engines
147
- and software. BGN/PCGN does not use the Unicode encoding FEEF for the
148
- character ی in any Afghan word.
149
-
150
- - |
151
- An inventory of letter-diacritic combinations in addition to the
152
- unmodified letters of the basic Roman script is:
153
-
154
- ‘ (U+2018)
155
- Ā (U+0100)
156
- Á (U+00C1)
157
- Ḏ (U+0044+0031)
158
- Ē (U+9112)
159
- Ê (U+00CA)
160
- Ḩ (U+1E28)
161
- Ī (U+012A)
162
- N-bar-top (U+004E+0304)
163
- Ō (U+014C)
164
- R-bar-bottom (U+0052+0031)
165
- Ş (U+015E)
166
- S-bar-top (U+0053+0304)
167
- Ṯ (U+0054+0031)
168
- Ţ (U+0162)
169
- Ū (U+918A)
170
- Z-comma-bottom (U+005A+0327)
171
- Z-bar-top (U+005A+0304)
172
- Ẕ (U+005A+0331)
173
- ẔH (U+005A+0048+035F)
174
-
175
-
176
- ʼ (U+2019)
177
- ā (U+0101)
178
- á (U+00E1)
179
- ḏ (U+0064+00031)
180
- ē (U+0113)
181
- ê (U+00EA)
182
- ḩ (U+1E29)
183
- ī (U+912B)
184
- n-bar-top (U+004E+0304)
185
- ō (U+014D)
186
- r-bar-bottom (U+0072+0031)
187
- ş (U+015F)
188
- s-bar-top (U+0073+0304)
189
- ṯ (U+0074+0031)
190
- ţ (U+0163)
191
- ū (U+918B)
192
- z-comma-bottom (U+007A+0327)
193
- z-bar-top (U+007A+0304)
194
- ẕ (U+007A+0331)
195
- zh-under-bar (U+007A+0068+035F)
196
-
197
- - The Romanization columns show only lowercase forms but, when
198
- romanizing, uppercase and lowercase Roman letters as appropriate should
199
- be used.
200
-
201
-
202
- tests:
203
- - source: بَغْلَان
204
- expected: baghlān
205
- - source: پُوټَكَى
206
- expected: pōtakay
207
- - source: شِيرِين تَگَاب
208
- expected: šīṟīn t̄agāb
209
- - source: کُوْټ
210
- expected: kōt
211
- - source: ثَابِر
212
- expected: s̄ābiṟ
213
- - source: جَبَل السَرَاج
214
- expected: jabal as saṟāj
215
-
216
- map:
217
- characters:
218
-
219
- # Vowel, Diphthong and Diacritical Characters
220
-
221
- # Or 'ē'. The character ی should be romanized ay or ē according to
222
- # its root language or local pronunciation. In case of uncertainty a
223
- # reference source (such as the Fairchild Aerial Surveys map series, or a
224
- # BGN/PCGN approved policy document/list of recommended spellings) should
225
- # be consulted.
226
- '\u06CC': 'ay'
227
-
228
- '\u06D0': 'ē' # Or 'ay'
229
- '\u06CC': 'ay' # Or 'āy'.
230
-
231
- # Both the combination ay and aī are available to romanize this
232
- # character according to its root language or local pronunciation. In
233
- # cases where the sound is uncertain ay is the default romanization in
234
- # BGN/PCGN standardization procedures
235
- '\u06CC': 'ā'
236
-
237
- '\u06CD': 'əy' # Or 'ay'
238
- '\u0621': '’'
239
- '\u0674':
240
- - '-i-'
241
- - 'e'
242
- - 'ī'
243
-
244
- # Other Diacritical Marks and Language Conventions
245
-
246
- '\u0627': 'ay' # Or 'āy'
247
- '\u06CC': 'ya' # Or 'yā'
248
-
249
- '\u0648': 'w'
250
- '\u06C0': '. . .h-e'
251
-
252
-
253
-
254
- # special rules
255
-
256
- '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
257
- '\ufdf2': 'Allāh' # See note 5
258
-
259
- # pointing
260
-
261
- '\u064E': # fatha
262
- - 'a'
263
- - 'â'
264
-
265
- '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
266
- '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
267
-
268
- # Both e and i are available to romanize this short vowel,
269
- # depending on local usage and/or root language. In cases where the sound
270
- # is uncertain, i is the default romanization in BGN/PCGN standardization
271
- # procedures.
272
- '\u0650':
273
- - 'i'
274
- - 'e'
275
-
276
- '\u0650\b' : '-e' # ِ kasra at the end of a word
277
-
278
- # Both o and u are available to romanize this short vowel,
279
- # depending on local usage and/or root language. In cases where the sound
280
- # is uncertain, u is the default romanization in BGN/PCGN standardization
281
- # procedures.
282
- '\u064f': # ُ damma
283
- - 'u'
284
- - 'o'
285
-
286
- # An alif with mad ( آ ) is written only in the initial position by
287
- # BGN/PCGN standardization procedures, in keeping with Persian language
288
- # family standards of use of the Arabic alphabet. The same letter written
289
- # in a medial or final position is written . . .
290
- '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
291
- '\u0622' : 'ā' # آ
292
-
293
- '\u0659': # ٙ madda
294
- - 'ə'
295
- - 'ê'
296
-
297
- '\u0648': 'ō'
298
-
299
- '\u0648':
300
- - 'u'
301
- - 'ū'
302
-
303
- '\u064e\u0648': # ـَو
304
- - 'aw'
305
- - 'āw'
306
-
307
- '\u06CC': 'i' # Or 'ī'
308
-
309
- '\u0649\u0670': 'ā' # ىٰ
310
-
311
-
312
- '\u0652' : '' # ْ sokoon
313
-
314
- # special pointed letters
315
- '\u0639\u064e' : '‘a' # عَ
316
- '\u0639\u0650' : '‘i' # عِ
317
- '\u0639\u064f' : '‘ū' # عُ
318
- # handle MacOS regex difference
319
- '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
320
-
321
- '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
322
- '\u0650\u06cc' : 'ī' # ـِي kasra followed by ي
323
- '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
324
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
325
- '\u064f\u0648' : 'ō' # ـُو damma followed by و
326
- '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
327
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
328
- '\u064e\u0648' : 'ow' # ـَو
329
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
330
- '\u0650\u06cc\u0651\u064e' : 'īy' # ـِيَّ
331
- '\u064e\u064a' : 'aī' # ـَي
332
- '\u064e\u06cc' : 'aī' # ـَي
333
- '\u0674': '-e' # ٴ
334
- '\u0654': '-e' # ٔ
335
- # - '-ye'
336
-
337
-
338
-
339
- # ta' marboota
340
- '\u0629' : 'at' # ة in the middle of the sentence
341
- '\u0629$' : 'ah'
342
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
343
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
344
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
345
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
346
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
347
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
348
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
349
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
350
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
351
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
352
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
353
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
354
-
355
-
356
- # shadda
357
-
358
- '\u0628\u0651' : 'bb' # ب
359
- '\u062a\u0651' : 'tt' # ت
360
- '\u062b\u0651' : 'thth' # ث
361
- '\u062c\u0651' : 'jj' # ج
362
- '\u062d\u0651' : 'ẖẖ' # ح
363
- '\u062e\u0651' : 'khkh' # خ
364
- '\u062f\u0651' : 'dd' # د
365
- '\u0630\u0651' : 'z̄z̄' # ذ
366
- '\u0631\u0651' : 'rr' # ر
367
- '\u0632\u0651' : 'zz' # ز
368
- '\u0633\u0651' : 'ss' # س
369
- '\u0634\u0651' : 'sh' # ش
370
- '\u0635\u0651' : 'şş' # ص
371
- '\u0636\u0651' : 'ḏḏ' # ض
372
- '\u0637\u0651' : 'ţţ' # ط
373
- '\u0638\u0651' : 'z̧z̧' # ظ
374
- '\u063a\u0651' : 'ghgh' # غ
375
- '\u0641\u0651' : 'ff' # ف
376
- '\u0642\u0651' : 'qq' # ق
377
- '\u0643\u0651' : 'kk' # ك
378
- '\u0644\u0651' : 'll' # ل
379
- '\u0645\u0651' : 'mm' # م
380
- '\u0646\u0651' : 'nn' # ن
381
- '\u0647\u0651' : 'hh' # ه
382
- '\u0648\u0651' : 'ww' # و
383
- '\u064a\u0651' : 'yy' # ي
384
-
385
-
386
- '\u0621' : '’' # ء
387
- '\u0626' : '’' # ئ
388
-
389
- '\u0623' : '' # أ
390
- '\u0625' : '' # إ
391
- '\u0627' : 'ā' # ا
392
-
393
- # See note B
394
- '\b\u0627\u0644' : 'al ' # ال
395
- # '\uFE8E' : '' # ﺎ
396
-
397
-
398
- # Sun letters
399
- '\b\u0627\u0644\u062a' : 'at̄ t̄' # الت
400
- '\b\u0627\u0644\u062b' : 'as̄ s̄' # الث
401
- '\b\u0627\u0644\u062f' : 'aḏ ḏ' # الد
402
- '\b\u0627\u0644\u0630' : 'az̄ z̄' # الذ
403
- '\b\u0627\u0644\u0631' : 'aṟ ṟ' # الر
404
- '\b\u0627\u0644\u0632' : 'az z' # الز
405
- '\b\u0627\u0644\u0633' : 'as s' # الس
406
- '\b\u0627\u0644\u0634' : 'aš š' # الش
407
- '\b\u0627\u0644\u0635' : 'as̱ s̱' # الص
408
- '\b\u0627\u0644\u0636' : 'ad͟z d͟z' # الض
409
- '\b\u0627\u0644\u0637' : 'aṯ ṯ' # الط
410
- '\b\u0627\u0644\u0638' : 'aẕ ẕ' # الظ
411
- '\b\u0627\u0644\u0644' : 'al l' # الل
412
- '\b\u0627\u0644\u0646' : 'an n' # الن
413
-
414
- # consonant characters
415
-
416
- '\u0628' : 'b' # ب
417
- '\u067E' : 'p' # پ
418
- '\u062A' : 't̄' # ت
419
- '\u067C' : 't' # ټ
420
- '\u062B' : 's̄' # ث
421
- '\u062C' : 'j' # ج
422
- '\u0686' : 'č' # ‫چ‬
423
-
424
- # The variant form ج is seen infrequently and does not have a single Unicode encoding.
425
- '\u0681' : 'j̄' # Note 2 # ‫ځ
426
- '\u0685' : 'c' # Note 2 # ‫څ
427
-
428
- '\u062D' : 'ẖ' # ح
429
- '\u062E' : 'kh' # خ
430
- '\u062F' : 'ḏ' # د
431
- '\u0689' : 'd' # ‫ډ‬
432
- '\u0630' : 'z̄' # ذ
433
- '\u0631' : 'ṟ' # ر
434
- '\u0693' : 'r' # ړ
435
- '\u0632' : 'z' # ز
436
- '\u0698' : 'ž' # ‫ژ‬
437
- '\u0696' : 'ž̲' # ږ
438
- '\u0633' : 's' # س
439
- '\u0634' : 'š' # ش
440
- '\u069A' : 'š̱' # ښ
441
- '\u0635' : 's̱' # ص
442
- '\u0636' : 'd͟z' # ض
443
- '\u0637' : 'ṯ' # ط
444
- '\u0638' : 'ẕ' # ظ
445
- '\u0639' : '’' # ع
446
- '\u063A' : 'gh' # غ
447
- '\u0641' : 'f' # ف
448
- '\u0642' : 'q' # ق
449
- '\u0643' : 'k' # ك
450
- '\u06A9' : 'k' # ک
451
- '\u06AF' : 'g' # گ
452
- '\u0644' : 'l' # ل‫‬
453
- '\u0645' : 'm' # م
454
- '\u0646' : 'n' # ن
455
- '\u06BC' : 'ṉ' # ڼ
456
- '\u0648' : 'w' # و
457
- '\u0647' : 'h' # ه
458
- '\u0649' : 'y' # ي
459
- '\u064a' : 'y' # ي