interscript 0.1.9 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,108 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1981
4
- language: iso-639-2:arm
5
- source_script: Armn
6
- destination_script: Latn
7
- name: BGN/PCGN 1981 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/810208/ROMANIZATION_OF_ARMENIAN.pdf
9
- creation_date: 2013
10
- confirmation date: 2019-06
11
- description: |
12
- The BGN/PCGN system for Armenian was designed for use in romanizing
13
- names written in the Armenian alphabet. The Roman letters and letter
14
- combinations shown as equivalents to the Armenian characters reflect
15
- the eastern variety of Armenian, i.e. the language spoken in the
16
- Republic of Armenia.
17
-
18
- notes:
19
- - The character ե should be romanized ye initially and after the vowel characters ա, ե, է, ը, ի, ո, ու and օ. In all other instances, it should be romanized e.
20
- - The character ո should be romanized vo initially except in the word ով, which should be roman- ized ov. In all other instances, it should be romanized o.
21
- - In Soviet-era sources this upper-case digraph character is found as Եի (Unicode encoding 0535+056B).
22
- - This lower-case character may be seen either in digraph form as եւ (Unicode encoding 0565+0582) or in single character form as եւ (Unicode encoding 0587).
23
- - The characters ԵՎ , եւ and եւ should be romanized yev initially, in isolation, and after the vowel characters ա, ե, է, ը, ի, ո, ու, and օ. In all other instances these characters should be romanized ev.
24
- - All apostrophes appearing in Armenian romanization are encoded Unicode 2019.
25
- - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
26
-
27
- tests:
28
-
29
- map:
30
- characters:
31
- '\u0531' : 'A'
32
- '\u0532' : 'B'
33
- '\u0533' : 'G'
34
- '\u0534' : 'D'
35
- '\u0535' : 'Ye' #treated same as Russian 'ye'
36
- '\u0536' : 'Z'
37
- '\u0537' : 'E'
38
- '\u0538' : 'Y'
39
- '\u0539' : 'T\u2019'
40
- '\u053a' : 'Zh'
41
- '\u053b' : 'I'
42
- '\u053c' : 'L'
43
- '\u053d' : 'Kh'
44
- '\u053e' : 'Ts'
45
- '\u053f' : 'K'
46
- '\u0540' : 'H'
47
- '\u0541' : 'Dz'
48
- '\u0542' : 'Gh'
49
- '\u0543' : 'Ch'
50
- '\u0544' : 'M'
51
- '\u0545' : 'Y'
52
- '\u0546' : 'N'
53
- '\u0547' : 'Sh'
54
- '\u0548' : 'O' # VO initially and U when in combination with \u0552
55
- '\u0549' : u'Ch\u2019'
56
- '\u054a' : 'P'
57
- '\u054b' : 'J'
58
- '\u054c' : 'Rr'
59
- '\u054d' : 'S'
60
- '\u054e' : 'V'
61
- '\u054f' : 'T'
62
- '\u0550' : 'R'
63
- '\u0551' : 'Ts\u2019'
64
- '\u0548\u0552' : 'U'
65
- '\u0548\u0582' : 'U'
66
- '\u0553' : 'P\u2019'
67
- '\u0554' : 'K\u2019'
68
- '\u0555' : 'O'
69
- '\u0556' : 'F'
70
- '\u0561' : 'a'
71
- '\u0562' : 'b'
72
- '\u0563' : 'g'
73
- '\u0564' : 'd'
74
- '\u0565' : 'e' # ye initially
75
- '\u0566' : 'z'
76
- '\u0567' : 'e'
77
- '\u0568' : 'y'
78
- '\u0569' : u't\u2019'
79
- '\u056a' : 'zh'
80
- '\u056b' : 'i'
81
- '\u056c' : 'l'
82
- '\u056d' : 'kh'
83
- '\u056e' : 'ts'
84
- '\u056f' : 'k'
85
- '\u0570' : 'h'
86
- '\u0571' : 'dz'
87
- '\u0572' : 'gh'
88
- '\u0573' : 'ch'
89
- '\u0574' : 'm'
90
- '\u0575' : 'y'
91
- '\u0576' : 'n'
92
- '\u0577' : 'sh'
93
- '\u0578' : 'o' # vo initially and u when in combination with \u0582
94
- '\u0579' : 'ch\u2019'
95
- '\u057a' : 'p'
96
- '\u057b' : 'j'
97
- '\u057c' : 'rr'
98
- '\u057d' : 's'
99
- '\u057e' : 'v'
100
- '\u057f' : 't'
101
- '\u0580' : 'r'
102
- '\u0581' : 'ts\u2019'
103
- '\u0578\u0582' : 'u'
104
- '\u0583' : 'p\u2019'
105
- '\u0584' : 'k\u2019'
106
- '\u0585' : 'o'
107
- '\u0586' : 'f'
108
- '\u0587' : 'ev' # yev initially
@@ -1,111 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1993
4
- language: iso-639-2:aze
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
9
- creation_date: 1993
10
- confirmation date: 2019-06
11
- description: |
12
- Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
13
-
14
- notes:
15
-
16
- - The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
17
-
18
- - The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
19
-
20
- - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
21
-
22
- - |
23
- An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
24
- Ğ (U+011E), ğ (U+011F)
25
- Ə (U+018F), ə (U+0259)
26
- İ (U+0130), ı (U+0131)
27
- Ö (U+00D6), ö (U+00F6)
28
- Ü (U+00DC), ü (U+00FC)
29
- Ç (U+00C7), ç (U+00E7)
30
- Ş (U+015E), ş (U+015F)
31
-
32
- - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
33
-
34
- tests:
35
- - source: Азәрбајҹан әлифбасы
36
- expected: Azərbaycan əlifbası
37
- - source: |
38
- Бүтүн инсанлар ләјагәт вә һүгугларына ҝөрә азад бәрабәр доғулурлар.
39
- Онларын шүурлары вә виҹданлары вар вә бир-бирләринә мүнасибәтдә гардашлыг руһунда давранмалыдырлар.
40
- expected: |
41
- Bütün insanlar ləyaqət və hüquqlarına görə azad bərabər doğulurlar.
42
- Onların şüurları və vicdanları var və bir-birlərinə münasibətdə qardaşlıq ruhunda davranmalıdırlar.
43
-
44
-
45
- map:
46
- characters:
47
- "\u0410": "A" # А
48
- "\u0411": "B" # Б
49
- "\u0412": "V" # В
50
- "\u0413": "Q" # Г
51
- "\u0492": "\u011E" # Ғ
52
- "\u0414": "D" # Д
53
- "\u0415": "E" # Е
54
- "\u04D8": "\u018F" # Ә
55
- "\u0416": "J" # Ж
56
- "\u0417": "Z" # З
57
- "\u0418": "\u0130" # И
58
- "\u042B": "I" # Ы
59
- "\u0408": "Y" # Ј
60
- "\u041A": "K" # К
61
- "\u049C": "G" # Ҝ
62
- "\u041B": "L" # Л
63
- "\u041C": "M" # М
64
- "\u041D": "N" # Н
65
- "\u041E": "O" # О
66
- "\u04E8": "\u00D6" # Ө
67
- "\u041F": "P" # П
68
- "\u0420": "R" # Р
69
- "\u0421": "S" # С
70
- "\u0422": "T" # Т
71
- "\u0423": "U" # У
72
- "\u04AE": "\u00DC" # Ү
73
- "\u0424": "F" # Ф
74
- "\u0425": "X" # Х
75
- "\u04BA": "H" # Һ
76
- "\u0427": "\u00C7" # Ч
77
- "\u04B8": "C" # Ҹ
78
- "\u0428": "\u015E" # Ш
79
-
80
- "\u0430": "a" # а
81
- "\u0431": "b" # б
82
- "\u0432": "v" # в
83
- "\u0433": "q" # г
84
- "\u0493": "\u011F" # ғ
85
- "\u0434": "d" # д
86
- "\u0435": "e" # е
87
- "\u04D9": "\u0259" # ә
88
- "\u0436": "j" # ж
89
- "\u0437": "z" # з
90
- "\u0438": "i" # и
91
- "\u044B": "\u0131" # ы
92
- "\u0458": "y" # ј
93
- "\u043A": "k" # к
94
- "\u049D": "g" # ҝ
95
- "\u043B": "l" # л
96
- "\u043C": "m" # м
97
- "\u043D": "n" # н
98
- "\u043E": "o" # о
99
- "\u04E9": "\u00F6" # ө
100
- "\u043F": "p" # п
101
- "\u0440": "r" # р
102
- "\u0441": "s" # с
103
- "\u0442": "t" # т
104
- "\u0443": "u" # у
105
- "\u04AF": "\u00FC" # ү
106
- "\u0444": "f" # ф
107
- "\u0445": "x" # х
108
- "\u04BB": "h" # һ
109
- "\u0447": "\u00E7" # ч
110
- "\u04B9": "c" # ҹ
111
- "\u0448": "\u015F" # ш
@@ -1,188 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2007
4
- language: iso-639-2:rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
8
- alias:
9
- ogc11122:
10
- code: bak_Cyrl2Latn_BGN_2007
11
- description: Bashkir 2007 BGN/PCGN Cyrillic-Latin Table of Correspondences
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
13
- creation_date: 2007
14
- confirmation_date: 2019
15
- description: |
16
- Bashkir is an official language within Respublika Bashkortostan, one of the
17
- republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
18
- which case it should be romanized by means of the Cyrillic-Roman table of
19
- correspondences given below
20
-
21
- notes:
22
- - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
23
- - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
24
- - The letter w is used between or after vowels.
25
- - The letter w is used after e, u, ö and ə.
26
- - |
27
- An inventory of letter-diacritic combinations, with their Unicode encoding,
28
- in addition to the unmodified letters of the basic Roman script is:
29
- Ğ (U+011E) ğ (U+011F)
30
- Ź (U+0179) ź (U+017A)
31
- Ë (U+00CB) ë (U+00EB)
32
- Ñ (U+00D1) ñ (U+00F1)
33
- Ö (U+00D6) ö (U+00F6)
34
- Ś (U+015A) ś (U+015B)
35
- Ü (U+00DC) ü (U+00FC)
36
- Ç (U+00C7) ç (U+00E7)
37
- Ş (U+015E) ş (U+015F)
38
- Ə (U+018F) ə (U+0259)
39
- - |
40
- The Roman-script columns show only lowercase forms but, when applying the table,
41
- uppercase and lowercase Roman letters as appropriate should be used.
42
-
43
- tests:
44
- # adopted http://www.eki.ee/knab/lat/kblba.pdf
45
- - source: Васйылға
46
- expected: Wasyılğa
47
- - source: Еҙем
48
- expected: Yeźem
49
- - source: Раевка
50
- expected: Raevka
51
- - source: Сәйетҡол
52
- expected: Səyetqol
53
- - source: Ауырғазы
54
- expected: Awırğazı
55
- - source: Бурһыҡтау
56
- expected: Burhıqtaw
57
- - source: Мәләүез
58
- expected: Mələwez
59
- - source: Ҡыҙылъяр
60
- expected: Qıźılyar
61
- # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
62
- - source: кемдең
63
- expected: kemdeñ
64
- - source: кем
65
- expected: kem
66
- - source: был
67
- expected: bıl
68
- - source: ошо
69
- expected: oşo
70
- - source: быларҙың
71
- expected: bılarźıñ
72
- - source: һеҙҙән
73
- expected: heźźən
74
- - source: һин
75
- expected: hin
76
- - source: һеҙҙең
77
- expected: heźźeñ
78
-
79
- map:
80
- rules:
81
- # note[1]
82
- - pattern: \b\u0412(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
83
- result: "W"
84
- - pattern: \b\u0432(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
85
- result: "w"
86
- # note[2]
87
- - pattern: \b\u0415
88
- result: "Ye"
89
- - pattern: \b\u0435
90
- result: "ye"
91
- - pattern: (?=\b)\u0415(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
92
- result: "Ye"
93
- - pattern: (?=\b)\u0435(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
94
- result: "ye"
95
-
96
- # note[3] # note[4]
97
- - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0423\u04AE]
98
- result: W
99
- - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0443\u04AF]
100
- result: w
101
-
102
-
103
- characters:
104
- '\u0410': 'A' # А
105
- '\u0411': 'B' # Б note[1]
106
- '\u0412': 'V' # В
107
- '\u0413': 'G' # Г
108
- '\u0492': "\u011E" # Ғ
109
- '\u0414': 'D' # Д
110
- '\u0498': "\u0179" # Ҙ
111
- '\u0415': 'E' # Е note[2]
112
- '\u0401': 'Ë' # Ё
113
- '\u0416': 'J' # Ж
114
- '\u0417': 'Z' # З
115
- '\u0418': 'I' # И
116
- '\u0419': 'Y' # Й
117
- '\u041A': 'K' # К
118
- '\u04A0': 'Q' # Ҡ
119
- '\u041B': 'L' # Л
120
- '\u041C': 'M' # М
121
- '\u041D': 'N' # Н
122
- '\u04A2': 'Ñ' # Ң
123
- '\u041E': 'O' # О
124
- '\u04E8': "Ö" # Ө
125
- '\u041F': 'P' # П
126
- '\u0420': 'R' # Р
127
- '\u0421': 'S' # С
128
- '\u04AA': 'Ś' # Ҫ
129
- '\u0422': 'T' # Т
130
- '\u0423': 'U' # У
131
- '\u04AE': 'Ü' # Ү note[3]
132
- '\u0424': 'F' # Ф
133
- '\u0425': 'X' # Х
134
- '\u04BA': 'H' # Һ
135
- '\u0426': 'Ts' # Ц
136
- '\u0427': 'Ç' # Ч
137
- '\u0428': 'Ş' # Ш
138
- '\u0429': 'ŞÇ' # Щ
139
- '\u042A': '' # Ъ
140
- '\u042B': 'I' # Ы
141
- '\u042C': '' # Ь
142
- '\u042D': 'E' # Э
143
- '\u04D8': "\u018F" # Ә
144
- '\u042E': 'Yu' # Ю
145
- '\u042F': 'Ya' # Я
146
-
147
- '\u0430': 'a' # а
148
- '\u0431': 'b' # б
149
- '\u0432': 'v' # в note[1]
150
- '\u0433': 'g' # г
151
- '\u0493': "\u011F" # ғ
152
- '\u0434': 'd' # д
153
- '\u0499': 'ź' # ҙ
154
- '\u0435': 'e' # e note[2]
155
- '\u0451': 'yo' # ё
156
- '\u0436': 'j' # ж
157
- '\u0437': 'z' # з
158
- '\u0438': 'i' # и
159
- '\u0439': 'y' # й
160
- '\u043A': 'k' # к
161
- '\u04A1': 'q' # ҡ
162
- '\u043B': 'l' # л
163
- '\u043C': 'm' # м
164
- '\u043D': 'n' # н
165
- '\u04A3': 'ñ' # ң
166
- '\u043E': 'o' # о
167
- '\u04E9': "\u00F6" # ө
168
- '\u043F': 'p' # п
169
- '\u0440': 'r' # р
170
- '\u0441': 's' # с
171
- '\u04AB': 'ś' # ҫ
172
- '\u0442': 't' # т
173
- '\u0443': 'u' # у
174
- "\u04AF": 'ü' # ү note[3]
175
- '\u0444': 'f' # ф
176
- '\u0445': 'x' # х
177
- '\u04BB': 'h' # һ
178
- '\u0446': 'ts' # ц
179
- '\u0447': 'ç' # ч
180
- '\u0448': 'ş' # ш
181
- '\u0449': 'şç' # щ
182
- '\u044A': '' # ъ
183
- '\u044B': "\u0131" # ы
184
- '\u044C': '' # ь
185
- '\u044D': 'e' # э
186
- '\u04D9': "\u0259" # ә
187
- '\u044E': 'yu' # ю
188
- '\u044F': 'ya' # я
@@ -1,329 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2008
4
- language: bal
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF BALUCHI -- BGN/PCGN 2008 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693687/ROMANIZATION_OF_BALUCHI.pdf
9
- creation_date: 2008
10
- confirmation date: 2017-11
11
- description: |
12
- The following is the BGN/PCGN-approved romanization
13
- system for deriving standard spellings of Baluchi
14
- geographic names. The romanization system is based on
15
- the Hunterian system of romanization, which has been
16
- used by the Surveys of India and Pakistan for
17
- romanizing Baluchi geographic names for more than one
18
- hundred years. The romanization system is compatible
19
- with all dialects of Baluchi, including Eastern
20
- Baluchi, Western Baluchi, and Southern Baluchi.
21
-
22
- The BGN/PCGN system laid out below includes diacritical
23
- marks in order that the original script can be derived
24
- from the romanized form (i.e. it is reversible). For
25
- desk users requiring a diacritic-free form, these
26
- diacritics can simply be removed. In almost every case
27
- the same basic Roman-script characters are kept as are
28
- used in the Hunterian system. The BGN/PCGN forms have
29
- further been designed to harmonize with the BGN/PCGN
30
- Urdu romanization system. In rigorous romanization
31
- (i.e. including diacritics), retroflexion is marked by
32
- a sub-dot, and aspiration is marked by an apostrophe,
33
- where confusion with fricative digraphs could arise.
34
- For letters used only in Arabic loan words, the
35
- rigorous forms have further been designed to harmonize
36
- with the BGN/PCGN Persian romanization system.
37
-
38
- notes:
39
- - Occasionally, sequences of /z/ or /s/ plus /h/ may be
40
- encountered, i.e. z·h, s·h. These may be romanized with the
41
- Unicode 'center dot' (U+00B7) separating the two letters,
42
- to distinguish them from the digraphs /zh/ and /sh/.
43
-
44
- - The character ة is found very rarely in Baluchi, principally in certain Arabic religious terms, e.g. zakāt
45
- ('alms'). It should be romanized t.
46
-
47
- - When the letters ال are found, representing the Arabic
48
- definite article, the ل is assimilated to a following 'sun letter' ,د ,ث ,ت
49
- ل ,ظ ,ط , ض , ,ص ,ش ,س , ,ر ,ذ or ن and is romanized t, , d, , r, z, s, sh, ş, ẕ ţ z , l, n accordingly.
50
-
51
- - In romanization, the suffixes ءَ (-ā, singular definite)
52
- and ءِ (-ay, possessive) are connected to the previous word
53
- by a hyphen, though they are usually written separately.
54
-
55
- - The word for 'and', written as و or ءُ, should be
56
- romanized as –u-, linked by hyphens to the two words it
57
- connects; e.g.,
58
- ہ ٹد و س ٹد → Sind-u-Hind ('The Gangetic Plain').
59
-
60
- - Except as specified in notes 4 and 5, word division in romanization should follow word division in the Baluchi script.
61
-
62
- - Note that the short vowels in the Baluchi examples are not pointed.
63
-
64
- - Certain initial, medial and final characters are not
65
- readily available in a Unicode-encoded font in a standalone form.
66
-
67
- - The Romanization columns show only lowercase forms but,
68
- when romanizing, uppercase and lowercase Roman letters as
69
- appropriate should be used.
70
-
71
- tests:
72
- # commented tests are blocked by https://github.com/interscript/interscript/issues/620
73
- # 'cultivable patch of riverbed'
74
- - source: بےنٹَگ
75
- expected: Benṭag
76
-
77
- # 'Japan'
78
- - source: جاپان
79
- expected: Jāpān
80
-
81
- - source: اَرَبِستان
82
- expected: Arabistān
83
-
84
- - source: بُنجاه
85
- expected: Bunjāh
86
-
87
- - source: بَلوچِستان
88
- expected: Balochistān
89
-
90
- # 'village'
91
- - source: حَلق
92
- expected: Ḩalq
93
-
94
- # 'foothills or skirts of a mountain'
95
- - source: دامان
96
- expected: Dāmān
97
-
98
- - source: ڈاڈَر
99
- expected: Ḍāḍar
100
-
101
- # 'tomb'
102
- - source: گُمبُذ
103
- expected: Gumbud͟h
104
-
105
- # 'crossroads'
106
- - source: چار راہ
107
- expected: Chār Rāh
108
-
109
- # 'market'
110
- - source: بازار
111
- expected: Bāzār
112
-
113
- - source: سےبِى
114
- expected: Sebī
115
-
116
- # - source: اِيشيا
117
- # expected: Eshyā
118
-
119
-
120
- # # 'homeland'
121
- # - source: وَطَن
122
- # expected: Waţan
123
-
124
- # 'Bandar Abbas'
125
- - source: عَبّاس
126
- expected: ‘Abbās
127
-
128
- # 'Taiwan'
129
- - source: فارموسا
130
- expected: Fārmosā
131
-
132
- - source: ڈاک
133
- expected: Ḍāk
134
-
135
- # 'stream, irrigated area, pasture'
136
- - source: مَلّ
137
- expected: Mall
138
-
139
- # - source: ہ یرات
140
- # expected: Herāt
141
-
142
- # 'Philippines'
143
- - source: فِلپائِن
144
- expected: Filpā’in
145
-
146
- - source: مُرگاپ
147
- expected: Murgāp
148
-
149
- # - source: مَرو
150
- # expected: Marw
151
-
152
-
153
- map:
154
- postrules:
155
- - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
156
- result: "upcase"
157
-
158
- characters:
159
-
160
- # consonant characters
161
-
162
- '\u0628' : 'b' # ب
163
- '\u067E' : 'p' # پ
164
- '\u062a' : 't' # ت
165
- '\u0679' : 'ṭ' # see note 8 ٹ
166
- '\u067C' : 'ṭ' # see note 8 ټ
167
- '\u062B' : 't͟h' # see note 8 ث
168
- '\u067F' : 't͟h' # see note 8 ٿ
169
- '\u062c' : 'j' # ج
170
- '\u0686' : 'ch' # ‫چ‬
171
- '\u062d' : 'ḩ' # ح
172
- '\u062e' : 'kh' # خ
173
- '\u062f' : 'd' # د
174
- '\u0688' : 'ḍ' # ڈ
175
- '\u0689' : 'ḍ' # ‫ډ‬
176
- '\u0630' : 'd͟h' # ذ
177
- '\u0631' : 'r' # ر
178
- '\u0691' : 'ṛ' # see note 8 ڑ
179
- '\u0693' : 'ṛ' # see note 8 ړ
180
- '\u0632' : 'z' # ز
181
- '\u0698' : 'zh' # ‫ژ‬
182
- '\u0633' : 's' # س
183
- '\u0634' : 'sh' # ش
184
- '\u0635' : 'ş' # ص
185
- '\u0636' : 'ẕ' # ض
186
- '\u0637' : 'ţ' # ط
187
- '\u0638' : 'z̧' # ظ
188
- '\u0639' : '‘' # ع
189
- '\u063a' : 'gh' # غ
190
- '\u0641' : 'f' # ف
191
- '\u0642' : 'q' # ق
192
- '\u0643' : 'k' # ك
193
- '\u06A9' : 'k' # ک
194
- '\u06AF' : 'g' # ‫گ‬
195
- '\u0644' : 'l' # ل
196
- '\u0645' : 'm' # م
197
- '\u0646' : 'n' # ن
198
- '\u06BA' : 'ñ' # ں
199
- '\u0648' : 'w' # و
200
- '\u0647' : 'h' # ه
201
- '\u06C1' : 'h'
202
- '\u06BE' : 'h'
203
- '\u0621' : '’' # ء
204
- '\u0626' : '’' # ئ
205
- '\u0649' : 'y' # ي
206
- '\u064A' : 'y' # ي
207
-
208
-
209
- # Aspiration is only contrastive in Eastern Baluchi
210
- '\u0628\u06BE' : 'bh'
211
-
212
- # Aspiration is only contrastive in Eastern Baluchi
213
- '\u067E\u06BE' : 'ph'
214
-
215
- # Aspiration is only contrastive in Eastern Baluchi.
216
- # Apostrophe distinguishes from fricative /th/.
217
- '\u062A\u06BE' : 'th’'
218
-
219
- # Aspiration is only contrastive in Eastern Baluchi
220
- '\u0679\u06BE' : 'ṭh'
221
-
222
- # Aspiration is only contrastive in Eastern Baluchi
223
- '\u062C\u06BE' : 'jh'
224
-
225
- # Aspiration is only contrastive in Eastern Baluchi
226
- '\u0686\u06BE' : 'chh'
227
-
228
- # Aspiration is only contrastive in Eastern Baluchi.
229
- # Apostrophe distinguishes from fricative /dh/
230
- '\u062D\u06BE' : 'dh’'
231
-
232
- # Aspiration is only contrastive in Eastern Baluchi
233
- '\u0688\u06BE' : 'ḍh'
234
-
235
- # Aspiration is only contrastive in Eastern Baluchi
236
- '\u0631\u06BE' : '\u1E5B\u0068'
237
-
238
- # Aspiration is only contrastive in Eastern Baluchi.
239
- # Apostrophe distinguishes from fricative /kh/
240
- '\u06A9\u06BE' : 'kh’'
241
-
242
- # Aspiration is only contrastive in Eastern Baluchi.
243
- # Apostrophe distinguishes from fricative /gh/
244
- '\u06AF\u06BE' : 'gh’' #
245
- '\u0644\u0627' : 'lā' #
246
- '\u06A9\u0627' : 'kā' #
247
- '\u06AF\u0627' : 'gā' #
248
- '\u06A9\u0644' : 'kl' #
249
- '\u06AF\u0644' : 'gl' #
250
-
251
- # Vowels, Diphthongs, and Diacritical Marks
252
- '\u0650\u0649' : 'ī' # ـِي
253
- '\u0650' : 'i' # ِ
254
- '\u06D2' : 'e' # ـے
255
- '\b\u0627' : '' # ا
256
- '\u0627' : 'ā' # ا
257
- '\u0622' : 'ā' # آ
258
- '\u064E' : 'a' # َ
259
- '\u0648' : 'o' # و
260
- '\u064F' : 'u' # ُ
261
- '\u064F\u0648' : 'ū' # ـُو
262
- '\u064E\u06D2' : 'ay' # ـَي
263
- '\u064E\u0648' : 'aw' # ـَو
264
- '\u0652' : '' # Not Romanized
265
- '\u0670' : 'á' #
266
-
267
- '\u0628\u0651' : 'bb' # ب
268
- '\u067E\u0651' : 'pp' # پ
269
- '\u062a\u0651' : 'tt' # ت
270
- '\u0679\u0651' : 'ṭṭ' # see note 8 ٹ
271
- '\u067C\u0651' : 'ṭṭ' # see note 8 ټ
272
- '\u062B\u0651' : 't͟ht͟h' # see note 8 ث
273
- '\u067F\u0651' : 't͟ht͟h' # see note 8 ٿ
274
- '\u062c\u0651' : 'jj' # ج
275
- '\u0686\u0651' : 'chch' # ‫چ‬
276
- '\u062d\u0651' : 'ḩḩ' # ح
277
- '\u062e\u0651' : 'khkh' # خ
278
- '\u062f\u0651' : 'dd' # د
279
- '\u0688\u0651' : 'ḍḍ' # ڈ
280
- '\u0689\u0651' : 'ḍḍ' # ‫ډ‬
281
- '\u0630\u0651' : 'd͟hd͟h' # ذ
282
- '\u0631\u0651' : 'rr' # ر
283
- '\u0691\u0651' : 'ṛṛ' # see note 8 ڑ
284
- '\u0693\u0651' : 'ṛṛ' # see note 8 ړ
285
- '\u0632\u0651' : 'zz' # ز
286
- '\u0698\u0651' : 'zhzh' # ‫ژ‬
287
- '\u0633\u0651' : 'ss' # س
288
- '\u0634\u0651' : 'shsh' # ش
289
- '\u0635\u0651' : 'şş' # ص
290
- '\u0636\u0651' : 'ẕẕ' # ض
291
- '\u0637\u0651' : 'ţţ' # ط
292
- '\u0638\u0651' : 'z̧z̧' # ظ
293
- '\u0639\u0651' : '‘‘' # ع
294
- '\u063a\u0651' : 'ghgh' # غ
295
- '\u0641\u0651' : 'ff' # ف
296
- '\u0642\u0651' : 'qq' # ق
297
- '\u0643\u0651' : 'kk' # ك
298
- '\u06A9\u0651' : 'kk' # ک
299
- '\u06AF\u0651' : 'gg' # ‫گ‬
300
- '\u0644\u0651' : 'll' # ل
301
- '\u0645\u0651' : 'mm' # م
302
- '\u0646\u0651' : 'nn' # ن
303
- '\u06BA\u0651' : 'ññ' # ں
304
- '\u0648\u0651' : 'ww' # و
305
- '\u0647\u0651' : 'hh' # ه
306
- '\u06C1\u0651' : 'hh'
307
- '\u06BE\u0651' : 'hh'
308
- '\u0621\u0651' : '’’' # ء
309
- '\u0626\u0651' : '’’' # ئ
310
- '\u0649\u0651' : 'yy' # ي
311
-
312
- '\u0621\u064E' : '-ā' # see note 4
313
- '\u0621\u0650' : '-ay' # see note 4
314
-
315
- # Numerals
316
- '۰' : '0'
317
- '۱' : '1'
318
- '۲' : '2'
319
- '۳' : '3'
320
- '۴' : '4'
321
- '۵' : '5'
322
- '۶' : '6'
323
- '۷' : '7'
324
- '۸' : '8'
325
- '۹' : '9'
326
- # Although Perso-Arabic script is written from right to
327
- # left, numerical expressions, e.g. ۸۶۹۱ → 1968, are
328
- # written from left to right. A comma is inserted into
329
- # longer sequences, either after thousands, millions, etc.