interscript 0.1.9 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,223 +0,0 @@
1
- ---
2
- authority_id: un
3
- id: 1972
4
- language: iso-639-2:ben
5
- source_script: Beng
6
- destination_script: Latn
7
- name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Assamese Romanization, Version 4.0
8
- url: https://www.eki.ee/wgrs/rom1_as.htm
9
- creation_date: 1972
10
- confirmation_date: 2016
11
- description: |
12
- The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
13
- based on a report prepared by D. N. Sharma. The tables and their corrections were published in
14
- volume II of the conference reports.
15
-
16
- There is no evidence of the use of the system either in India or in international cartographic products.
17
-
18
- Assamese (Asamīyā) uses an alphasyllabic script whereby each character represents a syllable rather
19
- than one sound. Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially)
20
- and in an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous but the user
21
- would have to recognize many ligatures not given in the original table. The system is mostly reversible but there
22
- exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters) and consonants
23
- (ligatures vs. character sequences).
24
-
25
- References
26
-
27
- Second United Nations Conference on the Standardization of Geographical Names.
28
- London, 10–31 May 1972. Vol. II. Technical papers. United Nations. New York 1974, pp. 141–142.
29
-
30
- Third United Nations Conference on the Standardization of Geographical Names. Athens,
31
- 17 August – 7 September 1977. Vol. II, Technical papers, pp. 393 etc.
32
-
33
- notes:
34
- - |
35
- ু Exceptions: গু gu; রু ru; শু shu; হু hu; ন্তু ntu; স্তু stu.
36
- - |
37
- ূ Exceptions: রূ rū.
38
- - |
39
- ৃ Exceptions: হৃ hṛ.
40
- - |
41
- ্‌ Pronunciation without a vowel; special form: ৎ t.
42
- - |
43
- Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
44
-
45
- tests:
46
- - source: "অসমীয়া কবিতা"
47
- expected: "asamīyā kabitā"
48
- - source: "কবিৰ আজি জন্মদিন"
49
- expected: "kabira āji janmadina"
50
- - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
51
- expected: "beruṭata emāhara pāchhate punara bhayaṁkara agnikāṇḍa"
52
- - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
53
- expected: "bhaṅāra biruddhe āvedana dākhila kaṁganāra"
54
- - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
55
- expected: "āpuni paṙhi bhāla pāba parā bātari"
56
- - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
57
- expected: "shrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
58
- - source: "কেনে আছে প্ৰাক্তন"
59
- expected: "kene āchhe prāktana"
60
- - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
61
- expected: "kamumbāira meyarara dehata kobhiḍa pajiṭibha"
62
- - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
63
- expected: "ṭuiṭāraj̱oge khoda sadarī kare ei kathā"
64
- - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
65
- expected: "lakhimapura jilāra nārāyaṇapurara barapathārata āji prashānti dhāma nāmere ekhana bṛddhāshramara shubhārambha karā haya"
66
-
67
- map:
68
- rules:
69
- - pattern: ([ক]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
70
- result: 'k'
71
- - pattern: ([খ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
72
- result: 'kh'
73
- - pattern: ([গ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
74
- result: 'g'
75
- - pattern: ([ঘ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
76
- result: 'gh'
77
- - pattern: ([ঙ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
78
- result: 'ṅ'
79
- - pattern: ([চ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
80
- result: 'ch'
81
- - pattern: ([ছ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
82
- result: 'chh'
83
- - pattern: ([জ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
84
- result: 'j'
85
- - pattern: ([ঝ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
86
- result: 'jh'
87
- - pattern: ([ঞ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
88
- result: 'ñ'
89
- - pattern: ([ট]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
90
- result: 'ṭ'
91
- - pattern: ([ঠ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
92
- result: 'ṭh'
93
- - pattern: ([ড]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
94
- result: 'ḍ'
95
- - pattern: ([ঢ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
96
- result: 'ḍh'
97
- - pattern: ([ণ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
98
- result: 'ṇ'
99
- - pattern: ([ত]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
100
- result: 't'
101
- - pattern: ([থ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
102
- result: 'th'
103
- - pattern: ([দ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
104
- result: 'd'
105
- - pattern: ([ধ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
106
- result: 'dh'
107
- - pattern: ([ন]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
108
- result: 'n'
109
- - pattern: ([প]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
110
- result: 'p'
111
- - pattern: ([ফ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
112
- result: 'ph'
113
- - pattern: ([ব]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
114
- result: 'b'
115
- - pattern: ([ভ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
116
- result: 'bh'
117
- - pattern: ([ম]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
118
- result: 'm'
119
- - pattern: ([য]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
120
- result: 'j̱'
121
- - pattern: ([ৰ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
122
- result: 'r'
123
- - pattern: ([ল]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
124
- result: 'l'
125
- - pattern: ([ৱ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
126
- result: 'v'
127
- - pattern: ([শ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
128
- result: 'sh'
129
- - pattern: ([ষ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
130
- result: 'ṣh'
131
- - pattern: ([স]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
132
- result: 's'
133
- - pattern: ([হ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
134
- result: 'h'
135
- - pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
136
- result: 'ṙ'
137
- - pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
138
- result: 'ṙh'
139
- - pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
140
- result: 'y'
141
- - pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
142
- result: 'ṙ'
143
- - pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
144
- result: 'ṙh'
145
- - pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
146
- result: 'y'
147
-
148
- characters:
149
-
150
- # I. Independent vowel characters
151
- 'অ': 'a'
152
- 'আ': 'ā'
153
- 'ই': 'i'
154
- 'ঈ': 'ī'
155
- 'উ': 'u'
156
- 'ঊ': 'ū'
157
- 'ঋ': 'ṛ'
158
- 'এ': 'e'
159
- 'ঐ': 'ai'
160
- 'ও': 'o'
161
- 'ঔ': 'au'
162
-
163
- # II. Abbreviated vowel characters
164
- '\u09be': 'ā'
165
- '\u09bf': 'i'
166
- '\u09c0': 'ī'
167
- '\u09c1': 'u'
168
- '\u09c2': 'ū'
169
- '\u09c3': 'ṛ'
170
- '\u09c7': 'e'
171
- '\u09c8': 'ai'
172
- '\u09cb': 'o'
173
- '\u09cc': 'au'
174
-
175
- # III. Other symbols
176
- '\u0982': 'ṁ'
177
- '\u0981': 'm̐'
178
- '\u0983': 'ḥ'
179
- '\u09cd': ''
180
-
181
- # IV. Consonant characters
182
- 'ক': 'ka'
183
- 'খ': 'kha'
184
- 'গ': 'ga'
185
- 'ঘ': 'gha'
186
- 'ঙ': 'ṅa'
187
- 'চ': 'cha'
188
- 'ছ': 'chha'
189
- 'জ': 'ja'
190
- 'ঝ': 'jha'
191
- 'ঞ': 'ña'
192
- 'ট': 'ṭa'
193
- 'ঠ': 'ṭha'
194
- 'ড': 'ḍa'
195
- 'ঢ': 'ḍha'
196
- 'ণ': 'ṇa'
197
- 'ত': 'ta'
198
- 'থ': 'tha'
199
- 'দ': 'da'
200
- 'ধ': 'dha'
201
- 'ন': 'na'
202
- 'প': 'pa'
203
- 'ফ': 'pha'
204
- 'ব': 'ba'
205
- 'ভ': 'bha'
206
- 'ম': 'ma'
207
- 'য': 'j̱a'
208
- 'ৰ': 'ra'
209
- 'ল': 'la'
210
- 'ৱ': 'va'
211
- 'শ': 'sha'
212
- 'ষ': 'ṣha'
213
- 'স': 'sa'
214
- 'হ': 'ha'
215
- 'ৎ': 't'
216
-
217
- # Note V Dotted variants
218
- 'ড়': 'ṙa'
219
- 'ঢ়': 'ṙha'
220
- 'য়': 'ya'
221
- 'য়': 'ya'
222
- 'ড়': 'ṙa'
223
- 'ঢ়': 'ya'
@@ -1,114 +0,0 @@
1
- ---
2
- authority_id: un
3
- id: 2007
4
- language: iso-639-2:bel
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: National System of Geographic Names Transmission into Roman Alphabet in Belarus
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/9th-uncsgn-docs/crp/9th_UNCSGN_e-conf-98-crp-21.pdf
9
- creation_date: 2007
10
- description: |
11
- RESOLUTION OF THE STATE COMMITTEE
12
- ON PROPERTY OF THE REPUBLIC OF BELARUS June 11, 2007 No. 38
13
-
14
- 8/16668 (06/18/2007) On amendments and additions to the Instructions
15
- for the transliteration of geographical names of the
16
- Republic of Belarus in letters of the Latin alphabet
17
-
18
- Based on the Regulation on the State Property Committee of the Republic of Belarus,
19
- approved by the Decree of the Council of Ministers of the Republic of Belarusdated July 29, 2006
20
- No. 958 "Issues of the State Committee on Property of the Republic of Belarus"
21
- tests: # the same as "by-bel-cyrl-Latn-2007"
22
- - source: Аршанскi
23
- expected: Aršanski
24
- - source: Бешанковічы
25
- expected: Biešankovičy
26
- - source: Віцебск
27
- expected: Viciebsk
28
- - source: Гомель
29
- expected: Homieĺ
30
- - source: Гаўя
31
- expected: Haŭja
32
- - source: Добруш
33
- expected: Dobruš
34
- - source: Ельск
35
- expected: Jeĺsk
36
- - source: Бабаедава
37
- expected: Babajedava
38
- - source: Венцавічы
39
- expected: Viencavičy
40
- - source: Ёды
41
- expected: Jody
42
- - source: Вераб'ёвічы
43
- expected: Vierabjovičy
44
- - source: Мёры
45
- expected: Miory
46
- - source: Зэльва
47
- expected: Zeĺva
48
- - source: Iванава
49
- expected: Ivanava
50
- - source: Iўе
51
- expected: Iŭje
52
- - source: Лагойск
53
- expected: Lahojsk
54
- - source: Круглае
55
- expected: Kruhlaje
56
- - source: Лошыца
57
- expected: Lošyca
58
- - source: Любань
59
- expected: Liubań
60
- - source: Магілёў
61
- expected: Mahilioŭ
62
- - source: Нясвіж
63
- expected: Niasviž
64
- - source: Орша
65
- expected: Orša
66
- - source: Паставы
67
- expected: Pastavy
68
- - source: Рагачоў
69
- expected: Rahačoŭ
70
- - source: Смаргонь
71
- expected: Smarhoń
72
- - source: Талачын
73
- expected: Talačyn
74
- - source: Узда
75
- expected: Uzda
76
- - source: Шаркаўшчына
77
- expected: Šarkaŭščyna
78
- - source: Фаніпаль
79
- expected: Fanipaĺ
80
- - source: Хоцімск
81
- expected: Chocimsk
82
- - source: Цёмны Лес
83
- expected: Ciomny Lies
84
- - source: Чавусы
85
- expected: Čavusy
86
- - source: Шумілiна
87
- expected: Šumilina
88
- - source: Чыгірынка
89
- expected: Čyhirynka
90
- - source: Чэрвень
91
- expected: Červień
92
- - source: Друць
93
- expected: Druć
94
- - source: Чачэрск
95
- expected: Čačersk
96
- - source: Юхнаўка
97
- expected: Juchnaŭka
98
- - source: Гаюціна
99
- expected: Hajucina
100
- - source: Цюрлi
101
- expected: Ciurli
102
- - source: Любонічы
103
- expected: Liuboničy
104
- - source: Ямнае
105
- expected: Jamnaje
106
- - source: Баяры
107
- expected: Bajary
108
- - source: Валяр'яны
109
- expected: Valiarjany
110
- - source: Вязынка
111
- expected: Viazynka
112
-
113
- map:
114
- inherit: "by-bel-Cyrl-Latn-2007"
@@ -1,534 +0,0 @@
1
- ---
2
- authority_id: un
3
- id: 2016
4
- language: iso-639-2:ben
5
- source_script: Beng
6
- destination_script: Latn
7
- name: Bengali Romanization, Version 4.0
8
- url: http://www.eki.ee/wgrs/rom1_bn.htm
9
- creation_date: 2016
10
- description: |
11
- The United Nations recommended system was approved in 1972 (II/11)
12
- and amended in 1977 (III/12), based on a report prepared by D. N.
13
- Sharma. The tables and their corrections were published in volume II of
14
- the conference reports1,2.
15
-
16
- There is no evidence of the use of the system either in Bangladesh,
17
- in India or in international cartographic products. The resolution
18
- IV/17 (1982) recommended association, inter alia, with Bangladesh, in
19
- carrying out further studies on the system.
20
-
21
- Bengali (Bānglā) uses an alphasyllabic script whereby each character
22
- represents a syllable rather than one sound. Vowels and diphthongs are
23
- marked in two ways: as independent characters (used syllable-initially)
24
- and in an abbreviated form, to denote vowels after consonants. The
25
- romanization table is unambiguous but the user would have to recognize
26
- many ligatures not given in the original table. The system is mostly
27
- reversible but there exist some ambiguities in the romanization of
28
- vowels (independent vs. abbreviated characters) and consonants
29
- (ligatures vs. character sequences).
30
-
31
- Other systems of romanization
32
-
33
- For differences between the UN system and the ISO transliteration
34
- standard ISO 15919: 2001 see the section on the romanization of Hindi.
35
-
36
- References
37
-
38
- Second United Nations Conference on the Standardization of
39
- Geographical Names. London, 10–31 May 1972. Vol. II. Technical papers.
40
- United Nations. New York 1974, pp. 139–140.
41
-
42
- Third United Nations Conference on the Standardization of
43
- Geographical Names. Athens, 17 August – 7 September 1977. Vol. II,
44
- Technical papers, pp. 393 etc.
45
-
46
- notes:
47
- - |
48
- In the romanization system below character variations and the table of ligatures have been added.
49
-
50
- I. Independent vowel characters
51
-
52
- 1 অ a
53
- 2 আ ā
54
- 3 ই i
55
- 4 ঈ ī
56
- 5 উ u
57
- 6 ঊ ū
58
- 7 ঋ ṛ
59
- 8 এ e
60
- 9 ঐ ai
61
- 10 ও o
62
- 11 ঔ au
63
-
64
-
65
-
66
-
67
- - Where two Roman equivalents are given, the second (in brackets) is
68
- used for recording the pronunciation of place-names while the first
69
- form is for general use.
70
- - In the table only word-initial character variants are shown.
71
- Depending on the position in the word many variants of the characters
72
- are used as well as some ligatures. These features are not covered here.
73
- - For technical reasons the characters of the Mongolian script are
74
- turned 90˚ anti-clockwise.
75
-
76
- tests:
77
- - source: "র্ক"
78
- expected: "rka"
79
- - source: "গ্র"
80
- expected: "gra"
81
- - source: "ত্য"
82
- expected: "tya"
83
-
84
- - source: |
85
- আমার সোনার বাংলা, আমি তোমায় ভালোবাসি।
86
- চিরদিন তোমার আকাশ, তোমার বাতাস, আমার প্রাণে বাজায় বাঁশি॥
87
- ও মা, ফাগুনে তোর আমের বনে ঘ্রাণে পাগল করে, মরি হায়, হায় রে—
88
- ও মা, অঘ্রাণে তোর ভরা ক্ষেতে আমি কী দেখেছি মধুর হাসি॥
89
-
90
- কী শোভা, কী ছায়া গো, কী স্নেহ, কী মায়া গো—
91
- কী আঁচল বিছায়েছ বটের মূলে, নদীর কূলে কূলে।
92
- মা, তোর মুখের বাণী আমার কানে লাগে সুধার মতো,
93
- মরি হায়, হায় রে—
94
- মা, তোর বদনখানি মলিন হলে, ও মা, আমি নয়নজলে ভাসি॥
95
-
96
- # Note: There are still couple of improvements we can do in the
97
- # transilation system, but for now this could work
98
- #
99
- # But please revisit this - specially the use case of `য়`, it's adding
100
- # some mixed character in the text.
101
- #
102
- expected: |
103
- āmaāra saonaāra baāṁlaā, āmai taomaāj̱aA় bhaālaobaāsai।
104
- chairadaina taomaāra ākaāsha, taomaāra baātaāsa, āmaāra praāṇae baājaāj̱aA় baām̐shai॥
105
- o maā, phaāgaunae taora āmaera banae ghraāṇae paāgala karae, marai haāj̱aA়, haāj̱aA় rae—
106
- o maā, aghraāṇae taora bharaā kṣhaetae āmai kaī daekhaechhai madhaura haāsai॥
107
-
108
- kaī shaobhaā, kaī chhaāj̱aA়ā gao, kaī snaeha, kaī maāj̱aA়ā gao—
109
- kaī ām̐chala baichhaāj̱aA়echha baṭaera maūlae, nadaīra kaūlae kaūlae।
110
- maā, taora maukhaera baāṇaī āmaāra kaānae laāgae saudhaāra matao,
111
- marai haāj̱aA়, haāj̱aA় rae—
112
- maā, taora badanakhaānai malaina halae, o maā, āmai naj̱aA়najalae bhaāsai॥
113
-
114
- map:
115
- characters:
116
-
117
- # I. Independent vowel characters
118
-
119
- 'অ': 'a' # 1
120
- 'আ': 'ā' # 2
121
- 'ই': 'i' # 3
122
- 'ঈ': 'ī' # 4
123
- 'উ': 'u' # 5
124
- 'ঊ': 'ū' # 6
125
- 'ঋ': 'ṛ' # 7
126
- 'এ': 'e' # 8
127
- 'ঐ': 'ai' # 9
128
- 'ও': 'o' # 10
129
- 'ঔ': 'au' # 11
130
-
131
- # II. Abbreviated vowel characters (ক stands for any consonant character)
132
-
133
- # 'ক': 'a' # 1
134
- '\u09be': 'ā' # 2 কা
135
- '\u09bf': 'i' # 3 কি
136
- '\u09c0': 'ī' # 4 কী
137
- '\u09c1': 'u' # 5 কু Exceptions: গু gu; রু ru; শু shu; হু hu; ন্তু ntu; স্তু stu.
138
- '\u09c2': 'ū' # 6 কূ Exception: রূ rū.
139
- '\u09c3': 'ṛ' # 7 কৃ Exception: হৃ hṛ.
140
- '\u09c7': 'e' # 8 কে
141
- '\u09c8': 'ai' # 9 কৈ
142
- '\u09cb': 'o' # 10 কো
143
- '\u09cc': 'au' # 11 কৌ
144
-
145
- # II 5 Exceptions
146
- 'গু': 'gu'
147
- 'রু': 'ru'
148
- 'শু': 'shu'
149
- 'হু': 'hu'
150
- 'ন্তু': 'ntu'
151
- 'স্তু': 'stu'
152
- # II 6 Exceptions
153
- 'রূ': 'rū'
154
- # II 7 Exceptions
155
- 'হৃ': 'hṛ'
156
-
157
- # III. Other symbols (ক stands for any consonant character)
158
-
159
- '\u0982': 'ṁ' # 1 কং
160
- '\u0981': 'm̐' # 2 কঁ
161
- '\u0983': 'ḥ' # 3 কঃ
162
- '\u09cd\u200c': '' # 4 ক্‌ Pronunciation without a vowel; special form: ৎ t.
163
-
164
- # III 4 special form
165
- 'ৎ': 't'
166
-
167
- # IV. Consonant characters
168
-
169
- 'ক': 'ka' # 1
170
- 'খ': 'kha' # 2
171
- 'গ': 'ga' # 3
172
- 'ঘ': 'gha' # 4
173
- 'ঙ': 'ṅa' # 5
174
- 'চ': 'cha' # 6
175
- 'ছ': 'chha' # 7
176
- 'জ': 'ja' # 8
177
- 'ঝ': 'jha' # 9
178
- 'ঞ': 'ña' # 10
179
- 'ট': 'ṭa' # 11
180
- 'ঠ': 'ṭha' # 12
181
- 'ড': 'ḍa' # 13 A Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
182
- 'ঢ': 'ḍha' # 14 A Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
183
- 'ণ': 'ṇa' # 15
184
- 'ত': 'ta' # 16
185
- 'থ': 'tha' # 17
186
- 'দ': 'da' # 18
187
- 'ধ': 'dha' # 19
188
- 'ন': 'na' # 20
189
- 'প': 'pa' # 21
190
- 'ফ': 'pha' # 22
191
- 'ব': 'ba' # 23
192
- 'ভ': 'bha' # 24
193
- 'ম': 'ma' # 25
194
- 'য': 'j̱aA' # 26
195
- 'র': 'ra' # 27
196
- 'ল': 'la' # 28
197
- 'শ': 'sha' # 29
198
- 'ষ': 'ṣha' # 30
199
- 'স': 'sa' # 31
200
- 'হ': 'ha' # 32
201
-
202
- # IV 13, 14
203
- 'ড়': 'ṙa'
204
- 'ঢ়': 'ṙha'
205
- 'য়': 'ya'
206
-
207
-
208
- # V. Ligatures
209
- # Adscript forms of some consonants
210
- #
211
- # We already implemented one to one mapping for most commonly used
212
- # combined letters - (Zuktabarna), so we can ignore this custom rules
213
- # fro now.
214
- #
215
- # 'র্‍': 'r-:'
216
- # '‍্র': '-r:'
217
- # '‍্য': '-y:'
218
-
219
-
220
- # Other ligatures (the list is not complete)
221
-
222
- 'ক্ক': 'kka'
223
- 'ক্ট': 'kṭa'
224
- 'ক্ত': 'kta'
225
- 'ক্ন': 'kna'
226
- 'ক্ম': 'kma'
227
- 'ক্র': 'kra'
228
- 'ক্ল': 'kla'
229
- 'ক্ব': 'kva'
230
- 'ক্ষ': 'kṣha'
231
- 'ক্ষ্ন': 'kṣhna'
232
- 'ক্ষ্ম': 'kṣhma'
233
- 'ক্ষ্ব': 'kṣhva'
234
-
235
- 'ক্স': 'ksa'
236
- 'গ্গ': 'gga'
237
- 'গ্দ': 'gda'
238
- 'গ্ধ': 'gdha'
239
- 'গ্ন': 'gna'
240
- 'গ্ম': 'gma'
241
- 'গ্র': 'gra'
242
- 'গ্ল': 'gla'
243
- 'ঘ্র': 'ghra'
244
- 'ঙ্ক': 'ṅka'
245
- 'ঙ্গ': 'ṅga'
246
- 'চ্চ': 'chcha'
247
-
248
- 'চ্ছ': 'chchha'
249
- 'চ্ছ্ব': 'chchhva'
250
- 'চ্ঞ': 'chña'
251
- 'জ্জ': 'jja'
252
- 'জ্জ্ব': 'jjva'
253
- 'জ্ঝ': 'jjha'
254
- 'জ্ঞ': 'jña'
255
- 'জ্ব': 'jva'
256
- 'ঞ্চ': 'ñcha'
257
- 'ঞ্ছ': 'ñchha'
258
- 'ঞ্জ': 'ñja'
259
- 'ঞ্ঝ': 'ñjha'
260
-
261
- 'ট্ট': 'ṭṭa'
262
- 'ড্ড': 'ḍḍa'
263
- 'ণ্ট': 'ṇṭa'
264
- 'ণ্ঠ': 'ṇṭha'
265
- 'ণ্ড': 'ṇḍa'
266
- 'ত্ত': 'tta'
267
- 'ত্ত্ব': 'ttva'
268
- 'ত্থ': 'ttha'
269
- 'ত্ন': 'tna'
270
- 'ত্ম': 'tma'
271
- 'ত্র': 'tra'
272
- 'ত্ল': 'tla'
273
-
274
- 'ত্ব': 'tva'
275
- 'দ্দ': 'dda'
276
- 'দ্দ্ব': 'ddva'
277
- 'দ্ধ': 'ddha'
278
- 'দ্ধ্ব': 'ddhva'
279
- 'দ্ন': 'dna'
280
- 'দ্ব': 'dva'
281
- 'দ্ভ': 'dbha'
282
- 'দ্ম': 'dma'
283
- 'দ্র': 'dra'
284
- 'দ্ল': 'dla'
285
- 'ধ্র': 'dhra'
286
-
287
- 'ন্ঠ': 'nṭha'
288
- 'ন্ড': 'nḍa'
289
- 'ন্ক': 'nka'
290
- 'ন্ত': 'nta'
291
- 'ন্ত্র': 'ntra'
292
- 'ন্থ': 'ntha'
293
- 'ন্দ': 'nda'
294
- 'ন্দ্র': 'ndra'
295
- 'ন্ধ': 'ndha'
296
- 'ন্ন': 'nna'
297
- 'ন্ম': 'nma'
298
- 'ন্ব': 'nva'
299
-
300
- 'প্ন': 'pna'
301
- 'প্ত': 'pta'
302
- 'প্প': 'ppa'
303
- 'প্র': 'pra'
304
- 'প্ল': 'pla'
305
- 'ফ্র': 'phra'
306
- 'ব্জ': 'bja'
307
- 'ব্দ': 'bda'
308
- 'ব্ধ': 'bdha'
309
- 'ব্ব': 'bba'
310
- 'ব্র': 'bra'
311
- 'ভ্র': 'bhra'
312
- 'ম্প': 'mpa'
313
- 'ম্ব': 'mba'
314
- 'ম্ভ': 'mbha'
315
- 'ম্ভ্র': 'mbhra'
316
- 'ম্ম': 'mma'
317
- 'ম্র': 'mra'
318
- 'ম্ল': 'mla'
319
- 'ল্ক': 'lka'
320
- 'ল্ট': 'lṭa'
321
- 'ল্ড': 'lḍa'
322
- 'ল্ম': 'lma'
323
- 'ল্ল': 'lla'
324
-
325
- 'শ্চ': 'shcha'
326
- 'শ্ছ': 'shchha'
327
- 'শ্ত': 'shta'
328
- 'শ্ন': 'shna'
329
- 'শ্ম': 'shma'
330
- 'শ্র': 'shra'
331
- 'শ্ল': 'shla'
332
- 'শ্ব': 'shva'
333
- 'ষ্ক': 'ṣhka'
334
- 'ষ্ট': 'ṣhṭa'
335
- 'ষ্ট্র': 'ṣhṭra'
336
- 'ষ্ঠ': 'ṣhṭha'
337
-
338
- 'ষ্ঞ': 'ṣhña'
339
- 'ষ্প': 'ṣhpa'
340
- 'ষ্ফ': 'ṣhpha'
341
- 'স্ক': 'ska'
342
- 'স্ক্র': 'skra'
343
- 'স্খ': 'skha'
344
- 'স্ত': 'sta'
345
- 'স্ন': 'sna'
346
- 'স্ম': 'sma'
347
- 'স্র': 'sra'
348
- 'স্ব': 'sva'
349
- 'হ্ন': 'hna'
350
-
351
- 'হ্ম': 'hma'
352
- 'হ্র': 'hra'
353
- 'হ্ল': 'hla'
354
-
355
- # Zuktabarna - combined letters
356
- #
357
- # The followings are not the official list, but this has been
358
- # collected and varified from some reliable source.
359
- # Source: https://www.somewhereinblog.net/blog/trivuzblog/28849694
360
- #
361
- 'ক্ট্র': 'kṭra'
362
- 'ক্ত্র': 'ktra'
363
- 'ক্য': 'kya'
364
- 'ক্ষ্ণ': 'kṣṇa'
365
- 'ক্ষ্ম': 'kṣma'
366
- 'খ্য': 'khaj̱a'
367
- 'খ্র': 'khra'
368
- 'গ্ন': 'gna'
369
- 'গ্‌ণ': 'gṇa'
370
- 'গ্ধ্য': 'gdhya'
371
- 'গ্ধ্র': 'gdhra'
372
- 'গ্ন্য': 'gnya'
373
- 'গ্ব': 'gva'
374
- 'গ্য': 'gya'
375
- 'গ্র্য': 'grya'
376
- 'ঘ্ন': 'ghna'
377
- 'ঘ্য': 'ghya'
378
- 'ঙ্‌ক্ত': 'ṅkata'
379
- 'ঙ্ক্য': 'ṅkaya'
380
- 'ঙ্ক্ষ': 'ṅkṣa'
381
- 'ঙ্খ': 'ṅkha'
382
- 'ঙ্গ্য': 'ṅgaya'
383
- 'ঙ্ঘ': 'ṅgha'
384
- 'ঙ্ঘ্য': 'ṅghya'
385
- 'ঙ্ঘ্র': 'ṅghra'
386
- 'ঙ্ম': 'ṅma'
387
- 'চ্ছ্র': 'cchra'
388
- 'চ্ব': 'cva'
389
- 'চ্য': 'cya'
390
- 'জ্য': 'jya'
391
- 'জ্র': 'jra'
392
- 'ট্ব': 'ṭva'
393
- 'ট্ম': 'ṭma'
394
- 'ট্য': 'ṭya'
395
- 'ট্র': 'ṭra'
396
- 'ড্ব': 'ḍva'
397
- 'ড্য': 'ḍya'
398
- 'ড্র': 'ḍra'
399
- 'ড়্গ': 'ḍga'
400
- 'ঢ্য': 'ḍhya'
401
- 'ঢ্র': 'ḍhra'
402
- 'ণ্ঠ্য': 'ṇṭhya'
403
- 'ণ্ড্য': 'ṇḍya'
404
- 'ণ্ড্র': 'ṇḍra'
405
- 'ণ্ঢ': 'ṇḍha'
406
- 'ণ্ণ': 'ṇṇa'
407
- 'ণ্ব': 'ṇva'
408
- 'ণ্ম': 'ṇma'
409
- 'ণ্য': 'ṇya'
410
- 'ৎক': 'tka'
411
- 'ত্ত্য': 'ttya'
412
- 'ত্ম্য': 'tmya'
413
- 'ত্য': 'tya'
414
- 'ত্র্য': 'trya'
415
- 'ৎল': 'tla'
416
- 'ৎস': 'tsa'
417
- 'থ্ব': 'thva'
418
- 'থ্য': 'thya'
419
- 'থ্র': 'thra'
420
- 'দ্গ': 'dga'
421
- 'দ্ঘ': 'dgha'
422
- 'দ্ভ্র': 'dbhra'
423
- 'দ্য': 'dya'
424
- 'দ্র্য': 'draya'
425
- 'ধ্ন': 'dhna'
426
- 'ধ্ব': 'dhva'
427
- 'ধ্ম': 'dhma'
428
- 'ধ্য': 'dya'
429
- 'ন্ট': 'nṭa'
430
- 'ন্ট্র': 'nṭra'
431
- 'ন্ড্র': 'nḍra'
432
- 'ন্ত্ব': 'ntva'
433
- 'ন্ত্য': 'ntaya'
434
- 'ন্ত্র্য': 'ntraya'
435
- 'ন্থ্র': 'nthra'
436
- 'ন্দ্য': 'ndya'
437
- 'ন্দ্ব': 'ndva'
438
- 'ন্ধ্য': 'ndhya'
439
- 'ন্ধ্র': 'ndhra'
440
- 'ন্য': 'nya'
441
- 'প্ট': 'pṭa'
442
- 'প্য': 'pya'
443
- 'প্র্য': 'praya'
444
- 'প্স': 'psa'
445
- 'ফ্ল': 'phla'
446
- 'ব্য': 'bya'
447
- 'ব্ল': 'bla'
448
- 'ভ্ব': 'bhva'
449
- 'ভ্য': 'bhya'
450
- 'ম্ন': 'mna'
451
- 'ম্প্র': 'mpra'
452
- 'ম্ফ': 'mpha'
453
- 'ম্ব্র': 'mvra'
454
- 'ম্য': 'mya'
455
- 'য্য': 'j̱aya'
456
- 'র্ক': 'rka'
457
- 'র্ক্য': 'rkya'
458
- 'র্গ্য': 'rgya'
459
- 'র্ঘ্য': 'rghya'
460
- 'র্চ্য': 'rchya'
461
- 'র্জ্য': 'rjya'
462
- 'র্ণ্য': 'rṇya'
463
- 'র্ত্য': 'rtya'
464
- 'র্থ্য': 'rthya'
465
- 'র্ব্য': 'rvya'
466
- 'র্ম্য': 'rmya'
467
- 'র্শ্য': 'rshya'
468
- 'র্ষ্য': 'rṣhya'
469
- 'র্হ্য': 'rhya'
470
- 'র্খ': 'rkha'
471
- 'র্গ': 'rga'
472
- 'র্গ্র': 'rgra'
473
- 'র্ঘ': 'rgha'
474
- 'র্চ': 'rcha'
475
- 'র্ছ': 'rchha'
476
- 'র্জ': 'rja'
477
- 'র্ঝ': 'rjha'
478
- 'র্ট': 'rṭa'
479
- 'র্ড': 'rḍa'
480
- 'র্ণ': 'rṇa'
481
- 'র্ত': 'rta'
482
- 'র্ত্র': 'rtra'
483
- 'র্থ': 'rtha'
484
- 'র্দ': 'rda'
485
- 'র্দ্ব': 'rdva'
486
- 'র্দ্র': 'rdra'
487
- 'র্ধ': 'rdha'
488
- 'র্ধ্ব': 'rdhba'
489
- 'র্ন': 'rna'
490
- 'র্প': 'rpa'
491
- 'র্ফ': 'rpha'
492
- 'র্ভ': 'rbha'
493
- 'র্ম': 'rma'
494
- 'র্য': 'rya'
495
- 'র্ল': 'rla'
496
- 'র্শ': 'rsha'
497
- 'র্শ্ব': 'rshba'
498
- 'র্ষ': 'rṣha'
499
- 'র্স': 'rsa'
500
- 'র্হ': 'rha'
501
- 'র্ঢ্য': 'rḍhya'
502
- 'ল্ক্য': 'lkaya'
503
- 'ল্গ': 'lga'
504
- 'ল্প': 'lpa'
505
- 'ল্‌ফ': 'lpha'
506
- 'ল্ফ': 'lpha'
507
- 'ল্ব': 'lba'
508
- 'ল্‌ভ': 'lbha'
509
- 'ল্য': 'lya'
510
- 'শ্য': 'sya'
511
- 'ষ্ক্র': 'ṣkra'
512
- 'ষ্ট্য': 'ṣṭya'
513
- 'ষ্ঠ্য': 'ṣṭhya'
514
- 'ষ্ণ': 'ṣṇa'
515
- 'ষ্প্র': 'ṣpra'
516
- 'ষ্ব': 'ṣva'
517
- 'ষ্ম': 'ṣma'
518
- 'ষ্য': 'ṣya'
519
- 'স্ট': 'sṭa'
520
- 'স্ট্র': 'sṭra'
521
- 'স্ত্ব': 'stva'
522
- 'স্ত্য': 'stṣya'
523
- 'স্ত্র': 'stra'
524
- 'স্থ': 'stha'
525
- 'স্থ্য': 'sthya'
526
- 'স্প': 'spa'
527
- 'স্প্র': 'spra'
528
- 'স্প্‌ল': 'spala'
529
- 'স্ফ': 'spha'
530
- 'স্য': 'sya'
531
- 'স্ল': 'sla'
532
- 'হ্ণ': 'hṇa'
533
- 'হ্ব': 'hva'
534
- 'হ্য': 'hya'