interscript 0.1.9 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,139 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 1971
4
- language: iso-639-2:ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: 1971 "Beirut system"
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/2nd-uncsgn-docs/E_Conf61_4_Add1_e.pdf
9
- creation_date: 1971
10
- confirmation date: 2018-06
11
- description: |
12
- The current United Nations recommended romanization
13
- system was approved in 2017 (resolution XI/3), based on
14
- the system adopted by Arabic experts at the conference
15
- held in Beirut in 2007, the Unified Arabic
16
- Transliteration System, taking into account the
17
- practical amendments and corrections carried out and
18
- agreed upon by the representatives of the Arabic-
19
- speaking countries at the Fourth Arab Conference on
20
- Geographical Names, held in Beirut in 2008, and some
21
- clarifications and amendments agreed in Riyadh in 20171.
22
- Previously, the United Nations had approved a
23
- romanization system in 1972 (resolution II/8), based on the
24
- system adopted by Arabic experts at the conference
25
- held at Beirut in 1971 with the practical amendments carried out
26
- and agreed upon by the representatives of the Arabic-speaking
27
- countries at their conference. The table was published in volume
28
- II of the conference report.
29
- In UN resolution XI/3 it is specifically stated that the
30
- system was recommended for the “romanization of the
31
- geographical names within those Arabic-speaking countries
32
- where this system is officially adopted”. There is
33
- evidence of its partial implementation in Jordan, Oman and
34
- Saudi Arabia. The UNGEGN Working Group on Romanization
35
- Systems intends to continue monitoring the UN system’s
36
- implementation across Arabic-speaking countries.
37
- In some countries there exist local romanization schemes
38
- or practices. The geographical names of Algeria, Djibouti,
39
- Mauritania, Morocco and Tunisia are generally rendered in
40
- the traditional manner which conforms to the principles of
41
- the French orthography.
42
- The previous UN-approved system is still found in
43
- considerable international usage.
44
- Arabic is written from right to left. The Arabic script
45
- usually omits vowel points and diacritical marks from
46
- writing which makes it difficult to obtain uniform results
47
- in the romanization of Arabic. It is essential to identify
48
- correctly the words which appear in any particular name
49
- and to know the standard Arabic-script spelling including
50
- the relevant vowels. One must also take into account
51
- dialectal and idiosyncratic deviations. The romanization
52
- is generally reversible though there may be some ambiguous
53
- letter sequences (dh, kh, sh, th) which may also point to
54
- combinations of Arabic characters in addition to the
55
- respective single characters.
56
- notes:
57
- - |
58
- ث is t͟h (th with sub marcon)
59
- خ is k͟h (kh with sub marcon)
60
- ذ is d͟h (dh with sub marcon)
61
- ش is s͟h (sh with sub marcon)
62
- ظ is z͟h (zh with sub marcon)
63
- غ is g͟h (gh witg sub marcon)
64
- The previous UN 1972 System had the following differences:
65
- the character (ظ) was romanized as z̧ instead of d͟h;
66
- the cedilla (¸) was used instead of sub-macron (_) in all characters with sub-macrons. - |
67
-
68
- tests:
69
-
70
- # Examples taken from:
71
- # https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/2nd-uncsgn-docs/E_Conf61_4_Add1_e.pdf
72
- # page 31 (38 digital)
73
-
74
- - source: خَيبَر
75
- expected: K͟haybar
76
-
77
- - source: ظَهران
78
- expected: Z͟hahrān
79
-
80
- - source: القُدس
81
- expected: Al Quds
82
-
83
- - source: شَرم الشَيْخ
84
- expected: S͟harm as͟h S͟hayk͟h
85
-
86
- map:
87
- inherit: "un-ara-Arab-Latn-2017"
88
- map:
89
- postrules:
90
- - pattern : ' At͟h T͟h' # الث
91
- result: ' at͟h T͟h'
92
- - pattern : ' Ad͟h D͟h' # الذ
93
- result: ' ad͟h D͟h'
94
- - pattern : ' As͟h S͟h' # الش
95
- result: ' as͟h S͟h'
96
- - pattern : ' Az͟h Z͟h' # الظ
97
- result: ' az͟h Z͟h'
98
- characters:
99
-
100
- # sun letters
101
- '\b\u0627\u0644\u062b' : 'at͟h t͟h' # الث
102
- '\b\u0627\u0644\u0630' : 'ad͟h d͟h' # الذ
103
- '\b\u0627\u0644\u0634' : 'as͟h s͟h' # الش
104
- '\b\u0627\u0644\u0638' : 'az͟h z͟h' # الظ
105
-
106
- # shadda
107
- '\u062e\u0651' : 'k͟hk͟h' # خ
108
- '\u0630\u0651' : 'd͟hd͟h' # ذ
109
- '\u0634\u0651' : 's͟h' # ش
110
- '\u0638\u0651' : 'z͟hz͟h' # ظ
111
- '\u063a\u0651' : 'g͟hg͟h' # غ
112
-
113
- '\u062b' : 't͟h' # ث
114
- '\ufe9b' : 't͟h' # ﺛ
115
- '\ufe9c' : 't͟h' # ﺜ
116
- '\ufe9a' : 't͟h' # ﺚ
117
-
118
- '\u062e' : 'k͟h' # خ
119
- '\ufea7' : 'k͟h' # ﺧ
120
- '\ufea8' : 'k͟h' # ﺨ
121
- '\ufea6' : 'k͟h' # ﺦ
122
-
123
- '\u063a' : 'g͟h' # غ
124
- '\ufecf' : 'g͟h' # ﻏ
125
- '\ufed0' : 'g͟h' # ﻐ
126
- '\ufece' : 'g͟h' # ﻎ
127
-
128
- '\u0630' : 'd͟h' # ذ
129
- '\ufeac' : 'd͟h' # ﺬ
130
-
131
- '\u0634' : 's͟h' # ش
132
- '\ufeb7' : 's͟h' # ﺷ
133
- '\ufeb8' : 's͟h' # ﺸ
134
- '\ufeb6' : 's͟h' # ﺶ
135
-
136
- '\u0638' : 'z͟h' # ظ
137
- '\ufec7' : 'z͟h' # ﻇ
138
- '\ufec8' : 'z͟h' # ﻈ
139
- '\ufec6' : 'z͟h' # ﻆ
@@ -1,159 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 1972
4
- language: iso-639-2:ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF ARABIC -- UNGEGN 1972 System
8
- url: http://www.eki.ee/wgrs/obs_rom_vers/rom1_ar_v4_0.pdf
9
- creation_date: 1972
10
- confirmation date: 2018-06
11
- description: |
12
- The United Nations recommended romanization
13
- system was approved in 1972 (resolution II/8),
14
- based on the system adopted by Arabic experts at
15
- the conference held at Beirut in 1971 with the
16
- practical amendments carried out and agreed upon
17
- by the representatives of the Arabic-speaking
18
- countries at their conference. The table was
19
- published in volume II of the conference report1
20
- . In the UN resolution it was specifically
21
- pointed out that the system was recommended "for
22
- the romanization of the geographical names within
23
- those Arabic-speaking countries where this system
24
- is officially acknowledged". It cannot be
25
- definitely ascertained which of the
26
- Arabicspeaking countries have adopted this system
27
- officially, especially since 2007 when there are
28
- efforts by the Arabic Division to promote a
29
- modification of the UN system (ADEGN
30
- romanization, see the section on other
31
- romanization systems below), with varying
32
- success2 . Judging by the use of names in
33
- international cartographic products which rely
34
- mostly on national sources it appears that the UN
35
- system or its modification is more or less
36
- current in Iraq, Kuwait, Libya, Saudi Arabia3 ,
37
- United Arab Emirates and Yemen, there and in some
38
- other countries the system is often used without
39
- diacritical marks. For the geographical names of
40
- the Syrian Arab Republic international maps
41
- favour the UN system while the local usage seems
42
- to prefer a French-oriented romanization. Also in
43
- Egypt and Sudan there exist local romanization
44
- schemes or practices side by side with the UN
45
- system. The geographical names of Algeria,
46
- Djibouti, Mauritania, Morocco and Tunisia are
47
- generally rendered in the traditional manner
48
- which conforms to the principles of the French
49
- orthography. Resolution 7 of the Seventh UN
50
- Conference on the Standardization of Geographical
51
- Names (1998) recommended that "the League of Arab
52
- States should, through its specialized
53
- structures, continue its efforts to organize a
54
- conference with a view to considering the
55
- difficulties encountered in applying the amended
56
- Beirut system of 1972 for the romanization of
57
- Arabic script, and submit, as soon as possible, a
58
- solution to the United Nations Group of Experts
59
- on Geographical Names". At the Eighth UN
60
- Conference on the Standardization of Geographical
61
- Names (2002), the Arabic Division of the UN Group
62
- of Experts announced that it had finalised
63
- proposed modifications to the UN recommended
64
- romanization system. These proposals would be
65
- submitted to the League of Arab States for
66
- approval. Arabic is written from right to left.
67
- The Arabic script usually omits vowel points and
68
- diacritical marks from writing which makes it
69
- difficult to obtain uniform results in the
70
- romanization of Arabic. It is essential to
71
- identify correctly the words which appear in any
72
- particular name and to know the standard Arabic-
73
- script spelling including proper pointing. One
74
- must also take into account dialectal and
75
- idiosyncratic deviations. The romanization is
76
- generally reversible though there are some
77
- ambiguous letter sequences (dh, kh, sh, th) which
78
- may also point to combinations of Arabic
79
- characters in addition to the respective single
80
- characters.
81
- notes:
82
- - |
83
- The previous UN 1972 System had the following differences:
84
- the character (ظ) was romanized as z̧ instead of d͟h;
85
- ح, ص, ض the cedilla (¸) was used instead of sub-macron (_) in all characters with sub-macrons. - |
86
- When the definite article al precedes a word beginning with one of the "sun letters" (t,
87
- th, d, dh, r, z, s, sh, ş, ḑ, ţ, z, l, n ̧ ) the l of the definite article is assimilated with the first
88
- consonant of the word: ash-Sh الشارقة āriqah.
89
-
90
-
91
- tests:
92
-
93
- # Examples taken from:
94
- # https://unstats.un.org/unsd/geoinfo/geonames/
95
-
96
- - source: مِصر
97
- expected: Mişr
98
-
99
- - source: قَطَر
100
- expected: Qaţar
101
-
102
- - source: الجُمهُورِيَّة العِراقِيَّة
103
- expected: Al Jumhūrīyah al ‘Irāqīyah
104
-
105
- - source: جُمهُورِيَّة مِصر العَرَبِيَّة
106
- expected: Jumhūrīyat Mişr al ‘Arabīyah
107
-
108
- - source: الرِيَاض
109
- expected: Ar Riyāḑ
110
-
111
- - source: الشارِقة
112
- expected: Ash Shāriqah
113
-
114
- map:
115
- inherit: "un-ara-Arab-Latn-2017"
116
- postrules:
117
- - pattern : ' Aş Ş' # الص
118
- result: ' aş Ş'
119
- - pattern : ' Aḑ Ḑ' # الض
120
- result: ' aḑ Ḑ'
121
- - pattern : ' Aţ Ţ' # الط
122
- result: ' aţ Ţ'
123
-
124
- characters:
125
-
126
- '\b\u0627\u0644\u0635' : 'aş ş' # الص
127
- '\b\u0627\u0644\u0636' : 'aḑ ḑ' # الض
128
- '\b\u0627\u0644\u0637' : 'aţ ţ' # الط
129
-
130
- '\u062d\u0651' : 'ḩḩ' # ح
131
- '\u0635\u0651' : 'şş' # ص
132
- '\u0636\u0651' : 'ḑḑ' # ض
133
- '\u0637\u0651' : 'ţţ' # ط
134
- '\u0638\u0651' : 'z̧z̧' # ظ
135
-
136
- '\u062d' : 'ḩ' # ح
137
- '\ufea3' : 'ḩ' # ﺣ
138
- '\ufea4' : 'ḩ' # ﺤ
139
- '\ufea2' : 'ḩ' # ﺢ
140
-
141
- '\u0635' : 'ş' # ص
142
- '\ufebb' : 'ş' # ﺻ
143
- '\ufebc' : 'ş' # ﺼ
144
- '\ufeba' : 'ş' # ﺺ
145
-
146
- '\u0636' : 'ḑ' # ض
147
- '\ufebf' : 'ḑ' # ﺿ
148
- '\ufec0' : 'ḑ' # ﻀ
149
- '\ufebe' : 'ḑ' # ﺾ
150
-
151
- '\u0637' : 'ţ' # ط
152
- '\ufec3' : 'ţ' # ﻃ
153
- '\ufec4' : 'ţ' # ﻄ
154
- '\ufec2' : 'ţ' # ﻂ
155
-
156
- '\u0638' : 'z̧' # ظ
157
- '\ufec7' : 'z̧' # ﻇ
158
- '\ufec8' : 'z̧' # ﻈ
159
- '\ufec6' : 'z̧' # ﻆ
@@ -1,420 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 2017
4
- language: iso-639-2:ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
8
- url: http://www.eki.ee/wgrs/rom1_ar.pdf
9
- creation_date: 2017
10
- confirmation date: 2018-06
11
- description: |
12
- The current United Nations recommended romanization
13
- system was approved in 2017 (resolution XI/3), based on
14
- the system adopted by Arabic experts at the conference
15
- held in Beirut in 2007, the Unified Arabic
16
- Transliteration System, taking into account the
17
- practical amendments and corrections carried out and
18
- agreed upon by the representatives of the Arabic-
19
- speaking countries at the Fourth Arab Conference on
20
- Geographical Names, held in Beirut in 2008, and some
21
- clarifications and amendments agreed in Riyadh in 20171.
22
- Previously, the United Nations had approved a
23
- romanization system in 1972 (resolution II/8), based on the
24
- system adopted by Arabic experts at the conference
25
- held at Beirut in 1971 with the practical amendments carried out
26
- and agreed upon by the representatives of the Arabic-speaking
27
- countries at their conference. The table was published in volume
28
- II of the conference report.
29
- In UN resolution XI/3 it is specifically stated that the
30
- system was recommended for the “romanization of the
31
- geographical names within those Arabic-speaking countries
32
- where this system is officially adopted”. There is
33
- evidence of its partial implementation in Jordan, Oman and
34
- Saudi Arabia. The UNGEGN Working Group on Romanization
35
- Systems intends to continue monitoring the UN system’s
36
- implementation across Arabic-speaking countries.
37
- In some countries there exist local romanization schemes
38
- or practices. The geographical names of Algeria, Djibouti,
39
- Mauritania, Morocco and Tunisia are generally rendered in
40
- the traditional manner which conforms to the principles of
41
- the French orthography.
42
- The previous UN-approved system is still found in
43
- considerable international usage.
44
- Arabic is written from right to left. The Arabic script
45
- usually omits vowel points and diacritical marks from
46
- writing which makes it difficult to obtain uniform results
47
- in the romanization of Arabic. It is essential to identify
48
- correctly the words which appear in any particular name
49
- and to know the standard Arabic-script spelling including
50
- the relevant vowels. One must also take into account
51
- dialectal and idiosyncratic deviations. The romanization
52
- is generally reversible though there may be some ambiguous
53
- letter sequences (dh, kh, sh, th) which may also point to
54
- combinations of Arabic characters in addition to the
55
- respective single characters.
56
- notes:
57
- - |
58
- When the definite article al precedes a word beginning with
59
- one of the "sun letters" (t, th, d, dh, r, z, s, sh, s̱, ḏ, ṯ,
60
- d͟h, l, n) the l of the definite article is assimilated with
61
- the first consonant of the word: الشارقة Ash Shāriqah.
62
- - |
63
- The definite article is always written with a capital
64
- initial: الزيتون Az Zaytūn, البلد Al Balad, منية الضنية Minyat Aḏ
65
- Ḏinniyyah.
66
- - |
67
- Nunation is unlikely to be found in geographical names and
68
- the last letter remains silent: جبل = جبلٌ Jabal (not Jabalun).
69
- - |
70
- In order to disambiguate certain character sequences a
71
- middle dot (·) may be used: سهيلة S·haylah (cf. شيلة Shaylah), دهيب
72
- D·hayb (cf. ذيب Dhayb), أدهم Ad·ham (cf. أذم Adham).
73
- - |
74
- ta' marboota should be transliterated to 'ah' if it's in
75
- a definite article, or at the end of the sentence
76
- otherwise it should be transliterated to 'at'
77
- to handle words starting with AL and ending with ta' marboota
78
- which is pronounced as "ah" not "at" divided into multiple
79
- regex because lookbehind in ruby doesn't support variable length
80
- - |
81
- مَكّة should be transliterated to makkah, shadda above ك
82
- is to double the consonant, same applies to all arabic letters
83
-
84
- tests:
85
-
86
- # Examples taken from:
87
- # https://unstats.un.org/unsd/geoinfo/geonames/
88
-
89
- - source: مِصر
90
- expected: Mis̱r
91
-
92
- - source: قَطَر
93
- expected: Qaṯar
94
-
95
- - source: المَغرِب
96
- expected: Al Maghrib
97
-
98
- - source: الجُمهُورِيَّة العِراقِيَّة
99
- expected: Al Jumhūrīyah al ‘Irāqīyah
100
-
101
- - source: جُمهُورِيَّة العِراق
102
- expected: Jumhūrīyat al ‘Irāq
103
-
104
- - source: جُمهُورِيَّة مِصر العَرَبِيَّة
105
- expected: Jumhūrīyat Mis̱r al ‘Arabīyah
106
-
107
- - source: بَغداد
108
- expected: Baghdād
109
-
110
- - source: تُونِس
111
- expected: Tūnis
112
-
113
- - source: السُعُودِيَّة
114
- expected: As Su‘ūdīyah
115
-
116
- - source: اليَمَن
117
- expected: Al Yaman
118
-
119
- - source: السُودان
120
- expected: As Sūdān
121
-
122
- - source: الجَزائِر
123
- expected: Al Jazā'ir
124
-
125
- - source: الجُمهُورِيَّة اللُبنانِيَّة
126
- expected: Al Jumhūrīyah al Lubnānīyah
127
-
128
- - source: أسمَرة
129
- expected: Asmarah
130
-
131
- - source: جِدَّة
132
- expected: Jiddah
133
-
134
- - source: مَكَّة
135
- expected: Makkah
136
-
137
- - source: الرِيَاض
138
- expected: Ar Riyāḏ
139
-
140
- map:
141
- postrules:
142
- - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
143
- result: "upcase"
144
- # don't capitalize defined article in the middle of a sentence
145
- - pattern : ' At T' # الت
146
- result: ' at T'
147
- - pattern : ' Ath Th' # الث
148
- result: ' ath th'
149
- - pattern : ' Ad D' # الد
150
- result: ' ad D'
151
- - pattern : ' Adh Dh' # الذ
152
- result: ' adh Dh'
153
- - pattern : ' Ar R' # الر
154
- result: ' ar R'
155
- - pattern : ' Az Z' # الز
156
- result: ' az Z'
157
- - pattern : ' As S' # الس
158
- result: ' as S'
159
- - pattern : ' Ash Sh' # الش
160
- result: ' ash Sh'
161
- - pattern : ' As̱ S̱' # الص
162
- result: ' as̱ S̱'
163
- - pattern : ' Aḏ Ḏ' # الض
164
- result: ' aḏ Ḏ'
165
- - pattern : ' Aṯ Ṯ' # الط
166
- result: ' aṯ Ṯ'
167
- - pattern : ' Ad͟h D͟h' # الظ
168
- result: ' ad͟h D͟h'
169
- - pattern : ' Al L' # الل
170
- result: ' al L'
171
- - pattern : ' An N' # الن
172
- result: ' an N'
173
- - pattern: " Al " # ال
174
- result: " al "
175
-
176
- characters:
177
-
178
- # Tool used for Unicode finding:
179
- # https://www.branah.com/unicode-converter
180
-
181
- # pointing
182
- '\u064e' : 'a' # َ fatha
183
- '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
184
- '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
185
- '\u0650' : 'i' # ِ kasra
186
- '\u064f' : 'u' # ُ damma
187
- '\u0652' : '' # ْ sokoon, see note A below
188
-
189
-
190
- # special pointed letters
191
- '\u0639\u064e' : '‘a' # عَ
192
- '\u0639\u0650' : '‘i' # عِ
193
- '\u0639\u064f' : '‘ū' # عُ
194
- # handle MacOS regex difference
195
- '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
196
-
197
- '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
198
- '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
199
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
200
- '\u064f\u0648' : 'ū' # ـُو damma followed by و
201
- '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
202
- '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
203
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
204
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
205
- '\u0622' : 'ā' # آ
206
-
207
- # (A) Marks absence of the vowel.
208
- # (B) Marks doubling of the consonant.
209
-
210
- # Sun letters
211
- '\b\u0627\u0644\u062a' : 'at t' # الت
212
- '\b\u0627\u0644\u062b' : 'ath th' # الث
213
- '\b\u0627\u0644\u062f' : 'ad d' # الد
214
- '\b\u0627\u0644\u0630' : 'adh dh' # الذ
215
- '\b\u0627\u0644\u0631' : 'ar r' # الر
216
- '\b\u0627\u0644\u0632' : 'az z' # الز
217
- '\b\u0627\u0644\u0633' : 'as s' # الس
218
- '\b\u0627\u0644\u0634' : 'ash sh' # الش
219
- '\b\u0627\u0644\u0635' : 'as̱ s̱' # الص
220
- '\b\u0627\u0644\u0636' : 'aḏ ḏ' # الض
221
- '\b\u0627\u0644\u0637' : 'aṯ ṯ' # الط
222
- '\b\u0627\u0644\u0638' : 'ad͟h d͟h' # الظ
223
- '\b\u0627\u0644\u0644' : 'al l' # الل
224
- '\b\u0627\u0644\u0646' : 'an n' # الن
225
-
226
- # ta' marboota
227
- '\u0629' : 'at' # ة in the middle of the sentence
228
- '\u0629$' : 'ah'
229
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
230
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
231
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
232
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
233
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
234
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
235
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
236
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
237
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
238
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
239
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
240
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
241
-
242
- # shadda
243
-
244
- '\u0628\u0651' : 'bb' # ب
245
- '\u062a\u0651' : 'tt' # ت
246
- '\u062b\u0651' : 'thth' # ث
247
- '\u062c\u0651' : 'jj' # ج
248
- '\u062d\u0651' : 'ẖẖ' # ح
249
- '\u062e\u0651' : 'khkh' # خ
250
- '\u062f\u0651' : 'dd' # د
251
- '\u0630\u0651' : 'dhdh' # ذ
252
- '\u0631\u0651' : 'rr' # ر
253
- '\u0632\u0651' : 'zz' # ز
254
- '\u0633\u0651' : 'ss' # س
255
- '\u0634\u0651' : 'sh' # ش
256
- '\u0635\u0651' : 's̱s̱' # ص
257
- '\u0636\u0651' : 'ḏḏ' # ض
258
- '\u0637\u0651' : 'ṯṯ' # ط
259
- '\u0638\u0651' : 'd͟hd͟h' # ظ
260
- '\u063a\u0651' : 'ghgh' # غ
261
- '\u0641\u0651' : 'ff' # ف
262
- '\u0642\u0651' : 'qq' # ق
263
- '\u0643\u0651' : 'kk' # ك
264
- '\u0644\u0651' : 'll' # ل
265
- '\u0645\u0651' : 'mm' # م
266
- '\u0646\u0651' : 'nn' # ن
267
- '\u0647\u0651' : 'hh' # ه
268
- '\u0648\u0651' : 'ww' # و
269
- '\u064a\u0651' : 'yy' # ي
270
-
271
- '\u0626' : "'" # ئ
272
-
273
-
274
- '\u0621' : # ء
275
- - '’'
276
- - '' # see note A
277
-
278
- '\u0623' : 'a' # أ
279
- '\u0627' : 'ā' # ا
280
-
281
- # See note B
282
- '\b\u0627\u0644' : 'al ' # ال
283
- # '\uFE8E' : '' # ﺎ
284
-
285
- '\u0628' : 'b' # ب
286
- '\uFE91' : 'b' # ﺑ
287
- '\uFE92' : 'b' # ﺒ
288
- '\uFE90' : 'b' # ﺐ
289
-
290
- # See note C
291
- '\u062a' : 't' # ت
292
- '\ufe97' : 't' # ﺗ
293
- '\ufe98' : 't' # ﺘ
294
- '\ufe96' : 't' # ﺖ
295
-
296
- '\u062b' : 'th' # ث
297
- '\ufe9b' : 'th' # ﺛ
298
- '\ufe9c' : 'th' # ﺜ
299
- '\ufe9a' : 'th' # ﺚ
300
-
301
- '\u062c' : 'j' # ج
302
- '\ufe9f' : 'j' # ﺟ
303
- '\ufea0' : 'j' # ﺠ
304
- '\ufe9e' : 'j' # ﺞ
305
-
306
- '\u062d' : 'ẖ' # ح
307
- '\ufea3' : 'ẖ' # ﺣ
308
- '\ufea4' : 'ẖ' # ﺤ
309
- '\ufea2' : 'ẖ' # ﺢ
310
-
311
- '\u062e' : 'kh' # خ
312
- '\ufea7' : 'kh' # ﺧ
313
- '\ufea8' : 'kh' # ﺨ
314
- '\ufea6' : 'kh' # ﺦ
315
-
316
- '\u062f' : 'd' # د
317
- '\ufeaa' : 'd' # ﺪ
318
-
319
- '\u0630' : 'dh' # ذ
320
- '\ufeac' : 'dh' # ﺬ
321
-
322
- '\u0631' : 'r' # ر
323
- '\ufeae' : 'r' # ﺮ
324
-
325
- '\u0632' : 'z' # ز
326
- '\ufeb0' : 'z' # ﺰ
327
-
328
- '\u0633' : 's' # س
329
- '\ufeb3' : 's' # ﺳ
330
- '\ufeb4' : 's' # ﺴ
331
- '\ufeb2' : 's' # ﺲ
332
-
333
- '\u0634' : 'sh' # ش
334
- '\ufeb7' : 'sh' # ﺷ
335
- '\ufeb8' : 'sh' # ﺸ
336
- '\ufeb6' : 'sh' # ﺶ
337
-
338
- '\u0635' : 's̱' # ص
339
- '\ufebb' : 's̱' # ﺻ
340
- '\ufebc' : 's̱' # ﺼ
341
- '\ufeba' : 's̱' # ﺺ
342
-
343
- '\u0636' : 'ḏ' # ض
344
- '\ufebf' : 'ḏ' # ﺿ
345
- '\ufec0' : 'ḏ' # ﻀ
346
- '\ufebe' : 'ḏ' # ﺾ
347
-
348
- '\u0637' : 'ṯ' # ط
349
- '\ufec3' : 'ṯ' # ﻃ
350
- '\ufec4' : 'ṯ' # ﻄ
351
- '\ufec2' : 'ṯ' # ﻂ
352
-
353
- '\u0638' : 'd͟h' # ظ
354
- '\ufec7' : 'd͟h' # ﻇ
355
- '\ufec8' : 'd͟h' # ﻈ
356
- '\ufec6' : 'd͟h' # ﻆ
357
-
358
- '\u0639' : '‘' # ع
359
- '\ufecb' : '‘' # ﻋ
360
- '\ufecc' : '‘' # ﻌ
361
- '\ufeca' : '‘' # ﻊ
362
-
363
- '\u063a' : 'gh' # غ
364
- '\ufecf' : 'gh' # ﻏ
365
- '\ufed0' : 'gh' # ﻐ
366
- '\ufece' : 'gh' # ﻎ
367
-
368
- '\u0641' : 'f' # ف
369
- '\ufed3' : 'f' # ﻓ
370
- '\ufed4' : 'f' # ﻔ
371
- '\ufed2' : 'f' # ﻒ
372
-
373
- '\u0642' : 'q' # ق
374
- '\ufed7' : 'q' # ﻗ
375
- '\ufed8' : 'q' # ﻘ
376
- '\ufed6' : 'q' # ﻖ
377
-
378
- '\u0643' : 'k' # ك
379
- '\ufedb' : 'k' # ﻛ
380
- '\ufedc' : 'k' # ﻜ
381
- '\ufeda' : 'k' # ﻚ
382
-
383
- '\u0644' : 'l' # ل
384
- '\ufedf' : 'l' # ﻟ
385
- '\ufee0' : 'l' # ﻠ
386
- '\ufede' : 'l' # ﻞ
387
-
388
- '\u0645' : 'm' # م
389
- '\ufee3' : 'm' # ﻣ
390
- '\ufee4' : 'm' # ﻤ
391
- '\ufee2' : 'm' # ﻢ
392
-
393
- '\u0646' : 'n' # ن
394
- '\ufee7' : 'n' # ﻧ
395
- '\ufee8' : 'n' # ﻨ
396
- '\ufee6' : 'n' # ﻦ
397
-
398
- # See note C
399
- '\u0647' : 'h' # ه
400
- '\ufeeb' : 'h' # ﻫ
401
- '\ufeec' : 'h' # ﻬ
402
- '\ufeea' : 'h' # ﻪ
403
-
404
- '\u0648' : 'w' # و
405
- '\ufeee' : 'w' # ﻮ
406
-
407
- '\u064a' : 'y' # ي
408
- '\ufef3' : 'y' # ﻳ
409
- '\ufef4' : 'y' # ﻴ
410
- '\ufef1' : 'y' # ﻱ
411
-
412
- # (A) Not romanized word-initially.
413
-
414
- # (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
415
-
416
- # (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
417
-
418
-
419
- # Vowels, diphthongs and diacritical marks
420
- # (ـ stands for any consonant)