interscript 0.1.9 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,109 +0,0 @@
1
- ---
2
- authority_id: mvd
3
- id: 2008
4
- language: iso-639-2:rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: 8/19678 On approval of the Instructions for transliteration of surnames and proper names of citizens of the Republic of Belarus when their personal data is included in the population register
8
- url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
- creation_date: 2008
10
-
11
- notes:
12
- - check notes from mvd-bel-Cyrl-Latn-2008
13
-
14
- tests:
15
- - source: Ева
16
- expected: Eva
17
- - source: Васiльева
18
- expected: Vasiĺeva
19
- - source: Адъютантов
20
- expected: Adjutantov
21
-
22
- map:
23
- rules:
24
- # note[5]
25
- - pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
26
- result: "\\1\u0301"
27
- - pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
28
- result: "\\1\u0301"
29
- - pattern: ([’Ъъ]\u042E)
30
- result: Ju
31
- - pattern: ([’Ъъ]\u044E)
32
- result: ju
33
- - pattern: ([’Ъъ]\u042F)
34
- result: Ja
35
- - pattern: ([’Ъъ]\u044F)
36
- result: ja
37
-
38
- characters:
39
- '’' : 'j'
40
-
41
- '\u0410' : 'A' # А
42
- '\u0411' : 'B' # Б
43
- '\u0412' : 'V' # B
44
- '\u0413' : 'G' # Г
45
- '\u0414' : 'D' # Д
46
- '\u0415' : 'E' # Е
47
- '\u0401' : 'E' # Ё
48
- '\u0416' : 'Zh' # Ж
49
- '\u0417' : 'Z' # З
50
- '\u0406' : 'I' # І
51
- '\u0419' : "J" # Й
52
- '\u041A' : 'K' # К
53
- '\u041B' : 'L' # Л
54
- '\u041C' : 'M' # М
55
- '\u041D' : 'N' # Н
56
- '\u041E' : 'O' # О
57
- '\u041F' : 'P' # П
58
- '\u0420' : 'R' # Р
59
- '\u0421' : 'S' # С
60
- '\u0422' : 'T' # Т
61
- '\u0423' : 'U' # У
62
- '\U040E' : 'W' # Ў
63
- '\u0424' : 'F' # Ф
64
- '\u0425' : 'Kh' # Х
65
- '\u0426' : 'Ts' # Ц
66
- '\u0427' : 'Ch' # Ч
67
- '\u0428' : 'Sh' # Ш
68
- '\u0429' : 'Shch' # Щ
69
- '\u042A' : 'J' # Ъ
70
- '\u042B' : 'Y' # Ы
71
- '\u042C' : '' # Ь
72
- '\u042D' : 'E' # Э
73
- '\u042E' : 'Iu' # Ю
74
- '\u042F' : 'Ia' # Я
75
-
76
- '\u0430' : 'a' # а
77
- '\u0431' : 'b' # б
78
- '\u0432' : 'v' # в
79
- '\u0433' : 'g' # г
80
- '\u0434' : 'd' # д
81
- '\u0435' : 'e' # е
82
- '\u0451' : 'e' # ё
83
- '\u0436' : 'zh' # ж
84
- '\u0437' : 'z' # з
85
- '\u0456' : 'i' # і
86
- '\u0439' : 'j' # й
87
- '\u043A' : 'k' # к
88
- '\u043B' : 'l' # л
89
- '\u043C' : 'm' # м
90
- '\u043D' : 'n' # н
91
- '\u043E' : 'o' # о
92
- '\u043F' : 'p' # п
93
- '\u0440' : 'r' # р
94
- '\u0441' : 's' # с
95
- '\u0442' : 't' # т
96
- '\u0443' : 'u' # у
97
- '\u045E' : 'w' # ў
98
- '\u0444' : 'f' # ф
99
- '\u0445' : 'kh' # х
100
- '\u0446' : 'ts' # Ц
101
- '\u0447' : 'ch' # ч
102
- '\u0448' : 'sh' # ш
103
- '\u0449' : 'shch' # щ
104
- '\u044A' : 'j' # ъ
105
- '\u044B' : 'y' # ы
106
- '\u044C' : '' # ь
107
- '\u044D' : 'e' # э
108
- '\u044E' : 'iu' # ю
109
- '\u044F' : 'ia' # я
@@ -1,37 +0,0 @@
1
- ---
2
- authority_id: mvd
3
- id: 2010
4
- language: iso-639-2:bel
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: |
8
- 8/22721 On approval of the Instructions on the organization of work of units of citizenship
9
- and migration of internal affairs bodies on the issuance, registration, exchange,
10
- invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
11
- url: https://pravo.by/document/?guid=3871&p0=W21022721
12
- creation_date: 2010
13
-
14
- description: |
15
- RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
16
- June 28, 2010 No. 200
17
- On approval of the Instructions on the organization of work of units of citizenship
18
- and migration of internal affairs bodies on the issuance, registration, exchange,
19
- invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
20
-
21
- notes:
22
- - check notes from mvd-rus-Cyrl-Latn-2008
23
-
24
- tests:
25
- - source: Ева
26
- expected: Eva
27
- - source: Васiльева
28
- expected: Vasileva
29
- - source: Адъютантов
30
- expected: Adjutantov
31
-
32
- map:
33
- inherit: "mvd-rus-Cyrl-Latn-2008"
34
-
35
- postrules:
36
- - pattern: \u0301 # remove diacritics
37
- result: ""
@@ -1,137 +0,0 @@
1
- ---
2
- authority_id: odni
3
- id: 2004
4
- language: ics-630-01:ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: Arabic Personal Names Office of the Director Of National Intelligence 2004 System
8
- url: https://github.com/interscript/interscript-private-references/blob/master/odni/Arabic_IC_Standard.doc
9
- creation_date: 2004
10
- confirmation date: 2004-06
11
- description:
12
- notes:
13
- - Long/Short Vowels Long and short vowels are not
14
- distinguished in this system Samir (could be Saamir or
15
- Samiir in Arabic).
16
-
17
- - Double consonants Double consonants represented by the
18
- Arabic shaddah are shown in most cases (e.g., Hassan,
19
- Muhammad). Exceptions ’ayn and consonants represented by
20
- digraphs are not doubled (e.g., al-Qadhafi [not
21
- alQadhdhafi], Mubashir [not Mubashshir]).
22
-
23
- - Hamzah (glottal stop) The hamzah is represented by an
24
- apostrophe (’). Note that this is the same symbol used to
25
- represent another consonant, the ’ayn.
26
-
27
- - Ta’ marbutah (feminine ending marker) On the construct
28
- form or when pronounced “t”, it is represented with a roman
29
- t. In all other cases, it is represented with an h.
30
-
31
- - Digraphs No distinction is made between digraphs such as
32
- sh and single contiguous letters (e.g., s followed by h).
33
-
34
- - Definite article “al” (‘the’) Follows Arabic spelling
35
- rather than pronunciation. That is, sun letter assimilation
36
- is not shown in the Romanized form (e.g., ’Abd-alRahman,
37
- not ’Abd-ar-Rahman).
38
-
39
- - Diphthongs the second element of the diphthong is
40
- represented by a y or a w (rather than an i or a u)
41
- Haytham, Faysal, Tawfiq, Rawdah.
42
-
43
- - Hyphens Hyphens (-) are used to connect name elements
44
- within a name ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
45
- Exceptions Names that incorporate “Allah” as part of the
46
- name (e.g., ’Abdallah, Nasrallah), names marked by the
47
- lineage/family marker “Al” (e.g., Al Thani) are not
48
- hyphenated.
49
-
50
- - The definite article, “al”, within name phrases, is
51
- Romanized as al and not as ul Nur-al-Din (not Nur-ul-Din).
52
- It is not capitalized when name-initial.
53
-
54
- - Names that incorporate Allah as part of the name retain the
55
- a of Allah rather than a grammatical marker u ’Abdallah (
56
- not ’Abdullah).
57
-
58
- - Foreign names borrowed or appearing in Arabic are spelled
59
- according to the standard Western tradition Georges,
60
- Michel. However, names of non-Arabic origin no longer
61
- considered foreign by Arabic speakers follow the IC
62
- conventions Butrus (not Peter).
63
-
64
- - Prefix ‫بن‬ (bin ‘son of’) is Romanized Bin unless written
65
- with an alif, in which case it is Romanized as Ibn. The
66
- colloquial form Bu (‘father’) should not be standardized as
67
- Abu. These prefixes are capitalized.
68
-
69
- - In general, Romanization follows the Modern Standard
70
- Arabic (MSA) form rather than local pronunciation
71
- standards. For example, the letter ‫ج‬ (jim) is represented
72
- as a j even when pronounced as a “g” (e.g., Egyptian Gamal
73
- is Romanized as Jamal).
74
-
75
- tests:
76
-
77
- - source: مِصر
78
- expected: Miṣr
79
-
80
- - source: قَطَر
81
- expected: Qaṭar
82
-
83
- - source: المَغرِب
84
- expected: Al Maghrib
85
-
86
- - source: الجُمهُورِيَّة العِراقِيَّة
87
- expected: Al Jumhuriyah al ’Iraqiyah
88
-
89
- - source: جُمهُورِيَّة العِراق
90
- expected: Jumhuriyat al ’Iraq
91
-
92
- - source: جُمهُورِيَّة مِصر العَرَبِيَّة
93
- expected: Jumhuriyat Miṣr al ’Arabiyah
94
-
95
- - source: بَغداد
96
- expected: Baghdad
97
-
98
- - source: تُونِس
99
- expected: Tunis
100
-
101
- - source: حَسّان
102
- expected: Hassan
103
-
104
- - source: مُحَمَّد
105
- expected: Muhammad
106
-
107
- - source: القَذَّافِي
108
- expected: Al Qadhafi
109
-
110
- - source: مُبَشِّر
111
- expected: Mubashir
112
-
113
- - source: الجَزائِر
114
- expected: Al Jaza’ir
115
-
116
- - source: عَبدالرَحمَن
117
- expected: ’Abd al Rahman
118
-
119
- - source: هَيْثَم
120
- expected: Haytham
121
-
122
- - source: فَيْصَل
123
- expected: Fayṣal
124
-
125
- - source: تَوْفِيق
126
- expected: Tawfiq
127
-
128
- - source: رَوْضَة
129
- expected: Rawḍah
130
-
131
- - source: نُورُالدِين
132
- expected: Nur al Din
133
-
134
- - source: عَبدُاللَّه
135
- expected: ’Abdallah
136
- map:
137
- inherit: odni-ara-Arab-Latn-2015
@@ -1,315 +0,0 @@
1
- ---
2
- authority_id: odni
3
- id: 2017
4
- language: ics-630-01:ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: Office of the Director Of National Intelligence Arabic Personal Names 2015 System
8
- url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20A%20-%20Arabic_Personal_Names_FLTS%20(U).pdf
9
- creation_date: 2017
10
- confirmation date: 2018-06
11
- description: |
12
- This system, adapted from the Board on Geographic Names, is
13
- the Intelligence Community (IC) standard for the
14
- transliteration of Arabic names that will be applied to all
15
- final written reports and products for IC consumers. It is
16
- not intended to eliminate variations of a name that can
17
- contribute forensic information. Rather, it is to provide
18
- an IC standard Romanized (English) transliteration from
19
- modern standard Arabic that can then be linked to forensic
20
- information in ways that will help identify the referent of
21
- the name. Ambiguities can result from the Romanization of
22
- Arabic names because the Arabic source generally omits
23
- short vowel markings, double consonant marks, and other
24
- diacritics that would clearly distinguish the name.
25
- Linguists use their experience with the language and aids
26
- such as on-line tools and name dictionaries to determine
27
- the exact Arabic and the appropriate transliteration into
28
- the Roman alphabet. In cases where an individual's name has
29
- already been transliterated, that is to be indicated -- as
30
- found -- in parentheses immediately following its rendition
31
- in the transliteration standard (e.g., Muhammad Khulud (
32
- Mohamed Khulood)). In addition, if the original Arabic-
33
- script spelling is known, that spelling should also appear
34
- in parentheses following the name, if possible, following
35
- best practices of the issuing organization and taking into
36
- consideration information system capabilities. This
37
- convention is designed to ensure that vital forensic
38
- information is not lost. For names of persons who are known
39
- to not be part of the Arabic-speaking community, use the
40
- relevant IC transliteration standard for names from that
41
- language (e.g., Mikhail, Yitzhak). A translator’s note may
42
- be used to clarify the known origin of the person. Spell
43
- names of individuals from languages that are written in
44
- Roman letters as they are spelled in those languages (e.g.,
45
- George Clooney, Jorge Garcia, Georges Pompidou). In the
46
- case of active senior government officials in the on-line
47
- CIA World Factbook and the online directory of Chiefs of
48
- State and Cabinet Members of Foreign Governments, the
49
- spellings given in these on-line reference works should be
50
- used in place of the IC Standard. For any individual who
51
- has at one time been listed in the Factbook or Chiefs of
52
- State directory but who no longer appears in those
53
- resources (i.e. is no longer a government official), the IC
54
- Standard spelling should appear first, with the spelling,
55
- if known, as it previously appeared in those resources
56
- listed within parentheses at the first usage. The primary
57
- goal of this system is to produce a consistent Romanized
58
- transcription of the name that is readable to the non-
59
- specialist. The system uses the 26 letters of the standard (
60
- English) Roman alphabet plus the apostrophe. Some
61
- ambiguities in the Romanized form will occur without the
62
- use of diacritics. However, within the context of a report,
63
- where additional information about the individual is
64
- provided, the referent will be clearly identified. This
65
- system will be used in conjunction with on-line tools, name
66
- dictionaries, and lists containing conventional spellings
67
- of names of well-known individuals.
68
- notes: |
69
- - Long/Short Vowels: Long and short vowels are not
70
- distinguished in this system Samir (could be Saamir or
71
- Samiir in Arabic).
72
-
73
- - Double consonants: Double consonants represented by the
74
- Arabic shaddah are shown in most cases (e.g., Hassan,
75
- Muhammad). Exceptions: ’ayn and consonants represented by
76
- digraphs are not doubled (e.g., al-Qadhafi [not
77
- alQadhdhafi], Mubashir [not Mubashshir]).
78
-
79
- - Hamzah (glottal stop): The hamzah is represented by an
80
- apostrophe (’). Note that this is the same symbol used to
81
- represent another consonant, the ’ayn.
82
-
83
- - Ta’ marbutah (feminine ending marker): On the construct
84
- form or when pronounced “t”, it is represented with a roman
85
- t. In all other cases, it is represented with an h.
86
-
87
- - Digraphs: No distinction is made between digraphs such as
88
- sh and single contiguous letters (e.g., s followed by h).
89
-
90
- - Definite article “al” (‘the’): Follows Arabic spelling
91
- rather than pronunciation. That is, sun letter assimilation
92
- is not shown in the Romanized form (e.g., ’Abd-alRahman,
93
- not ’Abd-ar-Rahman).
94
-
95
- - Diphthongs: the second element of the diphthong is
96
- represented by a y or a w (rather than an i or a u):
97
- Haytham, Faysal, Tawfiq, Rawdah.
98
-
99
- - Hyphens: Hyphens (-) are used to connect name elements
100
- within a name: ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
101
- Exceptions: Names that incorporate “Allah” as part of the
102
- name (e.g., ’Abdallah, Nasrallah), names marked by the
103
- lineage/family marker “Al” (e.g., Al Thani) are not
104
- hyphenated.
105
-
106
- - The definite article, “al”, within name phrases, is
107
- Romanized as al and not as ul: Nur-al-Din (not Nur-ul-Din).
108
- It is not capitalized when name-initial.
109
-
110
- - Names that incorporate Allah as part of the name retain the
111
- a of Allah rather than a grammatical marker u: ’Abdallah (
112
- not ’Abdullah).
113
-
114
- - Foreign names borrowed or appearing in Arabic are spelled
115
- according to the standard Western tradition: Georges,
116
- Michel. However, names of non-Arabic origin no longer
117
- considered foreign by Arabic speakers follow the IC
118
- conventions: Butrus (not Peter).
119
-
120
- - Prefix ‫بن‬ (bin ‘son of’) is Romanized Bin unless written
121
- with an alif, in which case it is Romanized as Ibn. The
122
- colloquial form Bu (‘father’) should not be standardized as
123
- Abu. These prefixes are capitalized.
124
-
125
- - In general, Romanization follows the Modern Standard
126
- Arabic (MSA) form rather than local pronunciation
127
- standards. For example, the letter ‫ج‬ (jim) is represented
128
- as a j even when pronounced as a “g” (e.g., Egyptian Gamal
129
- is Romanized as Jamal).
130
-
131
- tests:
132
-
133
- - source: مِصر
134
- expected: Miṣr
135
-
136
- - source: قَطَر
137
- expected: Qaṭar
138
-
139
- - source: المَغرِب
140
- expected: Al Maghrib
141
-
142
- - source: الجُمهُورِيَّة العِراقِيَّة
143
- expected: Al Jumhuriyah al ’Iraqiyah
144
-
145
- - source: جُمهُورِيَّة العِراق
146
- expected: Jumhuriyat al ’Iraq
147
-
148
- - source: جُمهُورِيَّة مِصر العَرَبِيَّة
149
- expected: Jumhuriyat Miṣr al ’Arabiyah
150
-
151
- - source: بَغداد
152
- expected: Baghdad
153
-
154
- - source: تُونِس
155
- expected: Tunis
156
-
157
- - source: حَسّان
158
- expected: Hassan
159
-
160
- - source: مُحَمَّد
161
- expected: Muhammad
162
-
163
- - source: القَذَّافِي
164
- expected: Al Qadhafi
165
-
166
- - source: مُبَشِّر
167
- expected: Mubashir
168
-
169
- - source: الجَزائِر
170
- expected: Al Jaza’ir
171
-
172
- - source: عَبدالرَحمَن
173
- expected: ’Abd al Rahman
174
-
175
- - source: هَيْثَم
176
- expected: Haytham
177
-
178
- - source: فَيْصَل
179
- expected: Fayṣal
180
-
181
- - source: تَوْفِيق
182
- expected: Tawfiq
183
-
184
- - source: رَوْضَة
185
- expected: Rawḍah
186
-
187
- - source: نُورُالدِين
188
- expected: Nur al Din
189
-
190
- - source: عَبدُاللَّه
191
- expected: ’Abdallah
192
- map:
193
- postrules:
194
- - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
195
- result: "upcase"
196
- - pattern: " Al " # ال
197
- result: " al "
198
-
199
- # don't capitalize defined article in the middle of a sentence
200
-
201
- characters:
202
-
203
- # Tool used for Unicode finding:
204
- # https://www.branah.com/unicode-converter
205
-
206
- # pointing
207
- '\u064e' : 'a' # َ fatha
208
- '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
209
- '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
210
- '\u0650' : 'i' # ِ kasra
211
- '\u064f' : 'u' # ُ damma
212
- '\u0652' : '' # ْ sokoon, see note A below
213
-
214
-
215
- '\u0650\u064a' : 'i' # ـِي kasra followed by ي
216
- '\u0650\u064a\u0651\u064e' : 'iy' # ـِيَّ
217
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
218
- '\u064f\u0648' : 'u' # ـُو damma followed by و
219
- '\u064e\u0627' : 'a' # ـَا fatha followed by ا
220
- '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
221
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
222
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
223
- '\u0622' : 'a' # آ
224
-
225
- # ta' marboota
226
- '\u0629' : 'at' # ة in the middle of the sentence
227
- '\u0629$' : 'ah'
228
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
229
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
230
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
231
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
232
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
233
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
234
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
235
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
236
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
237
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
238
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
239
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
240
-
241
- # shadda
242
-
243
- '\u0628\u0651' : 'bb' # ب
244
- '\u062a\u0651' : 'tt' # ت
245
- '\u062b\u0651' : 'th' # ث
246
- '\u062c\u0651' : 'jj' # ج
247
- '\u062d\u0651' : 'hh' # ح
248
- '\u062e\u0651' : 'kh' # خ
249
- '\u062f\u0651' : 'dd' # د
250
- '\u0630\u0651' : 'dh' # ذ
251
- '\u0631\u0651' : 'rr' # ر
252
- '\u0632\u0651' : 'zz' # ز
253
- '\u0633\u0651' : 'ss' # س
254
- '\u0634\u0651' : 'sh' # ش
255
- '\u0635\u0651' : 'ṣṣ' # ص
256
- '\u0636\u0651' : 'ḍḍ' # ض
257
- '\u0637\u0651' : 'ṭṭ' # ط
258
- '\u0638\u0651' : 'ẓẓ' # ظ
259
- '\u063a\u0651' : 'gh' # غ
260
- '\u0641\u0651' : 'ff' # ف
261
- '\u0642\u0651' : 'qq' # ق
262
- '\u0643\u0651' : 'kk' # ك
263
- '\u0644\u0651' : 'll' # ل
264
- '\u0645\u0651' : 'mm' # م
265
- '\u0646\u0651' : 'nn' # ن
266
- '\u0647\u0651' : 'hh' # ه
267
- '\u0648\u0651' : 'ww' # و
268
- '\u064a\u0651' : 'yy' # ي
269
-
270
- '\u0626' : "’" # ئ
271
-
272
- '\b\u0627\u0644\u0644\u0651\u064e\u0647': 'Allah'
273
-
274
- '\B\u064f?\u0627\u0644\u0644\u0651\u064e\u0647': 'allah'
275
-
276
- '\u0621' : # ء
277
- - '’'
278
- - ''
279
-
280
- '\b\u0627\u0644' : 'al ' # ال
281
- '\B\u064f?\u0627\u0644' : ' al ' # ال in middle of composite name
282
-
283
- # '\uFE8E' : '' # ﺎ
284
-
285
-
286
- '\u0623' : '' # أ
287
- '\b\u0627' : '' # ا
288
- '\u0627' : 'a' # ا
289
- '\u0628' : 'b' # ب
290
- '\u062a' : 't' # ت
291
- '\u062b' : 'th' # ث
292
- '\u062c' : 'j' # ج
293
- '\u062d' : 'h' # ح
294
- '\u062e' : 'kh' # خ
295
- '\u062f' : 'd' # د
296
- '\u0630' : 'dh' # ذ
297
- '\u0631' : 'r' # ر
298
- '\u0632' : 'z' # ز
299
- '\u0633' : 's' # س
300
- '\u0634' : 'sh' # ش
301
- '\u0635' : 'ṣ' # ص
302
- '\u0636' : 'ḍ' # ض
303
- '\u0637' : 'ṭ' # ط
304
- '\u0638' : 'ẓ' # ظ
305
- '\u0639' : '’' # ع
306
- '\u063a' : 'gh' # غ
307
- '\u0641' : 'f' # ف
308
- '\u0642' : 'q' # ق
309
- '\u0643' : 'k' # ك
310
- '\u0644' : 'l' # ل
311
- '\u0645' : 'm' # م
312
- '\u0646' : 'n' # ن
313
- '\u0647' : 'h' # ه
314
- '\u0648' : 'w' # و
315
- '\u064a' : 'y' # ي