interscript 0.1.7 → 2.1.0b1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
@@ -1,596 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1956
4
- language: iso-639-2:ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF ARABIC -- BGN/PCGN 1956 System
8
- alias:
9
- ogc11122:
10
- code: ara_Arab2Latn_BGN_1956
11
- description: Arabic US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1956 System
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/858000/ROMANIZATION_OF_ARABIC.pdf
13
- creation_date: 1956
14
- confirmation date: 2019-12
15
- description: |
16
-
17
- This System was adopted by the BGN in 1946 and by the PCGN in 1956 and is applied by BGN and PCGN in the systematic romanization of Arabic geographical names in Bahrain, Egypt, Iraq, Jordan, Kuwait, Libya, Oman, Qatar, Saudi Arabia, Syria, the United Arab Emirates, Yemen, the West Bank and Gaza Strip.
18
-
19
- Uniform results in the romanization of Arabic are difficult to obtain, since vowel points and diacritical marks are generally omitted from both handwriting and printed script. It follows that for correct identification of the words which appear in any particular name, knowledge of its standard Arabic-script spelling including proper pointing, and recognition of dialectal and idiosyncratic deviations are essential.
20
-
21
- In order to bring about uniformity in the Roman-script spelling of geographical names in Arabic-language areas, the system is based insofar as possible on fully pointed Modern Standard Arabic (MSA). In the interest of clarity, vowel pointing to indicate short vowels has been applied to the examples given below, and examples of the, more usual, unpointed script have also been provided; it should also be noted that the dots which occur on some characters of the Arabic script are not vowels but rather are an integral part of the base consonant.
22
-
23
- Arabic script is written from right to left, and does not make a distinction between upper and lower case.
24
-
25
-
26
-
27
- notes: |
28
-
29
- - (NOTE 1) The symbol ◌ is used in this system to symbolise any Arabic consonant character. It is not itself an Arabic letter.
30
-
31
- - (NOTE 2) Hamzah (ء) is written in Arabic in association with most instances of initial alif, except those which belong to the definite article al or which bear a maddah (see note 9). Hamzah is written above the alif ( أَ) if the accompanying short vowel is a fatḩah or ḑammah and usually below the alif( أ ) if the accompanying short vowel is a kasrah.
32
- When the purpose is to indicate the presence of a glottal stop, hamzah is written over medial alif ( أ ), wāw (ؤ) and yā’, typically without dots (ئ); or following final alif ( أ ء ), these characters serving only to “bear” the hamzah. Hamzah following kasrah ( ) is written (ئ); the yā’ is usually in the initial or medial form and the dots are omitted e.g. bi’r ( بئ ر ).
33
- Hamzah following ḑammah ( ) is written (ؤ). Hamzah following a long vowel is written without a bearer and is positioned on the line of print like a regular character, e.g. صنعاء Şan‘ā’. The romanization of hamzah (’ - Unicode encoding 2019) should always be carefully distinguished from that of ‘ayn (‘ - Unicode encoding 2018).
34
-
35
- - (NOTE 3) Alif (ا) occurs with the following uses
36
- a. Initially, it indicates that the word begins with a vowel or diphthong; the alif itself is not romanized, but rather “carries” the short vowel, which is romanized; e.g., ظب ي أبو → Abū Z̧aby.
37
-
38
- b. With maddah (آ – row 18 in the vowel table), it is represented ā; e.g., مُ عيط آلب و → Ālbū Mu‘ayţ. See also note 9.
39
-
40
- c. Medially and finally it is represented ā; e.g., ب ا ب → Bāb, صيدا → Şaydā.
41
-
42
- d. Medially and finally, alif may serve as the bearer of hamzah, e.g. رأس → ra’s. See also note 2.
43
-
44
- - (NOTE 4) The tā’ marbūţah character (ة), which looks like hā’ with two dots above and occurs only at the end of words, is romanized h, except in an iḑāfah noun phrase construction, where it is romanized t, in accordance with pronunciation. e.g. Muḩāfaz̧ah (as an isolated word) but Muḩāfaz̧at Baghdād. In exceptional cases, when it is necessary to distinguish it from the tā’ marbūţah, the ending fatḩah + hā’ ( ه ) may be romanized a·h when the character hā’ (ه) is pronounced as such. Example Muntaza·h. (see also special rule 13). The tā marbūţah is always preceded by the short vowel fatḩah ( ) and is therefore romanized as ah or at, except when it is preceded by alif when it is romanized āh (not āah), e.g. Ḩamāh (حماة ), and as āt within an iḑāfah construction.
45
-
46
- - (NOTE 5) The character yā’ (in final form but without dots) preceded by the vowel point fatḩah is known as alif maqşūrah. This character may also be pointed ى and should be romanized á. See character 7 in the vowel table.
47
-
48
- - (NOTE 6) The classical Arabic grammatical endings written with the nunation symbols (tanwīn) may be romanized, when necessary, by an, in, un. In modern spoken Arabic, these endings have become silent and should not be romanized e.g. classical alifun; modern alif.
49
-
50
- - (NOTE 7) Doubled consonant sounds are represented in Arabic script by placing a shaddah ( ) over a consonant character, although like the short vowels the shaddah may not always be written. In romanization the letter should be doubled, e.g. Quwwah, ‘Abbās. However, the combination of the consonant character yā’ with a shaddah preceded by a kasrah ( ي ) at the end of a word is romanized ī, e.g. Gharbī; a word ending kasrah + yā’ with a shaddah + tā’ marbūţah is romanized īyah (rather than iyyah), e.g. ال س ل يمانِ ية
51
- is romanized As Sulaymānīyah and not As Sulaymāniyyah; and when the kasrah + yā’ + shaddah combination is followed by the sound masculine plural ending ( يين or يون ) it should be romanized as –īyīn/īyūn, e.g. ساحة العباسيين should be romanized as Sāḩat al ‘Abbāsīyīn.
52
-
53
- - (NOTE 8) Hamzat al waşl (ٱ), which is utilized only in the pointing of classical Arabic, is romanized ’ as illustrated in the classical form of its name hamzatu’l waşli.
54
-
55
- - (NOTE 9) Since maddah ( أ ), which is placed over alif ( أ ), often occurs in word-initial position, no confusion results from the use of ā for alif maddah ( أ ) as well as for fatḩah followed by alif ( اَ ).
56
-
57
- - (NOTE 10) The ligature ل ا represents lām-alif, and should be romanized lā.
58
-
59
- - (NOTE 11) In word initial position the combination Alif +Wāw (او ) is sometimes used to render an initial long vowel sound in words of non-Arabic origin. Where this is clearly the case it should be romanized Ū. In words of Arabic or uncertain origin it should be romanized Aw. In word-medial or word-final position it should always be romanized āw. Similarly the combination Alif +Yā’ (اي ) is romanized Ī to render an initial long vowel sound but as āy in word-medial or word-final position.
60
-
61
- # SPECIAL RULES
62
-
63
- - The Arabic definite article al (ال ) should be treated as follows |
64
- a. Initial definite articles should be capitalized and hyphens should not be used to connect parts of names, e.g. Ash Shāriqah. When appearing medially in a name the initial ‘a’ should be lower case, e.g. Tall al Laḩm.
65
-
66
- b. When the definite article precedes a word beginning with one of the “sun letters” t, th, d, dh, r, z, s, sh, ş, ḑ, ţ, z̧, l, or n – the l is assimilated in pronunciation and romanization, thus yielding, for example, the romanization Ar Riyāḑ, rather than Al Riyāḑ for ال ريا ض .
67
-
68
- c. If sources contradict over the inclusion or non-inclusion of the definite article in a name, preference should be given to the form with the article.
69
-
70
- - Conjunctions and prepositions should be romanized according to their written form in Arabic script and should be lower case. In cases where the conjunction or preposition ends in a long or short vowel any assimilated pronunciation should not be shown in the romanized form. e.g. Khabb wa ash Sha‘f (خب والشعف ). |
71
-
72
- There are two exceptions to this rule
73
-
74
- a. In the case of the preposition li (ل), where the alif of the definite article is assimilated in the written form as well as pronunciation, the written form should be shown in romanization as follows Mişr liţ Ţayarān (مصر للطيران ); Ash Sharikah al ‘Āmmah lil Maghāzil (الشركة العامة للمغازل ).
75
-
76
- b. In the case of the preposition bi (ب), the alif of the definite article is assimilated in pronunciation and, although the alif remains in the written form the short vowel it carries changes from ‘a’ to ‘i’. For example Al Qaryah bid Duwayr (القرية بالدوير ) but Ad Duwayr (الدوير ); and Al Ḩarajah bil Qur’ān (الحرجة بالقرآن ) but Al Qur’ān (القرآن ).
77
-
78
-
79
- - The Arabic word for God ( لله) should be written Allāh. The alif khanjarīyah (dagger alif) ( ) above the second ل (lām) in the word لله , like the short vowels, is not usually written but should be romanized ā, like a full-size alif. This diacritical mark appears in a few other Arabic words, for instance on the alif maqşūrah as described in note 5.
80
-
81
- - Names which consist of noun phrases (see also note 4) should be written as separate words. The definite article within such names should be romanized al, not ul, e.g., ‘Abd Allāh, ‘Abd ar Raḩmān, Dhū al Faqār, and as noted in special rule 1, the medial al should be lower case.
82
-
83
- - The Arabic word ب ن should be romanized Bin rather than Ibn whenever written without alif, that is between two proper nouns, e.g., ‘Umar Bin al Khaţţāb. Where it appears with alif ( )اب ن , it should be romanized Ibn.
84
-
85
- - The Turkish word Paşa should be romanized from Arabic script as Bāshā. The Turkish word Bey should be romanized as Bey in Egyptian names, no matter how it is written in Arabic-language sources, but in other Arabic areas it should be romanized as Bak where written بك and as Bayk when written بيك .
86
-
87
- - The modern colloquial word Sīdī (سيدي ) should be give precedence over the classical form Sayyidī. This does not preclude the spelling Sayyidī if the latter is indicated by the Arabic script or other evidence – for instance, if the yā’ is written with a shaddah ( ).
88
-
89
- - The colloquial word Bū should not be changed to the standard form Abū.
90
-
91
- - The colloquial word for water, written مي ة on Arabic maps, should be romanized Mayyat.
92
-
93
- - Place names of Aramaic origin in Syria often contain initial consonant clusters consisting of b plus another consonant such as l or h. In romanization, the clusters bl, bh, etc., should be so represented.
94
-
95
- - In names containing the Arabic word for back, ridge, or hill, appearing as either ظهر (Z̧ahr) or ضه ر (Ḑahr) in Arabic sources, the word should be romanized to reflect the particular Arabic spelling shown. Where sources differ, preference should be given to the form found on the most authoritative source.
96
-
97
- - In formal Arabic, the spelling of some words ending in a long vowel character may change according to that word’s grammatical function in a sentence. For example, the personal name Abū Bakr (ابو بكر ) would become Abī Bakr (ابي بكر ) when preceded by a generic in an iḑāfah construction (used in Moroccan Arabic Script) e.g. Shāri‘ Abī Bakr (شارع ابي بكر – Abu Bakr Street). The spelling of such words as found on the most authoritative source should be used in the romanized form of the name. Other common words affected by this rule are Banū/Banī (sons of…) and Dhū/Dhī (owner of ...). Examples of names in this category include Jabal Abā aş Şabbān (جبل ابا الصبان ) and Muḩāfaz̧at Dhī Qār ( محافظة ذي قار ).
98
-
99
- - Occasionally the character sequences ك ه , ده , س ه , and ت occur. They may be romanized k·h, d·h, s·h, and t·h in order to differentiate these romanizations from the digraphs kh, dh, sh, and th, which are used to represent the characters خ, ذ, ش, and ث respectively. See also note 4.
100
-
101
-
102
- tests:
103
-
104
- - source: قُرآن
105
- expected: Qur’ān
106
-
107
- - source: أَبُو ظَبْي
108
- expected: Abū Z̧aby
109
-
110
- - source: بِئْر زَيْت
111
- expected: Bi’r Zayt
112
-
113
- - source: أُمّ العَمَد
114
- expected: Umm al ‘Amad
115
-
116
- - source: البَحرَيْن
117
- expected: Al Baḩrayn
118
-
119
- - source: الكُوت
120
- expected: Al Kūt
121
-
122
- - source: الثُّلَيْثُوَات
123
- expected: Ath Thulaythuwāt
124
-
125
- - source: الجَزِيرَة
126
- expected: Al Jazīrah
127
-
128
- - source: المَحْمُودِيَّة
129
- expected: Al Maḩmūdīyah
130
-
131
- - source: خَيْبَر
132
- expected: Khaybar
133
-
134
- - source: دَمَنْهُور
135
- expected: Damanhūr
136
-
137
- - source: ذَهَب
138
- expected: Dhahab
139
-
140
- - source: الرَّوْضة
141
- expected: Ar Rawḑah
142
-
143
- - source: زُوَارَة
144
- expected: Zuwārah
145
-
146
- - source: السُّلَيْمانِيَّة
147
- expected: As Sulaymānīyah
148
-
149
- - source: الشَّام
150
- expected: Ash Shām
151
-
152
- - source: قَيْصُومَة
153
- expected: Qayşūmah
154
-
155
- - source: ضَوْر
156
- expected: Ḑawr
157
-
158
- - source: القُنَيْطِرَة
159
- expected: Al Qunayţirah
160
-
161
- - source: ظُفَار
162
- expected: Z̧ufār
163
-
164
- - source: أَبُو عَرِيش
165
- expected: Abū ‘Arīsh
166
-
167
- - source: بَغْداد
168
- expected: Baghdād
169
-
170
- - source: الفُرات
171
- expected: Al Furāt
172
-
173
- - source: قَطَر
174
- expected: Qaţar
175
-
176
- - source: الكُوَيْت
177
- expected: Al Kuwayt
178
-
179
- - source: حَلَب
180
- expected: Ḩalab
181
-
182
- - source: مَكَّة
183
- expected: Makkah
184
-
185
- - source: نَخْل
186
- expected: Nakhl
187
-
188
- - source: جَبَل هارُون
189
- expected: Jabal Hārūn
190
-
191
- - source: وادِي غَضَا
192
- expected: Wādī Ghaḑā
193
-
194
- - source: اليَمَن
195
- expected: Al Yaman
196
-
197
- - source: القاهِرَة
198
- expected: Al Qāhirah
199
-
200
- - source: المَدِينَة المُنَوَّرَة
201
- expected: Al Madīnah al Munawwarah
202
-
203
- - source: مُحَافَظَة دِمَشْق
204
- expected: Muḩāfaz̧at Dimashq
205
-
206
- - source: البَصْرَة
207
- expected: Al Başrah
208
-
209
- - source: الرِّيَاض
210
- expected: Ar Riyāḑ
211
-
212
- - source: القُدْس
213
- expected: Al Quds
214
-
215
- - source: بَاب المَنْدَب
216
- expected: Bāb al Mandab
217
-
218
- - source: المَدِينة
219
- expected: Al Madīnah
220
-
221
- - source: صُور
222
- expected: Şūr
223
-
224
- - source: مَرْسَىٰ مَطْرُوح
225
- expected: Marsá Maţrūḩ
226
-
227
- - source: صَيْدَا
228
- expected: Şaydā
229
-
230
- - source: الدَّوحَة
231
- expected: Ad Dawḩah
232
-
233
- - source: مُحَمَّد
234
- expected: Muḩammad
235
-
236
- - source: أُوزُونْلَار
237
- expected: Ūzūnlār
238
-
239
- - source: أَوْسَط
240
- expected: Awsaţ
241
-
242
- - source: سَنَاو
243
- expected: Sanāw
244
-
245
- - source: اِيرَان
246
- expected: Īrān
247
-
248
- - source: تَلّ السَّرَاي
249
- expected: Tall as Sarāy
250
-
251
- - source: آلْبُو مُعَيْط
252
- expected: Ālbū Mu‘ayţ
253
-
254
- - source: سَلْمان پَاك
255
- expected: Salmān Pāk
256
-
257
- - source: تَلّ كُوچِك الصَّغِير
258
- expected: Tall Kūchik aş Şaghīr
259
-
260
- # - source: مَزََّة ڤِيلَّات غَرْبِيَّة
261
- # expected: Mazzah Vīllāt Gharbīyah
262
-
263
- - source: ڨَفْصَة
264
- expected: Gafşah
265
-
266
- - source: تَلّ گَمْر
267
- expected: Tall Gamr
268
-
269
- - source: زَاڴُورَة
270
- expected: Zāgūrah
271
-
272
-
273
- map:
274
- postrules:
275
- - pattern: '(?<=\b)(?<!\b[‘|’])[\u0061-\uFFFF]'
276
- result: "upcase"
277
- # don't capitalize defined article in the middle of a sentence
278
- - pattern : ' At T' # الت
279
- result: ' at T'
280
- - pattern : ' Ath Th' # الث
281
- result: ' ath th'
282
- - pattern : ' Ad D' # الد
283
- result: ' ad D'
284
- - pattern : ' Adh Dh' # الذ
285
- result: ' adh Dh'
286
- - pattern : ' Ar R' # الر
287
- result: ' ar R'
288
- - pattern : ' Az Z' # الز
289
- result: ' az Z'
290
- - pattern : ' As S' # الس
291
- result: ' as S'
292
- - pattern : ' Ash Sh' # الش
293
- result: ' ash Sh'
294
- - pattern : ' Aş Ş' # الص
295
- result: ' aş Ş'
296
- - pattern : ' Aḑ Ḑ' # الض
297
- result: ' aḑ Ḑ'
298
- - pattern : ' Aţ Ţ' # الط
299
- result: ' aţ Ţ'
300
- - pattern : ' Az̧ Z̧' # الظ
301
- result: ' az̧ Z̧'
302
- - pattern : ' Al L' # الل
303
- result: ' al L'
304
- - pattern : ' an n' # الن
305
- result: ' an N'
306
- - pattern: " Al " # ال
307
- result: " al "
308
-
309
- characters:
310
-
311
- # Modified/Non-Standard Arabic Script Characters
312
-
313
- '\u067E': 'p'
314
- '\u0686': 'ch'
315
- '\u06A4': 'v'
316
- # Used in Tunisian Arabic Script.
317
- '\u06A8': 'g'
318
- # Used principally in Iraq, but also sometimes used in other Arabic speaking countries to represent the ‘g’ sound.
319
- '\u06AF': 'g'
320
- # Used in Moroccan Arabic Script.
321
- '\u06B4': 'g' # ڭ
322
- '\u06AD': 'g'
323
-
324
-
325
-
326
- # pointing
327
-
328
-
329
- # Note 11
330
- '\b\u0627\u0648': 'ū' #او
331
- '\b\u0627\u0648\u0652' : 'aw' # اوْ
332
- '\u0627\u0648': 'āw' #او in word medial or final position
333
-
334
- '\b\u0627\u064A': 'ī' # اي in word initial position (see Note 11)
335
- '\u0627\u064A' : 'āy' # اي in word medial or final position
336
-
337
-
338
- '\u064e' : 'a' # َ fatha
339
- '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
340
- '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
341
- '\u0650' : 'i' # ِ kasra
342
- '\u064f' : 'u' # ُ damma
343
-
344
- '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
345
- '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
346
- '\u064f\u0648' : 'ū' # ـُو damma followed by و
347
- '\u064f\u0648(?=\u064e|u064f)' : 'uw' # ـِي kasra followed by ي
348
- '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي Note 5
349
- '\u064e\u0649\u0670' : 'á' # Note 5
350
- '\u0649\u0670': 'á' # See Note 5
351
- '\u0652' : '' # ْ sokoon, not romanized, Indicates absence of short vowel
352
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
353
- '\u064e\u064a' : 'aī' # ـَي
354
-
355
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
356
- '\u064b': '' # See Note 6
357
- '\u064d': '' # See Note 6
358
- '\u064c': '' # See Note 6
359
-
360
-
361
- # special pointed letters
362
- '\u0639\u064e' : '‘a' # عَ
363
- '\u0639\u0650' : '‘i' # عِ
364
- '\u0639\u064f' : '‘ū' # عُ
365
-
366
- # Note 2
367
- '\u0623' : ''
368
- # '\u0623\u064e' : 'a' # أَ
369
- # '\u0625\u0650' : 'i' # إِ
370
- # '\u0623\u064f' : 'u' # أُ
371
- # '\u0623\u064f\u0648' : 'ū' # أُ
372
-
373
- # handle MacOS regex difference
374
- '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
375
-
376
- '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
377
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
378
-
379
- # not romanized in word-initial position (see Note 2)
380
- '\u0621': '’'
381
-
382
- '\b\u0622' : 'ā' # آ in word initial position (see Notes 3 and 9)
383
- '\u0622': '’ā' # آ in word medial position (see Notes 3 and 9)
384
- '\u0671': '’' # See Note 8
385
- '\u0626' : "’" # ئ
386
- '\u0627': 'ā' # See Notes 3 and 10
387
-
388
- '\b\u0627\u0648' : 'ū' # اُ
389
- '\b\u0627\u0650\u064a' : 'ī' # اي
390
- '\b\u0627\u0644' : 'al ' # ال
391
-
392
- # Sun letters
393
-
394
- '\b\u0627\u0644\u062a\u0651?' : 'at t' # الت
395
- '\b\u0627\u0644\u062b\u0651?' : 'ath th' # الث
396
- '\b\u0627\u0644\u062f\u0651?' : 'ad d' # الد
397
- '\b\u0627\u0644\u0630\u0651?' : 'adh dh' # الذ
398
- '\b\u0627\u0644\u0631\u0651?' : 'ar r' # الر
399
- '\b\u0627\u0644\u0632\u0651?' : 'az z' # الز
400
- '\b\u0627\u0644\u0633\u0651?' : 'as s' # الس
401
- '\b\u0627\u0644\u0634\u0651?' : 'ash sh' # الش
402
- '\b\u0627\u0644\u0635\u0651?' : 'aş ş' # الص
403
- '\b\u0627\u0644\u0636\u0651?' : 'aḑ ḑ' # الض
404
- '\b\u0627\u0644\u0637\u0651?' : 'aţ ţ' # الط
405
- '\b\u0627\u0644\u0638\u0651?' : 'az̧ z̧' # الظ
406
- '\b\u0627\u0644\u0644\u0651?' : 'al l' # الل
407
- '\b\u0627\u0644\u0646\u0651?' : 'an n' # الن
408
-
409
- # shadda Note 7
410
-
411
- '\u0628\u0651' : 'bb' # ب
412
- '\u062a\u0651' : 'tt' # ت
413
- '\u062b\u0651' : 'thth' # ث
414
- '\u062c\u0651' : 'jj' # ج
415
- '\u062d\u0651' : 'ḩḩ' # ح
416
- '\u062e\u0651' : 'khkh' # خ
417
- '\u062f\u0651' : 'dd' # د
418
- '\u0630\u0651' : 'dhdh' # ذ
419
- '\u0631\u0651' : 'rr' # ر
420
- '\u0632\u0651' : 'zz' # ز
421
- '\u0633\u0651' : 'ss' # س
422
- '\u0634\u0651' : 'sh' # ش
423
- '\u0635\u0651' : 'şş' # ص
424
- '\u0636\u0651' : 'ḑḑ' # ض
425
- '\u0637\u0651' : 'ţţ' # ط
426
- '\u0638\u0651' : 'z̧z̧' # ظ
427
- '\u063a\u0651' : 'ghgh' # غ
428
- '\u0641\u0651' : 'ff' # ف
429
- '\u0642\u0651' : 'qq' # ق
430
- '\u0643\u0651' : 'kk' # ك
431
- '\u0644\u0651' : 'll' # ل
432
- '\u0645\u0651' : 'mm' # م
433
- '\u0646\u0651' : 'nn' # ن
434
- '\u0647\u0651' : 'hh' # ه
435
- '\u0648\u0651' : 'ww' # و
436
- '\u064a\u0651' : 'yy' # ي
437
-
438
- # ta' marboota See Note 4
439
-
440
- '\u0629' : 'at' # ة in the middle of the sentence
441
- '\u0629$' : 'ah'
442
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
443
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
444
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
445
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
446
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
447
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
448
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
449
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
450
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
451
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
452
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
453
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
454
-
455
-
456
- # standard consonant characters
457
-
458
- '\u0628' : 'b' # ب
459
- '\uFE91' : 'b' # ﺑ
460
- '\uFE92' : 'b' # ﺒ
461
- '\uFE90' : 'b' # ﺐ
462
-
463
- '\u062a' : 't' # ت
464
- '\ufe97' : 't' # ﺗ
465
- '\ufe98' : 't' # ﺘ
466
- '\ufe96' : 't' # ﺖ
467
-
468
- '\u062b' : 'th' # ث
469
- '\ufe9b' : 'th' # ﺛ
470
- '\ufe9c' : 'th' # ﺜ
471
- '\ufe9a' : 'th' # ﺚ
472
-
473
- '\u062c' : 'j' # ج
474
- '\ufe9f' : 'j' # ﺟ
475
- '\ufea0' : 'j' # ﺠ
476
- '\ufe9e' : 'j' # ﺞ
477
-
478
- '\u062d' : 'ḩ' # ح
479
- '\ufea3' : 'ḩ' # ﺣ
480
- '\ufea4' : 'ḩ' # ﺤ
481
- '\ufea2' : 'ḩ' # ﺢ
482
-
483
- '\u062e' : 'kh' # خ
484
- '\ufea7' : 'kh' # ﺧ
485
- '\ufea8' : 'kh' # ﺨ
486
- '\ufea6' : 'kh' # ﺦ
487
-
488
- '\u062f' : 'd' # د
489
- '\ufeaa' : 'd' # ﺪ
490
-
491
- '\u0630' : 'dh' # ذ
492
- '\ufeac' : 'dh' # ﺬ
493
-
494
- '\u0631' : 'r' # ر
495
- '\ufeae' : 'r' # ﺮ
496
-
497
- '\u0632' : 'z' # ز
498
- '\ufeb0' : 'z' # ﺰ
499
-
500
- '\u0633' : 's' # س
501
- '\ufeb3' : 's' # ﺳ
502
- '\ufeb4' : 's' # ﺴ
503
- '\ufeb2' : 's' # ﺲ
504
-
505
- '\u0634' : 'sh' # ش
506
- '\ufeb7' : 'sh' # ﺷ
507
- '\ufeb8' : 'sh' # ﺸ
508
- '\ufeb6' : 'sh' # ﺶ
509
-
510
- '\u0635' : 'ş' # ص
511
- '\ufebb' : 'ş' # ﺻ
512
- '\ufebc' : 'ş' # ﺼ
513
- '\ufeba' : 'ş' # ﺺ
514
-
515
- '\u0636' : 'ḑ' # ض
516
- '\ufebf' : 'ḑ' # ﺿ
517
- '\ufec0' : 'ḑ' # ﻀ
518
- '\ufebe' : 'ḑ' # ﺾ
519
-
520
- '\u0637' : 'ţ' # ط
521
- '\ufec3' : 'ţ' # ﻃ
522
- '\ufec4' : 'ţ' # ﻄ
523
- '\ufec2' : 'ţ' # ﻂ
524
-
525
- '\u0638' : 'z̧' # ظ
526
- '\ufec7' : 'z̧' # ﻇ
527
- '\ufec8' : 'z̧' # ﻈ
528
- '\ufec6' : 'z̧' # ﻆ
529
-
530
- '\u0639' : '‘' # ع
531
- '\ufecb' : '‘' # ﻋ
532
- '\ufecc' : '‘' # ﻌ
533
- '\ufeca' : '‘' # ﻊ
534
-
535
- '\u063a' : 'gh' # غ
536
- '\ufecf' : 'gh' # ﻏ
537
- '\ufed0' : 'gh' # ﻐ
538
- '\ufece' : 'gh' # ﻎ
539
-
540
- '\u0641' : 'f' # ف
541
- '\ufed3' : 'f' # ﻓ
542
- '\ufed4' : 'f' # ﻔ
543
- '\ufed2' : 'f' # ﻒ
544
-
545
- '\u0642' : 'q' # ق
546
- '\ufed7' : 'q' # ﻗ
547
- '\ufed8' : 'q' # ﻘ
548
- '\ufed6' : 'q' # ﻖ
549
-
550
- '\u0643' : 'k' # ك
551
- '\ufedb' : 'k' # ﻛ
552
- '\ufedc' : 'k' # ﻜ
553
- '\ufeda' : 'k' # ﻚ
554
-
555
- '\u0644' : 'l' # ل
556
- '\ufedf' : 'l' # ﻟ
557
- '\ufee0' : 'l' # ﻠ
558
- '\ufede' : 'l' # ﻞ
559
-
560
- '\u0645' : 'm' # م
561
- '\ufee3' : 'm' # ﻣ
562
- '\ufee4' : 'm' # ﻤ
563
- '\ufee2' : 'm' # ﻢ
564
-
565
- '\u0646' : 'n' # ن
566
- '\ufee7' : 'n' # ﻧ
567
- '\ufee8' : 'n' # ﻨ
568
- '\ufee6' : 'n' # ﻦ
569
-
570
- '\u0647' : 'h' # ه
571
- '\ufeeb' : 'h' # ﻫ
572
- '\ufeec' : 'h' # ﻬ
573
- '\ufeea' : 'h' # ﻪ
574
-
575
- '\u0648' : 'w' # و
576
- '\ufeee' : 'w' # ﻮ
577
-
578
- '\u064a' : 'y' # ي
579
- '\ufef3' : 'y' # ﻳ
580
- '\ufef4' : 'y' # ﻴ
581
- '\ufef1' : 'y' # ﻱ
582
-
583
-
584
- # NUMERALS
585
-
586
- # Although Perso-Arabic script is written from right to left, numerical expressions, e.g. ۱۹٦۸ → 1968, are written from left to right.
587
- '۰': '0'
588
- '۱': '1'
589
- '۲': '2'
590
- '۳': '3'
591
- '٤': '4'
592
- '٥': '5'
593
- '٦': '6'
594
- '۷': '7'
595
- '۸': '8'
596
- '۹': '9'