interscript 0.1.9 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,166 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1965
4
- language: iso-639-2:ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 1965 System
8
- alias:
9
- ogc11122:
10
- code: ukr_Cyrl2Latn_BGN_1965
11
- description: Ukrainian Board on Geographic Names/Permanent Committee on Geographical Names for British Official Use(PCGN) 1965 System
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
13
- creation_date: 1947
14
- confirmation_date: 2019-06
15
- description: |
16
- The BGN/PCGN system for Ukrainian was designed for use in romanizing
17
- names written in the Ukrainian alphabet. The Ukrainian alphabet
18
- contains five characters not present in the Russian alphabet: ґ, є, і,
19
- ї, and ’.
20
-
21
- notes:
22
- - The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
23
- - All apostrophes appearing in romanization are Unicode encoding 2019.
24
- - The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
25
-
26
- tests:
27
- - source: Авдіївська Міськрада
28
- expected: Avdiyivs’ka Mis’krada
29
- - source: Бабаї
30
- expected: Babayi
31
- - source: Віленька
32
- expected: Vilen’ka
33
- - source: Гагарінський Район
34
- expected: Haharins’kyy Rayon
35
- - source: Довбушева Криниця
36
- expected: Dovbusheva Krynytsya
37
- - source: Дідівщина
38
- expected: Didivshchyna
39
- - source: Економічна
40
- expected: Ekonomichna
41
- - source: Єфросинівка
42
- expected: Yefrosynivka
43
- - source: Жигуліна Роща
44
- expected: Zhyhulina Roshcha
45
- - source: Загір’я
46
- expected: Zahir”ya
47
- - source: З’єднувальний Канал
48
- expected: Z”yednuval’nyy Kanal
49
- - source: Ивахи
50
- expected: Yvakhy
51
- - source: Івано-Франківська Міськрада
52
- expected: Ivano-Frankivs’ka Mis’krada
53
- - source: Їжаківка
54
- expected: Yizhakivka
55
- - source: Йосиповичі
56
- expected: Yosypovychi
57
- - source: Кабичівка
58
- expected: Kabychivka
59
- - source: Лазуровий Провулок
60
- expected: Lazurovyy Provulok
61
- - source: Мала Сейдеминуха
62
- expected: Mala Seydemynukha
63
- - source: Нагірний
64
- expected: Nahirnyy
65
- - source: Овер’янівське Озеро
66
- expected: Over”yanivs’ke Ozero
67
- - source: Павлопільське Водосховище
68
- expected: Pavlopil’s’ke Vodoskhovyshche
69
- - source: Приґородний
70
- expected: Prygorodnyy
71
- - source: Радгосп Правда
72
- expected: Radhosp Pravda
73
- - source: Садово-Хрустальненський
74
- expected: Sadovo-Khrustal’nens’kyy
75
- - source: Таратутине
76
- expected: Taratutyne
77
- - source: Улу-Узень
78
- expected: Ulu-Uzen’
79
- - source: Христофорівка
80
- expected: Khrystoforivka
81
- - source: Центральна Вулиця
82
- expected: Tsentral’na Vulytsya
83
- - source: Чайковичі
84
- expected: Chaykovychi
85
- - source: Шалаші
86
- expected: Shalashi
87
- - source: Щербинівка
88
- expected: Shcherbynivka
89
- - source: Южноукраїнська Міськрада
90
- expected: Yuzhnoukrayins’ka Mis’krada
91
- - source: Ясениця
92
- expected: Yasenytsya
93
-
94
- map:
95
- rules:
96
- - pattern: \b\u2019\b # ’ in the middle of a word -> ”
97
- result: "\u201d"
98
-
99
- characters:
100
- "\u0430": 'a'
101
- "\u0431": 'b'
102
- "\u0432": 'v'
103
- "\u0433": 'h'
104
- "\u0434": 'd'
105
- "\u0435": 'e'
106
- "\u0436": 'zh'
107
- "\u0437": 'z'
108
- "\u0438": 'y'
109
- "\u0439": 'y'
110
- "\u043a": 'k'
111
- "\u043b": 'l'
112
- "\u043c": 'm'
113
- "\u043d": 'n'
114
- "\u043e": 'o'
115
- "\u043f": 'p'
116
- "\u0440": 'r'
117
- "\u0441": 's'
118
- "\u0442": 't'
119
- "\u0443": 'u'
120
- "\u0444": 'f'
121
- "\u0445": 'kh'
122
- "\u0446": 'ts'
123
- "\u0447": 'ch'
124
- "\u0448": 'sh'
125
- "\u0449": 'shch'
126
- "\u044c": "\u2019"
127
- "\u044e": 'yu'
128
- "\u044f": 'ya'
129
- "\u0454": 'ye'
130
- "\u0456": 'i'
131
- "\u0457": 'yi'
132
- "\u0491": 'g'
133
- "\ufeff": ' '
134
- "\u0404": 'Ye'
135
- "\u0406": 'I'
136
- "\u0407": 'Yi'
137
- "\u0410": 'A'
138
- "\u0411": 'B'
139
- "\u0412": 'V'
140
- "\u0413": 'H'
141
- "\u0414": 'D'
142
- "\u0415": 'E'
143
- "\u0416": 'Zh'
144
- "\u0417": 'Z'
145
- "\u0418": 'Y'
146
- "\u0419": 'Y'
147
- "\u041a": 'K'
148
- "\u041b": 'L'
149
- "\u041c": 'M'
150
- "\u041d": 'N'
151
- "\u041e": 'O'
152
- "\u041f": 'P'
153
- "\u0420": 'R'
154
- "\u0421": 'S'
155
- "\u0422": 'T'
156
- "\u0423": 'U'
157
- "\u0424": 'F'
158
- "\u0425": 'Kh'
159
- "\u0426": 'Ts'
160
- "\u0427": 'Ch'
161
- "\u0428": 'Sh'
162
- "\u0429": 'Shch'
163
- "\u042c": "\u2019"
164
- "\u042e": 'Yu'
165
- "\u042f": 'Ya'
166
- "\u0490": 'G'
@@ -1,119 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2019
4
- language: iso-639-2:ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 2019 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
9
- creation_date: 2019
10
- confirmation_date: 2020-01
11
- description: |
12
- The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
13
- in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
14
- since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
15
-
16
- notes:
17
- - |
18
- The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
19
- of the national system within Ukraine. Note, however, that this system is not recommended for
20
- reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
21
- This system also lacks the methodology outlined in the 1965 System to provide additional
22
- differentiation between digraphs and individual character sequences.
23
- For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
24
- sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
25
- from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
26
- the characters ж, х, ш, ц and the character sequence тш.
27
- - To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
28
- - The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
29
- - These characters differ significantly in romanization from the BGN/PCGN 1965 system.
30
-
31
- tests:
32
- - source: Алушта
33
- expected: Alushta
34
- - source: Борщагівка
35
- expected: Borshchahivka
36
- - source: Вишгород
37
- expected: Vyshhorod
38
- - source: Гадяч
39
- expected: Hadiach
40
- - source: Згорани
41
- expected: Zghorany
42
- - source: Ґалаґан
43
- expected: Galagan
44
- - source: Дон
45
- expected: Don
46
- - source: Рівне
47
- expected: Rivne
48
- - source: Єнакієве
49
- expected: Yenakiieve
50
- - source: Наєнко
51
- expected: Naienko
52
- - source: Житомир
53
- expected: Zhytomyr
54
- - source: Запоріжжя
55
- expected: Zaporizhzhia
56
- - source: Закарпаття
57
- expected: Zakarpattia
58
- - source: Медвин
59
- expected: Medvyn
60
- - source: Іршава
61
- expected: Irshava
62
- - source: Їжакевич
63
- expected: Yizhakevych
64
- - source: Кадіївка
65
- expected: Kadiivka
66
- - source: Йосипівка
67
- expected: Yosypivka
68
- - source: Стрий
69
- expected: Stryi
70
- - source: Київ
71
- expected: Kyiv
72
- - source: Лебедин
73
- expected: Lebedyn
74
- - source: Миколаїв
75
- expected: Mykolaiv
76
- - source: Ніжин
77
- expected: Nizhyn
78
- - source: Одеса
79
- expected: Odesa
80
- - source: Полтава
81
- expected: Poltava
82
- - source: Ромни
83
- expected: Romny
84
- - source: Суми
85
- expected: Sumy
86
- - source: Тетерів
87
- expected: Teteriv
88
- - source: Ужгород
89
- expected: Uzhhorod
90
- - source: Фастів
91
- expected: Fastiv
92
- - source: Харків
93
- expected: Kharkiv
94
- - source: Біла Церква
95
- expected: Bila Tserkva
96
- - source: Чернівці
97
- expected: Chernivtsi
98
- - source: Шостка
99
- expected: Shostka
100
- - source: Гоща
101
- expected: Hoshcha
102
- - source: Русь
103
- expected: Rus
104
- - source: Юрій
105
- expected: Yurii
106
- - source: Крюківка
107
- expected: Kriukivka
108
- - source: Яготин
109
- expected: Yahotyn
110
- - source: Ічня
111
- expected: Ichnia
112
- - source: Знам’янка
113
- expected: Znamianka
114
-
115
- map:
116
- inherit: un-ukr-Cyrl-Latn-2012
117
-
118
- characters:
119
- "\u0027": '' # ' ->
@@ -1,459 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2007
4
- language: iso-639-2:urd
5
- source_script: Arab
6
- destination_script: Latn
7
- name: BGN/PCGN Romanization System -- Urdu (2007)
8
- alias:
9
- ogc11122:
10
- code: uas_Arab2Latn_BGN_2007
11
- description: Unified Afghan Romanization System US Board on Geographic Names (BGN)/The Permanent Committee on Geographical Names (PCGN) 2007
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693788/ROMANIZATION_OF_URDU.pdf
13
- creation_date: 2007
14
- confirmation_date: 2017-11
15
- description: |
16
- The following is the approved romanization system for
17
- deriving standard spellings of Urdu geographical names for
18
- Pakistan. It was jointly adopted by BGN and PCGN at the
19
- 23rd BGN/PCGN Conference in Washington, DC, in 2007 and it
20
- is based on the Hunterian romanization system for Urdu,
21
- which has been used by the Surveys of India and Pakistan
22
- for romanizing Urdu geographical names for more than one
23
- hundred years. The BGN/PCGN system laid out below includes
24
- diacritical marks in order that the original script can be
25
- derived from the romanized form (i.e. it is reversible).
26
- For desk users requiring a diacritic-free form, these
27
- diacritics can simply be removed. In every case the same
28
- basic Roman-script characters are kept as are used in the
29
- Hunterian system. The BGN/PCGN forms have further been
30
- designed to harmonize with the BGN/PCGN Persian
31
- romanization system.
32
- notes:
33
- - 1. When the vowel sign zīr ( ِ) occurs word-finally in the
34
- first element of a compound, it is assumed to mark the
35
- Persian izafat
36
- morpheme, and is romanized -e, not i.
37
- - 2. The source of almost all example names is the 1951
38
- Census of Pakistan, Village List, Northwest Frontier
39
- Province, Chitral
40
- State. Office of the Provincial Superintendant of Census,
41
- North-West Frontier Province, Peshawar.
42
- - 3. No examples of aspirated dental r (rh, رھ ( were found,
43
- though this phoneme is assumed to be part of the phonology
44
- of
45
- Urdu, and was therefore left out of Table 2.
46
- - 4. Note that the short vowels in the Urdu examples are not
47
- pointed.
48
- - 5. Occasionally, sequences of /z/ or /s/ plus /h/ may be
49
- encountered, i.e. z·h, s·h. These may be romanized with the
50
- Unicode
51
- 'center dot' (U+00B7) separating the two letters, to
52
- distinguish them from the digraphs /zh/ and /sh/.
53
- - Commented tests are blocked by this issue https://github.com/interscript/interscript/issues/572
54
- depends on the different ways of handling ي to y or e AND و to u or o
55
-
56
-
57
- tests:
58
- # - source: بوغدِی
59
- # expected: Boghdī
60
-
61
- - source: پَالِير
62
- expected: Pālīr
63
-
64
- # - source: بیزوت كَلے
65
- # expected: Bezot Kale
66
-
67
- # - source: عَمَل كوٹ
68
- # expected: ‘Amal Koṭ
69
-
70
- - source: ثَابِر
71
- expected: S̄ābir
72
-
73
- - source: شَاه نَثَار ميلة
74
- expected: Shāh Nas̄ār Mylah
75
-
76
- # - source: بَرجُو ميلَه
77
- # expected: Barjū Melah
78
-
79
- - source: چَپرِی
80
- expected: Chaprī
81
-
82
- - source: أَحمَد خَان كَلے
83
- expected: Aḩmad Khān Kale
84
-
85
- # - source: آكَا خيل
86
- # expected: Ākā Khel
87
-
88
- - source: دُرَانِي
89
- expected: Durānī
90
-
91
- - source: ڈَنگِیلا
92
- expected: Ḍangīlā
93
-
94
- - source: ذَرَانِی
95
- expected: Z̄arānī
96
-
97
- - source: بُركِي
98
- expected: Burkī
99
-
100
- - source: گِیدَڑَه
101
- expected: Gīdaṛah
102
-
103
- - source: عَلِي زَائِي
104
- expected: ‘Alī Zā’ī
105
-
106
- # - source: ژوب
107
- # expected: Zhob
108
-
109
- - source: بِسَاتُو
110
- expected: Bisātū
111
-
112
- - source: أَحمَدِي شَامَا
113
- expected: Aḩmadī Shāmā
114
-
115
- - source: اَصَالَت كَلے
116
- expected: Aşālat Kale
117
-
118
- - source: خَضَر خَان
119
- expected: Khaẕar Khān
120
-
121
- - source: سُلْطَان
122
- expected: Sulţān
123
-
124
- - source: عَزَم سَيِّد نُور كَلے
125
- expected: ‘Azam Sayyid Nūr Kale
126
-
127
- # - source: عَلَم شير
128
- # expected: ‘Alam Sher
129
-
130
- - source: بغَاكِي
131
- expected: Bghākī
132
-
133
- # - source: مُظَفَر كوٹ
134
- # expected: Muz̧afar Koṭ
135
-
136
- - source: حَقدَرَه
137
- expected: Ḩaqdarah
138
-
139
- - source: کَچکِینَہ
140
- expected: Kachkīnah
141
-
142
- - source: بَاگَن
143
- expected: Bāgan
144
-
145
- - source: بُلبَلَک
146
- expected: Bulbalak
147
-
148
- - source: بِلیَامِین
149
- expected: Bilyāmīn
150
-
151
- - source: نَہر
152
- expected: Nahr
153
-
154
- # - source: جوکَالِیَاں
155
- # expected: Jokālīāñ
156
-
157
- - source: اَرَوْالِی
158
- expected: Arawālī
159
-
160
- # - source: هیروشاه
161
- # expected: Heroshāh
162
-
163
- - source: مَہردِی
164
- expected: Mahrdī
165
-
166
- - source: بَڑھ
167
- expected: Baṛh
168
-
169
- # - source: شِیوَاؤ
170
- # expected: Shīwā’o
171
-
172
- - source: یَاردَا کَلے
173
- expected: Yārdā Kale
174
-
175
- - source: بهَائِي خَان
176
- expected: Bhā’ī Khān
177
-
178
- - source: پھاشک
179
- expected: Phāshk
180
-
181
- - source: تھَلّ
182
- expected: Thall
183
-
184
- - source: پَٹھان ريَا
185
- expected: Paṭhān Ryā
186
-
187
- - source: جھِیل
188
- expected: Jhīl
189
-
190
- - source: غَزْنِي سْپِين
191
- expected: Ghaznī Spīn
192
-
193
- - source: بَادشَاه چھُم
194
- expected: Bādshāh Chhum
195
-
196
- - source: سِندھ
197
- expected: Sindh
198
-
199
- - source: ڈھَنڈ
200
- expected: Ḍhanḍ
201
-
202
- # - source: غوزگَڑھِی
203
- # expected: Ghozgaṛhī
204
-
205
- # - source: دوغَل گاکھَر
206
- # expected: Doghal Gākhar
207
-
208
- - source: خَان گھَڑِی
209
- expected: Khān Ghaṛī
210
-
211
- - source: غُلَامَک كَلے
212
- expected: Ghulāmak Kale
213
-
214
- # - source: کاراخیل
215
- # expected: Kārākhel
216
-
217
- - source: خَپیَنگا
218
- expected: Khapyangā
219
-
220
- - source: گَندَه كَلے
221
- expected: Gandah Kale
222
-
223
- # - source: گُلونَا ڈھيرِي
224
- # expected: Gulonā Ḍherī
225
-
226
- # - source: خيرَه دِين
227
- # expected: Kherah Dīn
228
-
229
- - source: مَورپِتھِی
230
- expected: Maurpithī
231
-
232
- - source: درے پلارِی
233
- expected: Dre Plārī
234
-
235
- - source: آگرَہ
236
- expected: Āgrah
237
-
238
- - source: ڈَنڈَر
239
- expected: Ḍanḍar
240
-
241
- # - source: گِیدو
242
- # expected: Gīdo
243
-
244
- - source: گُبازانَہ
245
- expected: Gubāzānah
246
-
247
- # - source: اُوشو
248
- # expected: Ūsho
249
-
250
- - source: حَےدَر عَلِی كَلے
251
- expected: Ḩaidar ‘Alī Kale
252
-
253
- - source: تَودَہ چِینَہ
254
- expected: Taudah Chīnah
255
-
256
- - source: مُوسى خَان كَلے
257
- expected: Mūsá Khān Kale
258
-
259
- - source: مُلَّا بَاغ
260
- expected: Mullā Bāgh
261
-
262
- map:
263
- postrules:
264
- - pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
265
- result: "upcase"
266
- # don't capitalize defined article in the middle of a sentence
267
- - pattern : ' At T' # الت
268
- result: ' at T'
269
- - pattern : ' As̄ S̄' # الث
270
- result: ' as̄ S̄'
271
- - pattern : ' Ad D' # الد
272
- result: ' ad D'
273
- - pattern : ' Az̄ Z̄' # الذ
274
- result: ' az̄ Z̄'
275
- - pattern : ' Ar R' # الر
276
- result: ' ar R'
277
- - pattern : ' Az Z' # الز
278
- result: ' az Z'
279
- - pattern : ' As S' # الس
280
- result: ' as S'
281
- - pattern : ' Ash Sh' # الش
282
- result: ' ash Sh'
283
- - pattern : ' Aş Ş' # الص
284
- result: ' aş Ş'
285
- - pattern : ' Aẕ Ẕ' # الض
286
- result: ' aẕ Ẕ'
287
- - pattern : ' Aţ Ţ' # الط
288
- result: ' aţ Ţ'
289
- - pattern : ' Az̧ Z̧' # الظ
290
- result: ' az̧ Z̧'
291
- - pattern : ' Al L' # الل
292
- result: ' al L'
293
- - pattern : ' An N' # الن
294
- result: ' an N'
295
- - pattern: " Al " # ال
296
- result: " al "
297
- characters:
298
- # special rules
299
-
300
- '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
301
- '\ufdf2': 'Allāh' # See note 5
302
-
303
- # Vowels, Diphthongs, and Diacritical Marks
304
- '\u064e' : 'a' # َ fatha
305
- '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
306
- '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
307
-
308
- '\u0652' : '' # ْ sokoon
309
- '\u0659': 'ê'
310
-
311
- '\u0650[\u064a|\u06cc]' : 'ī' # ـِي kasra followed by ي
312
- '\u0650' : 'i' # karsra
313
- '\u06d2' : 'e' # ـے
314
-
315
- '\u0622' : 'ā' # آ
316
- '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
317
- '\u0627' : 'ā' # ا
318
- '\b\u0627' : '' # ا
319
-
320
- '\u0648' : 'o' # و # suspect
321
- '\u064f' : 'u' # ُ damma
322
- '\u064f\u0648' : 'ū' # ـُو damma followed by و
323
-
324
- '\u064e\u06d2' : 'ai' # ـے
325
- '\u064e\u0648' : 'au' # ـَو
326
- '\u0670': 'á' # ىٰ
327
- '\u0649': 'á' # ىٰ
328
-
329
- # shadda
330
- '\u0628\u0651' : 'bb' # ب
331
- '\u062a\u0651' : 'tt' # ت
332
- '\u062b\u0651' : 'thth' # ث
333
- '\u062c\u0651' : 'jj' # ج
334
- '\u062d\u0651' : 'ẖẖ' # ح
335
- '\u062e\u0651' : 'khkh' # خ
336
- '\u062f\u0651' : 'dd' # د
337
- '\u0630\u0651' : 'z̄z̄' # ذ
338
- '\u0631\u0651' : 'rr' # ر
339
- '\u0632\u0651' : 'zz' # ز
340
- '\u0633\u0651' : 'ss' # س
341
- '\u0634\u0651' : 'sh' # ش
342
- '\u0635\u0651' : 'şş' # ص
343
- '\u0636\u0651' : 'ḏḏ' # ض
344
- '\u0637\u0651' : 'ţţ' # ط
345
- '\u0638\u0651' : 'z̧z̧' # ظ
346
- '\u063a\u0651' : 'ghgh' # غ
347
- '\u0641\u0651' : 'ff' # ف
348
- '\u0642\u0651' : 'qq' # ق
349
- '\u0643\u0651' : 'kk' # ك
350
- '\u0644\u0651' : 'll' # ل
351
- '\u0645\u0651' : 'mm' # م
352
- '\u0646\u0651' : 'nn' # ن
353
- '\u0647\u0651' : 'hh' # ه
354
- '\u0648\u0651' : 'ww' # و
355
- '[\u064a|\u06cc]\u0651' : 'yy' # ي
356
-
357
- # NOTE 1
358
- '\u0650\b' : '-e' # ِ kasra
359
- '\u0674' : '-e' # ٴ
360
- '\u0654' : '-e' # ٔ
361
-
362
- '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
363
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
364
- '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
365
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
366
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
367
- '\u0650\u06cc\u0651\u064e' : 'īy' # ـِيَّ
368
- '\u064e\u064a' : 'aī' # ـَي
369
- '\u064e\u06cc' : 'aī' # ـَي
370
- # - '-ye'
371
-
372
-
373
- # ta' marboota
374
- '\u0629' : 'at' # ة in the middle of the sentence
375
- '\u0629$' : 'ah'
376
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
377
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
378
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
379
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
380
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
381
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
382
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
383
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
384
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
385
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
386
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
387
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
388
-
389
-
390
-
391
- '\u0621' : '’' # ء
392
- '\u0624' : '’' # ؤ
393
- '\u0624\b' : '’o' # ؤ
394
- '\u0626' : '’' # ئ
395
-
396
- '\u0623' : '' # أ
397
- '\u0625' : '' # إ
398
- # See note B
399
- '\b\u0627\u0644' : 'al ' # ال
400
- # '\uFE8E' : '' # ﺎ
401
-
402
- # Sun letters
403
- '\b\u0627\u0644\u062a' : 'at t' # الت
404
- '\b\u0627\u0644\u062b' : 'as̄ s̄' # الث
405
- '\b\u0627\u0644\u062f' : 'ad d' # الد
406
- '\b\u0627\u0644\u0630' : 'az̄ z̄' # الذ
407
- '\b\u0627\u0644\u0631' : 'ar r' # الر
408
- '\b\u0627\u0644\u0632' : 'az z' # الز
409
- '\b\u0627\u0644\u0633' : 'as s' # الس
410
- '\b\u0627\u0644\u0634' : 'ash sh' # الش
411
- '\b\u0627\u0644\u0635' : 'aş ş' # الص
412
- '\b\u0627\u0644\u0636' : 'aẕ ẕ' # الض
413
- '\b\u0627\u0644\u0637' : 'aţ ţ' # الط
414
- '\b\u0627\u0644\u0638' : 'az̧ z̧' # الظ
415
- '\b\u0627\u0644\u0644' : 'al l' # الل
416
- '\b\u0627\u0644\u0646' : 'an n' # الن
417
-
418
-
419
- # consonant characters
420
-
421
- '\u0628' : 'b' # ب
422
- '\u067E' : 'p' # پ
423
- '\u062a' : 't' # ت
424
- '\u0679' : 'ṭ' # ٹ
425
- '\u062B' : 's̄' # ث
426
- '\u062c' : 'j' # ج
427
- '\u0686' : 'ch' # ‫چ‬
428
- '\u062d' : 'ḩ' # ح
429
- '\u062e' : 'kh' # خ
430
- '\u062f' : 'd' # د
431
- '\u0688' : 'ḍ' # ‫ڈ
432
- '\u0630' : 'z̄' # ذ
433
- '\u0631' : 'r' # ر
434
- '\u0691' : 'ṛ' # ڑ
435
- '\u0632' : 'z' # ز
436
- '\u0698' : 'zh' # ‫ژ‬
437
- '\u0633' : 's' # س
438
- '\u0634' : 'sh' # ش
439
- '\u0635' : 'ş' # ص
440
- '\u0636' : 'ẕ' # ض
441
- '\u0637' : 'ţ' # ط
442
- '\u0638' : 'z̧' # ظ
443
- '\u0639' : '‘' # ع
444
- '\u063a' : 'gh' # غ
445
- '\u0641' : 'f' # ف
446
- '\u0642' : 'q' # ق
447
- '\u0643' : 'k' # ك
448
- '\u06A9' : 'k' # ک
449
- '\u06AF' : 'g' # ‫گ‬
450
- '\u0644' : 'l' # ل
451
- '\u0645' : 'm' # م
452
- '\u0646' : 'n' # ن
453
- '\u06BA' : 'ñ' # ڼ
454
- '[\u0647|\u06c1|\u06be]' : 'h' # ه
455
- '\u0648' : 'w' # و
456
- '[\u064a|\u06cc]' : 'y' # ي
457
- # '\u0649' : 'y' # ي
458
- '\u06D0' : 'ē' # ې
459
- '\u06CD' : 'êy' # ‫ۍ‬