interscript 0.1.9 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,253 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: kn-1945
4
- language: iso-639-2:kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: BGN/PCGN 1945 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
9
- creation_date: 1945
10
- adoption_date:
11
- description:
12
-
13
- notes: "
14
-
15
- 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
- as in the following example:
17
-
18
- 평양 → P’yŏngyang
19
-
20
- At the beginning of a syllable, the character ᄋ is silent and
21
- should not be romanized. An example follows:
22
-
23
- 용화 → Yonghwa
24
-
25
- 2. Syllable boundaries within words are not reflected in romanization.
26
- In the different types of syllables shown in the table below, C
27
- represents any consonant character, V represents any vowel character
28
- and / represents a syllable boundary.
29
-
30
- Han’gŭl 개성 남포 안양
31
- Syllable boundaries CV/CVC CVC/CV VC/VC
32
- Romanization Kaesŏng Namp’o Anyang
33
-
34
- 3. Euphonic changes occurring within a word, including between the
35
- specific and generic of a geographical name, should be reflected in
36
- romanization. Generic terms are usually seen separated from the name
37
- by a hyphen and with a lower case initial letter rather than as a
38
- separate word:
39
-
40
- 영진리 → Yŏngjil-li
41
- 덕흥리 → Tŏkhŭng-ni
42
- 압록강 → Amnok-kang
43
- 대동강 → Taedong-gang
44
-
45
- 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
- published in North Korea in 1966), unlike the Korean spoken in the
47
- Republic of Korea, the language spoken in the Democratic People’s
48
- Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
49
- The use of the word-initial ᄅ ('r') can be seen in official news
50
- reports as well as native mapping. Since such examples exist, the
51
- word initial ᄅ ('r') is reflected as an option in the tables given above.
52
-
53
- 5. The Romanization column shows only lowercase forms but, when romanizing,
54
- uppercase and lowercase Roman letters as appropriate should be used.
55
- "
56
-
57
- tests:
58
- - source: "평양"
59
- expected: "P’yŏngyang"
60
- - source: "용화"
61
- expected: "Yonghwa"
62
- - source: "개성"
63
- expected: "Kaesŏng"
64
- - source: "남포"
65
- expected: "Namp’o"
66
- - source: "안양"
67
- expected: "Anyang"
68
- - source: "영진-리"
69
- expected: "Yŏngjil-li"
70
- - source: "덕흥-리"
71
- expected: "Tŏkhŭng-ni"
72
- - source: "압록-강"
73
- expected: "Amnok-kang"
74
- - source: "대동-강"
75
- expected: "Taedong-gang"
76
- - source: "라선특별시"
77
- expected: "Rasŏnt’ŭkpyŏlsi"
78
- - source: 은하-리
79
- expected: "Ŭnha-ri"
80
- - source: 은중-리
81
- expected: "Ŭnjung-ni"
82
- - source: 은장-령
83
- expected: "Ŭnjang-nyŏng"
84
- - source: 은혜-동
85
- expected: "Ŭnhye-dong"
86
- - source: 은호-리
87
- expected: "Ŭnho-ri"
88
- - source: 은행정
89
- expected: "Ŭnhaengjŏng"
90
- - source: 은행-동
91
- expected: "Ŭnhaeng-dong"
92
- - source: 은행-촌
93
- expected: "Ŭnhaeng-ch’on"
94
- - source: 원수
95
- expected: "Wŏnsu"
96
- - source: 원소리-고개
97
- expected: "Wŏnsori-gogae"
98
- - source: 원소참
99
- expected: "Wŏnsoch’am"
100
- - source: 원소-리
101
- expected: "Wŏnso-ri"
102
- - source: 원신-리
103
- expected: "Wŏnsil-li"
104
- - source: 난곡
105
- expected: "Nan’gok"
106
- - source: 난산-리
107
- expected: "Nansal-li"
108
- - source: 난직
109
- expected: "Nanjik"
110
- - source: 영곡
111
- expected: "Yŏnggok"
112
- - source: 윗두밀
113
- expected: "Wittumil"
114
- - source: 윗도심이
115
- expected: "Wittosimi"
116
- - source: 둔지
117
- expected: "Tunji"
118
- - source: 서승
119
- expected: "Sŏsŭng"
120
- - source: 신촌
121
- expected: "Sinch’on"
122
- - source: 비암덕
123
- expected: "Piamdŏk"
124
- - source: 바위안
125
- expected: "Pawian"
126
- - source: 오송평
127
- expected: "Osongp’yŏng"
128
- - source: 그물목
129
- expected: "Kŭmulmok"
130
- - source: 구원정
131
- expected: "Kuwŏnjŏng"
132
- - source: 일하
133
- expected: "Irha"
134
- - source: 황우
135
- expected: "Hwangu"
136
- - source: 자작보
137
- expected: "Chajakpo"
138
- - source: 비파1-동
139
- expected: "Pip’a Il-tong"
140
- - source: 문암 오-동
141
- expected: "Munam O-dong"
142
-
143
- map:
144
- character_separator: ""
145
- word_separator: " "
146
- title_case: True
147
- inherit: [var-kor-Hang-Latn-mr-1939]
148
-
149
- rules:
150
- # Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
151
- # So that the word-initial conversion rules will be blocked.
152
- - pattern: "^"
153
- result: "\u200B"
154
- - pattern: "(?<= )"
155
- result: "\u200B"
156
-
157
- # convert numbers to space + Hangul
158
- - pattern: "([^0-9 ])(?=[0-9])"
159
- result: "\\1 "
160
- - pattern: "1"
161
- result: "일"
162
- - pattern: "2"
163
- result: "이"
164
- - pattern: "3"
165
- result: "삼"
166
- - pattern: "4"
167
- result: "사"
168
- - pattern: "5"
169
- result: "오"
170
- - pattern: "6"
171
- result: "육"
172
- - pattern: "7"
173
- result: "칠"
174
- - pattern: "8"
175
- result: "팔"
176
- - pattern: "9"
177
- result: "구"
178
-
179
- # This is a logic to add hyphen in front of generics
180
- # - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
181
- # result: "-\\1"
182
-
183
- postrules:
184
-
185
- # Add space to the two ends of the string for easier word boundary handling
186
- - pattern: "^"
187
- result: " "
188
- - pattern: "$"
189
- result: " "
190
-
191
- # Initial rules in the inherited map were blocked, so that
192
- # this set of updated rules (with the onset rules removed) will be used instead.
193
- - pattern: "\u200B"
194
- result: ""
195
-
196
- - pattern: "(?<= )ᄀ"
197
- result: "k" # HANGUL CHOSEONG KIYEOK
198
- - pattern: "(?<= )ᄂ"
199
- result: "n" # HANGUL CHOSEONG NIEUN
200
- - pattern: "(?<= )ᄃ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
201
- result: "ch" # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
202
- - pattern: "(?<= )ᄃ"
203
- result: "t" # HANGUL CHOSEONG TIEUT
204
- - pattern: "(?<= )ᄅ"
205
- result: "r" # HANGUL CHOSEONG RIEUL
206
- - pattern: "(?<= )ᄆ"
207
- result: "m" # HANGUL CHOSEONG MIEUM
208
- - pattern: "(?<= )ᄇ"
209
- result: "p" # HANGUL CHOSEONG PIEUP
210
- - pattern: "(?<= )ᄉ(?=ᅱ)"
211
- result: "sh" # HANGUL CHOSEONG SIOS
212
- - pattern: "(?<= )ᄉ"
213
- result: "s" # HANGUL CHOSEONG SIOS
214
- - pattern: "(?<= )ᄋ"
215
- result: "" # HANGUL CHOSEONG IEUNG
216
- - pattern: "(?<= )ᄌ"
217
- result: "ch" # HANGUL CHOSEONG CIEUC
218
- - pattern: "(?<= )ᄎ"
219
- result: "ch’" # HANGUL CHOSEONG CHIEUCH
220
- - pattern: "(?<= )ᄏ"
221
- result: "k’" # HANGUL CHOSEONG KHIEUKH
222
- - pattern: "(?<= )ᄐ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
223
- result: "ch’" # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
224
- - pattern: "(?<= )ᄐ"
225
- result: "t’" # HANGUL CHOSEONG THIEUTH
226
- - pattern: "(?<= )ᄑ"
227
- result: "p’" # HANGUL CHOSEONG PHIEUPH
228
- - pattern: "(?<= )ᄒ"
229
- result: "h" # HANGUL CHOSEONG HIEUH
230
- - pattern: "(?<= )ᄁ"
231
- result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
232
- - pattern: "(?<= )ᄭ"
233
- result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
234
- - pattern: "(?<= )ᄄ"
235
- result: "tt" # HANGUL CHOSEONG SSANGTIEUT
236
- - pattern: "(?<= )ᄯ"
237
- result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
238
- - pattern: "(?<= )ᄈ"
239
- result: "pp" # HANGUL CHOSEONG SSANGPIEUP
240
- - pattern: "(?<= )ᄲ"
241
- result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
242
- - pattern: "(?<= )ᄊ"
243
- result: "ss" # HANGUL CHOSEONG SSANGSIOS
244
- - pattern: "(?<= )ᄍ"
245
- result: "tch" # HANGUL CHOSEONG SSANGCIEUC
246
- - pattern: "(?<= )ᄶ"
247
- result: "tch" # HANGUL CHOSEONG SIOS-CIEUC
248
-
249
- # Remove space added
250
- - pattern: "^ "
251
- result: ""
252
- - pattern: " $"
253
- result: ""
@@ -1,48 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2011
4
- language: iso-639-2:kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
- BGN/PCGN 2011 Agreement
15
-
16
- tests:
17
- - source: 불국사
18
- expected: "Bulguksa"
19
- - source: 묵호
20
- expected: "Mukho"
21
- - source: 울산
22
- expected: "Ulsan"
23
- - source: 독립문
24
- expected: "Dongnimmun"
25
- - source: 강남역
26
- expected: "Gangnamyeok"
27
- - source: 남산리
28
- expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
- - source: 내월리
30
- expected: "Naewol-ri"
31
- - source: 울릉군
32
- expected: "Ulleung-gun"
33
- - source: 설악산
34
- expected: "Seoraksan"
35
- - source: 삼죽면
36
- expected: "Samjuk-myeon"
37
- - source: 평리1동
38
- expected: "Pyeongni Il-dong"
39
- - source: 평리2동
40
- expected: "Pyeongni I-dong"
41
- - source: 탑안이
42
- expected: "Tabani"
43
-
44
- map:
45
- character_separator: ""
46
- word_separator: " "
47
- title_case: True
48
- inherit: moct-kor-Hang-Latn-2000
@@ -1,48 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2011
4
- language: iso-639-2:kor
5
- source_script: Kore
6
- destination_script: Latn
7
- name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
- BGN/PCGN 2011 Agreement
15
-
16
- tests:
17
- - source: 佛國寺
18
- expected: "Bulguksa"
19
- - source: 묵호
20
- expected: "Mukho"
21
- - source: 蔚山
22
- expected: "Ulsan"
23
- - source: 獨立門
24
- expected: "Dongnimmun"
25
- - source: 江南驛
26
- expected: "Gangnamyeok"
27
- - source: 南山里
28
- expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
- - source: 내월里
30
- expected: "Naewol-ri"
31
- - source: 鬱陵郡
32
- expected: "Ulleung-gun"
33
- - source: 雪嶽山
34
- expected: "Seoraksan"
35
- - source: 三竹面
36
- expected: "Samjuk-myeon"
37
- - source: 坪里1洞
38
- expected: "Pyeongni Il-dong"
39
- - source: 坪里2洞
40
- expected: "Pyeongni I-dong"
41
- - source: 탑안이
42
- expected: "Tabani"
43
-
44
- map:
45
- character_separator: ""
46
- word_separator: " "
47
- title_case: True
48
- inherit: [var-kor-Kore-Hang-2013, moct-kor-Hang-Latn-2000]
@@ -1,249 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2007
4
- language: kur
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF KURDISH -- BGN/PCGN 2007
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693727/ROMANIZATION_OF_KURDISH.pdf
9
- creation_date: 2007
10
- confirmation date: 2017-12
11
- description: |
12
- The tabulation below is applicable to the Kurdish language as a
13
- whole. It is based for the most part on the Hawar Roman alphabet used
14
- in the Library of Congress Standard Kurdish Orthography Table, but it
15
- also incorporates certain non-Hawar elements found in A Kurdish-English
16
- Dictionary (Taufiq Wahby & C J Edmonds, OUP, 1966). The tabulation
17
- covers both major varieties of the Kurdish language: Kurmanji and
18
- Sorani. Kurmanji is spoken principally in Turkey and in Iraq north of
19
- the Great Zab River (Dahūk/Dihok Governorate). It is generally written
20
- in Roman script, and usually employs the Roman orthography. Sorani is
21
- spoken principally in Iraq south of the Great Zab river (Arbīl/Hewlêr
22
- and As Sulaymānīyah/Slêmanî governorates). It is generally written in
23
- Perso-Arabic script, and usually employs the Perso-Arabic script
24
- orthography.
25
-
26
- Kurdish forms of geographical names in Turkey will usually be found
27
- in Roman script, and so no romanization process will be required. The
28
- digraph options for consonant letters '\u0686', '\u0634', and '\u063A'
29
- will not be encountered for such names. In Iraq, Syria, and Iran,
30
- Kurdish will usually be encountered in Perso-Arabic script, in which
31
- case it should be romanized into the corresponding Roman script form.
32
- Kurdish geographical names for places and features outside Turkey,
33
- found in Roman script form, should, where necessary and if possible, be
34
- tailored to fit the orthography of the Romanization shown below and
35
- should employ the digraph options for consonant letters '\u0686',
36
- '\u0634', and '\u063A'.
37
-
38
- notes:
39
-
40
- - In pure Kurdish words hamza is borne by yā’ ( ئ ) and occurs only
41
- before initial vowels; it is not romanized. Medial and final hamza in
42
- Arabic borrowings are romanized by ’ (apostrophe – Unicode encoding
43
- 2019).
44
-
45
- - The letters ث ذ ص ض ط ظ do not occur in pure Kurdish words. In Arabic
46
- borrowings some writers retain these letters, others substitute س ز س ز
47
- ت ز respectively. Only the letters ط ض and ص are catered for in the
48
- Library of Congress tabulation, as reflected in lines 16-18 of the
49
- above Consonant table. Words of obvious Arabic origin occurring in a
50
- Kurdish toponymic environment will be treated as Kurdish rather than
51
- Arabic, as will words of other non-Kurdish origins.
52
-
53
- - The digraph options appearing in rows 6, 15 and 20 of the consonants
54
- table should be used for Kurdish geographical names in Iraq, Iran, and
55
- Syria. The single character options should be used for Kurdish
56
- geographical names in Turkey.
57
-
58
- - ڨ is used to represent v in foreign words. Some southern Kurdish
59
- writers use it to represent the v in borrowings from northern Kurdish
60
- dialects. و is pronounced as a v in the north and as a w elsewhere.
61
-
62
- - Hā’ can be used as a vowel or a consonant. The initial (ه) and medial
63
- (forms are used for the consonant h, Consonant table, row 31, while the
64
- final (ه) and independent (forms are used to represent the vowel e,
65
- Vowel table, row 1. Therefore, when used as a consonant, the final and
66
- independent forms of hā’ will be seen as ‘ه’ instead of ‘and ‘ه’,
67
- respectively. For example, مهه meh, (“month”). When used as ‘e’, the
68
- hā’ behaves like the letters alif (ا) , wāw, dāl (د) , and rā (ر) , in
69
- that it never joins to the following letter (i.e., it has no medial
70
- form). Consequently, the following letter will display the initial
71
- form, e.g. هەولێر Hewlêr (unless there is only one following letter, in
72
- which case it will be written in the independent form, e.g. ماوەت
73
- Mawet). As with other vowels (see special rules 2 and 3), initial e is
74
- preceded by the kursî hamza, yielding initial ئه , e.g. ئهني enî
75
- “forehead”.
76
-
77
- - In pure Kurdish words, the vowel ى is always long î, e.g. كانى ماسێ
78
- Kanî Masê. When it represents îzafe, it is also romanized î and joined
79
- by means of a hyphen to its preceding word e.g. پارێزگاى دهۆك Parêzga-î
80
- Dihok.
81
-
82
- - |
83
- An inventory of letter-diacritic combinations, used in addition to
84
- the unmodified letters of the basic Roman script in the Romanization of
85
- Kurdish, with their Unicode encoding, is:
86
-
87
- '‘': '\u2018' , '’': '2019'
88
- 'Ç': '00C7' , 'ç': '00E7'
89
- 'Ḍ': '1E0C' , 'ḍ': '1E0D'
90
- 'Ê': '00CA' , 'ê': '00EA'
91
-
92
- # There is no single Unicode encoding for these letter-diacritic combinations.
93
- 'Ḧ': '0048+0308' , 'ḧ': '0068+0308'
94
- 'Î': '00CE' , 'î': '00EE'
95
- 'Ł': '0141' , 'ł': '0142'
96
- 'Ö': '00D6' , 'ö': '00F6'
97
- 'Ṟ': '1E5E' , 'ṟ': '1E5F'
98
- 'Ş': '015E' , 'ş': '015F'
99
- 'Ṣ': '1E62' , 'ṣ': '1E63'
100
- 'Ṭ': '1E6C' , 'ṭ': '1E6D'
101
- 'Û': '00DB' , 'û': '00FB'
102
- 'Ü': '00DC' , 'ü': '00FC'
103
- 'Ẍ': '1E8C' , 'ẍ': '1E8D'
104
-
105
- - The Romanization column shows only lowercase forms but, when
106
- romanizing, uppercase and lowercase Roman letters as appropriate should
107
- be used.
108
-
109
- # Special Rules
110
- - The conjunction و (and) should be rendered u if the
111
- preceding word ends in a consonant, and w if the preceding
112
- word ends in a vowel. It should be separated by spaces from
113
- the preceding and following words.
114
-
115
- - In the Perso-Arabic orthography for Kurdish, all vowels are
116
- written, with the exception of the short i, which is
117
- expressed with a kasrah under the preceding consonant (ِ).
118
- In Perso-Arabic script, the kasrah will rarely be written (
119
- e.g., كرن kirin “to do”). Like all Kurdish vowels, the
120
- short i will be preceded by a kursî hamza )ئ )if it appears
121
- at the beginning of a word (see 3 below; see row 4 of vowel
122
- table).
123
-
124
- - In the Perso-Arabic orthography for Kurdish, when a vowel
125
- comes at the beginning of a word, or when a vowel directly
126
- follows another vowel, a kursî hamza )ئ )precedes it (e.g.,
127
- ئاگر agir “fire”).
128
-
129
- - A Kurdish word will never start with alif )ا .)A Kurdish
130
- word may begin with a yā’ (ي) or wāw )و ,)but only when
131
- they are used as a consonant, when they will be romanized
132
- as y and w, respectively.
133
-
134
- - When preceded by a consonant, yā’ (ي )and wāw )و )should be
135
- romanized î and u, respectively. When preceded by a vowel (
136
- including short i, which is not written), yā’ (ي )and wāw (
137
- و )should be romanized y and w, respectively.
138
-
139
- - The Arabic sign shaddah ( ّ ) denoting a doubled consonant
140
- is not used in Kurdish; doubled consonants, which are rare,
141
- are written twice e.g. موحەممەد Muḧemmed; ننا موسه Musanna.
142
- Shaddah might be used in Arabic borrowings but, as in
143
- unpointed Arabic, would generally be omitted.
144
-
145
- - Particles such as له le (= at, in, on) and به be (= to,
146
- for, by, with) should be written separately from their
147
- following word, e.g. كوردستانێ له Le Kurdistanê “in
148
- Kurdistan”
149
-
150
- - Occasionally the character sequences چه ,سه and گه occur.
151
- They may be romanized c·h, s·h, and g·h in order to
152
- differentiate those romanizations from the digraphs ch, sh,
153
- and gh.
154
-
155
- tests:
156
- - source: كاني ماسێ
157
- expected: Kanî Masê
158
-
159
- - source: كِرِن
160
- expected: Kirin
161
-
162
- - source: ئاگِر
163
- expected: Agir
164
-
165
- - source: موحەممەد
166
- expected: Muḧemmed
167
-
168
- # - source: موسەننا # issue 604
169
- # expected: Musanna
170
-
171
- - source: لەكوردِستانێ
172
- expected: Le Kurdistanê
173
-
174
- map:
175
- postrules:
176
- - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
177
- result: "upcase"
178
-
179
- characters:
180
-
181
- '\u0650' : 'i' # ِ kasra special rule 2
182
- '\u0644\u06d5' : 'le ' # special rule 7
183
- '\u0628\u06d5' : 'be ' # special rule 7
184
- # Note 1
185
- '\u0621' : '’' # ء
186
- '\u0624' : '’' # ؤ
187
- '\u0626' : '’' # ئ
188
-
189
- "(?<=[\u0621|\u0628|\u067E|\u062A|\u062C|\u0686|\u062D|\u062E|\u062F|\u0631|\u0695|\u0632|\u0698|\u0633|\u0634|\u0635|\u0636|\u0637|\u0639|\u063A|\u0341|\u06A8|\u0642|\u06A9|\u0643|\u06AF|\u0644|\u06B5|\u0645|\u0646|\u0648|\u0647|\u064A])\u0648" : 'u' # special note 4/5
190
- "(?<=[\u0621|\u0628|\u067E|\u062A|\u062C|\u0686|\u062D|\u062E|\u062F|\u0631|\u0695|\u0632|\u0698|\u0633|\u0634|\u0635|\u0636|\u0637|\u0639|\u063A|\u0341|\u06A8|\u0642|\u06A9|\u0643|\u06AF|\u0644|\u06B5|\u0645|\u0646|\u0648|\u0647|\u064A])\u064A" : 'î' # special note 4/5
191
- '\u0621': '’' # ء (see note 1 and 7)
192
- '\u0628': 'b' # ب
193
- '\u067E': 'p' # پ
194
- '\u062A': 't' # ت (see note 2)
195
- '\u062C': 'c' # ج
196
- '\u0686': # چ (see notes 3 and 7)
197
- - 'ch'
198
- - 'ç'
199
- '\u062D': 'ḧ' # ح
200
- '\u062E': 'x' # خ
201
- '\u062F': 'd' # د
202
- '\u0631': 'r' # ر
203
- '\u0695': 'ṟ' # ڕ (Formerly written ڒ ڔ or رر according to typeface available; may vary on older sources. See note 7.)
204
- '\u0632': 'z' # ز (see note 2)
205
- '\u0698': 'j' # ژ
206
- '\u0633': 's' # س (see note 2)
207
- '\u0634': # ش (see notes 3 and 7)
208
- - 'sh'
209
- - 'ş'
210
- '\u0635': 'ṣ' # ص (see notes 2 and 7)
211
- '\u0636': 'ḍ' # ض (see notes 2 and 7)
212
- '\u0637': 'ṭ' # ط (see notes 2 and 7)
213
- '\u0639': '‘' # ع (see note 7)
214
- '\u063A': # غ (see notes 3 and 7)
215
- - 'gh'
216
- - 'ẍ'
217
- '\u0341': 'f' # ف
218
- '\u06A8': 'v' # ڨ (see note 4)
219
- '\u0642': 'q' # ق
220
- '\u06A9': 'k' # ك
221
- '\u0643': 'k' # ك
222
- '\u06AF': 'g' # گ
223
- '\u0644': 'l' # ل
224
- '\u06B5': 'ł' # ڵ (Formerly written ڶ according to type available; may vary on older sources. See note 7)
225
- '\u0645': 'm' # م
226
- '\u0646': 'n' # ن
227
- '\u0648': 'w' # و (see note 4)
228
- '\u0647': 'h' # ه (see note 5)
229
- '\u064A': 'y' # ي
230
-
231
- # VOWELS
232
- '\u0647\b': 'e' # See notes 1 and 5
233
- '\u06D5': 'e' # See notes 1 and 5
234
- '\u0626\u06D5': 'e' # See notes 1 and 5
235
- '\u0627': 'a' # See note 1
236
- '\u0626\u0627': 'a' # See note 1
237
- '\u064A': 'î' # See notes 1, 6 and 7
238
- '\u0626\u064A': 'î' # See notes 1, 6 and 7
239
- '\u0626': 'i'
240
- '\u06CE': 'ê' # See note 7
241
- '\u0626\u06CE': 'ê' # See note 7
242
- '\u0648': 'u'
243
- '\u0626\u0648': 'u'
244
- '\u0648\u0648': 'û' # See note 7
245
- '\u0626\u0648\u0648': 'û' # See note 7
246
- '\u06C6': 'o'
247
- '\u0626\u06C6': 'o'
248
- '\u0648': 'ö' # Rare; previously written وي . See note 7
249
- '\u06CA': 'ü' # Only appearing in some dialects and only in old sources. Often equated to /û/ (row 7 above). Sometimes written يو See note 7.