interscript 0.1.9 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,67 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 2011
4
- language: iso-639-2:ori
5
- source_script: Orya
6
- destination_script: Latn
7
- name: Oriya Romanization, 2011
8
- alias:
9
- ogc11122:
10
- code: ori_Orya2Latn_ALA_2011
11
- description: Oriya ALA-Library of Congress 2011 System
12
- url: https://www.loc.gov/catdir/cpso/romanization/oriya.pdf
13
- creation_date: 2011
14
- description: |
15
- ALA-LC Romanization table for Oriya
16
-
17
- notes:
18
- - |
19
- Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for vowels following a
20
- consonant can be found in grammars; no distinction between the two is made in transliteration.
21
- - |
22
- The vowel a is implicit after all consonants and consonant clusters and is supplied in
23
- transliteration, with the following exceptions:
24
- a) when another vowel is indicated by its appropriate sign; and
25
- b) when the absence of any vowel is indicated by the subscript sign ( ୍ ) called hasanta.
26
- - |
27
- ବ is used both as a labial and as a semivowel. When it occurs as the second consonant of a
28
- consonant cluster, it is transliterated va. When ବ is doubled, it is transliterated bba.
29
- - |
30
- Exception: Anusvāra is transliterated by:
31
- a) ṅ before gutturals,
32
- b) ñ before palatals,
33
- c) ṇ before cerebrals,
34
- d) n before dentals, and
35
- e) m before labials.
36
- - |
37
- Candrabindu before guttural, palatal, cerebral, and dental occlusives is transliterated n̐.
38
- Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
39
- transliterated m̐.
40
- - When doubled, abagraha is transliterated by two apostrophes ( ’’ ).
41
- tests:
42
- - source: "ବିହାର ନିର୍ବାଚନ: ସନ୍ଧ୍ୟା ୬ଟା ସୁଦ୍ଧା ୫୩.୫୪ ପ୍ରତିଶତ ମତଦାନ"
43
- expected: "bihāra nirbācana: sandhẏā 6ṭā suddhā 53.54 pratiśata matadāna"
44
- - source: "ଡିସେମ୍ବର ସୁଦ୍ଧା ପ୍ରସ୍ତୁତ ହୋଇଯିବ ଅକ୍ସଫୋର୍ଡ କରୋନାଭାଇରସ୍ ଟିକା: ଅଦର ପୁନାୱାଲା"
45
- expected: "ḍisembara suddhā prastuta heāiyiba aksapheārḍa kareānābhāiras ṭikā: adara punābālā"
46
- - source: "କରୋନା ଆକ୍ରାନ୍ତ ହେଲେ କେନ୍ଦ୍ରମନ୍ତ୍ରୀ ସ୍ମୃତି ଇରାନୀ"
47
- expected: "karonā ākrānta hele kendramantrī smṛti irānī"
48
- - source: "ଆମେରିକା ଉପରେ ଉତ୍‌କ୍ଷିପ୍ତ ହୋଇ ଚୀନ୍ କହିଲା: ଭାରତ ସହ ଆମର ସୀମା ବିବାଦ ଦ୍ବିପାକ୍ଷିକ ମାମଲା"
49
- expected: "āmerikā upare utkshipta heāi cīn kahilā: bhārata saha āmara sīmā bibāda dbipākshika māmalā"
50
- - source: "ପରଲୋକରେ ଦକ୍ଷିଣ କୋରିଆର ସବୁଠୁ ଧନୀବ୍ୟକ୍ତି ‘ସାମ୍‌ସଙ୍ଗ୍’ ଅଧ୍ୟକ୍ଷ ଲି କୁନ୍-ହି; ଛାଡ଼ିଯାଇଛନ୍ତି ୨୧ ବିଲିୟନ୍ ଡଲାର୍‌ର ସାମ୍ରାଜ୍ୟ"
51
- expected: "paraleākare dakshiṇa keāriāra sabuṭhu dhanībẏakti ‘sāmsaṅg’ adhẏaksha li kun-hi; chāṙiyāichanti 21 biliẏan ḍalārra sāmrājẏa"
52
- - source: "ଉପନିର୍ବାଚନ ପାଇଁ ବାଲେଶ୍ୱରରେ ବିଜେଡିର ସମାବେଶ। (ଫଟୋ: ମନୋଜ ବିଶ୍ୱାଳ)"
53
- expected: "upanirbācana pāim̐ bāleśwarare bijeḍira samābeśa. (phaṭo: manoja biśbāḷa)"
54
- - source: "ନୂଆପଲ୍ଲୀ ଦୁର୍ଗାପୂଜା ମଣ୍ଡପ ବାହାରେ ମା’ଙ୍କୁ ପୂଜାର୍ଚ୍ଚନା କରୁଛନ୍ତି ଭକ୍ତ। (ଫଟୋ: ବିଭୂତି)"
55
- expected: "nūāpallī durgāpūjā maṇḍapa bāhāre mā’ṅku pūjārccanā karuchanti bhakta. (phaṭo: bibhūti)"
56
- - source: "ମା ଶାରଳାଙ୍କ ପୀଠରେ ଦଶମୀ ପୂଜା। (ଫଟୋ: ସୋମନାଥ, ଜଗତସିଂହପୁର ଟାଉନ୍‌)"
57
- expected: "mā śāraḷāṅka pīṭhare daśamī pūjā. (phaṭo: somanātha, jagatasiṃhapura ṭāun)"
58
- - source: "ଆଜି ବିହାରରେ ପ୍ରଥମ ପର୍ଯ୍ୟାୟ ଭୋଟ, ମହାମେଣ୍ଟ-ଏନ୍‌ଡିଏ କଡ଼ା ଟକ୍କର"
59
- expected: "āji bihārare prathama paryẏāẏa bheāṭa, mahāmeṇṭa-enḍie kaṙā ṭakkara"
60
- - source: "ନିର୍ବାଚନ ପ୍ରଚାରରୁ ଫେରି ନିଜ ଅଭିଜ୍ଞତା ବଖାଣି ଅମିଷା କହିଲେ, ‘କୌଣସି ପରିସ୍ଥିତିରେ ମୋର ସହିତ ଦୁଷ୍କର୍ମ ହେବାର ଆଶଙ୍କା ରହିଥିଲା’"
61
- expected: "nirbācana pracāraru pheri nija abhijñatā bakhāṇi amishā kahile, ‘keୗṇasi paristhitire meāra sahita dushkarma hebāra āśaṅkā rahithilā’"
62
- - source: "ବ୍ଲୁ ଫ୍ଲାଗ ବିଚ୍‌ରେ ଅଘଟଣ: ମା’ ଆଗରେ ସମୁଦ୍ରରେ ଭାସିଗଲେ ପୁଅ, ଖୋଜିବା ପାଇଁ ଲାଇଫଗାର୍ଡ ଟିମ୍‌ ନିୟୋଜିତ"
63
- expected: "blu phlāga bicre aghaṭaṇa: mā’ āgare samudrare bhāsigale pua, kheājibā pāim̐ lāiphagārḍa ṭim niẏeājita"
64
-
65
- map:
66
-
67
- inherit: alalc-ori-Orya-Latn-1997
@@ -1,256 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 1997
4
- language: iso-639-2:pan
5
- source_script: Guru
6
- destination_script: Latn
7
- name: Panjabi Romanization, 1997
8
- alias:
9
- ogc11122:
10
- code: pan_Deva2Latn_ALA_1997
11
- description: Panjabi ALA-Library of Congress 1997 System
12
- url: http://catdir.loc.gov/catdir/cpso/romanization/panjabi.pdf
13
- creation_date: 1997
14
- description: |
15
- ALA-LC Romanization table for Panjabi
16
-
17
- notes:
18
-
19
- - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
20
- vowels following a consonant can be found in grammars; no distinction between the two is
21
- made in transliteration.
22
-
23
- - |
24
- The vowel a is implicit after consonant clusters and may be implicit after consonants except
25
- when they are final or when another vowel is indicated by its appropriate sign. The cases in
26
- which the vowel a is implicit, however, can be determined only from a knowledge of the
27
- language or from suitable reference sources. In such cases the a is supplied in transliteration.
28
-
29
- - The dotted letters (ਸ਼ ਖ਼ ਗ਼ ਜ਼ ਫ਼ ਲ਼) are used in Urdu words.
30
-
31
- - |
32
- Exception: Bindī is transliterated by:
33
-
34
- a) ṅ before gutturals,
35
- b) ñ before palatals,
36
- c) ṇ before cerebrals,
37
- d) n before dentals, and
38
- e) m before labials.
39
-
40
- - |
41
- Exception: Ṭippī is transliterated by:
42
-
43
- a) ṅ before gutturals,
44
- b) ñ before palatals,
45
- c) ṇ before cerebrals,
46
- d) n before dentals, and
47
- e) m before labials.
48
-
49
- - |
50
- Exception: When adhik implies the combination of a non-aspirated and an aspirated
51
- consonant, the combination is transliterated as a non-aspirated, followed by an aspirated
52
- consonant.
53
-
54
- tests:
55
- - source: "ਪੰਜਾਬ 'ਚ ਵਧ ਰਿਹਾ ਖ਼ੁਦਕੁਸ਼ੀਆਂ ਦਾ ਰੁਝਾਨ"
56
- expected: "pañjaāba 'ca wadha raihaā khaudakaushaīāṃ daā raujhaāna"
57
- - source: "ਲੱਖ ਤੋਂ ਪਾਰ ਪੁੱਜਾ ਸਰਗਰਮ ਕੇਸਾਂ ਦਾ ਅੰਕੜਾ, ਦਿੱਲੀ 'ਚ ਦੋ ਲੱਖ ਤੋਂ ਪਾਰ ਇਨਫੈਕਟਿਡ"
58
- expected: "lakkha taoṃ paāra paujjaā saragarama kaesaāṃ daā aṅkaṛaā, daillaī 'ca dao lakkha taoṃ paāra inaphaaikaṭaiḍa"
59
- - source: "ਪਰਿਵਾਰਕ ਸਮੱਸਿਆਵਾਂ ਅਤੇ ਵਿਆਹ ਵੀ ਹੈ ਹੋਰ ਅਹਿਮ ਕਾਰਨ"
60
- expected: "paraiwaāraka samassaiāwaāṃ atae waiāha waī haai haora ahaima kaārana"
61
- - source: "ਮਰਦਾਂ 'ਚ ਔਰਤਾਂ ਨਾਲੋਂ ਵੱਧ ਹੈ ਖ਼ੁਦਕੁਸ਼ੀ ਦਾ ਰੁਝਾਨ"
62
- expected: "maradaāṃ 'ca aurataāṃ naālaoṃ waddha haai khaudakaushaī daā raujhaāna"
63
- - source: "ਰਾਸ਼ਟਰੀ ਪੱਧਰ 'ਤੇ ਪੰਜਾਬ ਦੀ ਸਥਿਤੀ ਕਾਫ਼ੀ ਸੂਬਿਆਂ ਤੋਂ ਬਿਹਤਰ"
64
- expected: "raāshaṭaraī paddhara 'tae pañjaāba daī sathaitaī kaāfaī saūbaiāṃ taoṃ baihatara"
65
- - source: "ਚੀਨੀ ਸੈਨਾ ਨੇ ਲਾਪਤਾ ਅਰੁਣਾਚਲ ਦੇ 5 ਨੌਜਵਾਨਾਂ ਬਾਰੇ ਦੱਸਿਆ"
66
- expected: "caīnaī saainaā nae laāpataā arauṇaācala dae 5 naaujawaānaāṃ baārae dassaiā"
67
- - source: "ਸਾਖਰਤਾ ਦੇ ਮਾਮਲੇ 'ਚ ਦੇਸ਼ 'ਚ 7ਵੇਂ ਨੰਬਰ 'ਤੇ ਪੰਜਾਬ"
68
- expected: "saākharataā dae maāmalae 'ca daesha 'ca 7waeṃ nam̆̐bara 'tae pañjaāba"
69
- - source: "ਦਿੱਲੀ ਕਮੇਟੀ ਦੇ ਮੈਂਬਰ ਸ਼ੰਟੀ ਨੇ ਅਕਾਲੀ ਦਲ ਤੋਂ ਦਿੱਤਾ ਅਸਤੀਫ਼ਾ"
70
- expected: "daillaī kamaeṭaī dae maaiṃbara shaṇṭaī nae akaālaī dala taoṃ daittaā asataīfaā"
71
- - source: "੧੦੨ ਹੋਰ ਕੋਰੋਨਾ ਪਾਜ਼ੀਟਿਵ ਮਰੀਜ਼ਾਂ ਦੀ ਪੁਸ਼ਟੀ, ਇਕ ਦੀ ਮੌਤ"
72
- expected: "102 haora kaoraonaā paāzaīṭaiwa maraīzaāṃ daī paushaṭaī, ika daī maauta"
73
- - source: "ਸੜਕ ਹਾਦਸੇ ਦੌਰਾਨ ਇਕ ਦੀ ਮੌਤ"
74
- expected: "saṛaka haādasae daauraāna ika daī maauta"
75
-
76
- map:
77
-
78
- rules:
79
- # note[4]
80
- - pattern: \u0A02(?=[ਕਖਖ਼ਗਗ਼ਘਙ]) # ਂ before gutturals
81
- result: ṅ
82
- - pattern: \u0A02(?=[ਚਛਜਜ਼ਝਞ]) # ਂ before palatals
83
- result: ñ
84
- - pattern: \u0A02(?=[ਟਠਡਢਣ]) # ਂ before cerebrals
85
- result: ṇ
86
- - pattern: \u0A02(?=[ਤਥਦਧਨ]) # ਂ before dentals
87
- result: n
88
-
89
- # note[5]
90
- - pattern: \u0A70(?=[ਕਖਖ਼ਗਗ਼ਘਙ]) # ੰ before gutturals
91
- result: ṅ
92
- - pattern: \u0A70(?=[ਚਛਜਜ਼ਝਞ]) # ੰ before palatals
93
- result: ñ
94
- - pattern: \u0A70(?=[ਟਠਡਢਣ]) # ੰ before cerebrals
95
- result: ṇ
96
- - pattern: \u0A70(?=[ਤਥਦਧਨ]) # ੰ before dentals
97
- result: n
98
-
99
- characters:
100
-
101
- # I. Vowels and Diphthongs (see Note 1)
102
-
103
- 'ਅ': 'a'
104
- 'ਆ': 'ā'
105
- 'ਇ': 'i'
106
- 'ਈ': 'ī'
107
- 'ਉ': 'u'
108
- 'ਊ': 'ū'
109
- 'ਏ': 'e'
110
- 'ਐ': 'ai'
111
- 'ਓ': 'o'
112
- 'ਔ': 'au'
113
-
114
- # II. Consonants (see Note 2)
115
- # Gutturals
116
- 'ਕ': 'ka'
117
- 'ਖ': 'kha'
118
- 'ਖ਼': 'kha'
119
- 'ਗ': 'ga'
120
- 'ਗ਼': 'gha'
121
- 'ਘ': 'gha'
122
- 'ਙ': 'ṅa'
123
-
124
- # Palatals
125
- 'ਚ': 'ca'
126
- 'ਛ': 'cha'
127
- 'ਜ਼': 'za'
128
- 'ਜ': 'ja'
129
- 'ਝ': 'jha'
130
- 'ਞ': 'ña'
131
-
132
- # Cerebrals
133
- 'ਟ': 'ṭa'
134
- 'ਠ': 'ṭha'
135
- 'ਡ': 'ḍa'
136
- 'ਢ': 'ḍha'
137
- 'ਣ': 'ṇa'
138
-
139
- # Dentals
140
- 'ਤ': 'ta'
141
- 'ਥ': 'tha'
142
- 'ਦ': 'da'
143
- 'ਧ': 'dha'
144
- 'ਨ': 'na'
145
-
146
- # Labials
147
- 'ਪ': 'pa'
148
- 'ਫ': 'pha'
149
- 'ਫ਼': 'fa'
150
- 'ਬ': 'ba'
151
- 'ਭ': 'bha'
152
- 'ਮ': 'ma'
153
-
154
- # Semivowels
155
- 'ਯ': 'ya'
156
- 'ਰ': 'ra'
157
- 'ਲ': 'la'
158
- 'ਲੵ': 'ḷa'
159
- 'ਲ਼': 'ḷa'
160
- 'ਵ': 'wa'
161
- 'ੜ': 'ṛa'
162
-
163
- # Sibilants
164
- 'ਸ': 'sa'
165
- 'ਸ਼': 'sha'
166
-
167
- # Aspirate
168
- 'ਹ': 'ha'
169
-
170
- # Bindī (see Note 4)
171
- 'ਂ': 'ṃ'
172
-
173
- # Ṭippī (see Note 5)
174
- 'ੰ': 'm̆̐'
175
-
176
- # Medials # Needed for connecting constants
177
- 'ਾ': "ā"
178
- 'ਿ': "i"
179
- 'ੀ': "ī"
180
- 'ੁ': "u"
181
- 'ੂ': "ū"
182
- 'ੇ': "e"
183
- 'ੈ': "ai"
184
- 'ੋ': "o"
185
- 'ੌ': "au"
186
-
187
- # For adhik character[Note 6]
188
-
189
- # Gutturals
190
- 'ੱਕ': 'kka'
191
- 'ੱਖ': 'kkha'
192
- 'ੱਖ਼': 'kkha'
193
- 'ੱਗ': 'gga'
194
- 'ੱਗ਼': 'ggha'
195
- 'ੱਘ': 'ggha'
196
- 'ੱਙ': 'ṅṅa'
197
-
198
- # Palatals
199
- 'ੱਚ': 'cca'
200
- 'ੱਛ': 'ccha'
201
- 'ੱਜ': 'jja'
202
- 'ੱਜ਼': 'zza'
203
- 'ੱਝ': 'jjha'
204
- 'ੱਞ': 'ñña'
205
-
206
- # Cerebrals
207
- 'ੱਟ': 'ṭṭa'
208
- 'ੱਠ': 'ṭṭha'
209
- 'ੱਡ': 'ḍḍa'
210
- 'ੱਢ': 'ḍḍha'
211
- 'ੱਣ': 'ṇṇa'
212
-
213
- # Dentals
214
- 'ੱਤ': 'tta'
215
- 'ੱਥ': 'ttha'
216
- 'ੱਦ': 'dda'
217
- 'ੱਧ': 'ddha'
218
- 'ੱਨ': 'nna'
219
-
220
- # Labials
221
- 'ੱਪ': 'ppa'
222
- 'ੱਫ': 'ppha'
223
- 'ੱਫ਼': 'ffa'
224
- 'ੱਬ': 'bba'
225
- 'ੱਭ': 'bbha'
226
- 'ੱਮ': 'mma'
227
-
228
- # Semivowels
229
- 'ੱਯ': 'yya'
230
- 'ੱਰ': 'rra'
231
- 'ੱਲ': 'lla'
232
- 'ੱਲੵ': 'ḷḷa'
233
- 'ੱਵ': 'wwa'
234
- 'ੱੜ': 'ṛṛa'
235
-
236
- # Sibilants
237
- 'ੱਸ': 'ssa'
238
- 'ੱਸ਼': 'ssha'
239
-
240
- # Aspirate
241
- 'ੱਹ': 'hha'
242
-
243
-
244
- # digits
245
-
246
- '੦': '0'
247
- '੧': '1'
248
- '੨': '2'
249
- '੩': '3'
250
- '੪': '4'
251
- '੫': '5'
252
- '੬': '6'
253
- '੭': '7'
254
- '੮': '8'
255
- '੯': '9'
256
-
@@ -1,78 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 1997
4
- language: iso-639-2:pan
5
- source_script: Guru
6
- destination_script: Latn
7
- name: Panjabi Romanization, 2011
8
- alias:
9
- ogc11122:
10
- code: pan_Deva2Latn_ALA_2011
11
- description: Panjabi ALA-Library of Congress 2011 System
12
- url: https://www.loc.gov/catdir/cpso/romanization/panjabi.pdf
13
- creation_date: 2011
14
- description: |
15
- ALA-LC Romanization table for Panjabi
16
-
17
- notes:
18
-
19
- - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
20
- vowels following a consonant can be found in grammars; no distinction between the two is
21
- made in transliteration.
22
-
23
- - |
24
- The vowel a is implicit after consonant clusters and may be implicit after consonants except
25
- when they are final or when another vowel is indicated by its appropriate sign. The cases in
26
- which the vowel a is implicit, however, can be determined only from a knowledge of the
27
- language or from suitable reference sources. In such cases the a is supplied in transliteration.
28
-
29
- - The dotted letters (ਸ਼ ਖ਼ ਗ਼ ਜ਼ ਫ਼ ਲ਼) are used in Urdu words.
30
-
31
- - |
32
- Exception: Bindī is transliterated by:
33
-
34
- a) ṅ before gutturals,
35
- b) ñ before palatals,
36
- c) ṇ before cerebrals,
37
- d) n before dentals, and
38
- e) m before labials.
39
-
40
- - |
41
- Exception: Ṭippī is transliterated by:
42
-
43
- a) ṅ before gutturals,
44
- b) ñ before palatals,
45
- c) ṇ before cerebrals,
46
- d) n before dentals, and
47
- e) m before labials.
48
-
49
- - |
50
- Exception: When adhik implies the combination of a non-aspirated and an aspirated
51
- consonant, the combination is transliterated as a non-aspirated, followed by an aspirated
52
- consonant.
53
-
54
- tests:
55
- - source: "ਸਵਾਮਿਤਵ ਯੋਜਨਾ ਤਹਿਤ ਜਾਇਦਾਦ ਕਾਰਡ ਵੰਡੇ"
56
- expected: "sawaāmaitawa yaojanaā tahaita jaāidaāda kaāraḍa waṇḍae"
57
- - source: "ਕੇਂਦਰ ਸਰਕਾਰ ਨੇ ਕਿਸਾਨ ਜਥੇਬੰਦੀਆਂ ਨੂੰ ਮੁੜ ਦਿੱਤਾ ਗੱਲਬਾਤ ਦਾ ਸੱਦਾ"
58
- expected: "kaendara sarakaāra nae kaisaāna jathaebandaīāṃ naūm̆̐ mauṛa daittaā gallabaāta daā saddaā"
59
- - source: "ਦੁਸਹਿਰੇ ਮੌਕੇ ਕਿਸਾਨਾਂ ਵਲੋਂ ਮੋਦੀ ਦੇ ਪੁਤਲੇ ਫੂਕਣ ਦਾ ਫ਼ੈਸਲਾ"
60
- expected: "dausahairae maaukae kaisaānaāṃ walaoṃ maodaī dae pautalae phaūkaṇa daā faaisalaā"
61
- - source: "ਸੁਮੇਧ ਸੈਣੀ ਕੋਟਕਪੂਰਾ ਗੋਲੀਕਾਂਡ 'ਚ ਵੀ ਨਾਮਜ਼ਦ"
62
- expected: "saumaedha saaiṇaī kaoṭakapaūraā gaolaīkaāṇḍa 'ca waī naāmazada"
63
- - source: "ਕੋਰੋਨਾ ਟੀਕੇ ਦੀ ਹੰਗਾਮੀ ਵਰਤੋਂ 'ਤੇ ਵਿਚਾਰ ਨਹੀਂ-ਹਰਸ਼ ਵਰਧਨ"
64
- expected: "kaoraonaā ṭaīkae daī haṅgaāmaī warataoṃ 'tae waicaāra nahaīṃ-harasha waradhana"
65
- - source: "ਪੂਰਬੀ ਲੱਦਾਖ ਤਣਾਅ ਸਬੰਧੀ ਭਾਰਤ ਤੇ ਚੀਨ ਵਿਚਕਾਰ ੭ ਵੇਂ ਪੱਧਰ ਦੀ ਸੈਨਿਕ ਗੱਲਬਾਤ ਅੱਜ"
66
- expected: "paūrabaī laddaākha taṇaāa sabandhaī bhaārata tae caīna waicakaāra 7 waeṃ paddhara daī saainaika gallabaāta ajja"
67
- - source: "ਸਾਖਰਤਾ ਦੇ ਮਾਮਲੇ 'ਚ ਦੇਸ਼ 'ਚ ੭ਵੇਂ ਨੰਬਰ 'ਤੇ ਪੰਜਾਬ"
68
- expected: "saākharataā dae maāmalae 'ca daesha 'ca 7waeṃ nam̆̐bara 'tae pañjaāba"
69
- - source: "ਪਾਕਿ ਡਰੋਨ ਵਲੋਂ ਘੁਸਪੈਠ ਦੀ ਕੋਸ਼ਿਸ਼"
70
- expected: "paākai ḍaraona walaoṃ ghausapaaiṭha daī kaoshaisha"
71
- - source: "ਪਾਵਰਕਾਮ ਵਲੋਂ ਵਿੱਤੀ ਘਾਟੇ ਨੂੰ ਘਟਾਉਣ ਲਈ ਸਖ਼ਤ ਪੇਸ਼ਬੰਦੀਆਂ ਲਾਗੂ"
72
- expected: "paāwarakaāma walaoṃ waittaī ghaāṭae naūm̆̐ ghaṭaāuṇa laī sakhata paeshabandaīāṃ laāgaū"
73
- - source: "ਪੰਜਾਬ 'ਚ ਕੋਰੋਨਾ ਦੇ ਐਕਟਿਵ ਕੇਸਾਂ ਦੀ ਗਿਣਤੀ ਘਟੀ"
74
- expected: "pañjaāba 'ca kaoraonaā dae aikaṭaiwa kaesaāṃ daī gaiṇataī ghaṭaī"
75
-
76
- map:
77
-
78
- inherit: 'alalc-pan-Guru-Latn-1997'
@@ -1,375 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 1997
4
- language: iso-639-3:prs
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ALA-LC Romanization Table -- Persian (1997)
8
- url: http://catdir.loc.gov/catdir/cpso/romanization/persian.pdf
9
- creation_date: 1997
10
- confirmation_date: 1997
11
- description: |
12
- ALA-LC Romanization table for Persian
13
-
14
- notes:
15
- - 1. For the use of ا (alif) to support ء (hamzah) and ~ (maddah)
16
- see rule 1(a). For the romanization of ء and see rules 4 and
17
- respectively. For the use of ا to represent the long vowel
18
- romanized ā see the table of vowels and diphthongs, and rule 1(b).
19
-
20
- - 2. Final ک and ‫گ‬ may have the form ك, without the distinguishing
21
- upper stroke or strokes. The two letters are always distinguished in romanization.
22
-
23
- - 3. For other values of »5 and ,s see the table of vowels and diphthongs,
24
- and rules 2, 3, and 7.
25
-
26
- - 4. ة (dotted ه) when used as an alternative to ت is romanized t.
27
-
28
- - 5. Vowel points are not printed on Library of Congress cards.
29
-
30
- - 6. See rules 1(b) and 5.
31
-
32
- - 7. See rule 3(d).
33
-
34
- - Rule 1.(a) As a support for ء (hamzah) and ~ (maddah). In these cases it is
35
- not represented in romanization. See rules 4 and 5.
36
- (b) To indicate the long vowel romanized G. Forthe use of| in tanvin
37
- see rule 6 dānā دَانَا
38
-
39
- - Rule 2.و is used to represent
40
- (a) the consonant romanized v Silent و following خ is retained in romanization.
41
- (b) the long u-vowel (and short u-vowel in some monosyllables) is romanized ū
42
- (c) the diphthong romanized aw.
43
- Whenthe diphthong precedes a consonantal و, the combination is romanized avv. See
44
- rule 7.
45
- و may be used as a support for ء (hamzah); in this case it is not represented in
46
- romanization. See rule 4.
47
-
48
- - Rule 3. ي is used to represent
49
- (a) the consonant romanized y.
50
- (b) the long vowel romanized ī.
51
- (c) the diphthong romanized ay.
52
- (d) the final long vowel romanized á.
53
- For the use of ى as a mark of izafah see rule 8(c)
54
- ى in the medial form ىـ without dots, may be used as a support
55
- for ء (hamzah); in this case ى is not represented in romanization.
56
- See rule 4 below.
57
-
58
- - Rule 4. ء (hamzah)
59
- (a)When initial, ء is not represented in romanization.
60
- When medial orfinal, ء is romanized ’ (alif) except as noted
61
- in (c) and (d) below.
62
- (c)When used as a mark of izafah, ء is romanized -’i.
63
- (d)When used to mark the indefinite article i
64
-
65
- - Rule 5. ~(maddah)
66
- (a) initial آ is romanized ā
67
- (b) Medial آ, whenit represents the phonetic combination ’ā,
68
- is so romanized.
69
- (c) is otherwise not represented in romanization.
70
-
71
- - Rule 6. Tanvin (written ٌ, ٍ , ً , ـًا), which occurs chiefly in Arabic words,
72
- is romanized un, in, an, and an, respectively.
73
-
74
- - Rule 7. ّ (shaddah or tashdid) is represented by doubling the letter or
75
- digraph concerned. Note the exceptional case where 4 is written over و and ي
76
- to represent the combination of long vowel plus consonant.
77
-
78
- - Rule 8.Izafah. When two wordsareassociated in the relation known as
79
- izGfah,the first (the muzaf) is followed by an additional letter or
80
- syllable in romanization. This is added according to the following rules
81
- (a)When the muzaf bears no special mark of izafah, it is followed by-i.
82
- (b)When the muzaf is marked by the addition ofء, it is followed by-’i.
83
- (c)Whenthe muzaf is marked by the addition ofcs, it is followed by -yi.
84
- (d)Izafah is represented in romanization of personal names only when
85
- expressly indicated in the Persian script.
86
-
87
- - Rule 9.(a) When the affix and the word with which it is connected
88
- grammatically are written separately in Persian, the two are separated
89
- in romanization bya single prime (’). See also 12(b) below.
90
- (b) The Arabic article al is separated by a hyphen, in romanization,
91
- from the word to which it is prefixed.
92
-
93
- - Rule 10. Compounds. When the elements of a compound (except a
94
- compound personal name) are written separately in Persian, they are
95
- separated in romanization by a single prime (’). See also 12(b) below.
96
-
97
- - Rule 11. Capitalization.
98
- (a) Rules for the capitalization of English are followed, except that
99
- the Arabic article al is lowercased in all positions.
100
- (b) Diacritics are used with both capital and lowercaseletters.
101
-
102
- - Rule 12. The single prime (’) is used
103
- (a) To separate two letters representing two distinct consonantal
104
- sounds, when the combination might otherwise be read as a digraph.
105
- (b) To mark theuse ofa letterin its final form whenit occurs in the
106
- middle of a word. See also rules 9(a) and 10 above.
107
-
108
- - Rule 13. Foreign words in a Persian context, including Arabic words,
109
- are romanized accordingto the rules for Persian. For short vowels
110
- not indicated in the script, the Persian vowels nearest the original
111
- pronunciation of the word are supplied in romanization.
112
-
113
- - Rule 14. Dictionaries.
114
- In romanizing Persian, the Library of Congress has foundit necessary
115
- to consult dictionaries as an appendage to the romanizationtables,
116
- primarily for the purpose of supplying vowels. For Persian, the
117
- principle dictionary consulted is M. Mu’in. Farhang-i Farsi-i mutavassit
118
-
119
- tests:
120
- # 1(b)
121
- - source: دَانَا
122
- expected: Dānā
123
-
124
- # 2(a)
125
- - source: وَرزِش
126
- expected: Varzish
127
-
128
- - source: دَوَا
129
- expected: Davā
130
-
131
- - source: سَرو
132
- expected: Sarv
133
-
134
- - source: خوَاستَن
135
- expected: Khvāstan
136
-
137
- - source: خوُد
138
- expected: Khvud
139
-
140
- # 2(b)
141
- - source: دُور
142
- expected: Dūr
143
-
144
- - source: چُون
145
- expected: Chūn
146
-
147
- - source: تُو
148
- expected: Tū
149
-
150
- # 2(c)
151
- - source: فِردَوْسِي
152
- expected: Firdawsī
153
-
154
- # 3(a)
155
- - source: يَار
156
- expected: Yār
157
-
158
- - source: سِيَاه
159
- expected: Siyāh
160
-
161
- - source: پَاي
162
- expected: Pāy
163
-
164
- # 3(b)
165
- - source: اِيرَان
166
- expected: Īrān
167
-
168
- - source: قَالِي
169
- expected: Qālī
170
-
171
- # 3(c)
172
- - source: اَيوَان
173
- expected: Ayvān
174
-
175
- - source: رَي
176
- expected: Ray
177
-
178
- # 3(d)
179
- - source: مُصطَفَى
180
- expected: Muṣṭafá
181
-
182
- # 4(a)
183
- - source: مُؤَثِّر
184
- expected: Mu’as̱s̱ir
185
-
186
- - source: خُلَفَاء
187
- expected: Khulafā’
188
-
189
- - source: پَائِين
190
- expected: Pā’īn
191
-
192
- # 4(c)
193
- - source: اَستَانَهٔ دَر
194
- expected: Astānah-’i Dar
195
-
196
- # 5(a)
197
- - source: آب
198
- expected: Āb
199
-
200
- - source: كُلِّيَّة الآدَاب
201
- expected: Kullīyat al-Ādāb
202
-
203
- # 5(b)
204
- - source: مَآثِر
205
- expected: Ma’ās̱ir
206
-
207
- - source: دَريَاآبَادِي
208
- expected: Daryā’ābādī
209
-
210
- # 7
211
- - source: خُرَّم
212
- expected: Khurram
213
-
214
- - source: اَوَّل
215
- expected: Avval
216
-
217
- - source: بَچّة
218
- expected: Bachchah
219
-
220
- - source: خَيَّام
221
- expected: Khayyām
222
-
223
- - source: نَشرِيَّات
224
- expected: Nashrīyāt
225
-
226
- - source: قُوَّة
227
- expected: Qūvah
228
-
229
- - source: قُوَّة
230
- expected: Qūvah
231
-
232
- map:
233
- postrules:
234
- - pattern: '(?<=\b)(?<![‘|’])[\u0061-\uFFFF]'
235
- result: "upcase"
236
- - pattern: "Al-" # ال
237
- result: "al-"
238
-
239
- characters:
240
- # special rules
241
-
242
- '\s(?=\u0622\u0628\u064E\u0627\u062F)' : '' # space followed by abad is removed
243
- '\ufdf2' : 'Allāh' # See note 5
244
-
245
- # pointing
246
- '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
247
- '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
248
-
249
- '\u064e' : 'a' # َ fatha
250
- '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
251
- '\b\u0622' : 'ā' # آ
252
- '\u0622' : '’ā' # آ
253
-
254
- '\u0650' : 'i' # ِ kasra
255
- '\u0650[\u064a|\u06cc]' : 'ī' # ـِي kasra followed by ي
256
- '\u0650\b' : '-e' # ِ kasra
257
- '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
258
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
259
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
260
-
261
- '\u064f' : 'u' # ُ damma
262
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
263
- '\u064f\u0648' : 'ū' # ـُو damma followed by و
264
-
265
- '\u0652' : '' # ْ sokoon
266
- '\u0659' : 'ê'
267
-
268
- '[\u0674|\u0654]' : '-’i' # ٴ
269
-
270
- '\u0649\u0670' : 'á' # ىٰ
271
- # - '-ye'
272
-
273
- # Tanvin Rule 6
274
- '\u064c' : 'un' # ٌ
275
- '\u064d' : 'in' # ٍ
276
- '\u064b' : 'an' # ً
277
- '\u064b\u0627' : 'an' # ً
278
-
279
- # ta' marboota
280
- '\u0629' : 'at' # ة in the middle of the sentence
281
- '\u0629$' : 'ah'
282
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
283
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
284
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
285
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
286
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
287
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
288
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
289
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
290
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
291
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
292
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
293
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
294
-
295
- # shadda
296
-
297
- '\u0628\u0651' : 'bb' # ب
298
- '\u067E\u0651' : 'pp' # پ
299
- '\u062a\u0651' : 'tt' # ت
300
- '\u062b\u0651' : 's̱s̱' # ث
301
- '\u062c\u0651' : 'jj' # ج
302
- '\u0686\u0651' : 'chch' # ‫چ‬
303
- '\u062d\u0651' : 'ḥḥ' # ح
304
- '\u062e\u0651' : 'khkh' # خ
305
- '\u062f\u0651' : 'dd' # د
306
- '\u0630\u0651' : 'ẕẕ' # ذ
307
- '\u0631\u0651' : 'rr' # ر
308
- '\u0632\u0651' : 'zz' # ز
309
- '\u0698\u0651' : 'zhzh' # ‫ژ‬
310
- '\u0633\u0651' : 'ss' # س
311
- '\u0634\u0651' : 'shsh' # ش
312
- '\u0635\u0651' : 'z̤z̤' # ص
313
- '\u0636\u0651' : 'ḏḏ' # ض
314
- '\u0637\u0651' : 'ṭṭ' # ط
315
- '\u0638\u0651' : 'ẓẓ' # ظ
316
- '\u063a\u0651' : 'ghgh' # غ
317
- '\u0641\u0651' : 'ff' # ف
318
- '\u0642\u0651' : 'qq' # ق
319
- '\u0643\u0651' : 'kk' # ك
320
- '\u06A9\u0651' : 'kk' # ک
321
- '\u06AF\u0651' : 'gg' # ‫گ‬
322
- '\u0644\u0651' : 'll' # ل
323
- '\u0645\u0651' : 'mm' # م
324
- '\u0646\u0651' : 'nn' # ن
325
- '\u0647\u0651' : 'hh' # ه
326
- '\u0648\u0651' : 'vv' # و
327
- '[\u064a|\u0649]\u0651' : 'yy' # ي
328
- '\u0650[\u064a|\u06cc]\u0651\u064e' : 'īy' # ـِيَّ
329
- '\u064f\u0648\u0651' : 'ūv' # ـُو damma followed by و
330
-
331
- '\u0621' : '’' # ء
332
- '\u0624' : '’' # ؤ
333
- '\u0626' : '’' # ئ
334
-
335
- '\u0623' : '' # أ
336
- '\u0625' : '' # إ
337
- '\u0627' : 'ā' # ا
338
- '\b\u0627' : '' # ا
339
-
340
- '\b\u0627\u0644' : 'al-' # ال
341
-
342
- # consonant characters
343
-
344
- '\u0628' : 'b' # ب
345
- '\u067E' : 'p' # پ
346
- '\u062a' : 't' # ت
347
- '\u062B' : 's̱' # ث
348
- '\u062c' : 'j' # ج
349
- '\u0686' : 'ch' # ‫چ‬
350
- '\u062d' : 'ḥ' # ح
351
- '\u062e' : 'kh' # خ
352
- '\u062f' : 'd' # د
353
- '\u0630' : 'ẕ' # ذ
354
- '\u0631' : 'r' # ر
355
- '\u0632' : 'z' # ز
356
- '\u0698' : 'zh' # ‫ژ‬
357
- '\u0633' : 's' # س
358
- '\u0634' : 'sh' # ش
359
- '\u0635' : 'ṣ' # ص
360
- '\u0636' : 'z̤' # ض
361
- '\u0637' : 'ṭ' # ط
362
- '\u0638' : 'ẓ' # ظ
363
- '\u0639' : '‘' # ع
364
- '\u063a' : 'gh' # غ
365
- '\u0641' : 'f' # ف
366
- '\u0642' : 'q' # ق
367
- '\u0643' : 'k' # ك
368
- '\u06A9' : 'k' # ک
369
- '\u06AF' : 'g' # ‫گ‬
370
- '\u0644' : 'l' # ل
371
- '\u0645' : 'm' # م
372
- '\u0646' : 'n' # ن
373
- '\u0648' : 'v' # و
374
- '\u0647' : 'h' # ه
375
- '\u064a' : 'y' # ي