interscript 0.1.7 → 2.1.0b1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
@@ -1,366 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 233-3
4
- language: iso-233-3:prs
5
- source_script: Arab
6
- destination_script: Latn
7
- name: Persian language — Simplified transliteration
8
- url: https://web.archive.org/web/20200920064754/http://www.freeprotocols.org/content/republished/doc.public/standards/communication/iso/iso-233/iso-233-3.pdf
9
- creation_date: 1999
10
- confirmation_date: 1999-01-15
11
- description: |
12
- This part of ISO 233 is one of a series of International
13
- Standards, dealing with the conversion of systems of
14
- writing. The aim of this part of ISO 233 and others in the
15
- series is to provide a means for international
16
- communication of written messages in a form which permits
17
- the automatic transmission and reconstitution of these, by
18
- men or machines. The system of conversion, in this case,
19
- must be univocal and entirely reversible. This means that
20
- no consideration should be given to phonetic and aesthetic
21
- matters or to certain national customs: all these
22
- considerations are, indeed, ignored by the machine
23
- performing the function. The adoption of this part of ISO
24
- 233 for international communication leaves every country
25
- free to adopt for its own use a national standard which may
26
- be different, on condition that it is compatible with this
27
- part of ISO 233. The system proposed herein should make
28
- this possible and be acceptable to international use if the
29
- graphisms it creates are such that they may be converted
30
- automatically into the graphisms used in any strict
31
- national systems. This part of ISO 233 may be used by
32
- anyone who has a clear understanding of the system and is
33
- certain that it can be applied without ambiguity. The
34
- result obtained will not give a correct pronunciation of
35
- the original text in a person’s own language, but it will
36
- serve as a means of finding automatically the original
37
- graphism and thus allow anyone who has knowledge of the
38
- original language to pronounce it correctly. Similarly, one
39
- can only pronounce correctly a text written in, for
40
- example, English or Polish, if one has a knowledge of
41
- English or Polish. The adoption of national standards
42
- compatible with this part of ISO 233 will permit the
43
- representation, in an international publication, of the
44
- morphemes of each language according to the customs of the
45
- country where it is spoken. It will be possible to simplify
46
- this representation in order to take into account the
47
- number of the character sets available on different kinds
48
- of machines.
49
- 1-Scope:
50
- This part of ISO 233 establishes a simplified
51
- system for the transliteration of Persian characters into
52
- Latin characters. This simplification of the stringent
53
- rules established by ISO 233:1984 is especially intended to
54
- facilitate the processing of bibliographic information (
55
- e.g. catalogues, indices, citations, etc.)
56
- 2-Normative references:
57
- The following normative documents contain
58
- provisions which, though reference in this text, constitute
59
- provisions of this part of ISO 233. For dated references,
60
- subsequent amendments to, or revisions of, any of these
61
- publications do not apply. However, parties to agreements
62
- based on this part of ISO 233 are encouraged to investigate
63
- the possibility of applying the most recent editions of the
64
- normative documents indicated below. For undated
65
- references, the latest edition of the normative document
66
- referred to applies. Members of ISO and IEC maintain
67
- registers of currently valid International StandardsISO 233-
68
- 2, Information and documentation — Transliteration of
69
- Arabic characters into Latin characters — Part 2: Arabic
70
- language — Simplified transliteration. ISO/IEC 10646-1,
71
- Information Technology — Universal Multiple-Octet Coded
72
- Character Set (UCS) — Part 1: Architecture and Basic
73
- Multilingual Plane.
74
-
75
- notes: |
76
- TODO
77
-
78
- tests:
79
- - source: آذَر
80
- expected: âẕar
81
-
82
- - source: سَم
83
- expected: sam
84
-
85
- - source: پُر
86
- expected: por
87
-
88
- - source: پِدَر
89
- expected: pedar
90
-
91
- - source: مَثَلاً
92
- expected: mas̱alâ´´
93
-
94
- - source: جزء
95
- expected: jz’
96
-
97
- - source: رأس
98
- expected: râ’s
99
-
100
- - source: سؤال
101
- expected: sv’âl
102
-
103
- - source: مسئلة
104
- expected: msy’lh
105
-
106
-
107
- map:
108
- characters:
109
-
110
- # word-medial or word-final form where so appearing in a word.
111
- # '\u0627': '-'
112
-
113
- # # Vowel, Diphthong and Diacritical Characters
114
-
115
- # '\u064E': 'a'
116
-
117
- # # Both e and i are available to romanize this short vowel,
118
- # # depending on local usage and/or root language. In cases where the sound
119
- # # is uncertain, i is the default romanization in BGN/PCGN standardization
120
- # # procedures.
121
- # '\u0650':
122
- # - 'e'
123
- # - 'i'
124
-
125
- # # Both o and u are available to romanize this short vowel,
126
- # # depending on local usage and/or root language. In cases where the sound
127
- # # is uncertain, u is the default romanization in BGN/PCGN standardization
128
- # # procedures.
129
- # '\u064F':
130
- # - 'o'
131
- # - 'u'
132
- # '\u0659': 'ê'
133
-
134
- # # An alif with mad ( آ ) is written only in the initial position by
135
- # # BGN/PCGN standardization procedures, in keeping with Persian language
136
- # # family standards of use of the Arabic alphabet. The same letter written
137
- # # in a medial or final position is written . . .
138
- # '\u0622': 'ā'
139
-
140
- # pending issue #442
141
- # '\u0648': 'ō'
142
- # '\u0648': 'ū'
143
- # '\u0648': 'ow'
144
- # '\u06CC': 'ī'
145
-
146
- # # Or 'ē'. The character ی should be romanized ay or ē according to
147
- # # its root language or local pronunciation. In case of uncertainty a
148
- # # reference source (such as the Fairchild Aerial Surveys map series, or a
149
- # # BGN/PCGN approved policy document/list of recommended spellings) should
150
- # # be consulted.
151
- # '\u06CC': 'ay'
152
- # '\u06D0': 'ē'
153
-
154
- # # Or 'aī'. Both the combination ay and aī are available to romanize
155
- # # this character according to its root language or local pronunciation.
156
- # # In cases where the sound is uncertain ay is the default romanization in
157
- # # BGN/PCGN standardization procedures
158
- # '\u06CC':
159
- # - 'ay'
160
- # - 'á'
161
- # '\u06CD': 'êy'
162
- # '\u0621': '’'
163
- # '\u0674':
164
- # - '-e'
165
- # - '-ye'
166
-
167
- # # Other Diacritical Marks and Language Conventions
168
-
169
- # '\u0627': 'āy'
170
-
171
- # '\u0648': 'w'
172
- # '\u0626': '’'
173
- # '\u06C0': ''
174
- # '\u0651': ''
175
-
176
-
177
- # special rules
178
-
179
- '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
180
- '\ufdf2': 'Allāh' # See note 5
181
-
182
- # pointing
183
- '\u064e' : 'a' # َ fatha
184
-
185
- '\u0650':
186
- - 'e'
187
- - 'i'
188
- '\u0650\b' : '-e' # ِ kasra
189
-
190
- '\u064f': # ُ damma
191
- - 'o'
192
- - 'u'
193
-
194
- '\u0652' : '' # ْ sokoon
195
- '\u0659': 'ê'
196
-
197
- # special pointed letters
198
- '\u0639\u064e' : '‘a' # عَ
199
- '\u0639\u0650' : '‘i' # عِ
200
- '\u0639\u064f' : '‘ū' # عُ
201
- # handle MacOS regex difference
202
- '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
203
-
204
- '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
205
- '\u0650\u06cc' : 'ī' # ـِي kasra followed by ي
206
- '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
207
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
208
- '\u064f\u0648' : 'ō' # ـُو damma followed by و
209
- '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
210
- '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
211
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
212
- '\u064e\u0648' : 'ow' # ـَو
213
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
214
- '\u0650\u06cc\u0651\u064e' : 'īy' # ـِيَّ
215
- '\u064e\u064a' : 'aī' # ـَي
216
- '\u064e\u06cc' : 'aī' # ـَي
217
- '\u0649\u0670': 'á' # ىٰ
218
- '\u0674': '-e' # ٴ
219
- '\u0654': '-e' # ٔ
220
- # - '-ye'
221
-
222
- '\u0622' : 'â' # آ
223
-
224
- # ta' marboota
225
- '\u0629' : 't' # ة in the middle of the sentence
226
- '\u0629$' : 'h'
227
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'h'
228
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'h'
229
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'h'
230
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'h'
231
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'h'
232
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'h'
233
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'h'
234
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'h'
235
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'h'
236
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'h'
237
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'h'
238
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'h'
239
-
240
- # shadda
241
- '\u0628' : 'bb' # ب
242
- '\u067E' : 'pp' # پ
243
- '\u062a' : 'tt' # ت
244
- '\u062B' : 's̱s̱' # ث
245
- '\u062c' : 'jj' # ج
246
- '\u0686' : 'č̱č̱' # ‫چ‬‬
247
- '\u062d' : 'ḥḥ' # ح
248
- '\u062e' : 'ḵḵ' # خ
249
- '\u062f' : 'dd' # د
250
- '\u0689' : 'ḏḏ' # ‫ډ‬
251
- '\u0630' : 'ẕẕ' # ذ
252
- '\u0631' : 'rr' # ر
253
- '\u0632' : 'zz' # ز
254
- '\u0698' : 'zz' # ‫ژ‬
255
- '\u0633' : 'ss' # س
256
- '\u0634' : 'šš' # ش
257
- '\u0635' : 'ṣṣ' # ص
258
- '\u0636' : 'żż' # ض
259
- '\u0637' : 'ṭṭ' # ط
260
- '\u0638' : 'zz' # ظ
261
- '\u0639' : '‘' # ع
262
- '\u063a' : 'gh' # غ
263
- '\u0641' : 'ff' # ف
264
- '\u0642' : 'qq' # ق
265
- '\u06A9' : 'kk' # ک
266
- '\u06AF' : 'gg' # ‫گ‬
267
- '\u0644' : 'll' # ل
268
- '\u0645' : 'mm' # م
269
- '\u0646' : 'nn' # ن
270
- '\u0648' : 'vv' # و
271
- '\u0647' : 'hh' # ه
272
- '\u064a' : 'yy' # ي
273
- '\u0649' : 'yy' # ي
274
- '\u06D0' : 'ēē' # ې
275
- '\u06CD' : 'êy' # ‫ۍ
276
-
277
- # Tanvin
278
- '\u064b': '´´' # ً
279
- '\u064c': '' # ٌ
280
- '\u064d': '' # ٍ
281
-
282
- # hamzeh
283
- '\u0621' : '’' # ء
284
- '\u0623' : 'â’' # أ
285
- '\u0624' : 'v’' # ؤ
286
- '\u0626' : 'y’' # ئ
287
-
288
- # punctuation
289
-
290
- '\u060c' : ',' # vavak comma
291
- '\u061b' : ';' # nogteh vavak semi column
292
- '\u061f' : '?' # neshane-ye porsesh question mark
293
-
294
- '\u0625' : '' # إ
295
- '\u0627' : 'â' # ا
296
-
297
- # See note B
298
- '\b\u0627\u0644' : 'al ' # ال
299
- # '\uFE8E' : '' # ﺎ
300
-
301
- # Sun letters
302
- '\b\u0627\u0644\u062a' : 'at t' # الت
303
- '\b\u0627\u0644\u062b' : 'as̄ s̄' # الث
304
- '\b\u0627\u0644\u062f' : 'ad d' # الد
305
- '\b\u0627\u0644\u0630' : 'az̄ z̄' # الذ
306
- '\b\u0627\u0644\u0631' : 'ar r' # الر
307
- '\b\u0627\u0644\u0632' : 'az z' # الز
308
- '\b\u0627\u0644\u0633' : 'as s' # الس
309
- '\b\u0627\u0644\u0634' : 'ash sh' # الش
310
- '\b\u0627\u0644\u0635' : 'aş ş' # الص
311
- '\b\u0627\u0644\u0636' : 'aẕ ẕ' # الض
312
- '\b\u0627\u0644\u0637' : 'aţ ţ' # الط
313
- '\b\u0627\u0644\u0638' : 'az̧ z̧' # الظ
314
- '\b\u0627\u0644\u0644' : 'al l' # الل
315
- '\b\u0627\u0644\u0646' : 'an n' # الن
316
-
317
- # consonant characters
318
-
319
- '\u0628' : 'b' # ب
320
- '\u067E': 'p' # پ
321
- '\u062a' : 't' # ت
322
- # '\u067C': 'ṯ' # ټ
323
- '\u062B': 's̱' # ث
324
- '\u062c' : 'j' # ج
325
- '\u0686': 'c' # ‫چ‬
326
-
327
- # # The variant form ج is seen infrequently and does not have a
328
- # # single Unicode encoding.
329
- # '\u0681': 'dz' # Note 2 # ‫ځ‬
330
-
331
- # '\u0685': 'ts' # Note 2 # ‫څ
332
-
333
- '\u062d' : 'ḥ' # ح
334
- '\u062e' : 'ḵ' # خ
335
- '\u062f' : 'd' # د
336
- '\u0689' : 'ḏ' # ‫ډ‬
337
- '\u0630' : 'ẕ' # ذ
338
- '\u0631' : 'r' # ر
339
- # '\u0693' : 'ṟ' # ړ
340
- '\u0632' : 'z' # ز
341
- '\u0698' : 'z' # ‫ژ‬
342
- # '\u0696' : 'z͟h' # ږ
343
- '\u0633' : 's' # س
344
- # '\u069A' : 's͟h' # ښ
345
- '\u0634' : 'š' # ش
346
- '\u0635' : 'ṣ' # ص
347
- '\u0636' : 'ż' # ض
348
- '\u0637' : 'ṭ' # ط
349
- '\u0638' : 'z' # ظ
350
- '\u0639' : '‘' # ع
351
- '\u063a' : 'gh' # غ
352
- '\u0641' : 'f' # ف
353
- '\u0642' : 'q' # ق
354
- # '\u0643' : 'k' # ك
355
- '\u06A9' : 'k' # ک
356
- '\u06AF' : 'g' # ‫گ‬
357
- '\u0644' : 'l' # ل
358
- '\u0645' : 'm' # م
359
- '\u0646' : 'n' # ن
360
- # '\u06BC' : 'ṉ' # ڼ
361
- '\u0648' : 'v' # و
362
- '\u0647' : 'h' # ه
363
- '\u064a' : 'y' # ي
364
- '\u0649' : 'y' # ي
365
- '\u06D0' : 'ē' # ې
366
- '\u06CD' : 'êy' # ‫ۍ‬
@@ -1,271 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 9-1995
4
- language: iso-639-2:rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: "ISO 9:1995 Information and documentation — Transliteration of Cyrillic characters into Latin characters — Slavic and non-Slavic languages"
8
- url: https://www.iso.org/standard/3589.html
9
- creation_date: 1995
10
- description: |
11
- Establishes a system for the transliteration into Latin characters of
12
- Cyrillic characters constituting the alphabets of Slavic and non-Slavic
13
- languages. Table 3 includes in a single sequence, listed in the
14
- Cyrillic alphabetic order, the 118 single or diacritic-carrying
15
- characters that appear in one or another of the considered alphabets.
16
-
17
- tests:
18
-
19
- map:
20
- characters:
21
- "\u0410": "A" # А => A
22
- "\u04d2": "\u00c4" # Ӓ => Ä (a diaeresis)
23
- "\u04d2\u0304": "\u1ea0\u0308" # Ӓ̄ => Ạ̈ (a diaeresis and dot below)
24
- "\u04d0": "\u0102" # Ӑ => Ă (a breve)
25
- "\u0410\u0304": "\u0100" # А̄ => Ā (a macron)
26
- "\u04d4": "\u00c6" # Ӕ => Æ (ae ligature)
27
- "\u0410\u0301": "\u00c1" # А́ => Á (a acute)
28
- "\u0410\u030a": "\u00c5" # А̊ => Å (a ring)
29
- "\u0411": "B" # Б => B
30
- "\u0412": "V" # В => V
31
- "\u0413": "G" # Г => G
32
- "\u0403": "\u01f4" # Ѓ => Ǵ (g acute)
33
- "\u0492": "\u0120" # Ғ => Ġ (g dot)
34
- "\u0494": "\u011e" # Ҕ => Ğ (g breve)
35
- "\u04ba": "\u1e24" # Һ => Ḥ (h dot)
36
- "\u0414": "D" # Д => D
37
- "\u0402": "\u0110" # Ђ => Đ (d macron)
38
- "\u0415": "E" # Е => E
39
- "\u04d6": "\u0114" # Ӗ => Ĕ (e breve)
40
- "\u0401": "\u00cb" # Ё => Ë (e diaeresis)
41
- "\u0404": "\u00ca" # Є => Ê (e circumflex)
42
- "\u0416": "\u017d" # Ж => Ž (z caron)
43
- "\u0496": "\u017d\u0327" # Җ => Ž̧ (z caron and cedilla[4])
44
- "\u04dc": "\u005a\u0304" # Ӝ => Z̄ (z macron)
45
- "\u04c1": "\u005a\u0306" # Ӂ => Z̆ (z breve)
46
- "\u0417": "\u005a" # З => Z
47
- "\u04de": "\u005a\u0308" # Ӟ => Z̈ (z diaeresis)
48
- "\u04e0": "\u0179" # Ӡ => Ź (z acute)
49
- "\u0405": "\u1e90" # Ѕ => Ẑ (z circumflex)
50
- "\u0418": "I" # И => I
51
- "\u04e2": "\u012a" # Ӣ => Ī (i macron)
52
- "\u0418\u0301": "\u00cd" # И́ => Í (i acute)
53
- "\u04e4": "\u00ce" # Ӥ => Î (i circumflex)
54
- "\u0419": "\u004a" # Й => J
55
- "\u0406": "\u00cc" # І => Ì (i grave)
56
- "\u0407": "\u00cf" # Ї => Ï (i diaeresis)
57
- "\u0406\u0304": "\u01cf" # І̄ => Ǐ (i caron (or breve))
58
- "\u0408": "\u004a\u030c" # Ј => J̌ (j caron)
59
- "\u0408\u0335": "\u004a\u0301" # Ј̵ => J́ (j acute)
60
- "\u041a": "K" # К => K
61
- "\u040c": "\u1e30" # Ќ => Ḱ (k acute)
62
- "\u04c3": "\u1e32" # Ӄ => Ḳ (k dot below)
63
- "\u049c": "\u004b\u0302" # Ҝ => K̂ (k circumflex)
64
- "\u04a0": "\u01e8" # Ҡ => Ǩ (k caron)
65
- "\u049e": "\u004b\u0304" # Ҟ => K̄ (k macron)
66
- "\u049a": "\u0136" # Қ => Ķ (k cedilla[4])
67
- "\u041a\u0328": "\u004b\u0300" # К̨ => K̀ (k grave)
68
- "\u051a": "Q" # Ԛ => Q
69
- "\u041b": "L" # Л => L
70
- "\u0409": "\u004c\u0302" # Љ => L̂ (l circumflex)
71
- "\u0520": "\u013b" # Ԡ => Ļ (l cedilla[4])
72
- "\u041c": "M" # М => M
73
- "\u041d": "N" # Н => N
74
- "\u040a": "\u004e\u0302" # Њ => N̂ (n circumflex)
75
- "\u04a2": "\u0145" # Ң => Ņ (n cedilla[4])
76
- "\u04c9": "\u1e46" # Ӊ => Ṇ (n dot below)
77
- "\u04a4": "\u1e44" # Ҥ => Ṅ (n dot)
78
- "\u050a": "\u01f8" # Ԋ => Ǹ (n grave)
79
- "\u0522": "\u0143" # Ԣ => Ń (n acute)
80
- "\u04c7": "\u0147" # Ӈ => Ň (n caron)
81
- "\u041d\u0304": "\u004e\u0304" # Н̄ => N̄ (n macron)
82
- "\u041e": "O" # О => O
83
- "\u04e6": "\u00d6" # Ӧ => Ö (o diaeresis)
84
- "\u04e8": "\u00d4" # Ө => Ô (o circumflex)
85
- "\u04ea": "\u0150" # Ӫ => Ő (o double acute)
86
- "\u04e6\u0304": "\u1ecc\u0308" # Ӧ̄ => Ọ̈ (o diaeresis and dot below)
87
- "\u04a8": "\u00d2" # Ҩ => Ò (o grave)
88
- "\u041e\u0301": "\u00d3" # О́ => Ó (o acute)
89
- "\u041e\u0304": "\u014c" # О̄ => Ō (o macron)
90
- "\u0152": "\u0152" # Œ => Œ (oe ligature)
91
- "\u041f": "P" # П => P
92
- "\u04a6": "\u1e54" # Ҧ => Ṕ (p acute)
93
- "\u0524": "\u0050\u0300" # Ԥ => P̀ (p grave)
94
- "\u0420": "R" # Р => R
95
- "\u0421": "S" # С => S
96
- "\u04aa": "\u015e" # Ҫ => Ş (s cedilla[4])
97
- "\u0421\u0300": "\u0053\u0300" # С̀ => S̀ (s grave)
98
- "\u0422": "T" # Т => T
99
- "\u040b": "\u0106" # Ћ => Ć (c acute)
100
- "\u050e": "\u0054\u0300" # Ԏ => T̀ (t grave)
101
- "\u0422\u030c": "\u0164" # Т̌ => Ť (t caron)
102
- "\u04ac": "\u0162" # Ҭ => Ţ (t cedilla[4])
103
- "\u0423": "U" # У => U
104
- "\u04f0": "\u00dc" # Ӱ => Ü (u diaeresis)
105
- "\u04ee": "\u016a" # Ӯ => Ū (u macron)
106
- "\u040e": "\u016c" # Ў => Ŭ (u breve)
107
- "\u04f2": "\u0170" # Ӳ => Ű (u double acute)
108
- "\u0423\u0301": "\u00da" # У́ => Ú (u acute)
109
- "\u04f0\u0304": "\u1ee4\u0308" # Ӱ̄ => Ụ̈ (u diaeresis and dot below)
110
- "\u04ae": "\u00d9" # Ү => Ù (u grave)
111
- "\u04b0": "\u0055\u0307" # Ұ => U̇ (u dot)
112
- "\u051c": "W" # Ԝ => W
113
- "\u0424": "F" # Ф => F
114
- "\u0425": "H" # Х => H
115
- "\u04b2": "\u1e28" # Ҳ => Ḩ (h cedilla[4])
116
- "\u0426": "C" # Ц => C
117
- "\u04b4": "\u0043\u0304" # Ҵ => C̄ (c macron)
118
- "\u040f": "\u0044\u0302" # Џ => D̂ (d circumflex)
119
- "\u0427": "\u010c" # Ч => Č (c caron)
120
- "\u04b6": "\u00c7" # Ҷ => Ç (c cedilla[4])
121
- "\u04cb": "\u0043\u0323" # Ӌ => C̣ (c dot below)
122
- "\u04f4": "\u0043\u0308" # Ӵ => C̈ (c diaeresis)
123
- "\u04b8": "\u0108" # Ҹ => Ĉ (c circumflex)
124
- "\u0427\u0300": "\u0043\u0300" # Ч̀ => C̀ (c grave)
125
- "\u04bc": "\u0043\u0306" # Ҽ => C̆ (c breve)
126
- "\u04be": "\u0043\u0328\u0306" # Ҿ => C̨̆ (c ogonek[4] and breve)
127
- "\u0428": "\u0160" # Ш => Š (s caron)
128
- "\u0429": "\u015c" # Щ => Ŝ (s circumflex)
129
- "\u042a": "\u02ba" # Ъ => ʺ (modifier letter double prime[5])
130
- "\u042b": "Y" # Ы => Y
131
- "\u04f8": "\u0178" # Ӹ => Ÿ (y diaeresis)
132
- "\u042b\u0304": "\u0232" # Ы̄ => Ȳ (y macron)
133
- "\u042c": "\u02b9" # Ь => ʹ (modifier letter prime[5])
134
- "\u042d": "\u00c8" # Э => È (e grave)
135
- "\u04d8": "\u0041\u030b" # Ә => A̋ (a double acute)
136
- "\u04da": "\u00c0" # Ӛ => À (a grave)
137
- "\u042e": "\u00db" # Ю => Û (u circumflex)
138
- "\u042e\u0304": "\u00db\u0304" # Ю̄ => Û̄ (u circumflex with macron)
139
- "\u042f": "\u00c2" # Я => Â (a circumflex)
140
- "\u0490": "\u0047\u0300" # Ґ => G̀ (g grave)
141
- "\u0462": "\u011a" # Ѣ => Ě (e caron)
142
- "\u046a": "\u01cd" # Ѫ => Ǎ (a caron)
143
- "\u0472": "\u0046\u0300" # Ѳ => F̀ (f grave)
144
- "\u0474": "\u1ef2" # Ѵ => Ỳ (y grave)
145
- "\u0430": "a" # а => a
146
- "\u04d3": "\u00e4" # ӓ => ä
147
- "\u04d3\u0304": "\u1ea1\u0308" # ӓ̄ => ạ̈
148
- "\u04d1": "\u0103" # ӑ => ă
149
- "\u0430\u0304": "\u0101" # а̄ => ā
150
- "\u04d5": "\u00e6" # ӕ => æ
151
- "\u0430\u0301": "\u00e1" # а́ => á
152
- "\u0430\u030a": "\u00e5" # а̊ => å
153
- "\u0431": "b" # б => b
154
- "\u0432": "v" # в => v
155
- "\u0433": "g" # г => g
156
- "\u0453": "\u01f5" # ѓ => ǵ
157
- "\u0493": "\u0121" # ғ => ġ
158
- "\u0495": "\u011f" # ҕ => ğ
159
- "\u04bb": "\u1e25" # һ => ḥ
160
- "\u0434": "d" # д => d
161
- "\u0452": "\u0111" # ђ => đ
162
- "\u0435": "e" # е => e
163
- "\u04d7": "\u0115" # ӗ => ĕ
164
- "\u0451": "\u00eb" # ё => ë
165
- "\u0454": "\u00ea" # є => ê
166
- "\u0436": "\u017e" # ж => ž
167
- "\u0497": "\u017e\u0327" # җ => ž̧
168
- "\u04dd": "\u007a\u0304" # ӝ => z̄
169
- "\u04c2": "\u007a\u0306" # ӂ => z̆
170
- "\u0437": "z" # з => z
171
- "\u04df": "\u007a\u0308" # ӟ => z̈
172
- "\u04e1": "\u017a" # ӡ => ź
173
- "\u0455": "\u1e91" # ѕ => ẑ
174
- "\u0438": "i" # и => i
175
- "\u04e3": "\u012b" # ӣ => ī
176
- "\u0438\u0301": "\u00ed" # и́ => í
177
- "\u04e5": "\u00ee" # ӥ => î
178
- "\u0439": "j" # й => j
179
- "\u0456": "\u00ec" # і => ì
180
- "\u0457": "\u00ef" # ї => ï
181
- "\u0456\u0304": "\u01d0" # і̄ => ǐ
182
- "\u0458": "\u01f0" # ј => ǰ
183
- "\u0458\u0335": "\u006a\u0301" # ј̵ => j́
184
- "\u043a": "k" # к => k
185
- "\u045c": "\u1e31" # ќ => ḱ
186
- "\u04c4": "\u1e33" # ӄ => ḳ
187
- "\u049d": "\u006b\u0302" # ҝ => k̂
188
- "\u04a1": "\u01e9" # ҡ => ǩ
189
- "\u049f": "\u006b\u0304" # ҟ => k̄
190
- "\u049b": "\u0137" # қ => ķ
191
- "\u043a\u0328": "\u006b\u0300" # к̨ => k̀
192
- "\u051b": "q" # ԛ => q
193
- "\u043b": "l" # л => l
194
- "\u0459": "\u006c\u0302" # љ => l̂
195
- "\u0521": "\u013c" # ԡ => ļ
196
- "\u043c": "m" # м => m
197
- "\u043d": "n" # н => n
198
- "\u045a": "\u006e\u0302" # њ => n̂
199
- "\u04a3": "\u0146" # ң => ņ
200
- "\u04ca": "\u1e47" # ӊ => ṇ
201
- "\u04a5": "\u1e45" # ҥ => ṅ
202
- "\u050b": "\u01f9" # ԋ => ǹ
203
- "\u0523": "\u0144" # ԣ => ń
204
- "\u04c8": "\u0148" # ӈ => ň
205
- "\u043d\u0304": "\u006e\u0304" # н̄ => n̄
206
- "\u043e": "o" # о => o
207
- "\u04e7": "\u00f6" # ӧ => ö
208
- "\u04e9": "\u00f4" # ө => ô
209
- "\u04eb": "\u0151" # ӫ => ő
210
- "\u043e\u0304\u0308": "\u1ecd\u0308" # о̄̈ => ọ̈
211
- "\u04a9": "\u00f2" # ҩ => ò
212
- "\u043e\u0301": "\u00f3" # о́ => ó
213
- "\u043e\u0304": "\u014d" # о̄ => ō
214
- "\u0153": "\u0153" # œ => œ
215
- "\u043f": "p" # п => p
216
- "\u04a7": "\u1e55" # ҧ => ṕ
217
- "\u0525": "\u0070\u0300" # ԥ => p̀
218
- "\u0440": "r" # р => r
219
- "\u0441": "s" # с => s
220
- "\u04ab": "\u015f" # ҫ => ş
221
- "\u0441\u0300": "\u0073\u0300" # с̀ => s̀
222
- "\u0442": "t" # т => t
223
- "\u045b": "\u0107" # ћ => ć
224
- "\u050f": "\u0074\u0300" # ԏ => t̀
225
- "\u0442\u030c": "\u0165" # т̌ => ť
226
- "\u04ad": "\u0163" # ҭ => ţ
227
- "\u0443": "u" # у => u
228
- "\u04f1": "\u00fc" # ӱ => ü
229
- "\u04ef": "\u016b" # ӯ => ū
230
- "\u045e": "\u016d" # ў => ŭ
231
- "\u04f3": "\u0171" # ӳ => ű
232
- "\u0443\u0301": "\u00fa" # у́ => ú
233
- "\u04f1\u0304": "\u1ee5\u0308" # ӱ̄ => ụ̈
234
- "\u04af": "\u00f9" # ү => ù
235
- "\u04b1": "\u0075\u0307" # ұ => u̇
236
- "\u051d": "w" # ԝ => w
237
- "\u0444": "f" # ф => f
238
- "\u0445": "h" # х => h
239
- "\u04b3": "\u1e29" # ҳ => ḩ
240
- "\u0446": "c" # ц => c
241
- "\u04b5": "\u0063\u0304" # ҵ => c̄
242
- "\u045f": "\u0064\u0302" # џ => d̂
243
- "\u0447": "\u010d" # ч => č
244
- "\u04b7": "\u00e7" # ҷ => ç
245
- "\u04cc": "\u0063\u0323" # ӌ => c̣
246
- "\u04f5": "\u0063\u0308" # ӵ => c̈
247
- "\u04b9": "\u0109" # ҹ => ĉ
248
- "\u0447\u0300": "\u0063\u0300" # ч̀ => c̀
249
- "\u04bd": "\u0063\u0306" # ҽ => c̆
250
- "\u04bf": "\u0063\u0328\u0306" # ҿ => c̨̆
251
- "\u0448": "\u0161" # ш => š
252
- "\u0449": "\u015d" # щ => ŝ
253
- "\u044a": "\u02ba" # ъ => ʺ
254
- "\u044b": "y" # ы => y
255
- "\u04f9": "\u00ff" # ӹ => ÿ
256
- "\u044b\u0304": "\u0233" # ы̄ => ȳ
257
- "\u044c": "\u02b9" # ь => ʹ
258
- "\u044d": "\u00e8" # э => è
259
- "\u04d9": "\u0061\u030b" # ә => a̋
260
- "\u04db": "\u00e0" # ӛ => à
261
- "\u044e": "\u00fb" # ю => û
262
- "\u044e\u0304": "\u00fb\u0304" # ю̄ => û̄
263
- "\u044f": "\u00e2" # я => â
264
- "\u0491": "\u0067\u0300" # ґ => g̀
265
- "\u0463": "\u011b" # ѣ => ě
266
- "\u046b": "\u01ce" # ѫ => ǎ
267
- "\u0473": "\u0066\u0300" # ѳ => f̀
268
- "\u0475": "\u1ef3" # ѵ => ỳ
269
- "\u04c0": "\u2021" # Ӏ => ‡
270
- "\u02bc": "\u0060" # ʼ => `
271
- "\u02ee": "\u00a8" # ˮ => ¨