interscript 0.1.6 → 2.1.0a9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -1,40 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 1997
4
- language: ell
5
- source_script: Grek
6
- destination_script: Latn
7
- name: ISO 843:1997
8
- url:
9
- creation_date: 1997
10
- description: |
11
- ISO Transcription table for Greek
12
-
13
- note:
14
- - Transliteration of Greek into Latin: Type 2, Clause 3 Table 2
15
- - Introduced casing to digamma, yot, and lunate sigma. (Casing was late introduction to character sets for those characters)
16
-
17
- tests:
18
-
19
- - source: |
20
- Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
21
-
22
- Γιάννης Μακρυγιάννης.
23
-
24
- expected: |
25
- Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
26
-
27
- Giánnis Makrygiánnis.
28
-
29
- map:
30
- character_separator: ""
31
- word_separator: " "
32
- inherit: "elot-ell-Grek-Latn-743-1982-ts"
33
-
34
- characters:
35
- "\u03DC": "W" # Ϝ
36
- "\u03DD": "w" # ϝ
37
- "\u03F2": "s" # ϲ
38
- "\u03F9": "S" # Ϲ
39
- "\u03F3": "j"
40
- "\u037F": "j"
@@ -1,62 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 3602-1989
4
- language: jpn
5
- source_script: Hrkt
6
- destination_script: Latn
7
- name: ISO 3602 Romanization of Japanese (Kana Script)
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
-
15
- tests:
16
- - source: かんおう
17
- expected: kan’ô
18
- - source: かのう
19
- expected: kanô
20
- - source: きんゆう
21
- expected: kin’yû
22
- - source: とうきょう
23
- expected: tôkyô
24
- - source: がっ•こう
25
- expected: gakkô
26
- - source: かごっま
27
- expected: kagomma
28
- - source: ぽっぽっや
29
- expected: poppoyya
30
- - source: てっら
31
- expected: terra
32
- - source: にゃっほー
33
- expected: nyahhô
34
- - source: ゴッホ
35
- expected: gohho
36
- - source: おも•う
37
- expected: omou
38
- - source: こうし
39
- expected: kôsi
40
- - source: こう•し #格子
41
- expected: kôsi
42
- - source: こ•うし #子牛
43
- expected: kousi
44
- - source: ぎゃあ
45
- expected: gyâ
46
-
47
- map:
48
- inherit: mext-jpn-Hrkt-Latn-1954
49
-
50
- rules:
51
- # Remove morpheme boundary marker after sokuon っ/ッ
52
- - pattern: "([っッ])•"
53
- result: "\\1"
54
-
55
- postrules:
56
- # Remove morpheme boundary marker
57
- - pattern: "•"
58
- result: ""
59
-
60
- # Use ’ instead of '
61
- - pattern: "'"
62
- result: "’"
@@ -1,271 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 9-1995
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ISO 9
8
- url: https://www.iso.org/standard/3589.html
9
- creation_date: 1995
10
- description: |
11
- Establishes a system for the transliteration into Latin characters of
12
- Cyrillic characters constituting the alphabets of Slavic and non-Slavic
13
- languages. Table 3 includes in a single sequence, listed in the
14
- Cyrillic alphabetic order, the 118 single or diacritic-carrying
15
- characters that appear in one or another of the considered alphabets.
16
- tests:
17
-
18
-
19
- map:
20
- characters:
21
- "\u0410": "A" # А => A
22
- "\u04d2": "\u00c4" # Ӓ => Ä (a diaeresis)
23
- "\u04d2\u0304": "\u1ea0\u0308" # Ӓ̄ => Ạ̈ (a diaeresis and dot below)
24
- "\u04d0": "\u0102" # Ӑ => Ă (a breve)
25
- "\u0410\u0304": "\u0100" # А̄ => Ā (a macron)
26
- "\u04d4": "\u00c6" # Ӕ => Æ (ae ligature)
27
- "\u0410\u0301": "\u00c1" # А́ => Á (a acute)
28
- "\u0410\u030a": "\u00c5" # А̊ => Å (a ring)
29
- "\u0411": "B" # Б => B
30
- "\u0412": "V" # В => V
31
- "\u0413": "G" # Г => G
32
- "\u0403": "\u01f4" # Ѓ => Ǵ (g acute)
33
- "\u0492": "\u0120" # Ғ => Ġ (g dot)
34
- "\u0494": "\u011e" # Ҕ => Ğ (g breve)
35
- "\u04ba": "\u1e24" # Һ => Ḥ (h dot)
36
- "\u0414": "D" # Д => D
37
- "\u0402": "\u0110" # Ђ => Đ (d macron)
38
- "\u0415": "E" # Е => E
39
- "\u04d6": "\u0114" # Ӗ => Ĕ (e breve)
40
- "\u0401": "\u00cb" # Ё => Ë (e diaeresis)
41
- "\u0404": "\u00ca" # Є => Ê (e circumflex)
42
- "\u0416": "\u017d" # Ж => Ž (z caron)
43
- "\u0496": "\u017d\u0327" # Җ => Ž̧ (z caron and cedilla[4])
44
- "\u04dc": "\u005a\u0304" # Ӝ => Z̄ (z macron)
45
- "\u04c1": "\u005a\u0306" # Ӂ => Z̆ (z breve)
46
- "\u0417": "\u005a" # З => Z
47
- "\u04de": "\u005a\u0308" # Ӟ => Z̈ (z diaeresis)
48
- "\u04e0": "\u0179" # Ӡ => Ź (z acute)
49
- "\u0405": "\u1e90" # Ѕ => Ẑ (z circumflex)
50
- "\u0418": "I" # И => I
51
- "\u04e2": "\u012a" # Ӣ => Ī (i macron)
52
- "\u0418\u0301": "\u00cd" # И́ => Í (i acute)
53
- "\u04e4": "\u00ce" # Ӥ => Î (i circumflex)
54
- "\u0419": "\u004a" # Й => J
55
- "\u0406": "\u00cc" # І => Ì (i grave)
56
- "\u0407": "\u00cf" # Ї => Ï (i diaeresis)
57
- "\u0406\u0304": "\u01cf" # І̄ => Ǐ (i caron (or breve))
58
- "\u0408": "\u004a\u030c" # Ј => J̌ (j caron)
59
- "\u0408\u0335": "\u004a\u0301" # Ј̵ => J́ (j acute)
60
- "\u041a": "K" # К => K
61
- "\u040c": "\u1e30" # Ќ => Ḱ (k acute)
62
- "\u04c3": "\u1e32" # Ӄ => Ḳ (k dot below)
63
- "\u049c": "\u004b\u0302" # Ҝ => K̂ (k circumflex)
64
- "\u04a0": "\u01e8" # Ҡ => Ǩ (k caron)
65
- "\u049e": "\u004b\u0304" # Ҟ => K̄ (k macron)
66
- "\u049a": "\u0136" # Қ => Ķ (k cedilla[4])
67
- "\u041a\u0328": "\u004b\u0300" # К̨ => K̀ (k grave)
68
- "\u051a": "Q" # Ԛ => Q
69
- "\u041b": "L" # Л => L
70
- "\u0409": "\u004c\u0302" # Љ => L̂ (l circumflex)
71
- "\u0520": "\u013b" # Ԡ => Ļ (l cedilla[4])
72
- "\u041c": "M" # М => M
73
- "\u041d": "N" # Н => N
74
- "\u040a": "\u004e\u0302" # Њ => N̂ (n circumflex)
75
- "\u04a2": "\u0145" # Ң => Ņ (n cedilla[4])
76
- "\u04c9": "\u1e46" # Ӊ => Ṇ (n dot below)
77
- "\u04a4": "\u1e44" # Ҥ => Ṅ (n dot)
78
- "\u050a": "\u01f8" # Ԋ => Ǹ (n grave)
79
- "\u0522": "\u0143" # Ԣ => Ń (n acute)
80
- "\u04c7": "\u0147" # Ӈ => Ň (n caron)
81
- "\u041d\u0304": "\u004e\u0304" # Н̄ => N̄ (n macron)
82
- "\u041e": "O" # О => O
83
- "\u04e6": "\u00d6" # Ӧ => Ö (o diaeresis)
84
- "\u04e8": "\u00d4" # Ө => Ô (o circumflex)
85
- "\u04ea": "\u0150" # Ӫ => Ő (o double acute)
86
- "\u04e6\u0304": "\u1ecc\u0308" # Ӧ̄ => Ọ̈ (o diaeresis and dot below)
87
- "\u04a8": "\u00d2" # Ҩ => Ò (o grave)
88
- "\u041e\u0301": "\u00d3" # О́ => Ó (o acute)
89
- "\u041e\u0304": "\u014c" # О̄ => Ō (o macron)
90
- "\u0152": "\u0152" # Œ => Œ (oe ligature)
91
- "\u041f": "P" # П => P
92
- "\u04a6": "\u1e54" # Ҧ => Ṕ (p acute)
93
- "\u0524": "\u0050\u0300" # Ԥ => P̀ (p grave)
94
- "\u0420": "R" # Р => R
95
- "\u0421": "S" # С => S
96
- "\u04aa": "\u015e" # Ҫ => Ş (s cedilla[4])
97
- "\u0421\u0300": "\u0053\u0300" # С̀ => S̀ (s grave)
98
- "\u0422": "T" # Т => T
99
- "\u040b": "\u0106" # Ћ => Ć (c acute)
100
- "\u050e": "\u0054\u0300" # Ԏ => T̀ (t grave)
101
- "\u0422\u030c": "\u0164" # Т̌ => Ť (t caron)
102
- "\u04ac": "\u0162" # Ҭ => Ţ (t cedilla[4])
103
- "\u0423": "U" # У => U
104
- "\u04f0": "\u00dc" # Ӱ => Ü (u diaeresis)
105
- "\u04ee": "\u016a" # Ӯ => Ū (u macron)
106
- "\u040e": "\u016c" # Ў => Ŭ (u breve)
107
- "\u04f2": "\u0170" # Ӳ => Ű (u double acute)
108
- "\u0423\u0301": "\u00da" # У́ => Ú (u acute)
109
- "\u04f0\u0304": "\u1ee4\u0308" # Ӱ̄ => Ụ̈ (u diaeresis and dot below)
110
- "\u04ae": "\u00d9" # Ү => Ù (u grave)
111
- "\u04b0": "\u0055\u0307" # Ұ => U̇ (u dot)
112
- "\u051c": "W" # Ԝ => W
113
- "\u0424": "F" # Ф => F
114
- "\u0425": "H" # Х => H
115
- "\u04b2": "\u1e28" # Ҳ => Ḩ (h cedilla[4])
116
- "\u0426": "C" # Ц => C
117
- "\u04b4": "\u0043\u0304" # Ҵ => C̄ (c macron)
118
- "\u040f": "\u0044\u0302" # Џ => D̂ (d circumflex)
119
- "\u0427": "\u010c" # Ч => Č (c caron)
120
- "\u04b6": "\u00c7" # Ҷ => Ç (c cedilla[4])
121
- "\u04cb": "\u0043\u0323" # Ӌ => C̣ (c dot below)
122
- "\u04f4": "\u0043\u0308" # Ӵ => C̈ (c diaeresis)
123
- "\u04b8": "\u0108" # Ҹ => Ĉ (c circumflex)
124
- "\u0427\u0300": "\u0043\u0300" # Ч̀ => C̀ (c grave)
125
- "\u04bc": "\u0043\u0306" # Ҽ => C̆ (c breve)
126
- "\u04be": "\u0043\u0328\u0306" # Ҿ => C̨̆ (c ogonek[4] and breve)
127
- "\u0428": "\u0160" # Ш => Š (s caron)
128
- "\u0429": "\u015c" # Щ => Ŝ (s circumflex)
129
- "\u042a": "\u02ba" # Ъ => ʺ (modifier letter double prime[5])
130
- "\u042b": "Y" # Ы => Y
131
- "\u04f8": "\u0178" # Ӹ => Ÿ (y diaeresis)
132
- "\u042b\u0304": "\u0232" # Ы̄ => Ȳ (y macron)
133
- "\u042c": "\u02b9" # Ь => ʹ (modifier letter prime[5])
134
- "\u042d": "\u00c8" # Э => È (e grave)
135
- "\u04d8": "\u0041\u030b" # Ә => A̋ (a double acute)
136
- "\u04da": "\u00c0" # Ӛ => À (a grave)
137
- "\u042e": "\u00db" # Ю => Û (u circumflex)
138
- "\u042e\u0304": "\u00db\u0304" # Ю̄ => Û̄ (u circumflex with macron)
139
- "\u042f": "\u00c2" # Я => Â (a circumflex)
140
- "\u0490": "\u0047\u0300" # Ґ => G̀ (g grave)
141
- "\u0462": "\u011a" # Ѣ => Ě (e caron)
142
- "\u046a": "\u01cd" # Ѫ => Ǎ (a caron)
143
- "\u0472": "\u0046\u0300" # Ѳ => F̀ (f grave)
144
- "\u0474": "\u1ef2" # Ѵ => Ỳ (y grave)
145
- "\u0430": "a" # а => a
146
- "\u04d3": "\u00e4" # ӓ => ä
147
- "\u04d3\u0304": "\u1ea1\u0308" # ӓ̄ => ạ̈
148
- "\u04d1": "\u0103" # ӑ => ă
149
- "\u0430\u0304": "\u0101" # а̄ => ā
150
- "\u04d5": "\u00e6" # ӕ => æ
151
- "\u0430\u0301": "\u00e1" # а́ => á
152
- "\u0430\u030a": "\u00e5" # а̊ => å
153
- "\u0431": "b" # б => b
154
- "\u0432": "v" # в => v
155
- "\u0433": "g" # г => g
156
- "\u0453": "\u01f5" # ѓ => ǵ
157
- "\u0493": "\u0121" # ғ => ġ
158
- "\u0495": "\u011f" # ҕ => ğ
159
- "\u04bb": "\u1e25" # һ => ḥ
160
- "\u0434": "d" # д => d
161
- "\u0452": "\u0111" # ђ => đ
162
- "\u0435": "e" # е => e
163
- "\u04d7": "\u0115" # ӗ => ĕ
164
- "\u0451": "\u00eb" # ё => ë
165
- "\u0454": "\u00ea" # є => ê
166
- "\u0436": "\u017e" # ж => ž
167
- "\u0497": "\u017e\u0327" # җ => ž̧
168
- "\u04dd": "\u007a\u0304" # ӝ => z̄
169
- "\u04c2": "\u007a\u0306" # ӂ => z̆
170
- "\u0437": "z" # з => z
171
- "\u04df": "\u007a\u0308" # ӟ => z̈
172
- "\u04e1": "\u017a" # ӡ => ź
173
- "\u0455": "\u1e91" # ѕ => ẑ
174
- "\u0438": "i" # и => i
175
- "\u04e3": "\u012b" # ӣ => ī
176
- "\u0438\u0301": "\u00ed" # и́ => í
177
- "\u04e5": "\u00ee" # ӥ => î
178
- "\u0439": "j" # й => j
179
- "\u0456": "\u00ec" # і => ì
180
- "\u0457": "\u00ef" # ї => ï
181
- "\u0456\u0304": "\u01d0" # і̄ => ǐ
182
- "\u0458": "\u01f0" # ј => ǰ
183
- "\u0458\u0335": "\u006a\u0301" # ј̵ => j́
184
- "\u043a": "k" # к => k
185
- "\u045c": "\u1e31" # ќ => ḱ
186
- "\u04c4": "\u1e33" # ӄ => ḳ
187
- "\u049d": "\u006b\u0302" # ҝ => k̂
188
- "\u04a1": "\u01e9" # ҡ => ǩ
189
- "\u049f": "\u006b\u0304" # ҟ => k̄
190
- "\u049b": "\u0137" # қ => ķ
191
- "\u043a\u0328": "\u006b\u0300" # к̨ => k̀
192
- "\u051b": "q" # ԛ => q
193
- "\u043b": "l" # л => l
194
- "\u0459": "\u006c\u0302" # љ => l̂
195
- "\u0521": "\u013c" # ԡ => ļ
196
- "\u043c": "m" # м => m
197
- "\u043d": "n" # н => n
198
- "\u045a": "\u006e\u0302" # њ => n̂
199
- "\u04a3": "\u0146" # ң => ņ
200
- "\u04ca": "\u1e47" # ӊ => ṇ
201
- "\u04a5": "\u1e45" # ҥ => ṅ
202
- "\u050b": "\u01f9" # ԋ => ǹ
203
- "\u0523": "\u0144" # ԣ => ń
204
- "\u04c8": "\u0148" # ӈ => ň
205
- "\u043d\u0304": "\u006e\u0304" # н̄ => n̄
206
- "\u043e": "o" # о => o
207
- "\u04e7": "\u00f6" # ӧ => ö
208
- "\u04e9": "\u00f4" # ө => ô
209
- "\u04eb": "\u0151" # ӫ => ő
210
- "\u043e\u0304\u0308": "\u1ecd\u0308" # о̄̈ => ọ̈
211
- "\u04a9": "\u00f2" # ҩ => ò
212
- "\u043e\u0301": "\u00f3" # о́ => ó
213
- "\u043e\u0304": "\u014d" # о̄ => ō
214
- "\u0153": "\u0153" # œ => œ
215
- "\u043f": "p" # п => p
216
- "\u04a7": "\u1e55" # ҧ => ṕ
217
- "\u0525": "\u0070\u0300" # ԥ => p̀
218
- "\u0440": "r" # р => r
219
- "\u0441": "s" # с => s
220
- "\u04ab": "\u015f" # ҫ => ş
221
- "\u0441\u0300": "\u0073\u0300" # с̀ => s̀
222
- "\u0442": "t" # т => t
223
- "\u045b": "\u0107" # ћ => ć
224
- "\u050f": "\u0074\u0300" # ԏ => t̀
225
- "\u0442\u030c": "\u0165" # т̌ => ť
226
- "\u04ad": "\u0163" # ҭ => ţ
227
- "\u0443": "u" # у => u
228
- "\u04f1": "\u00fc" # ӱ => ü
229
- "\u04ef": "\u016b" # ӯ => ū
230
- "\u045e": "\u016d" # ў => ŭ
231
- "\u04f3": "\u0171" # ӳ => ű
232
- "\u0443\u0301": "\u00fa" # у́ => ú
233
- "\u04f1\u0304": "\u1ee5\u0308" # ӱ̄ => ụ̈
234
- "\u04af": "\u00f9" # ү => ù
235
- "\u04b1": "\u0075\u0307" # ұ => u̇
236
- "\u051d": "w" # ԝ => w
237
- "\u0444": "f" # ф => f
238
- "\u0445": "h" # х => h
239
- "\u04b3": "\u1e29" # ҳ => ḩ
240
- "\u0446": "c" # ц => c
241
- "\u04b5": "\u0063\u0304" # ҵ => c̄
242
- "\u045f": "\u0064\u0302" # џ => d̂
243
- "\u0447": "\u010d" # ч => č
244
- "\u04b7": "\u00e7" # ҷ => ç
245
- "\u04cc": "\u0063\u0323" # ӌ => c̣
246
- "\u04f5": "\u0063\u0308" # ӵ => c̈
247
- "\u04b9": "\u0109" # ҹ => ĉ
248
- "\u0447\u0300": "\u0063\u0300" # ч̀ => c̀
249
- "\u04bd": "\u0063\u0306" # ҽ => c̆
250
- "\u04bf": "\u0063\u0328\u0306" # ҿ => c̨̆
251
- "\u0448": "\u0161" # ш => š
252
- "\u0449": "\u015d" # щ => ŝ
253
- "\u044a": "\u02ba" # ъ => ʺ
254
- "\u044b": "y" # ы => y
255
- "\u04f9": "\u00ff" # ӹ => ÿ
256
- "\u044b\u0304": "\u0233" # ы̄ => ȳ
257
- "\u044c": "\u02b9" # ь => ʹ
258
- "\u044d": "\u00e8" # э => è
259
- "\u04d9": "\u0061\u030b" # ә => a̋
260
- "\u04db": "\u00e0" # ӛ => à
261
- "\u044e": "\u00fb" # ю => û
262
- "\u044e\u0304": "\u00fb\u0304" # ю̄ => û̄
263
- "\u044f": "\u00e2" # я => â
264
- "\u0491": "\u0067\u0300" # ґ => g̀
265
- "\u0463": "\u011b" # ѣ => ě
266
- "\u046b": "\u01ce" # ѫ => ǎ
267
- "\u0473": "\u0066\u0300" # ѳ => f̀
268
- "\u0475": "\u1ef3" # ѵ => ỳ
269
- "\u04c0": "\u2021" # Ӏ => ‡
270
- "\u02bc": "\u0060" # ʼ => `
271
- "\u02ee": "\u00a8" # ˮ => ¨
@@ -1,109 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 11940-1998
4
- language: tha
5
- source_script: Thai
6
- destination_script: Latn
7
- name: ISO 11940:1998 Information and documentation -- Transliteration of Thai
8
- url: https://www.iso.org/standard/20574.html
9
- creation_date: 1998
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
-
15
- tests:
16
- - source: 'ภาษาไทย'
17
- expected: 'p̣hās̛̄āịthy'
18
- - source: 'เชียงใหม่'
19
- expected: 'echīyngıh̄m̀'
20
-
21
- map:
22
-
23
- characters:
24
- '\u0e01': 'k' # ก THAI CHARACTER KO KAI
25
- '\u0e02': 'k̄h' # ข THAI CHARACTER KHO KHAI
26
- '\u0e03': 'ḳ̄h' # ฃ THAI CHARACTER KHO KHUAT
27
- '\u0e04': 'kh' # ค THAI CHARACTER KHO KHWAI
28
- '\u0e05': 'k̛h' # ฅ THAI CHARACTER KHO KHON
29
- '\u0e06': 'ḳh' # ฆ THAI CHARACTER KHO RAKHANG
30
- '\u0e07': 'ng' # ง THAI CHARACTER NGO NGU
31
- '\u0e08': 'c' # จ THAI CHARACTER CHO CHAN
32
- '\u0e09': 'c̄h' # ฉ THAI CHARACTER CHO CHING
33
- '\u0e0a': 'ch' # ช THAI CHARACTER CHO CHANG
34
- '\u0e0b': 's' # ซ THAI CHARACTER SO SO
35
- '\u0e0c': 'c̣h' # ฌ THAI CHARACTER CHO CHOE
36
- '\u0e0d': 'ỵ' # ญ THAI CHARACTER YO YING
37
- '\u0e0e': 'ḍ' # ฎ THAI CHARACTER DO CHADA
38
- '\u0e0f': 'ṭ' # ฏ THAI CHARACTER TO PATAK
39
- '\u0e10': 'ṭ̄h' # ฐ THAI CHARACTER THO THAN
40
- '\u0e11': 'ṯh' # ฑ THAI CHARACTER THO NANGMONTHO
41
- '\u0e12': 't̛h' # ฒ THAI CHARACTER THO PHUTHAO
42
- '\u0e13': 'ṇ' # ณ THAI CHARACTER NO NEN
43
- '\u0e14': 'd' # ด THAI CHARACTER DO DEK
44
- '\u0e15': 't' # ต THAI CHARACTER TO TAO
45
- '\u0e16': 't̄h' # ถ THAI CHARACTER THO THUNG
46
- '\u0e17': 'th' # ท THAI CHARACTER THO THAHAN
47
- '\u0e18': 'ṭh' # ธ THAI CHARACTER THO THONG
48
- '\u0e19': 'n' # น THAI CHARACTER NO NU
49
- '\u0e1a': 'b' # บ THAI CHARACTER BO BAIMAI
50
- '\u0e1b': 'p' # ป THAI CHARACTER PO PLA
51
- '\u0e1c': 'p̄h' # ผ THAI CHARACTER PHO PHUNG
52
- '\u0e1d': 'f̄' # ฝ THAI CHARACTER FO FA
53
- '\u0e1e': 'ph' # พ THAI CHARACTER PHO PHAN
54
- '\u0e1f': 'f' # ฟ THAI CHARACTER FO FAN
55
- '\u0e20': 'p̣h' # ภ THAI CHARACTER PHO SAMPHAO
56
- '\u0e21': 'm' # ม THAI CHARACTER MO MA
57
- '\u0e22': 'y' # ย THAI CHARACTER YO YAK
58
- '\u0e23': 'r' # ร THAI CHARACTER RO RUA
59
- '\u0e24': 'v' # ฤ THAI CHARACTER RU
60
- '\u0e25': 'l' # ล THAI CHARACTER LO LING
61
- '\u0e26': 'ł' # ฦ THAI CHARACTER LU
62
- '\u0e27': 'w' # ว THAI CHARACTER WO WAEN
63
- '\u0e28': 'ṣ̄' # ศ THAI CHARACTER SO SALA
64
- '\u0e29': 's̛̄' # ษ THAI CHARACTER SO RUSI
65
- '\u0e2a': 's̄' # ส THAI CHARACTER SO SUA
66
- '\u0e2b': 'h̄' # ห THAI CHARACTER HO HIP
67
- '\u0e2c': 'ḷ' # ฬ THAI CHARACTER LO CHULA
68
- '\u0e2d': 'x' # อ THAI CHARACTER O ANG
69
- '\u0e2e': 'ḥ' # ฮ THAI CHARACTER HO NOKHUK
70
- '\u0e2f': 'ǂ' # ฯ THAI CHARACTER PAIYANNOI
71
- '\u0e30': 'a' # ะ THAI CHARACTER SARA A
72
- '\u0e31': 'ạ' # ั THAI CHARACTER MAI HAN-AKAT
73
- '\u0e32': 'ā' # า THAI CHARACTER SARA AA
74
- '\u0e33': 'å' # ำ THAI CHARACTER SARA AM
75
- '\u0e34': 'i' # ิ THAI CHARACTER SARA I
76
- '\u0e35': 'ī' # ี THAI CHARACTER SARA II
77
- '\u0e36': 'ụ' # ึ THAI CHARACTER SARA UE
78
- '\u0e37': 'ụ̄' # ื THAI CHARACTER SARA UEE
79
- '\u0e38': 'u' # ุ THAI CHARACTER SARA U
80
- '\u0e39': 'ū' # ู THAI CHARACTER SARA UU
81
- '\u0e3a': '–̥' # ฺ THAI CHARACTER PHINTHU
82
- '\u0e40': 'e' # เ THAI CHARACTER SARA E
83
- '\u0e41': 'æ' # แ THAI CHARACTER SARA AE
84
- '\u0e42': 'o' # โ THAI CHARACTER SARA O
85
- '\u0e43': 'ı' # ใ THAI CHARACTER SARA AI MAIMUAN
86
- '\u0e44': 'ị' # ไ THAI CHARACTER SARA AI MAIMALAI
87
- '\u0e45': 'ɨ' # ๅ THAI CHARACTER LAKKHANGYAO
88
- '\u0e46': '«' # ๆ THAI CHARACTER MAIYAMOK
89
- '\u0e47': '̆' # ็ THAI CHARACTER MAITAIKHU
90
- '\u0e48': '̀' # ่ THAI CHARACTER MAI EK
91
- '\u0e49': '̂' # ้ THAI CHARACTER MAI THO
92
- '\u0e4a': '́' # ๊ THAI CHARACTER MAI TRI
93
- '\u0e4b': '̌' # ๋ THAI CHARACTER MAI CHATTAWA
94
- '\u0e4c': '̒' # ์ THAI CHARACTER THANTHAKHAT
95
- '\u0e4d': '̊' # ํ THAI CHARACTER NIKHAHIT
96
- '\u0e4e': '~' # ๎ THAI CHARACTER YAMAKKAN
97
- '\u0e4f': '§' # ๏ THAI CHARACTER FONGMAN
98
- '\u0e50': '0' # ๐ THAI DIGIT ZERO
99
- '\u0e51': '1' # ๑ THAI DIGIT ONE
100
- '\u0e52': '2' # ๒ THAI DIGIT TWO
101
- '\u0e53': '3' # ๓ THAI DIGIT THREE
102
- '\u0e54': '4' # ๔ THAI DIGIT FOUR
103
- '\u0e55': '5' # ๕ THAI DIGIT FIVE
104
- '\u0e56': '6' # ๖ THAI DIGIT SIX
105
- '\u0e57': '7' # ๗ THAI DIGIT SEVEN
106
- '\u0e58': '8' # ๘ THAI DIGIT EIGHT
107
- '\u0e59': '9' # ๙ THAI DIGIT NINE
108
- '\u0e5a': 'ǁ' # ๚ THAI CHARACTER ANGKHANKHU
109
- '\u0e5b': '»' # ๛ THAI CHARACTER KHOMUT