interscript 0.1.5 → 2.1.0a8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -1,41 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 1997
4
- language: ell
5
- source_script: Grek
6
- destination_script: Latn
7
- name: ISO 843:1997
8
- url:
9
- creation_date: 1997
10
- description: |
11
- ISO Transcription table for Greek
12
-
13
- note:
14
- - Transliteration of Greek into Latin: Type 2, Clause 3 Table 2
15
- - Introduced casing to digamma, yot, and lunate sigma. (Casing was late introduction to character sets for those characters)
16
-
17
- tests:
18
-
19
- - source: |
20
- Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
21
-
22
- Γιάννης Μακρυγιάννης.
23
-
24
- expected: |
25
- Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
26
-
27
- Giánnis Makrygiánnis.
28
-
29
- map:
30
- character_separator: ""
31
- word_separator: " "
32
- inherit: "elot-ell-Grek-Latn-743-1982-ts"
33
-
34
- characters:
35
- "\u03DC": "W" # Ϝ
36
- "\u03DD": "w" # ϝ
37
- "\u03F2": "s" # ϲ
38
- "\u03F9": "S" # Ϲ
39
- "\u03F3": "j"
40
- "\u037F": "j"
41
-
@@ -1,62 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 3602-1989
4
- language: jpn
5
- source_script: Hrkt
6
- destination_script: Latn
7
- name: ISO 3602 Romanization of Japanese (Kana Script)
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
-
15
- tests:
16
- - source: かんおう
17
- expected: kan’ô
18
- - source: かのう
19
- expected: kanô
20
- - source: きんゆう
21
- expected: kin’yû
22
- - source: とうきょう
23
- expected: tôkyô
24
- - source: がっ•こう
25
- expected: gakkô
26
- - source: かごっま
27
- expected: kagomma
28
- - source: ぽっぽっや
29
- expected: poppoyya
30
- - source: てっら
31
- expected: terra
32
- - source: にゃっほー
33
- expected: nyahhô
34
- - source: ゴッホ
35
- expected: gohho
36
- - source: おも•う
37
- expected: omou
38
- - source: こうし
39
- expected: kôsi
40
- - source: こう•し #格子
41
- expected: kôsi
42
- - source: こ•うし #子牛
43
- expected: kousi
44
- - source: ぎゃあ
45
- expected: gyâ
46
-
47
- map:
48
- inherit: mext-jpn-Hrkt-Latn-1954
49
-
50
- rules:
51
- # Remove morpheme boundary marker after sokuon っ/ッ
52
- - pattern: "([っッ])•"
53
- result: "\\1"
54
-
55
- postrules:
56
- # Remove morpheme boundary marker
57
- - pattern: "•"
58
- result: ""
59
-
60
- # Use ’ instead of '
61
- - pattern: "'"
62
- result: "’"
@@ -1,272 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 9-1995
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ISO 9
8
- url: https://www.iso.org/standard/3589.html
9
- creation_date: 1995
10
- description: |
11
- Establishes a system for the transliteration into Latin characters of
12
- Cyrillic characters constituting the alphabets of Slavic and non-Slavic
13
- languages. Table 3 includes in a single sequence, listed in the
14
- Cyrillic alphabetic order, the 118 single or diacritic-carrying
15
- characters that appear in one or another of the considered alphabets.
16
- tests:
17
-
18
-
19
- map:
20
- characters:
21
- "\u0410": "A" # А => A
22
- "\u04d2": "\u00c4" # Ӓ => Ä (a diaeresis)
23
- "\u04d2\u0304": "\u1ea0\u0308" # Ӓ̄ => Ạ̈ (a diaeresis and dot below)
24
- "\u04d0": "\u0102" # Ӑ => Ă (a breve)
25
- "\u0410\u0304": "\u0100" # А̄ => Ā (a macron)
26
- "\u04d4": "\u00c6" # Ӕ => Æ (ae ligature)
27
- "\u0410\u0301": "\u00c1" # А́ => Á (a acute)
28
- "\u0410\u030a": "\u00c5" # А̊ => Å (a ring)
29
- "\u0411": "B" # Б => B
30
- "\u0412": "V" # В => V
31
- "\u0413": "G" # Г => G
32
- "\u0403": "\u01f4" # Ѓ => Ǵ (g acute)
33
- "\u0492": "\u0120" # Ғ => Ġ (g dot)
34
- "\u0494": "\u011e" # Ҕ => Ğ (g breve)
35
- "\u04ba": "\u1e24" # Һ => Ḥ (h dot)
36
- "\u0414": "D" # Д => D
37
- "\u0402": "\u0110" # Ђ => Đ (d macron)
38
- "\u0415": "E" # Е => E
39
- "\u04d6": "\u0114" # Ӗ => Ĕ (e breve)
40
- "\u0401": "\u00cb" # Ё => Ë (e diaeresis)
41
- "\u0404": "\u00ca" # Є => Ê (e circumflex)
42
- "\u0416": "\u017d" # Ж => Ž (z caron)
43
- "\u0496": "\u017d\u0327" # Җ => Ž̧ (z caron and cedilla[4])
44
- "\u04dc": "\u005a\u0304" # Ӝ => Z̄ (z macron)
45
- "\u04c1": "\u005a\u0306" # Ӂ => Z̆ (z breve)
46
- "\u0417": "\u005a" # З => Z
47
- "\u04de": "\u005a\u0308" # Ӟ => Z̈ (z diaeresis)
48
- "\u04e0": "\u0179" # Ӡ => Ź (z acute)
49
- "\u0405": "\u1e90" # Ѕ => Ẑ (z circumflex)
50
- "\u0418": "I" # И => I
51
- "\u04e2": "\u012a" # Ӣ => Ī (i macron)
52
- "\u0418\u0301": "\u00cd" # И́ => Í (i acute)
53
- "\u04e4": "\u00ce" # Ӥ => Î (i circumflex)
54
- "\u0419": "\u004a" # Й => J
55
- "\u0406": "\u00cc" # І => Ì (i grave)
56
- "\u0407": "\u00cf" # Ї => Ï (i diaeresis)
57
- "\u0406\u0304": "\u01cf" # І̄ => Ǐ (i caron (or breve))
58
- "\u0408": "\u004a\u030c" # Ј => J̌ (j caron)
59
- "\u0408\u0335": "\u004a\u0301" # Ј̵ => J́ (j acute)
60
- "\u041a": "K" # К => K
61
- "\u040c": "\u1e30" # Ќ => Ḱ (k acute)
62
- "\u04c3": "\u1e32" # Ӄ => Ḳ (k dot below)
63
- "\u049c": "\u004b\u0302" # Ҝ => K̂ (k circumflex)
64
- "\u04a0": "\u01e8" # Ҡ => Ǩ (k caron)
65
- "\u049e": "\u004b\u0304" # Ҟ => K̄ (k macron)
66
- "\u049a": "\u0136" # Қ => Ķ (k cedilla[4])
67
- "\u041a\u0328": "\u004b\u0300" # К̨ => K̀ (k grave)
68
- "\u051a": "Q" # Ԛ => Q
69
- "\u041b": "L" # Л => L
70
- "\u0409": "\u004c\u0302" # Љ => L̂ (l circumflex)
71
- "\u0520": "\u013b" # Ԡ => Ļ (l cedilla[4])
72
- "\u041c": "M" # М => M
73
- "\u041d": "N" # Н => N
74
- "\u040a": "\u004e\u0302" # Њ => N̂ (n circumflex)
75
- "\u04a2": "\u0145" # Ң => Ņ (n cedilla[4])
76
- "\u04c9": "\u1e46" # Ӊ => Ṇ (n dot below)
77
- "\u04a4": "\u1e44" # Ҥ => Ṅ (n dot)
78
- "\u050a": "\u01f8" # Ԋ => Ǹ (n grave)
79
- "\u0522": "\u0143" # Ԣ => Ń (n acute)
80
- "\u04c7": "\u0147" # Ӈ => Ň (n caron)
81
- "\u041d\u0304": "\u004e\u0304" # Н̄ => N̄ (n macron)
82
- "\u041e": "O" # О => O
83
- "\u04e6": "\u00d6" # Ӧ => Ö (o diaeresis)
84
- "\u04e8": "\u00d4" # Ө => Ô (o circumflex)
85
- "\u04ea": "\u0150" # Ӫ => Ő (o double acute)
86
- "\u04e6\u0304": "\u1ecc\u0308" # Ӧ̄ => Ọ̈ (o diaeresis and dot below)
87
- "\u04a8": "\u00d2" # Ҩ => Ò (o grave)
88
- "\u041e\u0301": "\u00d3" # О́ => Ó (o acute)
89
- "\u041e\u0304": "\u014c" # О̄ => Ō (o macron)
90
- "\u0152": "\u0152" # Œ => Œ (oe ligature)
91
- "\u041f": "P" # П => P
92
- "\u04a6": "\u1e54" # Ҧ => Ṕ (p acute)
93
- "\u0524": "\u0050\u0300" # Ԥ => P̀ (p grave)
94
- "\u0420": "R" # Р => R
95
- "\u0421": "S" # С => S
96
- "\u04aa": "\u015e" # Ҫ => Ş (s cedilla[4])
97
- "\u0421\u0300": "\u0053\u0300" # С̀ => S̀ (s grave)
98
- "\u0422": "T" # Т => T
99
- "\u040b": "\u0106" # Ћ => Ć (c acute)
100
- "\u050e": "\u0054\u0300" # Ԏ => T̀ (t grave)
101
- "\u0422\u030c": "\u0164" # Т̌ => Ť (t caron)
102
- "\u04ac": "\u0162" # Ҭ => Ţ (t cedilla[4])
103
- "\u0423": "U" # У => U
104
- "\u04f0": "\u00dc" # Ӱ => Ü (u diaeresis)
105
- "\u04ee": "\u016a" # Ӯ => Ū (u macron)
106
- "\u040e": "\u016c" # Ў => Ŭ (u breve)
107
- "\u04f2": "\u0170" # Ӳ => Ű (u double acute)
108
- "\u0423\u0301": "\u00da" # У́ => Ú (u acute)
109
- "\u04f0\u0304": "\u1ee4\u0308" # Ӱ̄ => Ụ̈ (u diaeresis and dot below)
110
- "\u04ae": "\u00d9" # Ү => Ù (u grave)
111
- "\u04b0": "\u0055\u0307" # Ұ => U̇ (u dot)
112
- "\u051c": "W" # Ԝ => W
113
- "\u0424": "F" # Ф => F
114
- "\u0425": "H" # Х => H
115
- "\u04b2": "\u1e28" # Ҳ => Ḩ (h cedilla[4])
116
- "\u0426": "C" # Ц => C
117
- "\u04b4": "\u0043\u0304" # Ҵ => C̄ (c macron)
118
- "\u040f": "\u0044\u0302" # Џ => D̂ (d circumflex)
119
- "\u0427": "\u010c" # Ч => Č (c caron)
120
- "\u04b6": "\u00c7" # Ҷ => Ç (c cedilla[4])
121
- "\u04cb": "\u0043\u0323" # Ӌ => C̣ (c dot below)
122
- "\u04f4": "\u0043\u0308" # Ӵ => C̈ (c diaeresis)
123
- "\u04b8": "\u0108" # Ҹ => Ĉ (c circumflex)
124
- "\u0427\u0300": "\u0043\u0300" # Ч̀ => C̀ (c grave)
125
- "\u04bc": "\u0043\u0306" # Ҽ => C̆ (c breve)
126
- "\u04be": "\u0043\u0328\u0306" # Ҿ => C̨̆ (c ogonek[4] and breve)
127
- "\u0428": "\u0160" # Ш => Š (s caron)
128
- "\u0429": "\u015c" # Щ => Ŝ (s circumflex)
129
- "\u042a": "\u02ba" # Ъ => ʺ (modifier letter double prime[5])
130
- "\u042b": "Y" # Ы => Y
131
- "\u04f8": "\u0178" # Ӹ => Ÿ (y diaeresis)
132
- "\u042b\u0304": "\u0232" # Ы̄ => Ȳ (y macron)
133
- "\u042c": "\u02b9" # Ь => ʹ (modifier letter prime[5])
134
- "\u042d": "\u00c8" # Э => È (e grave)
135
- "\u04d8": "\u0041\u030b" # Ә => A̋ (a double acute)
136
- "\u04da": "\u00c0" # Ӛ => À (a grave)
137
- "\u042e": "\u00db" # Ю => Û (u circumflex)
138
- "\u042e\u0304": "\u00db\u0304" # Ю̄ => Û̄ (u circumflex with macron)
139
- "\u042f": "\u00c2" # Я => Â (a circumflex)
140
- "\u0490": "\u0047\u0300" # Ґ => G̀ (g grave)
141
- "\u0462": "\u011a" # Ѣ => Ě (e caron)
142
- "\u046a": "\u01cd" # Ѫ => Ǎ (a caron)
143
- "\u0472": "\u0046\u0300" # Ѳ => F̀ (f grave)
144
- "\u0474": "\u1ef2" # Ѵ => Ỳ (y grave)
145
- "\u0430": "a" # а => a
146
- "\u04d3": "\u00e4" # ӓ => ä
147
- "\u04d3\u0304": "\u1ea1\u0308" # ӓ̄ => ạ̈
148
- "\u04d1": "\u0103" # ӑ => ă
149
- "\u0430\u0304": "\u0101" # а̄ => ā
150
- "\u04d5": "\u00e6" # ӕ => æ
151
- "\u0430\u0301": "\u00e1" # а́ => á
152
- "\u0430\u030a": "\u00e5" # а̊ => å
153
- "\u0431": "b" # б => b
154
- "\u0432": "v" # в => v
155
- "\u0433": "g" # г => g
156
- "\u0453": "\u01f5" # ѓ => ǵ
157
- "\u0493": "\u0121" # ғ => ġ
158
- "\u0495": "\u011f" # ҕ => ğ
159
- "\u04bb": "\u1e25" # һ => ḥ
160
- "\u0434": "d" # д => d
161
- "\u0452": "\u0111" # ђ => đ
162
- "\u0435": "e" # е => e
163
- "\u04d7": "\u0115" # ӗ => ĕ
164
- "\u0451": "\u00eb" # ё => ë
165
- "\u0454": "\u00ea" # є => ê
166
- "\u0436": "\u017e" # ж => ž
167
- "\u0497": "\u017e\u0327" # җ => ž̧
168
- "\u04dd": "\u007a\u0304" # ӝ => z̄
169
- "\u04c2": "\u007a\u0306" # ӂ => z̆
170
- "\u0437": "z" # з => z
171
- "\u04df": "\u007a\u0308" # ӟ => z̈
172
- "\u04e1": "\u017a" # ӡ => ź
173
- "\u0455": "\u1e91" # ѕ => ẑ
174
- "\u0438": "i" # и => i
175
- "\u04e3": "\u012b" # ӣ => ī
176
- "\u0438\u0301": "\u00ed" # и́ => í
177
- "\u04e5": "\u00ee" # ӥ => î
178
- "\u0439": "j" # й => j
179
- "\u0456": "\u00ec" # і => ì
180
- "\u0457": "\u00ef" # ї => ï
181
- "\u0456\u0304": "\u01d0" # і̄ => ǐ
182
- "\u0458": "\u01f0" # ј => ǰ
183
- "\u0458\u0335": "\u006a\u0301" # ј̵ => j́
184
- "\u043a": "k" # к => k
185
- "\u045c": "\u1e31" # ќ => ḱ
186
- "\u04c4": "\u1e33" # ӄ => ḳ
187
- "\u049d": "\u006b\u0302" # ҝ => k̂
188
- "\u04a1": "\u01e9" # ҡ => ǩ
189
- "\u049f": "\u006b\u0304" # ҟ => k̄
190
- "\u049b": "\u0137" # қ => ķ
191
- "\u043a\u0328": "\u006b\u0300" # к̨ => k̀
192
- "\u051b": "q" # ԛ => q
193
- "\u043b": "l" # л => l
194
- "\u0459": "\u006c\u0302" # љ => l̂
195
- "\u0521": "\u013c" # ԡ => ļ
196
- "\u043c": "m" # м => m
197
- "\u043d": "n" # н => n
198
- "\u045a": "\u006e\u0302" # њ => n̂
199
- "\u04a3": "\u0146" # ң => ņ
200
- "\u04ca": "\u1e47" # ӊ => ṇ
201
- "\u04a5": "\u1e45" # ҥ => ṅ
202
- "\u050b": "\u01f9" # ԋ => ǹ
203
- "\u0523": "\u0144" # ԣ => ń
204
- "\u04c8": "\u0148" # ӈ => ň
205
- "\u043d\u0304": "\u006e\u0304" # н̄ => n̄
206
- "\u043e": "o" # о => o
207
- "\u04e7": "\u00f6" # ӧ => ö
208
- "\u04e9": "\u00f4" # ө => ô
209
- "\u04eb": "\u0151" # ӫ => ő
210
- "\u043e\u0304\u0308": "\u1ecd\u0308" # о̄̈ => ọ̈
211
- "\u04a9": "\u00f2" # ҩ => ò
212
- "\u043e\u0301": "\u00f3" # о́ => ó
213
- "\u043e\u0304": "\u014d" # о̄ => ō
214
- "\u0153": "\u0153" # œ => œ
215
- "\u043f": "p" # п => p
216
- "\u04a7": "\u1e55" # ҧ => ṕ
217
- "\u0525": "\u0070\u0300" # ԥ => p̀
218
- "\u0440": "r" # р => r
219
- "\u0441": "s" # с => s
220
- "\u04ab": "\u015f" # ҫ => ş
221
- "\u0441\u0300": "\u0073\u0300" # с̀ => s̀
222
- "\u0442": "t" # т => t
223
- "\u045b": "\u0107" # ћ => ć
224
- "\u050f": "\u0074\u0300" # ԏ => t̀
225
- "\u0442\u030c": "\u0165" # т̌ => ť
226
- "\u04ad": "\u0163" # ҭ => ţ
227
- "\u0443": "u" # у => u
228
- "\u04f1": "\u00fc" # ӱ => ü
229
- "\u04ef": "\u016b" # ӯ => ū
230
- "\u045e": "\u016d" # ў => ŭ
231
- "\u04f3": "\u0171" # ӳ => ű
232
- "\u0443\u0301": "\u00fa" # у́ => ú
233
- "\u04f1\u0304": "\u1ee5\u0308" # ӱ̄ => ụ̈
234
- "\u04af": "\u00f9" # ү => ù
235
- "\u04b1": "\u0075\u0307" # ұ => u̇
236
- "\u051d": "w" # ԝ => w
237
- "\u0444": "f" # ф => f
238
- "\u0445": "h" # х => h
239
- "\u04b3": "\u1e29" # ҳ => ḩ
240
- "\u0446": "c" # ц => c
241
- "\u04b5": "\u0063\u0304" # ҵ => c̄
242
- "\u045f": "\u0064\u0302" # џ => d̂
243
- "\u0447": "\u010d" # ч => č
244
- "\u04b7": "\u00e7" # ҷ => ç
245
- "\u04cc": "\u0063\u0323" # ӌ => c̣
246
- "\u04f5": "\u0063\u0308" # ӵ => c̈
247
- "\u04b9": "\u0109" # ҹ => ĉ
248
- "\u0447\u0300": "\u0063\u0300" # ч̀ => c̀
249
- "\u04bd": "\u0063\u0306" # ҽ => c̆
250
- "\u04bf": "\u0063\u0328\u0306" # ҿ => c̨̆
251
- "\u0448": "\u0161" # ш => š
252
- "\u0449": "\u015d" # щ => ŝ
253
- "\u044a": "\u02ba" # ъ => ʺ
254
- "\u044b": "y" # ы => y
255
- "\u04f9": "\u00ff" # ӹ => ÿ
256
- "\u044b\u0304": "\u0233" # ы̄ => ȳ
257
- "\u044c": "\u02b9" # ь => ʹ
258
- "\u044d": "\u00e8" # э => è
259
- "\u04d9": "\u0061\u030b" # ә => a̋
260
- "\u04db": "\u00e0" # ӛ => à
261
- "\u044e": "\u00fb" # ю => û
262
- "\u044e\u0304": "\u00fb\u0304" # ю̄ => û̄
263
- "\u044f": "\u00e2" # я => â
264
- "\u0491": "\u0067\u0300" # ґ => g̀
265
- "\u0463": "\u011b" # ѣ => ě
266
- "\u046b": "\u01ce" # ѫ => ǎ
267
- "\u0473": "\u0066\u0300" # ѳ => f̀
268
- "\u0475": "\u1ef3" # ѵ => ỳ
269
- "\u04c0": "\u2021" # Ӏ => ‡
270
- "\u02bc": "\u0060" # ʼ => `
271
- "\u02ee": "\u00a8" # ˮ => ¨
272
-
@@ -1,109 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 11940-1998
4
- language: tha
5
- source_script: Thai
6
- destination_script: Latn
7
- name: ISO 11940:1998 Information and documentation -- Transliteration of Thai
8
- url: https://www.iso.org/standard/20574.html
9
- creation_date: 1998
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
-
15
- tests:
16
- - source: 'ภาษาไทย'
17
- expected: 'p̣hās̛̄āịthy'
18
- - source: 'เชียงใหม่'
19
- expected: 'echīyngıh̄m̀'
20
-
21
- map:
22
-
23
- characters:
24
- '\u0e01': 'k' # ก THAI CHARACTER KO KAI
25
- '\u0e02': 'k̄h' # ข THAI CHARACTER KHO KHAI
26
- '\u0e03': 'ḳ̄h' # ฃ THAI CHARACTER KHO KHUAT
27
- '\u0e04': 'kh' # ค THAI CHARACTER KHO KHWAI
28
- '\u0e05': 'k̛h' # ฅ THAI CHARACTER KHO KHON
29
- '\u0e06': 'ḳh' # ฆ THAI CHARACTER KHO RAKHANG
30
- '\u0e07': 'ng' # ง THAI CHARACTER NGO NGU
31
- '\u0e08': 'c' # จ THAI CHARACTER CHO CHAN
32
- '\u0e09': 'c̄h' # ฉ THAI CHARACTER CHO CHING
33
- '\u0e0a': 'ch' # ช THAI CHARACTER CHO CHANG
34
- '\u0e0b': 's' # ซ THAI CHARACTER SO SO
35
- '\u0e0c': 'c̣h' # ฌ THAI CHARACTER CHO CHOE
36
- '\u0e0d': 'ỵ' # ญ THAI CHARACTER YO YING
37
- '\u0e0e': 'ḍ' # ฎ THAI CHARACTER DO CHADA
38
- '\u0e0f': 'ṭ' # ฏ THAI CHARACTER TO PATAK
39
- '\u0e10': 'ṭ̄h' # ฐ THAI CHARACTER THO THAN
40
- '\u0e11': 'ṯh' # ฑ THAI CHARACTER THO NANGMONTHO
41
- '\u0e12': 't̛h' # ฒ THAI CHARACTER THO PHUTHAO
42
- '\u0e13': 'ṇ' # ณ THAI CHARACTER NO NEN
43
- '\u0e14': 'd' # ด THAI CHARACTER DO DEK
44
- '\u0e15': 't' # ต THAI CHARACTER TO TAO
45
- '\u0e16': 't̄h' # ถ THAI CHARACTER THO THUNG
46
- '\u0e17': 'th' # ท THAI CHARACTER THO THAHAN
47
- '\u0e18': 'ṭh' # ธ THAI CHARACTER THO THONG
48
- '\u0e19': 'n' # น THAI CHARACTER NO NU
49
- '\u0e1a': 'b' # บ THAI CHARACTER BO BAIMAI
50
- '\u0e1b': 'p' # ป THAI CHARACTER PO PLA
51
- '\u0e1c': 'p̄h' # ผ THAI CHARACTER PHO PHUNG
52
- '\u0e1d': 'f̄' # ฝ THAI CHARACTER FO FA
53
- '\u0e1e': 'ph' # พ THAI CHARACTER PHO PHAN
54
- '\u0e1f': 'f' # ฟ THAI CHARACTER FO FAN
55
- '\u0e20': 'p̣h' # ภ THAI CHARACTER PHO SAMPHAO
56
- '\u0e21': 'm' # ม THAI CHARACTER MO MA
57
- '\u0e22': 'y' # ย THAI CHARACTER YO YAK
58
- '\u0e23': 'r' # ร THAI CHARACTER RO RUA
59
- '\u0e24': 'v' # ฤ THAI CHARACTER RU
60
- '\u0e25': 'l' # ล THAI CHARACTER LO LING
61
- '\u0e26': 'ł' # ฦ THAI CHARACTER LU
62
- '\u0e27': 'w' # ว THAI CHARACTER WO WAEN
63
- '\u0e28': 'ṣ̄' # ศ THAI CHARACTER SO SALA
64
- '\u0e29': 's̛̄' # ษ THAI CHARACTER SO RUSI
65
- '\u0e2a': 's̄' # ส THAI CHARACTER SO SUA
66
- '\u0e2b': 'h̄' # ห THAI CHARACTER HO HIP
67
- '\u0e2c': 'ḷ' # ฬ THAI CHARACTER LO CHULA
68
- '\u0e2d': 'x' # อ THAI CHARACTER O ANG
69
- '\u0e2e': 'ḥ' # ฮ THAI CHARACTER HO NOKHUK
70
- '\u0e2f': 'ǂ' # ฯ THAI CHARACTER PAIYANNOI
71
- '\u0e30': 'a' # ะ THAI CHARACTER SARA A
72
- '\u0e31': 'ạ' # ั THAI CHARACTER MAI HAN-AKAT
73
- '\u0e32': 'ā' # า THAI CHARACTER SARA AA
74
- '\u0e33': 'å' # ำ THAI CHARACTER SARA AM
75
- '\u0e34': 'i' # ิ THAI CHARACTER SARA I
76
- '\u0e35': 'ī' # ี THAI CHARACTER SARA II
77
- '\u0e36': 'ụ' # ึ THAI CHARACTER SARA UE
78
- '\u0e37': 'ụ̄' # ื THAI CHARACTER SARA UEE
79
- '\u0e38': 'u' # ุ THAI CHARACTER SARA U
80
- '\u0e39': 'ū' # ู THAI CHARACTER SARA UU
81
- '\u0e3a': '–̥' # ฺ THAI CHARACTER PHINTHU
82
- '\u0e40': 'e' # เ THAI CHARACTER SARA E
83
- '\u0e41': 'æ' # แ THAI CHARACTER SARA AE
84
- '\u0e42': 'o' # โ THAI CHARACTER SARA O
85
- '\u0e43': 'ı' # ใ THAI CHARACTER SARA AI MAIMUAN
86
- '\u0e44': 'ị' # ไ THAI CHARACTER SARA AI MAIMALAI
87
- '\u0e45': 'ɨ' # ๅ THAI CHARACTER LAKKHANGYAO
88
- '\u0e46': '«' # ๆ THAI CHARACTER MAIYAMOK
89
- '\u0e47': '̆' # ็ THAI CHARACTER MAITAIKHU
90
- '\u0e48': '̀' # ่ THAI CHARACTER MAI EK
91
- '\u0e49': '̂' # ้ THAI CHARACTER MAI THO
92
- '\u0e4a': '́' # ๊ THAI CHARACTER MAI TRI
93
- '\u0e4b': '̌' # ๋ THAI CHARACTER MAI CHATTAWA
94
- '\u0e4c': '̒' # ์ THAI CHARACTER THANTHAKHAT
95
- '\u0e4d': '̊' # ํ THAI CHARACTER NIKHAHIT
96
- '\u0e4e': '~' # ๎ THAI CHARACTER YAMAKKAN
97
- '\u0e4f': '§' # ๏ THAI CHARACTER FONGMAN
98
- '\u0e50': '0' # ๐ THAI DIGIT ZERO
99
- '\u0e51': '1' # ๑ THAI DIGIT ONE
100
- '\u0e52': '2' # ๒ THAI DIGIT TWO
101
- '\u0e53': '3' # ๓ THAI DIGIT THREE
102
- '\u0e54': '4' # ๔ THAI DIGIT FOUR
103
- '\u0e55': '5' # ๕ THAI DIGIT FIVE
104
- '\u0e56': '6' # ๖ THAI DIGIT SIX
105
- '\u0e57': '7' # ๗ THAI DIGIT SEVEN
106
- '\u0e58': '8' # ๘ THAI DIGIT EIGHT
107
- '\u0e59': '9' # ๙ THAI DIGIT NINE
108
- '\u0e5a': 'ǁ' # ๚ THAI CHARACTER ANGKHANKHU
109
- '\u0e5b': '»' # ๛ THAI CHARACTER KHOMUT