interscript 0.1.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +76 -128
  21. data/lib/interscript/command.rb +6 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -223
  63. data/README.adoc +0 -297
  64. data/bin/rspec +0 -29
  65. data/lib/g2pwrapper.py +0 -34
  66. data/lib/interscript/mapping.rb +0 -125
  67. data/lib/model-7 +0 -0
  68. data/lib/tha-pt-b-7 +0 -0
  69. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  70. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  71. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  72. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  73. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  74. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  75. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  76. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  77. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  78. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  79. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  80. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  81. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  82. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  83. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  84. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  85. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  86. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  87. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  88. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  89. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  90. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  91. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  92. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  93. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  94. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  95. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  96. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  97. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  98. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  99. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  100. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  101. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +0 -7456
  102. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  103. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  104. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  105. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  106. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  107. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  108. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  109. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  110. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  111. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  112. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  113. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  114. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  115. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  116. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  117. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  118. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  119. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  120. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  121. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  122. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  123. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  124. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  125. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  126. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  127. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  128. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  129. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  130. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  131. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  132. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  133. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  134. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  135. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  136. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  137. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  138. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  139. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  140. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  141. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  142. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  143. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  144. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  145. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  146. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  147. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  148. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  149. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  150. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  151. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  152. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  153. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  154. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  155. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  156. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  157. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  158. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  159. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  160. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  161. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  162. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  163. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  164. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  165. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  166. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  167. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  168. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  169. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  170. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  171. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  172. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  173. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  174. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  175. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  176. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  177. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  178. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  179. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  180. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  181. data/spec/interscript/mapping_spec.rb +0 -42
  182. data/spec/interscript_spec.rb +0 -26
  183. data/spec/spec_helper.rb +0 -3
@@ -1,41 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 1997
4
- language: ell
5
- source_script: Grek
6
- destination_script: Latn
7
- name: ISO 843:1997
8
- url:
9
- creation_date: 1997
10
- description: |
11
- ISO Transcription table for Greek
12
-
13
- note:
14
- - Transliteration of Greek into Latin: Type 2, Clause 3 Table 2
15
- - Introduced casing to digamma, yot, and lunate sigma. (Casing was late introduction to character sets for those characters)
16
-
17
- tests:
18
-
19
- - source: |
20
- Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
21
-
22
- Γιάννης Μακρυγιάννης.
23
-
24
- expected: |
25
- Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
26
-
27
- Giánnis Makrygiánnis.
28
-
29
- map:
30
- character_separator: ""
31
- word_separator: " "
32
- inherit: "elot-ell-Grek-Latn-743-1982-ts"
33
-
34
- characters:
35
- "\u03DC": "W" # Ϝ
36
- "\u03DD": "w" # ϝ
37
- "\u03F2": "s" # ϲ
38
- "\u03F9": "S" # Ϲ
39
- "\u03F3": "j"
40
- "\u037F": "j"
41
-
@@ -1,62 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 3602-1989
4
- language: jpn
5
- source_script: Hrkt
6
- destination_script: Latn
7
- name: ISO 3602 Romanization of Japanese (Kana Script)
8
- url:
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
-
15
- tests:
16
- - source: かんおう
17
- expected: kan’ô
18
- - source: かのう
19
- expected: kanô
20
- - source: きんゆう
21
- expected: kin’yû
22
- - source: とうきょう
23
- expected: tôkyô
24
- - source: がっ•こう
25
- expected: gakkô
26
- - source: かごっま
27
- expected: kagomma
28
- - source: ぽっぽっや
29
- expected: poppoyya
30
- - source: てっら
31
- expected: terra
32
- - source: にゃっほー
33
- expected: nyahhô
34
- - source: ゴッホ
35
- expected: gohho
36
- - source: おも•う
37
- expected: omou
38
- - source: こうし
39
- expected: kôsi
40
- - source: こう•し #格子
41
- expected: kôsi
42
- - source: こ•うし #子牛
43
- expected: kousi
44
- - source: ぎゃあ
45
- expected: gyâ
46
-
47
- map:
48
- inherit: mext-jpn-Hrkt-Latn-1954
49
-
50
- rules:
51
- # Remove morpheme boundary marker after sokuon っ/ッ
52
- - pattern: "([っッ])•"
53
- result: "\\1"
54
-
55
- postrules:
56
- # Remove morpheme boundary marker
57
- - pattern: "•"
58
- result: ""
59
-
60
- # Use ’ instead of '
61
- - pattern: "'"
62
- result: "’"
@@ -1,272 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 9-1995
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ISO 9
8
- url: https://www.iso.org/standard/3589.html
9
- creation_date: 1995
10
- description: |
11
- Establishes a system for the transliteration into Latin characters of
12
- Cyrillic characters constituting the alphabets of Slavic and non-Slavic
13
- languages. Table 3 includes in a single sequence, listed in the
14
- Cyrillic alphabetic order, the 118 single or diacritic-carrying
15
- characters that appear in one or another of the considered alphabets.
16
- tests:
17
-
18
-
19
- map:
20
- characters:
21
- "\u0410": "A" # А => A
22
- "\u04d2": "\u00c4" # Ӓ => Ä (a diaeresis)
23
- "\u04d2\u0304": "\u1ea0\u0308" # Ӓ̄ => Ạ̈ (a diaeresis and dot below)
24
- "\u04d0": "\u0102" # Ӑ => Ă (a breve)
25
- "\u0410\u0304": "\u0100" # А̄ => Ā (a macron)
26
- "\u04d4": "\u00c6" # Ӕ => Æ (ae ligature)
27
- "\u0410\u0301": "\u00c1" # А́ => Á (a acute)
28
- "\u0410\u030a": "\u00c5" # А̊ => Å (a ring)
29
- "\u0411": "B" # Б => B
30
- "\u0412": "V" # В => V
31
- "\u0413": "G" # Г => G
32
- "\u0403": "\u01f4" # Ѓ => Ǵ (g acute)
33
- "\u0492": "\u0120" # Ғ => Ġ (g dot)
34
- "\u0494": "\u011e" # Ҕ => Ğ (g breve)
35
- "\u04ba": "\u1e24" # Һ => Ḥ (h dot)
36
- "\u0414": "D" # Д => D
37
- "\u0402": "\u0110" # Ђ => Đ (d macron)
38
- "\u0415": "E" # Е => E
39
- "\u04d6": "\u0114" # Ӗ => Ĕ (e breve)
40
- "\u0401": "\u00cb" # Ё => Ë (e diaeresis)
41
- "\u0404": "\u00ca" # Є => Ê (e circumflex)
42
- "\u0416": "\u017d" # Ж => Ž (z caron)
43
- "\u0496": "\u017d\u0327" # Җ => Ž̧ (z caron and cedilla[4])
44
- "\u04dc": "\u005a\u0304" # Ӝ => Z̄ (z macron)
45
- "\u04c1": "\u005a\u0306" # Ӂ => Z̆ (z breve)
46
- "\u0417": "\u005a" # З => Z
47
- "\u04de": "\u005a\u0308" # Ӟ => Z̈ (z diaeresis)
48
- "\u04e0": "\u0179" # Ӡ => Ź (z acute)
49
- "\u0405": "\u1e90" # Ѕ => Ẑ (z circumflex)
50
- "\u0418": "I" # И => I
51
- "\u04e2": "\u012a" # Ӣ => Ī (i macron)
52
- "\u0418\u0301": "\u00cd" # И́ => Í (i acute)
53
- "\u04e4": "\u00ce" # Ӥ => Î (i circumflex)
54
- "\u0419": "\u004a" # Й => J
55
- "\u0406": "\u00cc" # І => Ì (i grave)
56
- "\u0407": "\u00cf" # Ї => Ï (i diaeresis)
57
- "\u0406\u0304": "\u01cf" # І̄ => Ǐ (i caron (or breve))
58
- "\u0408": "\u004a\u030c" # Ј => J̌ (j caron)
59
- "\u0408\u0335": "\u004a\u0301" # Ј̵ => J́ (j acute)
60
- "\u041a": "K" # К => K
61
- "\u040c": "\u1e30" # Ќ => Ḱ (k acute)
62
- "\u04c3": "\u1e32" # Ӄ => Ḳ (k dot below)
63
- "\u049c": "\u004b\u0302" # Ҝ => K̂ (k circumflex)
64
- "\u04a0": "\u01e8" # Ҡ => Ǩ (k caron)
65
- "\u049e": "\u004b\u0304" # Ҟ => K̄ (k macron)
66
- "\u049a": "\u0136" # Қ => Ķ (k cedilla[4])
67
- "\u041a\u0328": "\u004b\u0300" # К̨ => K̀ (k grave)
68
- "\u051a": "Q" # Ԛ => Q
69
- "\u041b": "L" # Л => L
70
- "\u0409": "\u004c\u0302" # Љ => L̂ (l circumflex)
71
- "\u0520": "\u013b" # Ԡ => Ļ (l cedilla[4])
72
- "\u041c": "M" # М => M
73
- "\u041d": "N" # Н => N
74
- "\u040a": "\u004e\u0302" # Њ => N̂ (n circumflex)
75
- "\u04a2": "\u0145" # Ң => Ņ (n cedilla[4])
76
- "\u04c9": "\u1e46" # Ӊ => Ṇ (n dot below)
77
- "\u04a4": "\u1e44" # Ҥ => Ṅ (n dot)
78
- "\u050a": "\u01f8" # Ԋ => Ǹ (n grave)
79
- "\u0522": "\u0143" # Ԣ => Ń (n acute)
80
- "\u04c7": "\u0147" # Ӈ => Ň (n caron)
81
- "\u041d\u0304": "\u004e\u0304" # Н̄ => N̄ (n macron)
82
- "\u041e": "O" # О => O
83
- "\u04e6": "\u00d6" # Ӧ => Ö (o diaeresis)
84
- "\u04e8": "\u00d4" # Ө => Ô (o circumflex)
85
- "\u04ea": "\u0150" # Ӫ => Ő (o double acute)
86
- "\u04e6\u0304": "\u1ecc\u0308" # Ӧ̄ => Ọ̈ (o diaeresis and dot below)
87
- "\u04a8": "\u00d2" # Ҩ => Ò (o grave)
88
- "\u041e\u0301": "\u00d3" # О́ => Ó (o acute)
89
- "\u041e\u0304": "\u014c" # О̄ => Ō (o macron)
90
- "\u0152": "\u0152" # Œ => Œ (oe ligature)
91
- "\u041f": "P" # П => P
92
- "\u04a6": "\u1e54" # Ҧ => Ṕ (p acute)
93
- "\u0524": "\u0050\u0300" # Ԥ => P̀ (p grave)
94
- "\u0420": "R" # Р => R
95
- "\u0421": "S" # С => S
96
- "\u04aa": "\u015e" # Ҫ => Ş (s cedilla[4])
97
- "\u0421\u0300": "\u0053\u0300" # С̀ => S̀ (s grave)
98
- "\u0422": "T" # Т => T
99
- "\u040b": "\u0106" # Ћ => Ć (c acute)
100
- "\u050e": "\u0054\u0300" # Ԏ => T̀ (t grave)
101
- "\u0422\u030c": "\u0164" # Т̌ => Ť (t caron)
102
- "\u04ac": "\u0162" # Ҭ => Ţ (t cedilla[4])
103
- "\u0423": "U" # У => U
104
- "\u04f0": "\u00dc" # Ӱ => Ü (u diaeresis)
105
- "\u04ee": "\u016a" # Ӯ => Ū (u macron)
106
- "\u040e": "\u016c" # Ў => Ŭ (u breve)
107
- "\u04f2": "\u0170" # Ӳ => Ű (u double acute)
108
- "\u0423\u0301": "\u00da" # У́ => Ú (u acute)
109
- "\u04f0\u0304": "\u1ee4\u0308" # Ӱ̄ => Ụ̈ (u diaeresis and dot below)
110
- "\u04ae": "\u00d9" # Ү => Ù (u grave)
111
- "\u04b0": "\u0055\u0307" # Ұ => U̇ (u dot)
112
- "\u051c": "W" # Ԝ => W
113
- "\u0424": "F" # Ф => F
114
- "\u0425": "H" # Х => H
115
- "\u04b2": "\u1e28" # Ҳ => Ḩ (h cedilla[4])
116
- "\u0426": "C" # Ц => C
117
- "\u04b4": "\u0043\u0304" # Ҵ => C̄ (c macron)
118
- "\u040f": "\u0044\u0302" # Џ => D̂ (d circumflex)
119
- "\u0427": "\u010c" # Ч => Č (c caron)
120
- "\u04b6": "\u00c7" # Ҷ => Ç (c cedilla[4])
121
- "\u04cb": "\u0043\u0323" # Ӌ => C̣ (c dot below)
122
- "\u04f4": "\u0043\u0308" # Ӵ => C̈ (c diaeresis)
123
- "\u04b8": "\u0108" # Ҹ => Ĉ (c circumflex)
124
- "\u0427\u0300": "\u0043\u0300" # Ч̀ => C̀ (c grave)
125
- "\u04bc": "\u0043\u0306" # Ҽ => C̆ (c breve)
126
- "\u04be": "\u0043\u0328\u0306" # Ҿ => C̨̆ (c ogonek[4] and breve)
127
- "\u0428": "\u0160" # Ш => Š (s caron)
128
- "\u0429": "\u015c" # Щ => Ŝ (s circumflex)
129
- "\u042a": "\u02ba" # Ъ => ʺ (modifier letter double prime[5])
130
- "\u042b": "Y" # Ы => Y
131
- "\u04f8": "\u0178" # Ӹ => Ÿ (y diaeresis)
132
- "\u042b\u0304": "\u0232" # Ы̄ => Ȳ (y macron)
133
- "\u042c": "\u02b9" # Ь => ʹ (modifier letter prime[5])
134
- "\u042d": "\u00c8" # Э => È (e grave)
135
- "\u04d8": "\u0041\u030b" # Ә => A̋ (a double acute)
136
- "\u04da": "\u00c0" # Ӛ => À (a grave)
137
- "\u042e": "\u00db" # Ю => Û (u circumflex)
138
- "\u042e\u0304": "\u00db\u0304" # Ю̄ => Û̄ (u circumflex with macron)
139
- "\u042f": "\u00c2" # Я => Â (a circumflex)
140
- "\u0490": "\u0047\u0300" # Ґ => G̀ (g grave)
141
- "\u0462": "\u011a" # Ѣ => Ě (e caron)
142
- "\u046a": "\u01cd" # Ѫ => Ǎ (a caron)
143
- "\u0472": "\u0046\u0300" # Ѳ => F̀ (f grave)
144
- "\u0474": "\u1ef2" # Ѵ => Ỳ (y grave)
145
- "\u0430": "a" # а => a
146
- "\u04d3": "\u00e4" # ӓ => ä
147
- "\u04d3\u0304": "\u1ea1\u0308" # ӓ̄ => ạ̈
148
- "\u04d1": "\u0103" # ӑ => ă
149
- "\u0430\u0304": "\u0101" # а̄ => ā
150
- "\u04d5": "\u00e6" # ӕ => æ
151
- "\u0430\u0301": "\u00e1" # а́ => á
152
- "\u0430\u030a": "\u00e5" # а̊ => å
153
- "\u0431": "b" # б => b
154
- "\u0432": "v" # в => v
155
- "\u0433": "g" # г => g
156
- "\u0453": "\u01f5" # ѓ => ǵ
157
- "\u0493": "\u0121" # ғ => ġ
158
- "\u0495": "\u011f" # ҕ => ğ
159
- "\u04bb": "\u1e25" # һ => ḥ
160
- "\u0434": "d" # д => d
161
- "\u0452": "\u0111" # ђ => đ
162
- "\u0435": "e" # е => e
163
- "\u04d7": "\u0115" # ӗ => ĕ
164
- "\u0451": "\u00eb" # ё => ë
165
- "\u0454": "\u00ea" # є => ê
166
- "\u0436": "\u017e" # ж => ž
167
- "\u0497": "\u017e\u0327" # җ => ž̧
168
- "\u04dd": "\u007a\u0304" # ӝ => z̄
169
- "\u04c2": "\u007a\u0306" # ӂ => z̆
170
- "\u0437": "z" # з => z
171
- "\u04df": "\u007a\u0308" # ӟ => z̈
172
- "\u04e1": "\u017a" # ӡ => ź
173
- "\u0455": "\u1e91" # ѕ => ẑ
174
- "\u0438": "i" # и => i
175
- "\u04e3": "\u012b" # ӣ => ī
176
- "\u0438\u0301": "\u00ed" # и́ => í
177
- "\u04e5": "\u00ee" # ӥ => î
178
- "\u0439": "j" # й => j
179
- "\u0456": "\u00ec" # і => ì
180
- "\u0457": "\u00ef" # ї => ï
181
- "\u0456\u0304": "\u01d0" # і̄ => ǐ
182
- "\u0458": "\u01f0" # ј => ǰ
183
- "\u0458\u0335": "\u006a\u0301" # ј̵ => j́
184
- "\u043a": "k" # к => k
185
- "\u045c": "\u1e31" # ќ => ḱ
186
- "\u04c4": "\u1e33" # ӄ => ḳ
187
- "\u049d": "\u006b\u0302" # ҝ => k̂
188
- "\u04a1": "\u01e9" # ҡ => ǩ
189
- "\u049f": "\u006b\u0304" # ҟ => k̄
190
- "\u049b": "\u0137" # қ => ķ
191
- "\u043a\u0328": "\u006b\u0300" # к̨ => k̀
192
- "\u051b": "q" # ԛ => q
193
- "\u043b": "l" # л => l
194
- "\u0459": "\u006c\u0302" # љ => l̂
195
- "\u0521": "\u013c" # ԡ => ļ
196
- "\u043c": "m" # м => m
197
- "\u043d": "n" # н => n
198
- "\u045a": "\u006e\u0302" # њ => n̂
199
- "\u04a3": "\u0146" # ң => ņ
200
- "\u04ca": "\u1e47" # ӊ => ṇ
201
- "\u04a5": "\u1e45" # ҥ => ṅ
202
- "\u050b": "\u01f9" # ԋ => ǹ
203
- "\u0523": "\u0144" # ԣ => ń
204
- "\u04c8": "\u0148" # ӈ => ň
205
- "\u043d\u0304": "\u006e\u0304" # н̄ => n̄
206
- "\u043e": "o" # о => o
207
- "\u04e7": "\u00f6" # ӧ => ö
208
- "\u04e9": "\u00f4" # ө => ô
209
- "\u04eb": "\u0151" # ӫ => ő
210
- "\u043e\u0304\u0308": "\u1ecd\u0308" # о̄̈ => ọ̈
211
- "\u04a9": "\u00f2" # ҩ => ò
212
- "\u043e\u0301": "\u00f3" # о́ => ó
213
- "\u043e\u0304": "\u014d" # о̄ => ō
214
- "\u0153": "\u0153" # œ => œ
215
- "\u043f": "p" # п => p
216
- "\u04a7": "\u1e55" # ҧ => ṕ
217
- "\u0525": "\u0070\u0300" # ԥ => p̀
218
- "\u0440": "r" # р => r
219
- "\u0441": "s" # с => s
220
- "\u04ab": "\u015f" # ҫ => ş
221
- "\u0441\u0300": "\u0073\u0300" # с̀ => s̀
222
- "\u0442": "t" # т => t
223
- "\u045b": "\u0107" # ћ => ć
224
- "\u050f": "\u0074\u0300" # ԏ => t̀
225
- "\u0442\u030c": "\u0165" # т̌ => ť
226
- "\u04ad": "\u0163" # ҭ => ţ
227
- "\u0443": "u" # у => u
228
- "\u04f1": "\u00fc" # ӱ => ü
229
- "\u04ef": "\u016b" # ӯ => ū
230
- "\u045e": "\u016d" # ў => ŭ
231
- "\u04f3": "\u0171" # ӳ => ű
232
- "\u0443\u0301": "\u00fa" # у́ => ú
233
- "\u04f1\u0304": "\u1ee5\u0308" # ӱ̄ => ụ̈
234
- "\u04af": "\u00f9" # ү => ù
235
- "\u04b1": "\u0075\u0307" # ұ => u̇
236
- "\u051d": "w" # ԝ => w
237
- "\u0444": "f" # ф => f
238
- "\u0445": "h" # х => h
239
- "\u04b3": "\u1e29" # ҳ => ḩ
240
- "\u0446": "c" # ц => c
241
- "\u04b5": "\u0063\u0304" # ҵ => c̄
242
- "\u045f": "\u0064\u0302" # џ => d̂
243
- "\u0447": "\u010d" # ч => č
244
- "\u04b7": "\u00e7" # ҷ => ç
245
- "\u04cc": "\u0063\u0323" # ӌ => c̣
246
- "\u04f5": "\u0063\u0308" # ӵ => c̈
247
- "\u04b9": "\u0109" # ҹ => ĉ
248
- "\u0447\u0300": "\u0063\u0300" # ч̀ => c̀
249
- "\u04bd": "\u0063\u0306" # ҽ => c̆
250
- "\u04bf": "\u0063\u0328\u0306" # ҿ => c̨̆
251
- "\u0448": "\u0161" # ш => š
252
- "\u0449": "\u015d" # щ => ŝ
253
- "\u044a": "\u02ba" # ъ => ʺ
254
- "\u044b": "y" # ы => y
255
- "\u04f9": "\u00ff" # ӹ => ÿ
256
- "\u044b\u0304": "\u0233" # ы̄ => ȳ
257
- "\u044c": "\u02b9" # ь => ʹ
258
- "\u044d": "\u00e8" # э => è
259
- "\u04d9": "\u0061\u030b" # ә => a̋
260
- "\u04db": "\u00e0" # ӛ => à
261
- "\u044e": "\u00fb" # ю => û
262
- "\u044e\u0304": "\u00fb\u0304" # ю̄ => û̄
263
- "\u044f": "\u00e2" # я => â
264
- "\u0491": "\u0067\u0300" # ґ => g̀
265
- "\u0463": "\u011b" # ѣ => ě
266
- "\u046b": "\u01ce" # ѫ => ǎ
267
- "\u0473": "\u0066\u0300" # ѳ => f̀
268
- "\u0475": "\u1ef3" # ѵ => ỳ
269
- "\u04c0": "\u2021" # Ӏ => ‡
270
- "\u02bc": "\u0060" # ʼ => `
271
- "\u02ee": "\u00a8" # ˮ => ¨
272
-
@@ -1,109 +0,0 @@
1
- ---
2
- authority_id: iso
3
- id: 11940-1998
4
- language: tha
5
- source_script: Thai
6
- destination_script: Latn
7
- name: ISO 11940:1998 Information and documentation -- Transliteration of Thai
8
- url: https://www.iso.org/standard/20574.html
9
- creation_date: 1998
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
-
15
- tests:
16
- - source: 'ภาษาไทย'
17
- expected: 'p̣hās̛̄āịthy'
18
- - source: 'เชียงใหม่'
19
- expected: 'echīyngıh̄m̀'
20
-
21
- map:
22
-
23
- characters:
24
- '\u0e01': 'k' # ก THAI CHARACTER KO KAI
25
- '\u0e02': 'k̄h' # ข THAI CHARACTER KHO KHAI
26
- '\u0e03': 'ḳ̄h' # ฃ THAI CHARACTER KHO KHUAT
27
- '\u0e04': 'kh' # ค THAI CHARACTER KHO KHWAI
28
- '\u0e05': 'k̛h' # ฅ THAI CHARACTER KHO KHON
29
- '\u0e06': 'ḳh' # ฆ THAI CHARACTER KHO RAKHANG
30
- '\u0e07': 'ng' # ง THAI CHARACTER NGO NGU
31
- '\u0e08': 'c' # จ THAI CHARACTER CHO CHAN
32
- '\u0e09': 'c̄h' # ฉ THAI CHARACTER CHO CHING
33
- '\u0e0a': 'ch' # ช THAI CHARACTER CHO CHANG
34
- '\u0e0b': 's' # ซ THAI CHARACTER SO SO
35
- '\u0e0c': 'c̣h' # ฌ THAI CHARACTER CHO CHOE
36
- '\u0e0d': 'ỵ' # ญ THAI CHARACTER YO YING
37
- '\u0e0e': 'ḍ' # ฎ THAI CHARACTER DO CHADA
38
- '\u0e0f': 'ṭ' # ฏ THAI CHARACTER TO PATAK
39
- '\u0e10': 'ṭ̄h' # ฐ THAI CHARACTER THO THAN
40
- '\u0e11': 'ṯh' # ฑ THAI CHARACTER THO NANGMONTHO
41
- '\u0e12': 't̛h' # ฒ THAI CHARACTER THO PHUTHAO
42
- '\u0e13': 'ṇ' # ณ THAI CHARACTER NO NEN
43
- '\u0e14': 'd' # ด THAI CHARACTER DO DEK
44
- '\u0e15': 't' # ต THAI CHARACTER TO TAO
45
- '\u0e16': 't̄h' # ถ THAI CHARACTER THO THUNG
46
- '\u0e17': 'th' # ท THAI CHARACTER THO THAHAN
47
- '\u0e18': 'ṭh' # ธ THAI CHARACTER THO THONG
48
- '\u0e19': 'n' # น THAI CHARACTER NO NU
49
- '\u0e1a': 'b' # บ THAI CHARACTER BO BAIMAI
50
- '\u0e1b': 'p' # ป THAI CHARACTER PO PLA
51
- '\u0e1c': 'p̄h' # ผ THAI CHARACTER PHO PHUNG
52
- '\u0e1d': 'f̄' # ฝ THAI CHARACTER FO FA
53
- '\u0e1e': 'ph' # พ THAI CHARACTER PHO PHAN
54
- '\u0e1f': 'f' # ฟ THAI CHARACTER FO FAN
55
- '\u0e20': 'p̣h' # ภ THAI CHARACTER PHO SAMPHAO
56
- '\u0e21': 'm' # ม THAI CHARACTER MO MA
57
- '\u0e22': 'y' # ย THAI CHARACTER YO YAK
58
- '\u0e23': 'r' # ร THAI CHARACTER RO RUA
59
- '\u0e24': 'v' # ฤ THAI CHARACTER RU
60
- '\u0e25': 'l' # ล THAI CHARACTER LO LING
61
- '\u0e26': 'ł' # ฦ THAI CHARACTER LU
62
- '\u0e27': 'w' # ว THAI CHARACTER WO WAEN
63
- '\u0e28': 'ṣ̄' # ศ THAI CHARACTER SO SALA
64
- '\u0e29': 's̛̄' # ษ THAI CHARACTER SO RUSI
65
- '\u0e2a': 's̄' # ส THAI CHARACTER SO SUA
66
- '\u0e2b': 'h̄' # ห THAI CHARACTER HO HIP
67
- '\u0e2c': 'ḷ' # ฬ THAI CHARACTER LO CHULA
68
- '\u0e2d': 'x' # อ THAI CHARACTER O ANG
69
- '\u0e2e': 'ḥ' # ฮ THAI CHARACTER HO NOKHUK
70
- '\u0e2f': 'ǂ' # ฯ THAI CHARACTER PAIYANNOI
71
- '\u0e30': 'a' # ะ THAI CHARACTER SARA A
72
- '\u0e31': 'ạ' # ั THAI CHARACTER MAI HAN-AKAT
73
- '\u0e32': 'ā' # า THAI CHARACTER SARA AA
74
- '\u0e33': 'å' # ำ THAI CHARACTER SARA AM
75
- '\u0e34': 'i' # ิ THAI CHARACTER SARA I
76
- '\u0e35': 'ī' # ี THAI CHARACTER SARA II
77
- '\u0e36': 'ụ' # ึ THAI CHARACTER SARA UE
78
- '\u0e37': 'ụ̄' # ื THAI CHARACTER SARA UEE
79
- '\u0e38': 'u' # ุ THAI CHARACTER SARA U
80
- '\u0e39': 'ū' # ู THAI CHARACTER SARA UU
81
- '\u0e3a': '–̥' # ฺ THAI CHARACTER PHINTHU
82
- '\u0e40': 'e' # เ THAI CHARACTER SARA E
83
- '\u0e41': 'æ' # แ THAI CHARACTER SARA AE
84
- '\u0e42': 'o' # โ THAI CHARACTER SARA O
85
- '\u0e43': 'ı' # ใ THAI CHARACTER SARA AI MAIMUAN
86
- '\u0e44': 'ị' # ไ THAI CHARACTER SARA AI MAIMALAI
87
- '\u0e45': 'ɨ' # ๅ THAI CHARACTER LAKKHANGYAO
88
- '\u0e46': '«' # ๆ THAI CHARACTER MAIYAMOK
89
- '\u0e47': '̆' # ็ THAI CHARACTER MAITAIKHU
90
- '\u0e48': '̀' # ่ THAI CHARACTER MAI EK
91
- '\u0e49': '̂' # ้ THAI CHARACTER MAI THO
92
- '\u0e4a': '́' # ๊ THAI CHARACTER MAI TRI
93
- '\u0e4b': '̌' # ๋ THAI CHARACTER MAI CHATTAWA
94
- '\u0e4c': '̒' # ์ THAI CHARACTER THANTHAKHAT
95
- '\u0e4d': '̊' # ํ THAI CHARACTER NIKHAHIT
96
- '\u0e4e': '~' # ๎ THAI CHARACTER YAMAKKAN
97
- '\u0e4f': '§' # ๏ THAI CHARACTER FONGMAN
98
- '\u0e50': '0' # ๐ THAI DIGIT ZERO
99
- '\u0e51': '1' # ๑ THAI DIGIT ONE
100
- '\u0e52': '2' # ๒ THAI DIGIT TWO
101
- '\u0e53': '3' # ๓ THAI DIGIT THREE
102
- '\u0e54': '4' # ๔ THAI DIGIT FOUR
103
- '\u0e55': '5' # ๕ THAI DIGIT FIVE
104
- '\u0e56': '6' # ๖ THAI DIGIT SIX
105
- '\u0e57': '7' # ๗ THAI DIGIT SEVEN
106
- '\u0e58': '8' # ๘ THAI DIGIT EIGHT
107
- '\u0e59': '9' # ๙ THAI DIGIT NINE
108
- '\u0e5a': 'ǁ' # ๚ THAI CHARACTER ANGKHANKHU
109
- '\u0e5b': '»' # ๛ THAI CHARACTER KHOMUT