interscript 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +36 -17
  4. data/bin/rspec +29 -0
  5. data/bin/setup +8 -0
  6. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  7. data/lib/g2pwrapper.py +34 -0
  8. data/lib/interscript-opal.rb +2 -0
  9. data/lib/interscript.rb +138 -38
  10. data/lib/interscript/command.rb +28 -0
  11. data/lib/interscript/fs.rb +69 -0
  12. data/lib/interscript/mapping.rb +142 -0
  13. data/lib/interscript/opal.rb +23 -0
  14. data/lib/interscript/opal/maps.js.erb +7 -0
  15. data/lib/interscript/opal_map_translate.rb +12 -0
  16. data/lib/interscript/version.rb +1 -1
  17. data/lib/model-7 +0 -0
  18. data/lib/tha-pt-b-7 +0 -0
  19. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  20. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  21. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  22. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  23. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  24. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  25. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  26. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  27. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  28. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  29. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  30. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  31. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +222 -0
  32. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  33. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  34. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  35. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  36. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  37. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  38. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +175 -0
  39. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  40. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  41. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  42. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  43. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  44. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  45. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  46. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  47. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  48. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  49. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  50. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  51. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  52. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  53. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  54. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  55. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  56. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  57. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  58. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  59. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  60. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  61. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
  62. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  63. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  64. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +163 -0
  65. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  66. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  67. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  68. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  69. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  70. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  71. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  72. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  73. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  74. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  75. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  76. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  77. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  78. data/maps/icao-bel-Cyrl-Latn-9303.yaml +141 -0
  79. data/maps/icao-bul-Cyrl-Latn-9303.yaml +122 -0
  80. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  81. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  82. data/maps/icao-per-Arab-Latn-9303.yaml +104 -0
  83. data/maps/icao-rus-Cyrl-Latn-9303.yaml +118 -0
  84. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  85. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +120 -0
  86. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  87. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  88. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  89. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +272 -0
  90. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  91. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  92. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  93. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  94. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  95. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  96. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  97. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  98. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +110 -0
  99. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  100. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  101. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  102. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  103. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  104. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  105. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  106. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  107. data/maps/odni-mkd-cyrl-latn-2015.yaml +122 -0
  108. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  109. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  110. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  111. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  112. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  113. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  114. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +167 -0
  115. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  116. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  117. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  118. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  119. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  120. data/maps/ses-ara-arab-latn-1930.yaml +275 -0
  121. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  122. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  123. data/maps/un-ara-Arab-Latn-1971.yaml +127 -0
  124. data/maps/un-ara-Arab-Latn-1972.yaml +152 -0
  125. data/maps/un-ara-Arab-Latn-2017.yaml +383 -0
  126. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  127. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  128. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  129. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  130. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  131. data/maps/un-mon-Mong-Latn-2013.yaml +93 -0
  132. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  133. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  134. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  135. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  136. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  137. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  138. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  139. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  140. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  141. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  142. data/spec/interscript/mapping_spec.rb +42 -0
  143. data/spec/interscript_spec.rb +26 -0
  144. data/spec/spec_helper.rb +3 -0
  145. metadata +295 -11
@@ -0,0 +1,41 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 1997
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: ISO 843:1997
8
+ url:
9
+ creation_date: 1997
10
+ description: |
11
+ ISO Transcription table for Greek
12
+
13
+ note:
14
+ - Transliteration of Greek into Latin: Type 2, Clause 3 Table 2
15
+ - Introduced casing to digamma, yot, and lunate sigma. (Casing was late introduction to character sets for those characters)
16
+
17
+ tests:
18
+
19
+ - source: |
20
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
21
+
22
+ Γιάννης Μακρυγιάννης.
23
+
24
+ expected: |
25
+ Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
26
+
27
+ Giánnis Makrygiánnis.
28
+
29
+ map:
30
+ character_separator: ""
31
+ word_separator: " "
32
+ inherit: "elot-ell-Grek-Latn-743-1982-ts"
33
+
34
+ characters:
35
+ "\u03DC": "W" # Ϝ
36
+ "\u03DD": "w" # ϝ
37
+ "\u03F2": "s" # ϲ
38
+ "\u03F9": "S" # Ϲ
39
+ "\u03F3": "j"
40
+ "\u037F": "j"
41
+
@@ -0,0 +1,62 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 3602-1989
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: ISO 3602 Romanization of Japanese (Kana Script)
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+
15
+ tests:
16
+ - source: かんおう
17
+ expected: kan’ô
18
+ - source: かのう
19
+ expected: kanô
20
+ - source: きんゆう
21
+ expected: kin’yû
22
+ - source: とうきょう
23
+ expected: tôkyô
24
+ - source: がっ•こう
25
+ expected: gakkô
26
+ - source: かごっま
27
+ expected: kagomma
28
+ - source: ぽっぽっや
29
+ expected: poppoyya
30
+ - source: てっら
31
+ expected: terra
32
+ - source: にゃっほー
33
+ expected: nyahhô
34
+ - source: ゴッホ
35
+ expected: gohho
36
+ - source: おも•う
37
+ expected: omou
38
+ - source: こうし
39
+ expected: kôsi
40
+ - source: こう•し #格子
41
+ expected: kôsi
42
+ - source: こ•うし #子牛
43
+ expected: kousi
44
+ - source: ぎゃあ
45
+ expected: gyâ
46
+
47
+ map:
48
+ inherit: mext-jpn-Hrkt-Latn-1954
49
+
50
+ rules:
51
+ # Remove morpheme boundary marker after sokuon っ/ッ
52
+ - pattern: "([っッ])•"
53
+ result: "\\1"
54
+
55
+ postrules:
56
+ # Remove morpheme boundary marker
57
+ - pattern: "•"
58
+ result: ""
59
+
60
+ # Use ’ instead of '
61
+ - pattern: "'"
62
+ result: "’"
@@ -0,0 +1,272 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 9-1995
4
+ language: rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ISO 9
8
+ url: https://www.iso.org/standard/3589.html
9
+ creation_date: 1995
10
+ description: |
11
+ Establishes a system for the transliteration into Latin characters of
12
+ Cyrillic characters constituting the alphabets of Slavic and non-Slavic
13
+ languages. Table 3 includes in a single sequence, listed in the
14
+ Cyrillic alphabetic order, the 118 single or diacritic-carrying
15
+ characters that appear in one or another of the considered alphabets.
16
+ tests:
17
+
18
+
19
+ map:
20
+ characters:
21
+ "\u0410": "A" # А => A
22
+ "\u04d2": "\u00c4" # Ӓ => Ä (a diaeresis)
23
+ "\u04d2\u0304": "\u1ea0\u0308" # Ӓ̄ => Ạ̈ (a diaeresis and dot below)
24
+ "\u04d0": "\u0102" # Ӑ => Ă (a breve)
25
+ "\u0410\u0304": "\u0100" # А̄ => Ā (a macron)
26
+ "\u04d4": "\u00c6" # Ӕ => Æ (ae ligature)
27
+ "\u0410\u0301": "\u00c1" # А́ => Á (a acute)
28
+ "\u0410\u030a": "\u00c5" # А̊ => Å (a ring)
29
+ "\u0411": "B" # Б => B
30
+ "\u0412": "V" # В => V
31
+ "\u0413": "G" # Г => G
32
+ "\u0403": "\u01f4" # Ѓ => Ǵ (g acute)
33
+ "\u0492": "\u0120" # Ғ => Ġ (g dot)
34
+ "\u0494": "\u011e" # Ҕ => Ğ (g breve)
35
+ "\u04ba": "\u1e24" # Һ => Ḥ (h dot)
36
+ "\u0414": "D" # Д => D
37
+ "\u0402": "\u0110" # Ђ => Đ (d macron)
38
+ "\u0415": "E" # Е => E
39
+ "\u04d6": "\u0114" # Ӗ => Ĕ (e breve)
40
+ "\u0401": "\u00cb" # Ё => Ë (e diaeresis)
41
+ "\u0404": "\u00ca" # Є => Ê (e circumflex)
42
+ "\u0416": "\u017d" # Ж => Ž (z caron)
43
+ "\u0496": "\u017d\u0327" # Җ => Ž̧ (z caron and cedilla[4])
44
+ "\u04dc": "\u005a\u0304" # Ӝ => Z̄ (z macron)
45
+ "\u04c1": "\u005a\u0306" # Ӂ => Z̆ (z breve)
46
+ "\u0417": "\u005a" # З => Z
47
+ "\u04de": "\u005a\u0308" # Ӟ => Z̈ (z diaeresis)
48
+ "\u04e0": "\u0179" # Ӡ => Ź (z acute)
49
+ "\u0405": "\u1e90" # Ѕ => Ẑ (z circumflex)
50
+ "\u0418": "I" # И => I
51
+ "\u04e2": "\u012a" # Ӣ => Ī (i macron)
52
+ "\u0418\u0301": "\u00cd" # И́ => Í (i acute)
53
+ "\u04e4": "\u00ce" # Ӥ => Î (i circumflex)
54
+ "\u0419": "\u004a" # Й => J
55
+ "\u0406": "\u00cc" # І => Ì (i grave)
56
+ "\u0407": "\u00cf" # Ї => Ï (i diaeresis)
57
+ "\u0406\u0304": "\u01cf" # І̄ => Ǐ (i caron (or breve))
58
+ "\u0408": "\u004a\u030c" # Ј => J̌ (j caron)
59
+ "\u0408\u0335": "\u004a\u0301" # Ј̵ => J́ (j acute)
60
+ "\u041a": "K" # К => K
61
+ "\u040c": "\u1e30" # Ќ => Ḱ (k acute)
62
+ "\u04c3": "\u1e32" # Ӄ => Ḳ (k dot below)
63
+ "\u049c": "\u004b\u0302" # Ҝ => K̂ (k circumflex)
64
+ "\u04a0": "\u01e8" # Ҡ => Ǩ (k caron)
65
+ "\u049e": "\u004b\u0304" # Ҟ => K̄ (k macron)
66
+ "\u049a": "\u0136" # Қ => Ķ (k cedilla[4])
67
+ "\u041a\u0328": "\u004b\u0300" # К̨ => K̀ (k grave)
68
+ "\u051a": "Q" # Ԛ => Q
69
+ "\u041b": "L" # Л => L
70
+ "\u0409": "\u004c\u0302" # Љ => L̂ (l circumflex)
71
+ "\u0520": "\u013b" # Ԡ => Ļ (l cedilla[4])
72
+ "\u041c": "M" # М => M
73
+ "\u041d": "N" # Н => N
74
+ "\u040a": "\u004e\u0302" # Њ => N̂ (n circumflex)
75
+ "\u04a2": "\u0145" # Ң => Ņ (n cedilla[4])
76
+ "\u04c9": "\u1e46" # Ӊ => Ṇ (n dot below)
77
+ "\u04a4": "\u1e44" # Ҥ => Ṅ (n dot)
78
+ "\u050a": "\u01f8" # Ԋ => Ǹ (n grave)
79
+ "\u0522": "\u0143" # Ԣ => Ń (n acute)
80
+ "\u04c7": "\u0147" # Ӈ => Ň (n caron)
81
+ "\u041d\u0304": "\u004e\u0304" # Н̄ => N̄ (n macron)
82
+ "\u041e": "O" # О => O
83
+ "\u04e6": "\u00d6" # Ӧ => Ö (o diaeresis)
84
+ "\u04e8": "\u00d4" # Ө => Ô (o circumflex)
85
+ "\u04ea": "\u0150" # Ӫ => Ő (o double acute)
86
+ "\u04e6\u0304": "\u1ecc\u0308" # Ӧ̄ => Ọ̈ (o diaeresis and dot below)
87
+ "\u04a8": "\u00d2" # Ҩ => Ò (o grave)
88
+ "\u041e\u0301": "\u00d3" # О́ => Ó (o acute)
89
+ "\u041e\u0304": "\u014c" # О̄ => Ō (o macron)
90
+ "\u0152": "\u0152" # Œ => Œ (oe ligature)
91
+ "\u041f": "P" # П => P
92
+ "\u04a6": "\u1e54" # Ҧ => Ṕ (p acute)
93
+ "\u0524": "\u0050\u0300" # Ԥ => P̀ (p grave)
94
+ "\u0420": "R" # Р => R
95
+ "\u0421": "S" # С => S
96
+ "\u04aa": "\u015e" # Ҫ => Ş (s cedilla[4])
97
+ "\u0421\u0300": "\u0053\u0300" # С̀ => S̀ (s grave)
98
+ "\u0422": "T" # Т => T
99
+ "\u040b": "\u0106" # Ћ => Ć (c acute)
100
+ "\u050e": "\u0054\u0300" # Ԏ => T̀ (t grave)
101
+ "\u0422\u030c": "\u0164" # Т̌ => Ť (t caron)
102
+ "\u04ac": "\u0162" # Ҭ => Ţ (t cedilla[4])
103
+ "\u0423": "U" # У => U
104
+ "\u04f0": "\u00dc" # Ӱ => Ü (u diaeresis)
105
+ "\u04ee": "\u016a" # Ӯ => Ū (u macron)
106
+ "\u040e": "\u016c" # Ў => Ŭ (u breve)
107
+ "\u04f2": "\u0170" # Ӳ => Ű (u double acute)
108
+ "\u0423\u0301": "\u00da" # У́ => Ú (u acute)
109
+ "\u04f0\u0304": "\u1ee4\u0308" # Ӱ̄ => Ụ̈ (u diaeresis and dot below)
110
+ "\u04ae": "\u00d9" # Ү => Ù (u grave)
111
+ "\u04b0": "\u0055\u0307" # Ұ => U̇ (u dot)
112
+ "\u051c": "W" # Ԝ => W
113
+ "\u0424": "F" # Ф => F
114
+ "\u0425": "H" # Х => H
115
+ "\u04b2": "\u1e28" # Ҳ => Ḩ (h cedilla[4])
116
+ "\u0426": "C" # Ц => C
117
+ "\u04b4": "\u0043\u0304" # Ҵ => C̄ (c macron)
118
+ "\u040f": "\u0044\u0302" # Џ => D̂ (d circumflex)
119
+ "\u0427": "\u010c" # Ч => Č (c caron)
120
+ "\u04b6": "\u00c7" # Ҷ => Ç (c cedilla[4])
121
+ "\u04cb": "\u0043\u0323" # Ӌ => C̣ (c dot below)
122
+ "\u04f4": "\u0043\u0308" # Ӵ => C̈ (c diaeresis)
123
+ "\u04b8": "\u0108" # Ҹ => Ĉ (c circumflex)
124
+ "\u0427\u0300": "\u0043\u0300" # Ч̀ => C̀ (c grave)
125
+ "\u04bc": "\u0043\u0306" # Ҽ => C̆ (c breve)
126
+ "\u04be": "\u0043\u0328\u0306" # Ҿ => C̨̆ (c ogonek[4] and breve)
127
+ "\u0428": "\u0160" # Ш => Š (s caron)
128
+ "\u0429": "\u015c" # Щ => Ŝ (s circumflex)
129
+ "\u042a": "\u02ba" # Ъ => ʺ (modifier letter double prime[5])
130
+ "\u042b": "Y" # Ы => Y
131
+ "\u04f8": "\u0178" # Ӹ => Ÿ (y diaeresis)
132
+ "\u042b\u0304": "\u0232" # Ы̄ => Ȳ (y macron)
133
+ "\u042c": "\u02b9" # Ь => ʹ (modifier letter prime[5])
134
+ "\u042d": "\u00c8" # Э => È (e grave)
135
+ "\u04d8": "\u0041\u030b" # Ә => A̋ (a double acute)
136
+ "\u04da": "\u00c0" # Ӛ => À (a grave)
137
+ "\u042e": "\u00db" # Ю => Û (u circumflex)
138
+ "\u042e\u0304": "\u00db\u0304" # Ю̄ => Û̄ (u circumflex with macron)
139
+ "\u042f": "\u00c2" # Я => Â (a circumflex)
140
+ "\u0490": "\u0047\u0300" # Ґ => G̀ (g grave)
141
+ "\u0462": "\u011a" # Ѣ => Ě (e caron)
142
+ "\u046a": "\u01cd" # Ѫ => Ǎ (a caron)
143
+ "\u0472": "\u0046\u0300" # Ѳ => F̀ (f grave)
144
+ "\u0474": "\u1ef2" # Ѵ => Ỳ (y grave)
145
+ "\u0430": "a" # а => a
146
+ "\u04d3": "\u00e4" # ӓ => ä
147
+ "\u04d3\u0304": "\u1ea1\u0308" # ӓ̄ => ạ̈
148
+ "\u04d1": "\u0103" # ӑ => ă
149
+ "\u0430\u0304": "\u0101" # а̄ => ā
150
+ "\u04d5": "\u00e6" # ӕ => æ
151
+ "\u0430\u0301": "\u00e1" # а́ => á
152
+ "\u0430\u030a": "\u00e5" # а̊ => å
153
+ "\u0431": "b" # б => b
154
+ "\u0432": "v" # в => v
155
+ "\u0433": "g" # г => g
156
+ "\u0453": "\u01f5" # ѓ => ǵ
157
+ "\u0493": "\u0121" # ғ => ġ
158
+ "\u0495": "\u011f" # ҕ => ğ
159
+ "\u04bb": "\u1e25" # һ => ḥ
160
+ "\u0434": "d" # д => d
161
+ "\u0452": "\u0111" # ђ => đ
162
+ "\u0435": "e" # е => e
163
+ "\u04d7": "\u0115" # ӗ => ĕ
164
+ "\u0451": "\u00eb" # ё => ë
165
+ "\u0454": "\u00ea" # є => ê
166
+ "\u0436": "\u017e" # ж => ž
167
+ "\u0497": "\u017e\u0327" # җ => ž̧
168
+ "\u04dd": "\u007a\u0304" # ӝ => z̄
169
+ "\u04c2": "\u007a\u0306" # ӂ => z̆
170
+ "\u0437": "z" # з => z
171
+ "\u04df": "\u007a\u0308" # ӟ => z̈
172
+ "\u04e1": "\u017a" # ӡ => ź
173
+ "\u0455": "\u1e91" # ѕ => ẑ
174
+ "\u0438": "i" # и => i
175
+ "\u04e3": "\u012b" # ӣ => ī
176
+ "\u0438\u0301": "\u00ed" # и́ => í
177
+ "\u04e5": "\u00ee" # ӥ => î
178
+ "\u0439": "j" # й => j
179
+ "\u0456": "\u00ec" # і => ì
180
+ "\u0457": "\u00ef" # ї => ï
181
+ "\u0456\u0304": "\u01d0" # і̄ => ǐ
182
+ "\u0458": "\u01f0" # ј => ǰ
183
+ "\u0458\u0335": "\u006a\u0301" # ј̵ => j́
184
+ "\u043a": "k" # к => k
185
+ "\u045c": "\u1e31" # ќ => ḱ
186
+ "\u04c4": "\u1e33" # ӄ => ḳ
187
+ "\u049d": "\u006b\u0302" # ҝ => k̂
188
+ "\u04a1": "\u01e9" # ҡ => ǩ
189
+ "\u049f": "\u006b\u0304" # ҟ => k̄
190
+ "\u049b": "\u0137" # қ => ķ
191
+ "\u043a\u0328": "\u006b\u0300" # к̨ => k̀
192
+ "\u051b": "q" # ԛ => q
193
+ "\u043b": "l" # л => l
194
+ "\u0459": "\u006c\u0302" # љ => l̂
195
+ "\u0521": "\u013c" # ԡ => ļ
196
+ "\u043c": "m" # м => m
197
+ "\u043d": "n" # н => n
198
+ "\u045a": "\u006e\u0302" # њ => n̂
199
+ "\u04a3": "\u0146" # ң => ņ
200
+ "\u04ca": "\u1e47" # ӊ => ṇ
201
+ "\u04a5": "\u1e45" # ҥ => ṅ
202
+ "\u050b": "\u01f9" # ԋ => ǹ
203
+ "\u0523": "\u0144" # ԣ => ń
204
+ "\u04c8": "\u0148" # ӈ => ň
205
+ "\u043d\u0304": "\u006e\u0304" # н̄ => n̄
206
+ "\u043e": "o" # о => o
207
+ "\u04e7": "\u00f6" # ӧ => ö
208
+ "\u04e9": "\u00f4" # ө => ô
209
+ "\u04eb": "\u0151" # ӫ => ő
210
+ "\u043e\u0304\u0308": "\u1ecd\u0308" # о̄̈ => ọ̈
211
+ "\u04a9": "\u00f2" # ҩ => ò
212
+ "\u043e\u0301": "\u00f3" # о́ => ó
213
+ "\u043e\u0304": "\u014d" # о̄ => ō
214
+ "\u0153": "\u0153" # œ => œ
215
+ "\u043f": "p" # п => p
216
+ "\u04a7": "\u1e55" # ҧ => ṕ
217
+ "\u0525": "\u0070\u0300" # ԥ => p̀
218
+ "\u0440": "r" # р => r
219
+ "\u0441": "s" # с => s
220
+ "\u04ab": "\u015f" # ҫ => ş
221
+ "\u0441\u0300": "\u0073\u0300" # с̀ => s̀
222
+ "\u0442": "t" # т => t
223
+ "\u045b": "\u0107" # ћ => ć
224
+ "\u050f": "\u0074\u0300" # ԏ => t̀
225
+ "\u0442\u030c": "\u0165" # т̌ => ť
226
+ "\u04ad": "\u0163" # ҭ => ţ
227
+ "\u0443": "u" # у => u
228
+ "\u04f1": "\u00fc" # ӱ => ü
229
+ "\u04ef": "\u016b" # ӯ => ū
230
+ "\u045e": "\u016d" # ў => ŭ
231
+ "\u04f3": "\u0171" # ӳ => ű
232
+ "\u0443\u0301": "\u00fa" # у́ => ú
233
+ "\u04f1\u0304": "\u1ee5\u0308" # ӱ̄ => ụ̈
234
+ "\u04af": "\u00f9" # ү => ù
235
+ "\u04b1": "\u0075\u0307" # ұ => u̇
236
+ "\u051d": "w" # ԝ => w
237
+ "\u0444": "f" # ф => f
238
+ "\u0445": "h" # х => h
239
+ "\u04b3": "\u1e29" # ҳ => ḩ
240
+ "\u0446": "c" # ц => c
241
+ "\u04b5": "\u0063\u0304" # ҵ => c̄
242
+ "\u045f": "\u0064\u0302" # џ => d̂
243
+ "\u0447": "\u010d" # ч => č
244
+ "\u04b7": "\u00e7" # ҷ => ç
245
+ "\u04cc": "\u0063\u0323" # ӌ => c̣
246
+ "\u04f5": "\u0063\u0308" # ӵ => c̈
247
+ "\u04b9": "\u0109" # ҹ => ĉ
248
+ "\u0447\u0300": "\u0063\u0300" # ч̀ => c̀
249
+ "\u04bd": "\u0063\u0306" # ҽ => c̆
250
+ "\u04bf": "\u0063\u0328\u0306" # ҿ => c̨̆
251
+ "\u0448": "\u0161" # ш => š
252
+ "\u0449": "\u015d" # щ => ŝ
253
+ "\u044a": "\u02ba" # ъ => ʺ
254
+ "\u044b": "y" # ы => y
255
+ "\u04f9": "\u00ff" # ӹ => ÿ
256
+ "\u044b\u0304": "\u0233" # ы̄ => ȳ
257
+ "\u044c": "\u02b9" # ь => ʹ
258
+ "\u044d": "\u00e8" # э => è
259
+ "\u04d9": "\u0061\u030b" # ә => a̋
260
+ "\u04db": "\u00e0" # ӛ => à
261
+ "\u044e": "\u00fb" # ю => û
262
+ "\u044e\u0304": "\u00fb\u0304" # ю̄ => û̄
263
+ "\u044f": "\u00e2" # я => â
264
+ "\u0491": "\u0067\u0300" # ґ => g̀
265
+ "\u0463": "\u011b" # ѣ => ě
266
+ "\u046b": "\u01ce" # ѫ => ǎ
267
+ "\u0473": "\u0066\u0300" # ѳ => f̀
268
+ "\u0475": "\u1ef3" # ѵ => ỳ
269
+ "\u04c0": "\u2021" # Ӏ => ‡
270
+ "\u02bc": "\u0060" # ʼ => `
271
+ "\u02ee": "\u00a8" # ˮ => ¨
272
+
@@ -0,0 +1,109 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 11940-1998
4
+ language: tha
5
+ source_script: Thai
6
+ destination_script: Latn
7
+ name: ISO 11940:1998 Information and documentation -- Transliteration of Thai
8
+ url: https://www.iso.org/standard/20574.html
9
+ creation_date: 1998
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+
15
+ tests:
16
+ - source: 'ภาษาไทย'
17
+ expected: 'p̣hās̛̄āịthy'
18
+ - source: 'เชียงใหม่'
19
+ expected: 'echīyngıh̄m̀'
20
+
21
+ map:
22
+
23
+ characters:
24
+ '\u0e01': 'k' # ก THAI CHARACTER KO KAI
25
+ '\u0e02': 'k̄h' # ข THAI CHARACTER KHO KHAI
26
+ '\u0e03': 'ḳ̄h' # ฃ THAI CHARACTER KHO KHUAT
27
+ '\u0e04': 'kh' # ค THAI CHARACTER KHO KHWAI
28
+ '\u0e05': 'k̛h' # ฅ THAI CHARACTER KHO KHON
29
+ '\u0e06': 'ḳh' # ฆ THAI CHARACTER KHO RAKHANG
30
+ '\u0e07': 'ng' # ง THAI CHARACTER NGO NGU
31
+ '\u0e08': 'c' # จ THAI CHARACTER CHO CHAN
32
+ '\u0e09': 'c̄h' # ฉ THAI CHARACTER CHO CHING
33
+ '\u0e0a': 'ch' # ช THAI CHARACTER CHO CHANG
34
+ '\u0e0b': 's' # ซ THAI CHARACTER SO SO
35
+ '\u0e0c': 'c̣h' # ฌ THAI CHARACTER CHO CHOE
36
+ '\u0e0d': 'ỵ' # ญ THAI CHARACTER YO YING
37
+ '\u0e0e': 'ḍ' # ฎ THAI CHARACTER DO CHADA
38
+ '\u0e0f': 'ṭ' # ฏ THAI CHARACTER TO PATAK
39
+ '\u0e10': 'ṭ̄h' # ฐ THAI CHARACTER THO THAN
40
+ '\u0e11': 'ṯh' # ฑ THAI CHARACTER THO NANGMONTHO
41
+ '\u0e12': 't̛h' # ฒ THAI CHARACTER THO PHUTHAO
42
+ '\u0e13': 'ṇ' # ณ THAI CHARACTER NO NEN
43
+ '\u0e14': 'd' # ด THAI CHARACTER DO DEK
44
+ '\u0e15': 't' # ต THAI CHARACTER TO TAO
45
+ '\u0e16': 't̄h' # ถ THAI CHARACTER THO THUNG
46
+ '\u0e17': 'th' # ท THAI CHARACTER THO THAHAN
47
+ '\u0e18': 'ṭh' # ธ THAI CHARACTER THO THONG
48
+ '\u0e19': 'n' # น THAI CHARACTER NO NU
49
+ '\u0e1a': 'b' # บ THAI CHARACTER BO BAIMAI
50
+ '\u0e1b': 'p' # ป THAI CHARACTER PO PLA
51
+ '\u0e1c': 'p̄h' # ผ THAI CHARACTER PHO PHUNG
52
+ '\u0e1d': 'f̄' # ฝ THAI CHARACTER FO FA
53
+ '\u0e1e': 'ph' # พ THAI CHARACTER PHO PHAN
54
+ '\u0e1f': 'f' # ฟ THAI CHARACTER FO FAN
55
+ '\u0e20': 'p̣h' # ภ THAI CHARACTER PHO SAMPHAO
56
+ '\u0e21': 'm' # ม THAI CHARACTER MO MA
57
+ '\u0e22': 'y' # ย THAI CHARACTER YO YAK
58
+ '\u0e23': 'r' # ร THAI CHARACTER RO RUA
59
+ '\u0e24': 'v' # ฤ THAI CHARACTER RU
60
+ '\u0e25': 'l' # ล THAI CHARACTER LO LING
61
+ '\u0e26': 'ł' # ฦ THAI CHARACTER LU
62
+ '\u0e27': 'w' # ว THAI CHARACTER WO WAEN
63
+ '\u0e28': 'ṣ̄' # ศ THAI CHARACTER SO SALA
64
+ '\u0e29': 's̛̄' # ษ THAI CHARACTER SO RUSI
65
+ '\u0e2a': 's̄' # ส THAI CHARACTER SO SUA
66
+ '\u0e2b': 'h̄' # ห THAI CHARACTER HO HIP
67
+ '\u0e2c': 'ḷ' # ฬ THAI CHARACTER LO CHULA
68
+ '\u0e2d': 'x' # อ THAI CHARACTER O ANG
69
+ '\u0e2e': 'ḥ' # ฮ THAI CHARACTER HO NOKHUK
70
+ '\u0e2f': 'ǂ' # ฯ THAI CHARACTER PAIYANNOI
71
+ '\u0e30': 'a' # ะ THAI CHARACTER SARA A
72
+ '\u0e31': 'ạ' # ั THAI CHARACTER MAI HAN-AKAT
73
+ '\u0e32': 'ā' # า THAI CHARACTER SARA AA
74
+ '\u0e33': 'å' # ำ THAI CHARACTER SARA AM
75
+ '\u0e34': 'i' # ิ THAI CHARACTER SARA I
76
+ '\u0e35': 'ī' # ี THAI CHARACTER SARA II
77
+ '\u0e36': 'ụ' # ึ THAI CHARACTER SARA UE
78
+ '\u0e37': 'ụ̄' # ื THAI CHARACTER SARA UEE
79
+ '\u0e38': 'u' # ุ THAI CHARACTER SARA U
80
+ '\u0e39': 'ū' # ู THAI CHARACTER SARA UU
81
+ '\u0e3a': '–̥' # ฺ THAI CHARACTER PHINTHU
82
+ '\u0e40': 'e' # เ THAI CHARACTER SARA E
83
+ '\u0e41': 'æ' # แ THAI CHARACTER SARA AE
84
+ '\u0e42': 'o' # โ THAI CHARACTER SARA O
85
+ '\u0e43': 'ı' # ใ THAI CHARACTER SARA AI MAIMUAN
86
+ '\u0e44': 'ị' # ไ THAI CHARACTER SARA AI MAIMALAI
87
+ '\u0e45': 'ɨ' # ๅ THAI CHARACTER LAKKHANGYAO
88
+ '\u0e46': '«' # ๆ THAI CHARACTER MAIYAMOK
89
+ '\u0e47': '̆' # ็ THAI CHARACTER MAITAIKHU
90
+ '\u0e48': '̀' # ่ THAI CHARACTER MAI EK
91
+ '\u0e49': '̂' # ้ THAI CHARACTER MAI THO
92
+ '\u0e4a': '́' # ๊ THAI CHARACTER MAI TRI
93
+ '\u0e4b': '̌' # ๋ THAI CHARACTER MAI CHATTAWA
94
+ '\u0e4c': '̒' # ์ THAI CHARACTER THANTHAKHAT
95
+ '\u0e4d': '̊' # ํ THAI CHARACTER NIKHAHIT
96
+ '\u0e4e': '~' # ๎ THAI CHARACTER YAMAKKAN
97
+ '\u0e4f': '§' # ๏ THAI CHARACTER FONGMAN
98
+ '\u0e50': '0' # ๐ THAI DIGIT ZERO
99
+ '\u0e51': '1' # ๑ THAI DIGIT ONE
100
+ '\u0e52': '2' # ๒ THAI DIGIT TWO
101
+ '\u0e53': '3' # ๓ THAI DIGIT THREE
102
+ '\u0e54': '4' # ๔ THAI DIGIT FOUR
103
+ '\u0e55': '5' # ๕ THAI DIGIT FIVE
104
+ '\u0e56': '6' # ๖ THAI DIGIT SIX
105
+ '\u0e57': '7' # ๗ THAI DIGIT SEVEN
106
+ '\u0e58': '8' # ๘ THAI DIGIT EIGHT
107
+ '\u0e59': '9' # ๙ THAI DIGIT NINE
108
+ '\u0e5a': 'ǁ' # ๚ THAI CHARACTER ANGKHANKHU
109
+ '\u0e5b': '»' # ๛ THAI CHARACTER KHOMUT