interscript 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,88 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2015
4
+ language: kat
5
+ source_script: Geor
6
+ destination_script: Latn
7
+ name: Office of the Director Of National Intelligence Georgian Personal Names 2015
8
+ # url:
9
+ source: ICS 630-01, Annex E
10
+ creation_date: 2015
11
+ confirmation_date: 2015
12
+ description: |
13
+ This system is the Intelligence Community standard for the transliteration of Georgian names that
14
+ will be applied to all final written reports and products for IC consumers. It is not intended to
15
+ eliminate variations of a name that can contribute forensic information. Rather, it is to provide an
16
+ IC standard Romanized (English) transliteration from Georgian that can then be linked to forensic
17
+ information in ways that will help identify the referent of the name.
18
+
19
+ notes:
20
+
21
+ tests:
22
+
23
+ - source: ბაყაყი
24
+ expected: baqaqi
25
+
26
+ - source: ძროხა
27
+ expected: dzrokha
28
+
29
+ - source: ჰაერი
30
+ expected: haeri
31
+
32
+ - source: ჟოლო
33
+ expected: zholo
34
+
35
+ - source: ჯართი
36
+ expected: jarti
37
+
38
+ - source: ღრმაღელე
39
+ expected: ghrmaghele
40
+
41
+ - source: ზვიად გამსახურდია
42
+ expected: zviad gamsakhurdia
43
+
44
+ - source: ედუარდ შევარდნაძე
45
+ expected: eduard shevardnadze
46
+
47
+ - source: მიხეილ სააკაშვილი
48
+ expected: mikheil saakashvili
49
+
50
+ - source: გიორგი მარგველაშვილი
51
+ expected: giorgi margvelashvili
52
+
53
+ map:
54
+ characters:
55
+ '\u10d0' : 'a' # ა
56
+ '\u10d1' : 'b' # ბ
57
+ '\u10d2' : 'g' # გ
58
+ '\u10d3' : 'd' # დ
59
+ '\u10d4' : 'e' # ე
60
+ '\u10d5' : 'v' # ვ
61
+ '\u10d6' : 'z' # ზ
62
+ '\u10d7' : 't' # თ
63
+ '\u10d8' : 'i' # ი
64
+ '\u10d9' : 'k' # კ
65
+ '\u10da' : 'l' # ლ
66
+ '\u10db' : 'm' # მ
67
+ '\u10dc' : 'n' # ნ
68
+ '\u10dd' : 'o' # ო
69
+ '\u10de' : 'p' # პ
70
+ '\u10df' : 'zh' # ჟ
71
+ '\u10e0' : 'r' # რ
72
+ '\u10e1' : 's' # ს
73
+ '\u10e2' : 't' # ტ
74
+ '\u10e3' : 'u' # უ
75
+ '\u10e4' : 'p' # ფ
76
+ '\u10e5' : 'k' # ქ
77
+ '\u10e6' : 'gh' # ღ
78
+ '\u10e7' : 'q' # ყ
79
+ '\u10e8' : 'sh' # შ
80
+ '\u10e9' : 'ch' # ჩ
81
+ '\u10ea' : 'ts' # ც
82
+ '\u10eb' : 'dz' # ძ
83
+ '\u10ec' : 'ts' # წ
84
+ '\u10ed' : 'ch' # ჭ
85
+ '\u10ee' : 'kh' # ხ
86
+ '\u10ef' : 'j' # ჯ
87
+ '\u10f0' : 'h' # ჰ
88
+
@@ -0,0 +1,157 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2015
4
+ language: ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Office of the Director Of National Intelligence Ukrainian Personal Names 2004 System
8
+ # url:
9
+ source: ICS 630-01, Annex M
10
+ creation_date: 2015
11
+ confirmation_date: 2015
12
+ description: |
13
+ This system is the Intelligence Community (IC) standard for the
14
+ transliteration of Ukrainian names that will be applied to all final
15
+ written reports and products for IC consumers. It is not intended to
16
+ eliminate variations of a name that can contribute forensic
17
+ information. Rather, it is to provide an IC standard Romanized
18
+ (English) transliteration from Ukrainian that can then be linked to
19
+ forensic information in ways that will help identify the referent of
20
+ the name.
21
+
22
+ tests:
23
+ - source: Андрій
24
+ expected: Andriy
25
+ - source: Борисенко
26
+ expected: Borysenko
27
+ - source: Володимир
28
+ expected: Volodymyr
29
+ - source: Богдан
30
+ expected: Bohdan
31
+ - source: Згурський
32
+ expected: Zhurskyy
33
+ - source: Дмитро
34
+ expected: Dmytro
35
+ - source: Олег
36
+ expected: Oleh
37
+ - source: Гаєвич
38
+ expected: Hayevych
39
+ - source: Жанна
40
+ expected: Zhanna
41
+ - source: Казимирчук
42
+ expected: Kazymyrchuk
43
+ - source: Михайленко
44
+ expected: Mykhaylenko
45
+ - source: Іващенко
46
+ expected: Ivashchenko
47
+ - source: Олексій
48
+ expected: Oleksiy
49
+ - source: Коваленко
50
+ expected: Kovalenko
51
+ - source: Леонід
52
+ expected: Leonid
53
+ - source: Маринич
54
+ expected: Marynych
55
+ - source: Наталія
56
+ expected: Nataliya
57
+ - source: Онищенко
58
+ expected: Onyshchenko
59
+ - source: Петро
60
+ expected: Petro
61
+ - source: Рибчинський
62
+ expected: Rybchynskyy
63
+ - source: Соломія
64
+ expected: Solomiya
65
+ - source: Троць
66
+ expected: Trots
67
+ - source: Уляна
68
+ expected: Ulyana
69
+ - source: Філіпчук
70
+ expected: Filipchuk
71
+ - source: Христина
72
+ expected: Khrystyna
73
+ - source: Стеценко
74
+ expected: Stetsenko
75
+ - source: Шевченко
76
+ expected: Shevchenko
77
+ - source: Гаращенко
78
+ expected: Harashchenko
79
+ - source: Юрій
80
+ expected: Yuriy
81
+ - source: Ярошенко
82
+ expected: Yaroshenko
83
+ - source: Костянтин
84
+ expected: Kostyantyn
85
+
86
+ map:
87
+ rules:
88
+ - pattern: \b\u2019\b # remove ’
89
+ result: ""
90
+
91
+ characters:
92
+ "\u0410": "A" # А
93
+ "\u0411": "B" # Б
94
+ "\u0412": "V" # В
95
+ "\u0413": "H" # Г
96
+ "\u0490": "G" # Ґ
97
+ "\u0414": "D" # Д
98
+ "\u0415": "E" # Е
99
+ "\u0404": "Ye" # Є
100
+ "\u0416": "Zh" # Ж
101
+ "\u0417": "Z" # З
102
+ "\u0418": "Y" # И
103
+ "\u0406": "I" # І
104
+ "\u0407": "Yi" # Ї
105
+ "\u0419": "Y" # Й
106
+ "\u041a": "K" # К
107
+ "\u041b": "L" # Л
108
+ "\u041c": "M" # М
109
+ "\u041d": "N" # Н
110
+ "\u041e": "O" # О
111
+ "\u041f": "P" # П
112
+ "\u0420": "R" # Р
113
+ "\u0421": "S" # С
114
+ "\u0422": "T" # Т
115
+ "\u0423": "U" # У
116
+ "\u0424": "F" # Ф
117
+ "\u0425": "Kh" # Х
118
+ "\u0426": "Ts" # Ц
119
+ "\u0427": "Ch" # Ч
120
+ "\u0428": "Sh" # Ш
121
+ "\u0429": "Shch" # Щ
122
+ "\u042e": "Yu" # Ю
123
+ "\u042f": "Ya" # Я
124
+ "\u042c": "" # Ь
125
+ "\u0430": "a" # а
126
+ "\u0431": "b" # б
127
+ "\u0432": "v" # в
128
+ "\u0433": "h" # г
129
+ "\u0491": "g" # ґ
130
+ "\u0434": "d" # д
131
+ "\u0435": "e" # е
132
+ "\u0454": "ye" # є
133
+ "\u0436": "zh" # ж
134
+ "\u0437": "z" # з
135
+ "\u0438": "y" # и
136
+ "\u0456": "i" # і
137
+ "\u0457": "yi" # ї
138
+ "\u0439": "y" # й
139
+ "\u043a": "k" # к
140
+ "\u043b": "l" # л
141
+ "\u043c": "m" # м
142
+ "\u043d": "n" # н
143
+ "\u043e": "o" # о
144
+ "\u043f": "p" # п
145
+ "\u0440": "r" # р
146
+ "\u0441": "s" # с
147
+ "\u0442": "t" # т
148
+ "\u0443": "u" # у
149
+ "\u0444": "f" # ф
150
+ "\u0445": "kh" # х
151
+ "\u0446": "ts" # ц
152
+ "\u0447": "ch" # ч
153
+ "\u0448": "sh" # ш
154
+ "\u0449": "shch" # щ
155
+ "\u044e": "yu" # ю
156
+ "\u044f": "ya" # я
157
+ "\u044c": "" # Ь
@@ -0,0 +1,90 @@
1
+ ---
2
+ authority_id: royin
3
+ id: 1939-generic
4
+ language: tha
5
+ source_script: Thai
6
+ destination_script: Latn
7
+ name: Royal Thai General System of Transcription (1939) Generic
8
+ url: http://www.siamese-heritage.org/jsspdf/1941/JSS_033_1d_RoyalInstituteTranscriptionOfThaiIntoRomanCharacters.pdf
9
+ creation_date: 1939
10
+ adoption_date:
11
+ description: |
12
+ This map loads two external maps to convert Thai text first into phonemic Thai,
13
+ and then into IPA transcription.
14
+
15
+ The IPA transcription will then be handled by this map, and converted into
16
+ Royal Thai General System of Transcription (1939)
17
+
18
+ The first two parts are done via two external maps.
19
+
20
+
21
+ notes: |
22
+ This is a draft for the map.
23
+ The conversion from Thai to Phonemic Thai is still work-in-progress.
24
+
25
+ tests:
26
+ - source: "กษัตริย์"
27
+ expected: "kasat"
28
+ - source: "ประกาศ"
29
+ expected: "prakat"
30
+ # - source: "ราชบุรี่"
31
+ # expected: "ratburi"
32
+ # - source: "ปากลัด"
33
+ # expected: "pak-lat"
34
+
35
+
36
+ chain: ["var-tha-Thai-Thai-phonemic" ,"var-tha-Thai-Zsym-ipa"]
37
+
38
+ map:
39
+ title-case: false
40
+ word_separator: " "
41
+
42
+ rules:
43
+ - pattern: '[˩˨˧˦˥]'
44
+ result : ''
45
+
46
+ postrules:
47
+ - pattern: '\.'
48
+ result: ''
49
+
50
+ characters:
51
+
52
+ dictionary:
53
+
54
+ '̯': ''
55
+ '̚': ''
56
+
57
+ 'ʔ': ''
58
+ 'ː': ''
59
+
60
+ 't͡ɕʰ': 'ch'
61
+ 't͡ɕ': 'čh'
62
+ 'ŋ': 'ng'
63
+ 'j': 'y'
64
+ 'ɔ': 'o̦'
65
+ 'ɤ': 'œ'
66
+ 'ɛ': 'æ'
67
+ 'ɯ': 'ư'
68
+ 'ʰ': 'h'
69
+
70
+ 'aːw': 'ao'
71
+ 'aw': 'ao'
72
+ 'a̯w': 'ao'
73
+ 'eːw': 'eo'
74
+ 'ew': 'eo'
75
+ 'ɛːw': 'aeo'
76
+ 'ɛw': 'æo'
77
+ 'iːw': 'iu'
78
+ 'iw': 'iu'
79
+
80
+ 'aːj': 'ai'
81
+ 'aj': 'ai'
82
+ 'a̯j': 'ai'
83
+ 'ɔːj': 'o̦i'
84
+ 'ɔj': 'o̦i'
85
+ 'oːj': 'oi'
86
+ 'oj': 'oi'
87
+ 'ɤːj': 'œi'
88
+ 'ɤj': 'œi'
89
+ 'uːj': 'ui'
90
+ 'uj': 'ui'
@@ -0,0 +1,179 @@
1
+ ---
2
+ authority_id: royin
3
+ id: 1968-chained
4
+ language: tha
5
+ source_script: Thai
6
+ destination_script: Latn
7
+ name: Royal Thai General System of Transcription (1968)
8
+ url: http://www.royin.go.th/wp-content/uploads/royin-ebook/276/FileUpload/758_6484.pdf
9
+ creation_date: 1968
10
+ adoption_date:
11
+ description: |
12
+ This map loads two external maps to convert Thai text first into phonemic Thai,
13
+ and then into IPA transcription.
14
+
15
+ The IPA transcription will then be handled by this map, and converted into
16
+ Royal Thai General System of Transcription (1968).
17
+
18
+ The first two parts are done via two external maps.
19
+
20
+ notes: |
21
+ The conversion from Thai to Phonemic Thai is still work-in-progress.
22
+
23
+ tests:
24
+ - source: "สะพาน"
25
+ expected: "saphan"
26
+ - source: "ลานตา"
27
+ expected: "lanta"
28
+ - source: "บาง"
29
+ expected: "bang"
30
+ - source: "สมุทร"
31
+ expected: "samut"
32
+ - source: "ลำ"
33
+ expected: "lam"
34
+ - source: "สิงห์"
35
+ expected: "sing"
36
+ - source: "บุรี"
37
+ expected: "buri"
38
+ - source: "สตึก"
39
+ expected: "satuk"
40
+ - source: "พืช"
41
+ expected: "phut"
42
+ - source: "บรบือ"
43
+ expected: "borabu"
44
+ - source: "ภู"
45
+ expected: "phu"
46
+ - source: "ปะนาเระ"
47
+ expected: "panare"
48
+ - source: "เพ็ญ"
49
+ expected: "phen"
50
+ # - source: "เขน"
51
+ # expected: "khen"
52
+ - source: "แซะ"
53
+ expected: "sae"
54
+ # - source: "สะแก"
55
+ # expected: "sakae"
56
+ - source: "พะโต๊ะ"
57
+ expected: "phato"
58
+ - source: "ลพ"
59
+ expected: "lop"
60
+ # - source: "สามโก้"
61
+ # expected: "samko"
62
+ - source: "เกาะ"
63
+ expected: "ko"
64
+ - source: "บ่อ"
65
+ expected: "bo"
66
+ - source: "เซอะ"
67
+ expected: "soe"
68
+ - source: "อำเภอ"
69
+ expected: "amphoe"
70
+ - source: "เนิน"
71
+ expected: "noen"
72
+ # - source: "เพียะ"
73
+ # expected: "phia"
74
+ - source: "เทียน"
75
+ expected: "thian"
76
+ # - source: "เกือะ"
77
+ # expected: "kua"
78
+ - source: "เมือง"
79
+ expected: "muang"
80
+ # - source: "ผัวะ"
81
+ # expected: "phua"
82
+ - source: "บัว"
83
+ expected: "bua"
84
+ # - source: "ควน"
85
+ # expected: "khuan"
86
+ - source: "ใหญ่"
87
+ expected: "yai"
88
+ # - source: "ไผ่"
89
+ # expected: "phai"
90
+ - source: "ชัย"
91
+ expected: "chai"
92
+ - source: "ไทย"
93
+ expected: "thai"
94
+ # - source: "ปาย"
95
+ # expected: "pai"
96
+ - source: "เจ้า"
97
+ expected: "chao"
98
+ - source: "ข้าว"
99
+ expected: "khao"
100
+ # - source: "กุย"
101
+ # expected: "kui"
102
+ - source: "โดย"
103
+ expected: "doi"
104
+ # - source: "ดอย"
105
+ # expected: "doi"
106
+ # - source: "งิ้ว"
107
+ # expected: "ngiu"
108
+ - source: "เร็ว"
109
+ expected: "reo"
110
+ # - source: "เลว"
111
+ # expected: "leo"
112
+ # - source: "เลย"
113
+ # expected: "loei"
114
+ # - source: "เดือย"
115
+ # expected: "duai"
116
+ # - source: "ห้วย"
117
+ # expected: "huai"
118
+ - source: "แมว"
119
+ expected: "maeo"
120
+ - source: "เขียว"
121
+ expected: "khieu"
122
+
123
+ chain: ["var-tha-Thai-Thai-phonemic" ,"var-tha-Thai-Zsym-ipa"]
124
+
125
+ map:
126
+ title-case: false
127
+ word_separator: " "
128
+
129
+ rules:
130
+ - pattern: '[˩˨˧˦˥]'
131
+ result : ''
132
+ - pattern: '^'
133
+ result: '.'
134
+
135
+ postrules:
136
+ - pattern: '\.'
137
+ result: ''
138
+
139
+ characters:
140
+
141
+ dictionary:
142
+
143
+ '̯': ''
144
+ '̚': ''
145
+
146
+ 'ʔ': ''
147
+ 'ː': ''
148
+
149
+ 't͡ɕʰ': 'ch'
150
+ 't͡ɕ': 'ch'
151
+ 'ŋ': 'ng'
152
+ 'j': 'y'
153
+ 'ɔ': 'o'
154
+ 'ɤ': 'oe'
155
+ 'ɛ': 'ae'
156
+ 'ɯ': 'u'
157
+ 'ʰ': 'h'
158
+
159
+ 'aːw': 'ao'
160
+ 'aw': 'ao'
161
+ 'a̯w': 'eu'
162
+ 'eːw': 'eo'
163
+ 'ew': 'eo'
164
+ 'ɛːw': 'aeo'
165
+ 'ɛw': 'aeo'
166
+ 'iːw': 'iu'
167
+ 'iw': 'iu'
168
+
169
+ 'aːj': 'ai'
170
+ 'aj': 'ai'
171
+ 'a̯j': 'ai'
172
+ 'ɔːj': 'oi'
173
+ 'ɔj': 'oi'
174
+ 'oːj': 'oi'
175
+ 'oj': 'oi'
176
+ 'ɤːj': 'oei'
177
+ 'ɤj': 'oei'
178
+ 'uːj': 'ui'
179
+ 'uj': 'ui'