interscript 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: odni
|
|
3
|
+
id: 2015
|
|
4
|
+
language: kat
|
|
5
|
+
source_script: Geor
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Office of the Director Of National Intelligence Georgian Personal Names 2015
|
|
8
|
+
# url:
|
|
9
|
+
source: ICS 630-01, Annex E
|
|
10
|
+
creation_date: 2015
|
|
11
|
+
confirmation_date: 2015
|
|
12
|
+
description: |
|
|
13
|
+
This system is the Intelligence Community standard for the transliteration of Georgian names that
|
|
14
|
+
will be applied to all final written reports and products for IC consumers. It is not intended to
|
|
15
|
+
eliminate variations of a name that can contribute forensic information. Rather, it is to provide an
|
|
16
|
+
IC standard Romanized (English) transliteration from Georgian that can then be linked to forensic
|
|
17
|
+
information in ways that will help identify the referent of the name.
|
|
18
|
+
|
|
19
|
+
notes:
|
|
20
|
+
|
|
21
|
+
tests:
|
|
22
|
+
|
|
23
|
+
- source: ბაყაყი
|
|
24
|
+
expected: baqaqi
|
|
25
|
+
|
|
26
|
+
- source: ძროხა
|
|
27
|
+
expected: dzrokha
|
|
28
|
+
|
|
29
|
+
- source: ჰაერი
|
|
30
|
+
expected: haeri
|
|
31
|
+
|
|
32
|
+
- source: ჟოლო
|
|
33
|
+
expected: zholo
|
|
34
|
+
|
|
35
|
+
- source: ჯართი
|
|
36
|
+
expected: jarti
|
|
37
|
+
|
|
38
|
+
- source: ღრმაღელე
|
|
39
|
+
expected: ghrmaghele
|
|
40
|
+
|
|
41
|
+
- source: ზვიად გამსახურდია
|
|
42
|
+
expected: zviad gamsakhurdia
|
|
43
|
+
|
|
44
|
+
- source: ედუარდ შევარდნაძე
|
|
45
|
+
expected: eduard shevardnadze
|
|
46
|
+
|
|
47
|
+
- source: მიხეილ სააკაშვილი
|
|
48
|
+
expected: mikheil saakashvili
|
|
49
|
+
|
|
50
|
+
- source: გიორგი მარგველაშვილი
|
|
51
|
+
expected: giorgi margvelashvili
|
|
52
|
+
|
|
53
|
+
map:
|
|
54
|
+
characters:
|
|
55
|
+
'\u10d0' : 'a' # ა
|
|
56
|
+
'\u10d1' : 'b' # ბ
|
|
57
|
+
'\u10d2' : 'g' # გ
|
|
58
|
+
'\u10d3' : 'd' # დ
|
|
59
|
+
'\u10d4' : 'e' # ე
|
|
60
|
+
'\u10d5' : 'v' # ვ
|
|
61
|
+
'\u10d6' : 'z' # ზ
|
|
62
|
+
'\u10d7' : 't' # თ
|
|
63
|
+
'\u10d8' : 'i' # ი
|
|
64
|
+
'\u10d9' : 'k' # კ
|
|
65
|
+
'\u10da' : 'l' # ლ
|
|
66
|
+
'\u10db' : 'm' # მ
|
|
67
|
+
'\u10dc' : 'n' # ნ
|
|
68
|
+
'\u10dd' : 'o' # ო
|
|
69
|
+
'\u10de' : 'p' # პ
|
|
70
|
+
'\u10df' : 'zh' # ჟ
|
|
71
|
+
'\u10e0' : 'r' # რ
|
|
72
|
+
'\u10e1' : 's' # ს
|
|
73
|
+
'\u10e2' : 't' # ტ
|
|
74
|
+
'\u10e3' : 'u' # უ
|
|
75
|
+
'\u10e4' : 'p' # ფ
|
|
76
|
+
'\u10e5' : 'k' # ქ
|
|
77
|
+
'\u10e6' : 'gh' # ღ
|
|
78
|
+
'\u10e7' : 'q' # ყ
|
|
79
|
+
'\u10e8' : 'sh' # შ
|
|
80
|
+
'\u10e9' : 'ch' # ჩ
|
|
81
|
+
'\u10ea' : 'ts' # ც
|
|
82
|
+
'\u10eb' : 'dz' # ძ
|
|
83
|
+
'\u10ec' : 'ts' # წ
|
|
84
|
+
'\u10ed' : 'ch' # ჭ
|
|
85
|
+
'\u10ee' : 'kh' # ხ
|
|
86
|
+
'\u10ef' : 'j' # ჯ
|
|
87
|
+
'\u10f0' : 'h' # ჰ
|
|
88
|
+
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: odni
|
|
3
|
+
id: 2015
|
|
4
|
+
language: ukr
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Office of the Director Of National Intelligence Ukrainian Personal Names 2004 System
|
|
8
|
+
# url:
|
|
9
|
+
source: ICS 630-01, Annex M
|
|
10
|
+
creation_date: 2015
|
|
11
|
+
confirmation_date: 2015
|
|
12
|
+
description: |
|
|
13
|
+
This system is the Intelligence Community (IC) standard for the
|
|
14
|
+
transliteration of Ukrainian names that will be applied to all final
|
|
15
|
+
written reports and products for IC consumers. It is not intended to
|
|
16
|
+
eliminate variations of a name that can contribute forensic
|
|
17
|
+
information. Rather, it is to provide an IC standard Romanized
|
|
18
|
+
(English) transliteration from Ukrainian that can then be linked to
|
|
19
|
+
forensic information in ways that will help identify the referent of
|
|
20
|
+
the name.
|
|
21
|
+
|
|
22
|
+
tests:
|
|
23
|
+
- source: Андрій
|
|
24
|
+
expected: Andriy
|
|
25
|
+
- source: Борисенко
|
|
26
|
+
expected: Borysenko
|
|
27
|
+
- source: Володимир
|
|
28
|
+
expected: Volodymyr
|
|
29
|
+
- source: Богдан
|
|
30
|
+
expected: Bohdan
|
|
31
|
+
- source: Згурський
|
|
32
|
+
expected: Zhurskyy
|
|
33
|
+
- source: Дмитро
|
|
34
|
+
expected: Dmytro
|
|
35
|
+
- source: Олег
|
|
36
|
+
expected: Oleh
|
|
37
|
+
- source: Гаєвич
|
|
38
|
+
expected: Hayevych
|
|
39
|
+
- source: Жанна
|
|
40
|
+
expected: Zhanna
|
|
41
|
+
- source: Казимирчук
|
|
42
|
+
expected: Kazymyrchuk
|
|
43
|
+
- source: Михайленко
|
|
44
|
+
expected: Mykhaylenko
|
|
45
|
+
- source: Іващенко
|
|
46
|
+
expected: Ivashchenko
|
|
47
|
+
- source: Олексій
|
|
48
|
+
expected: Oleksiy
|
|
49
|
+
- source: Коваленко
|
|
50
|
+
expected: Kovalenko
|
|
51
|
+
- source: Леонід
|
|
52
|
+
expected: Leonid
|
|
53
|
+
- source: Маринич
|
|
54
|
+
expected: Marynych
|
|
55
|
+
- source: Наталія
|
|
56
|
+
expected: Nataliya
|
|
57
|
+
- source: Онищенко
|
|
58
|
+
expected: Onyshchenko
|
|
59
|
+
- source: Петро
|
|
60
|
+
expected: Petro
|
|
61
|
+
- source: Рибчинський
|
|
62
|
+
expected: Rybchynskyy
|
|
63
|
+
- source: Соломія
|
|
64
|
+
expected: Solomiya
|
|
65
|
+
- source: Троць
|
|
66
|
+
expected: Trots
|
|
67
|
+
- source: Уляна
|
|
68
|
+
expected: Ulyana
|
|
69
|
+
- source: Філіпчук
|
|
70
|
+
expected: Filipchuk
|
|
71
|
+
- source: Христина
|
|
72
|
+
expected: Khrystyna
|
|
73
|
+
- source: Стеценко
|
|
74
|
+
expected: Stetsenko
|
|
75
|
+
- source: Шевченко
|
|
76
|
+
expected: Shevchenko
|
|
77
|
+
- source: Гаращенко
|
|
78
|
+
expected: Harashchenko
|
|
79
|
+
- source: Юрій
|
|
80
|
+
expected: Yuriy
|
|
81
|
+
- source: Ярошенко
|
|
82
|
+
expected: Yaroshenko
|
|
83
|
+
- source: Костянтин
|
|
84
|
+
expected: Kostyantyn
|
|
85
|
+
|
|
86
|
+
map:
|
|
87
|
+
rules:
|
|
88
|
+
- pattern: \b\u2019\b # remove ’
|
|
89
|
+
result: ""
|
|
90
|
+
|
|
91
|
+
characters:
|
|
92
|
+
"\u0410": "A" # А
|
|
93
|
+
"\u0411": "B" # Б
|
|
94
|
+
"\u0412": "V" # В
|
|
95
|
+
"\u0413": "H" # Г
|
|
96
|
+
"\u0490": "G" # Ґ
|
|
97
|
+
"\u0414": "D" # Д
|
|
98
|
+
"\u0415": "E" # Е
|
|
99
|
+
"\u0404": "Ye" # Є
|
|
100
|
+
"\u0416": "Zh" # Ж
|
|
101
|
+
"\u0417": "Z" # З
|
|
102
|
+
"\u0418": "Y" # И
|
|
103
|
+
"\u0406": "I" # І
|
|
104
|
+
"\u0407": "Yi" # Ї
|
|
105
|
+
"\u0419": "Y" # Й
|
|
106
|
+
"\u041a": "K" # К
|
|
107
|
+
"\u041b": "L" # Л
|
|
108
|
+
"\u041c": "M" # М
|
|
109
|
+
"\u041d": "N" # Н
|
|
110
|
+
"\u041e": "O" # О
|
|
111
|
+
"\u041f": "P" # П
|
|
112
|
+
"\u0420": "R" # Р
|
|
113
|
+
"\u0421": "S" # С
|
|
114
|
+
"\u0422": "T" # Т
|
|
115
|
+
"\u0423": "U" # У
|
|
116
|
+
"\u0424": "F" # Ф
|
|
117
|
+
"\u0425": "Kh" # Х
|
|
118
|
+
"\u0426": "Ts" # Ц
|
|
119
|
+
"\u0427": "Ch" # Ч
|
|
120
|
+
"\u0428": "Sh" # Ш
|
|
121
|
+
"\u0429": "Shch" # Щ
|
|
122
|
+
"\u042e": "Yu" # Ю
|
|
123
|
+
"\u042f": "Ya" # Я
|
|
124
|
+
"\u042c": "" # Ь
|
|
125
|
+
"\u0430": "a" # а
|
|
126
|
+
"\u0431": "b" # б
|
|
127
|
+
"\u0432": "v" # в
|
|
128
|
+
"\u0433": "h" # г
|
|
129
|
+
"\u0491": "g" # ґ
|
|
130
|
+
"\u0434": "d" # д
|
|
131
|
+
"\u0435": "e" # е
|
|
132
|
+
"\u0454": "ye" # є
|
|
133
|
+
"\u0436": "zh" # ж
|
|
134
|
+
"\u0437": "z" # з
|
|
135
|
+
"\u0438": "y" # и
|
|
136
|
+
"\u0456": "i" # і
|
|
137
|
+
"\u0457": "yi" # ї
|
|
138
|
+
"\u0439": "y" # й
|
|
139
|
+
"\u043a": "k" # к
|
|
140
|
+
"\u043b": "l" # л
|
|
141
|
+
"\u043c": "m" # м
|
|
142
|
+
"\u043d": "n" # н
|
|
143
|
+
"\u043e": "o" # о
|
|
144
|
+
"\u043f": "p" # п
|
|
145
|
+
"\u0440": "r" # р
|
|
146
|
+
"\u0441": "s" # с
|
|
147
|
+
"\u0442": "t" # т
|
|
148
|
+
"\u0443": "u" # у
|
|
149
|
+
"\u0444": "f" # ф
|
|
150
|
+
"\u0445": "kh" # х
|
|
151
|
+
"\u0446": "ts" # ц
|
|
152
|
+
"\u0447": "ch" # ч
|
|
153
|
+
"\u0448": "sh" # ш
|
|
154
|
+
"\u0449": "shch" # щ
|
|
155
|
+
"\u044e": "yu" # ю
|
|
156
|
+
"\u044f": "ya" # я
|
|
157
|
+
"\u044c": "" # Ь
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: royin
|
|
3
|
+
id: 1939-generic
|
|
4
|
+
language: tha
|
|
5
|
+
source_script: Thai
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Royal Thai General System of Transcription (1939) Generic
|
|
8
|
+
url: http://www.siamese-heritage.org/jsspdf/1941/JSS_033_1d_RoyalInstituteTranscriptionOfThaiIntoRomanCharacters.pdf
|
|
9
|
+
creation_date: 1939
|
|
10
|
+
adoption_date:
|
|
11
|
+
description: |
|
|
12
|
+
This map loads two external maps to convert Thai text first into phonemic Thai,
|
|
13
|
+
and then into IPA transcription.
|
|
14
|
+
|
|
15
|
+
The IPA transcription will then be handled by this map, and converted into
|
|
16
|
+
Royal Thai General System of Transcription (1939)
|
|
17
|
+
|
|
18
|
+
The first two parts are done via two external maps.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
notes: |
|
|
22
|
+
This is a draft for the map.
|
|
23
|
+
The conversion from Thai to Phonemic Thai is still work-in-progress.
|
|
24
|
+
|
|
25
|
+
tests:
|
|
26
|
+
- source: "กษัตริย์"
|
|
27
|
+
expected: "kasat"
|
|
28
|
+
- source: "ประกาศ"
|
|
29
|
+
expected: "prakat"
|
|
30
|
+
# - source: "ราชบุรี่"
|
|
31
|
+
# expected: "ratburi"
|
|
32
|
+
# - source: "ปากลัด"
|
|
33
|
+
# expected: "pak-lat"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
chain: ["var-tha-Thai-Thai-phonemic" ,"var-tha-Thai-Zsym-ipa"]
|
|
37
|
+
|
|
38
|
+
map:
|
|
39
|
+
title-case: false
|
|
40
|
+
word_separator: " "
|
|
41
|
+
|
|
42
|
+
rules:
|
|
43
|
+
- pattern: '[˩˨˧˦˥]'
|
|
44
|
+
result : ''
|
|
45
|
+
|
|
46
|
+
postrules:
|
|
47
|
+
- pattern: '\.'
|
|
48
|
+
result: ''
|
|
49
|
+
|
|
50
|
+
characters:
|
|
51
|
+
|
|
52
|
+
dictionary:
|
|
53
|
+
|
|
54
|
+
'̯': ''
|
|
55
|
+
'̚': ''
|
|
56
|
+
|
|
57
|
+
'ʔ': ''
|
|
58
|
+
'ː': ''
|
|
59
|
+
|
|
60
|
+
't͡ɕʰ': 'ch'
|
|
61
|
+
't͡ɕ': 'čh'
|
|
62
|
+
'ŋ': 'ng'
|
|
63
|
+
'j': 'y'
|
|
64
|
+
'ɔ': 'o̦'
|
|
65
|
+
'ɤ': 'œ'
|
|
66
|
+
'ɛ': 'æ'
|
|
67
|
+
'ɯ': 'ư'
|
|
68
|
+
'ʰ': 'h'
|
|
69
|
+
|
|
70
|
+
'aːw': 'ao'
|
|
71
|
+
'aw': 'ao'
|
|
72
|
+
'a̯w': 'ao'
|
|
73
|
+
'eːw': 'eo'
|
|
74
|
+
'ew': 'eo'
|
|
75
|
+
'ɛːw': 'aeo'
|
|
76
|
+
'ɛw': 'æo'
|
|
77
|
+
'iːw': 'iu'
|
|
78
|
+
'iw': 'iu'
|
|
79
|
+
|
|
80
|
+
'aːj': 'ai'
|
|
81
|
+
'aj': 'ai'
|
|
82
|
+
'a̯j': 'ai'
|
|
83
|
+
'ɔːj': 'o̦i'
|
|
84
|
+
'ɔj': 'o̦i'
|
|
85
|
+
'oːj': 'oi'
|
|
86
|
+
'oj': 'oi'
|
|
87
|
+
'ɤːj': 'œi'
|
|
88
|
+
'ɤj': 'œi'
|
|
89
|
+
'uːj': 'ui'
|
|
90
|
+
'uj': 'ui'
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: royin
|
|
3
|
+
id: 1968-chained
|
|
4
|
+
language: tha
|
|
5
|
+
source_script: Thai
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Royal Thai General System of Transcription (1968)
|
|
8
|
+
url: http://www.royin.go.th/wp-content/uploads/royin-ebook/276/FileUpload/758_6484.pdf
|
|
9
|
+
creation_date: 1968
|
|
10
|
+
adoption_date:
|
|
11
|
+
description: |
|
|
12
|
+
This map loads two external maps to convert Thai text first into phonemic Thai,
|
|
13
|
+
and then into IPA transcription.
|
|
14
|
+
|
|
15
|
+
The IPA transcription will then be handled by this map, and converted into
|
|
16
|
+
Royal Thai General System of Transcription (1968).
|
|
17
|
+
|
|
18
|
+
The first two parts are done via two external maps.
|
|
19
|
+
|
|
20
|
+
notes: |
|
|
21
|
+
The conversion from Thai to Phonemic Thai is still work-in-progress.
|
|
22
|
+
|
|
23
|
+
tests:
|
|
24
|
+
- source: "สะพาน"
|
|
25
|
+
expected: "saphan"
|
|
26
|
+
- source: "ลานตา"
|
|
27
|
+
expected: "lanta"
|
|
28
|
+
- source: "บาง"
|
|
29
|
+
expected: "bang"
|
|
30
|
+
- source: "สมุทร"
|
|
31
|
+
expected: "samut"
|
|
32
|
+
- source: "ลำ"
|
|
33
|
+
expected: "lam"
|
|
34
|
+
- source: "สิงห์"
|
|
35
|
+
expected: "sing"
|
|
36
|
+
- source: "บุรี"
|
|
37
|
+
expected: "buri"
|
|
38
|
+
- source: "สตึก"
|
|
39
|
+
expected: "satuk"
|
|
40
|
+
- source: "พืช"
|
|
41
|
+
expected: "phut"
|
|
42
|
+
- source: "บรบือ"
|
|
43
|
+
expected: "borabu"
|
|
44
|
+
- source: "ภู"
|
|
45
|
+
expected: "phu"
|
|
46
|
+
- source: "ปะนาเระ"
|
|
47
|
+
expected: "panare"
|
|
48
|
+
- source: "เพ็ญ"
|
|
49
|
+
expected: "phen"
|
|
50
|
+
# - source: "เขน"
|
|
51
|
+
# expected: "khen"
|
|
52
|
+
- source: "แซะ"
|
|
53
|
+
expected: "sae"
|
|
54
|
+
# - source: "สะแก"
|
|
55
|
+
# expected: "sakae"
|
|
56
|
+
- source: "พะโต๊ะ"
|
|
57
|
+
expected: "phato"
|
|
58
|
+
- source: "ลพ"
|
|
59
|
+
expected: "lop"
|
|
60
|
+
# - source: "สามโก้"
|
|
61
|
+
# expected: "samko"
|
|
62
|
+
- source: "เกาะ"
|
|
63
|
+
expected: "ko"
|
|
64
|
+
- source: "บ่อ"
|
|
65
|
+
expected: "bo"
|
|
66
|
+
- source: "เซอะ"
|
|
67
|
+
expected: "soe"
|
|
68
|
+
- source: "อำเภอ"
|
|
69
|
+
expected: "amphoe"
|
|
70
|
+
- source: "เนิน"
|
|
71
|
+
expected: "noen"
|
|
72
|
+
# - source: "เพียะ"
|
|
73
|
+
# expected: "phia"
|
|
74
|
+
- source: "เทียน"
|
|
75
|
+
expected: "thian"
|
|
76
|
+
# - source: "เกือะ"
|
|
77
|
+
# expected: "kua"
|
|
78
|
+
- source: "เมือง"
|
|
79
|
+
expected: "muang"
|
|
80
|
+
# - source: "ผัวะ"
|
|
81
|
+
# expected: "phua"
|
|
82
|
+
- source: "บัว"
|
|
83
|
+
expected: "bua"
|
|
84
|
+
# - source: "ควน"
|
|
85
|
+
# expected: "khuan"
|
|
86
|
+
- source: "ใหญ่"
|
|
87
|
+
expected: "yai"
|
|
88
|
+
# - source: "ไผ่"
|
|
89
|
+
# expected: "phai"
|
|
90
|
+
- source: "ชัย"
|
|
91
|
+
expected: "chai"
|
|
92
|
+
- source: "ไทย"
|
|
93
|
+
expected: "thai"
|
|
94
|
+
# - source: "ปาย"
|
|
95
|
+
# expected: "pai"
|
|
96
|
+
- source: "เจ้า"
|
|
97
|
+
expected: "chao"
|
|
98
|
+
- source: "ข้าว"
|
|
99
|
+
expected: "khao"
|
|
100
|
+
# - source: "กุย"
|
|
101
|
+
# expected: "kui"
|
|
102
|
+
- source: "โดย"
|
|
103
|
+
expected: "doi"
|
|
104
|
+
# - source: "ดอย"
|
|
105
|
+
# expected: "doi"
|
|
106
|
+
# - source: "งิ้ว"
|
|
107
|
+
# expected: "ngiu"
|
|
108
|
+
- source: "เร็ว"
|
|
109
|
+
expected: "reo"
|
|
110
|
+
# - source: "เลว"
|
|
111
|
+
# expected: "leo"
|
|
112
|
+
# - source: "เลย"
|
|
113
|
+
# expected: "loei"
|
|
114
|
+
# - source: "เดือย"
|
|
115
|
+
# expected: "duai"
|
|
116
|
+
# - source: "ห้วย"
|
|
117
|
+
# expected: "huai"
|
|
118
|
+
- source: "แมว"
|
|
119
|
+
expected: "maeo"
|
|
120
|
+
- source: "เขียว"
|
|
121
|
+
expected: "khieu"
|
|
122
|
+
|
|
123
|
+
chain: ["var-tha-Thai-Thai-phonemic" ,"var-tha-Thai-Zsym-ipa"]
|
|
124
|
+
|
|
125
|
+
map:
|
|
126
|
+
title-case: false
|
|
127
|
+
word_separator: " "
|
|
128
|
+
|
|
129
|
+
rules:
|
|
130
|
+
- pattern: '[˩˨˧˦˥]'
|
|
131
|
+
result : ''
|
|
132
|
+
- pattern: '^'
|
|
133
|
+
result: '.'
|
|
134
|
+
|
|
135
|
+
postrules:
|
|
136
|
+
- pattern: '\.'
|
|
137
|
+
result: ''
|
|
138
|
+
|
|
139
|
+
characters:
|
|
140
|
+
|
|
141
|
+
dictionary:
|
|
142
|
+
|
|
143
|
+
'̯': ''
|
|
144
|
+
'̚': ''
|
|
145
|
+
|
|
146
|
+
'ʔ': ''
|
|
147
|
+
'ː': ''
|
|
148
|
+
|
|
149
|
+
't͡ɕʰ': 'ch'
|
|
150
|
+
't͡ɕ': 'ch'
|
|
151
|
+
'ŋ': 'ng'
|
|
152
|
+
'j': 'y'
|
|
153
|
+
'ɔ': 'o'
|
|
154
|
+
'ɤ': 'oe'
|
|
155
|
+
'ɛ': 'ae'
|
|
156
|
+
'ɯ': 'u'
|
|
157
|
+
'ʰ': 'h'
|
|
158
|
+
|
|
159
|
+
'aːw': 'ao'
|
|
160
|
+
'aw': 'ao'
|
|
161
|
+
'a̯w': 'eu'
|
|
162
|
+
'eːw': 'eo'
|
|
163
|
+
'ew': 'eo'
|
|
164
|
+
'ɛːw': 'aeo'
|
|
165
|
+
'ɛw': 'aeo'
|
|
166
|
+
'iːw': 'iu'
|
|
167
|
+
'iw': 'iu'
|
|
168
|
+
|
|
169
|
+
'aːj': 'ai'
|
|
170
|
+
'aj': 'ai'
|
|
171
|
+
'a̯j': 'ai'
|
|
172
|
+
'ɔːj': 'oi'
|
|
173
|
+
'ɔj': 'oi'
|
|
174
|
+
'oːj': 'oi'
|
|
175
|
+
'oj': 'oi'
|
|
176
|
+
'ɤːj': 'oei'
|
|
177
|
+
'ɤj': 'oei'
|
|
178
|
+
'uːj': 'ui'
|
|
179
|
+
'uj': 'ui'
|