interscript 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
|
@@ -72,9 +72,22 @@ map:
|
|
|
72
72
|
"ᠱ": "x"
|
|
73
73
|
"ᠶ": "y"
|
|
74
74
|
"ᠽ": "z"
|
|
75
|
-
"ᠣ":
|
|
76
|
-
- "o" # General use
|
|
77
|
-
- "ô" # For place names
|
|
78
|
-
"ᠤ":
|
|
79
|
-
- "u" # General use
|
|
80
|
-
- "û" # For place names
|
|
75
|
+
"ᠣ": "o"
|
|
76
|
+
# - "o" # General use
|
|
77
|
+
# - "ô" # For place names
|
|
78
|
+
"ᠤ": "u"
|
|
79
|
+
# - "u" # General use
|
|
80
|
+
# - "û" # For place names
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# @TODO Exceptional
|
|
84
|
+
#
|
|
85
|
+
# This is failing the whole test suite, but as far as I understood
|
|
86
|
+
# from the comment, this city name is exceptional, so we are temporarliy
|
|
87
|
+
# adding it as execeptional rules for now.
|
|
88
|
+
#
|
|
89
|
+
# But, we will need some natives attension to help us out here, and maybe
|
|
90
|
+
# come up with some basic rules for this exceptional pattern.
|
|
91
|
+
#
|
|
92
|
+
|
|
93
|
+
"ᠬᠥᠬᠡᠬᠣᠲᠠ": "kökeqota"
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: ungegn
|
|
3
|
+
id: 1987
|
|
4
|
+
language: rus
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Russian Romanization system
|
|
8
|
+
url: http://www.eki.ee/wgrs/rom1_ru.htm
|
|
9
|
+
creation_date: 1987
|
|
10
|
+
confirmation_date: 2016
|
|
11
|
+
description: |
|
|
12
|
+
The United Nations recommended system was approved in 1987 (V/18),
|
|
13
|
+
based on the official system of the Main Administration of Geodesy and
|
|
14
|
+
Cartography of the former Soviet Union, also known as the GOST 1983
|
|
15
|
+
system (GOST 16876-71). The table was published as an annex to the
|
|
16
|
+
resolution.
|
|
17
|
+
|
|
18
|
+
The system is used in the Russian Federation and increasingly in
|
|
19
|
+
international cartographic products.
|
|
20
|
+
|
|
21
|
+
Russian uses the Cyrillic script which is alphabetic. The
|
|
22
|
+
romanization table is unambiguous and can be applied automatically. The
|
|
23
|
+
system is reversible, although rarely there can be ambiguities.
|
|
24
|
+
|
|
25
|
+
notes:
|
|
26
|
+
- "Cursive forms of some characters might be formed differently: Аа Бб
|
|
27
|
+
Вв Гг Дд Ее Ёё Жж Зз Ии Йй Кк Лл Мм Нн Оо Пп Рр Сс Тт Уу Фф Хх Цц Чч Шш
|
|
28
|
+
Щщ Ъъ Ыы Ьь Ээ Юю Яя."
|
|
29
|
+
|
|
30
|
+
- Fifth United Nations Conference on the Standardization of
|
|
31
|
+
Geographical Names. Montreal, 18–31 August 1987. Vol. I. Report of the
|
|
32
|
+
Conference, pp. 40–41.
|
|
33
|
+
|
|
34
|
+
tests:
|
|
35
|
+
- source: Aнaпa
|
|
36
|
+
expected: Anapa
|
|
37
|
+
|
|
38
|
+
- source: Бaбушкин
|
|
39
|
+
expected: Babuškin
|
|
40
|
+
|
|
41
|
+
- source: Вaвилово
|
|
42
|
+
expected: Vavilovo
|
|
43
|
+
|
|
44
|
+
- source: Гaгaрин
|
|
45
|
+
expected: Gagarin
|
|
46
|
+
|
|
47
|
+
- source: Дудинкa
|
|
48
|
+
expected: Dudinka
|
|
49
|
+
|
|
50
|
+
- source: Елисeeвкa
|
|
51
|
+
expected: Eliseevka
|
|
52
|
+
|
|
53
|
+
- source: Ёлкино
|
|
54
|
+
expected: "\u00CBlkino"
|
|
55
|
+
|
|
56
|
+
- source: Псëл
|
|
57
|
+
expected: Psël
|
|
58
|
+
|
|
59
|
+
- source: Жужa
|
|
60
|
+
expected: Žuža
|
|
61
|
+
|
|
62
|
+
- source: Звëздный
|
|
63
|
+
expected: Zvëzdnyj
|
|
64
|
+
|
|
65
|
+
- source: Идрицa
|
|
66
|
+
expected: Idrica
|
|
67
|
+
|
|
68
|
+
- source: Зaрaйск
|
|
69
|
+
expected: Zarajsk
|
|
70
|
+
|
|
71
|
+
- source: Кокaнд
|
|
72
|
+
expected: Kokand
|
|
73
|
+
|
|
74
|
+
- source: Лaлвaр
|
|
75
|
+
expected: Lalvar
|
|
76
|
+
|
|
77
|
+
- source: Мaймaк
|
|
78
|
+
expected: Majmak
|
|
79
|
+
|
|
80
|
+
- source: Нeжин
|
|
81
|
+
expected: Nežin
|
|
82
|
+
|
|
83
|
+
- source: Обoдoвкa
|
|
84
|
+
expected: Obodovka
|
|
85
|
+
|
|
86
|
+
- source: Пaп
|
|
87
|
+
expected: Pap
|
|
88
|
+
|
|
89
|
+
- source: Рeбрихa
|
|
90
|
+
expected: Rebriha
|
|
91
|
+
|
|
92
|
+
- source: Сaсoвo
|
|
93
|
+
expected: Sasovo
|
|
94
|
+
|
|
95
|
+
- source: Тaттa
|
|
96
|
+
expected: Tatta
|
|
97
|
+
|
|
98
|
+
- source: Уржум
|
|
99
|
+
expected: Uržum
|
|
100
|
+
|
|
101
|
+
- source: Фoфaнoвo
|
|
102
|
+
expected: Fofanovo
|
|
103
|
+
|
|
104
|
+
- source: Хoхломa
|
|
105
|
+
expected: Hohloma
|
|
106
|
+
|
|
107
|
+
- source: Цвeткoвo
|
|
108
|
+
expected: Cvetkovo
|
|
109
|
+
|
|
110
|
+
- source: Чeчeльник
|
|
111
|
+
expected: Čečel’nik
|
|
112
|
+
|
|
113
|
+
- source: Шишкинo
|
|
114
|
+
expected: Šiškino
|
|
115
|
+
|
|
116
|
+
- source: Щукинo
|
|
117
|
+
expected: Ščukino
|
|
118
|
+
|
|
119
|
+
- source: Пoдъячeвo
|
|
120
|
+
expected: Pod”jačevo
|
|
121
|
+
|
|
122
|
+
- source: Ыныкчaнский
|
|
123
|
+
expected: Ynykčanskij
|
|
124
|
+
|
|
125
|
+
- source: Пaрaньгa
|
|
126
|
+
expected: Paran’ga
|
|
127
|
+
|
|
128
|
+
- source: Кaзaнь
|
|
129
|
+
expected: Kazan’
|
|
130
|
+
|
|
131
|
+
- source: Щучьe
|
|
132
|
+
expected: Ščuč’e
|
|
133
|
+
|
|
134
|
+
- source: Элистa
|
|
135
|
+
expected: Èlista
|
|
136
|
+
|
|
137
|
+
- source: Юринo
|
|
138
|
+
expected: Jurino
|
|
139
|
+
|
|
140
|
+
- source: Юхнoв
|
|
141
|
+
expected: Juhnov
|
|
142
|
+
|
|
143
|
+
- source: Юрюзaнь
|
|
144
|
+
expected: Jurjuzan’
|
|
145
|
+
|
|
146
|
+
- source: Ямaл
|
|
147
|
+
expected: Jamal
|
|
148
|
+
|
|
149
|
+
- source: Язъявaн
|
|
150
|
+
expected: Jaz”javan
|
|
151
|
+
|
|
152
|
+
- source: Яя
|
|
153
|
+
expected: Jaja
|
|
154
|
+
|
|
155
|
+
- source: Вязьмa
|
|
156
|
+
expected: Vjaz’ma
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
map:
|
|
160
|
+
inherit: gost-rus-cyrl-latn-16876-71-1983
|
|
161
|
+
|
|
162
|
+
characters:
|
|
163
|
+
'\u042A' : '”' # Ъ
|
|
164
|
+
'\u042C' : '’' # Ь
|
|
165
|
+
'\u044A' : '”' # ъ
|
|
166
|
+
'\u044C' : '’' # ь
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: un
|
|
3
|
+
id: 1998
|
|
4
|
+
language: ukr
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ROMANIZATION SYSTEM FOR BELARUSIAN, RUSSIAN AND UKRAINIAN CYRILLIC
|
|
8
|
+
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/7th-uncsgn-docs/econf/7th_UNCSGN_econf.91_3_Add1.pdf
|
|
9
|
+
creation_date: 1998
|
|
10
|
+
|
|
11
|
+
tests:
|
|
12
|
+
|
|
13
|
+
map:
|
|
14
|
+
inherit: gost-rus-cyrl-latn-16876-71-1983
|
|
15
|
+
|
|
16
|
+
characters:
|
|
17
|
+
'\u0490' : '?' # Ґ
|
|
18
|
+
'\u0491' : '?' # ґ
|
|
19
|
+
|
|
20
|
+
'\u0404' : "Je" # Є
|
|
21
|
+
'\u0454' : "je" # є
|
|
22
|
+
|
|
23
|
+
'\u0406' : 'I' # І
|
|
24
|
+
'\u0456' : 'i' # і
|
|
25
|
+
|
|
26
|
+
'\u0407' : 'I' # Ї
|
|
27
|
+
'\u0457' : 'i' # ї
|
|
28
|
+
|
|
29
|
+
'\u2019' : '?'
|
|
30
|
+
"'" : '?'
|
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: var
|
|
3
|
+
id: hepburn-1886
|
|
4
|
+
language: jpn
|
|
5
|
+
source_script: Hrkt
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Traditional Hepburn
|
|
8
|
+
url: http://www.ab.cyberhome.ne.jp/~kaizu/roomazi/doc/hep3.html
|
|
9
|
+
creation_date: 1886
|
|
10
|
+
adoption_date:
|
|
11
|
+
description:
|
|
12
|
+
This is a traditional version of Hepburn romanization.
|
|
13
|
+
|
|
14
|
+
notes:
|
|
15
|
+
"
|
|
16
|
+
The book was published before the Japanese orthographic reform,
|
|
17
|
+
and this map takes the reformed orthography in Kana as the source
|
|
18
|
+
form.
|
|
19
|
+
https://en.wikipedia.org/wiki/Historical_kana_orthography
|
|
20
|
+
|
|
21
|
+
The distinction for long-vowel vs. repeating vowels has not been
|
|
22
|
+
implemented.
|
|
23
|
+
For example, the consecutive o's in these words are considered
|
|
24
|
+
a case of long vowel, and is transliterated as ō:
|
|
25
|
+
|
|
26
|
+
氷 (こおり) - kōri, 大阪(おおさか)- Ōsaka
|
|
27
|
+
|
|
28
|
+
If there are two consecutive o's in a string, but they belong to
|
|
29
|
+
different morpheme, then they should be transliterated separately.
|
|
30
|
+
|
|
31
|
+
小躍り(こおどり)- koodori
|
|
32
|
+
|
|
33
|
+
The same goes for the combinations o+u, u+u as well.
|
|
34
|
+
|
|
35
|
+
However, this cannot be easily determined from the Kana.
|
|
36
|
+
Lexical knowledge is needed, and sometimes the Kanji representation
|
|
37
|
+
will give more hints about morpheme boundary.
|
|
38
|
+
|
|
39
|
+
For now, this map will assume that all o+o, o+u, u+u combinations to
|
|
40
|
+
be instances of long vowels.
|
|
41
|
+
"
|
|
42
|
+
|
|
43
|
+
tests:
|
|
44
|
+
- source: "ぐんま"
|
|
45
|
+
expected: "gumma"
|
|
46
|
+
- source: "しんよう"
|
|
47
|
+
expected: "shin-yō"
|
|
48
|
+
- source: "きんようび"
|
|
49
|
+
expected: "kin-yōbi"
|
|
50
|
+
- source: "とうきょう"
|
|
51
|
+
expected: "tōkyō"
|
|
52
|
+
- source: "しんばし"
|
|
53
|
+
expected: "shimbashi"
|
|
54
|
+
|
|
55
|
+
map:
|
|
56
|
+
|
|
57
|
+
rules:
|
|
58
|
+
# Add a dash (-) between ん and a vowel sound or ya, yu, yo
|
|
59
|
+
- pattern: "([んン])(?=[あいうえおやゆよアイウエオヤユヨ])"
|
|
60
|
+
result: "\\1-"
|
|
61
|
+
|
|
62
|
+
# Convert ん into m before b, m, p
|
|
63
|
+
- pattern: "[んン](?=[ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ])"
|
|
64
|
+
result: "m"
|
|
65
|
+
|
|
66
|
+
postrules:
|
|
67
|
+
# Handling of っ/ッ
|
|
68
|
+
#
|
|
69
|
+
# The kana っ/ッ is a geminate marker.
|
|
70
|
+
# When followed by a consonant, repeat the first letter of
|
|
71
|
+
# the following syllable. Exception: the combination -cch-
|
|
72
|
+
# should be transliterated as -tch-
|
|
73
|
+
#
|
|
74
|
+
# If っ/ッ is not followed by a consonant, then it is usually
|
|
75
|
+
# phonetically realised as an abrupt stop or shorterning of
|
|
76
|
+
# the previous syllable. There is no documented or commonly
|
|
77
|
+
# accepted way to transliterate this sound.
|
|
78
|
+
|
|
79
|
+
- pattern: "[っッ]([BbDdFfGgHhJjKkLlMmNnPpQqRrSsTtVvWwXxYyZz])"
|
|
80
|
+
result: "\\1\\1"
|
|
81
|
+
- pattern: "[っッ]([Cc])" # ッ followed by ch-
|
|
82
|
+
result: "t\\1"
|
|
83
|
+
- pattern: "[っッ]" # drop all other っッ.
|
|
84
|
+
result: ""
|
|
85
|
+
|
|
86
|
+
# In Traditional Hepburn, long o (which can be o+o or o+u), and long u
|
|
87
|
+
# are transliterated as ō and ū.
|
|
88
|
+
#
|
|
89
|
+
# Macron should not be used if two repeating letters split across
|
|
90
|
+
# a morpheme boundary.
|
|
91
|
+
#
|
|
92
|
+
# Long vowels in loanwords are indicated with a macron instead
|
|
93
|
+
# of letter doubling.
|
|
94
|
+
|
|
95
|
+
- pattern: "a[ー]"
|
|
96
|
+
result: "ā"
|
|
97
|
+
- pattern: "i[ー]"
|
|
98
|
+
result: "ī"
|
|
99
|
+
- pattern: "u[ーu]"
|
|
100
|
+
result: "ū"
|
|
101
|
+
- pattern: "e[ー]"
|
|
102
|
+
result: "ē"
|
|
103
|
+
- pattern: "o[ーo]"
|
|
104
|
+
result: "ō"
|
|
105
|
+
|
|
106
|
+
characters:
|
|
107
|
+
|
|
108
|
+
# Hiragana
|
|
109
|
+
|
|
110
|
+
"あ": "a"
|
|
111
|
+
"い": "i"
|
|
112
|
+
"う": "u"
|
|
113
|
+
"え": "e"
|
|
114
|
+
"お": "o"
|
|
115
|
+
"おう": "ō"
|
|
116
|
+
|
|
117
|
+
"か": "ka"
|
|
118
|
+
"き": "ki"
|
|
119
|
+
"く": "ku"
|
|
120
|
+
"け": "ke"
|
|
121
|
+
"こ": "ko"
|
|
122
|
+
"きゃ": "kya"
|
|
123
|
+
"きゅ": "kyu"
|
|
124
|
+
"きょ": "kyo"
|
|
125
|
+
"きょう": "kyō"
|
|
126
|
+
"こう": "kō"
|
|
127
|
+
|
|
128
|
+
"さ": "sa"
|
|
129
|
+
"し": "shi"
|
|
130
|
+
"す": "su"
|
|
131
|
+
"せ": "se"
|
|
132
|
+
"そ": "so"
|
|
133
|
+
"しゃ": "sha"
|
|
134
|
+
"しゅ": "shu"
|
|
135
|
+
"しょ": "sho"
|
|
136
|
+
"しょう": "shō"
|
|
137
|
+
"そう": "sō"
|
|
138
|
+
|
|
139
|
+
"た": "ta"
|
|
140
|
+
"ち": "chi"
|
|
141
|
+
"つ": "tsu"
|
|
142
|
+
"て": "te"
|
|
143
|
+
"と": "to"
|
|
144
|
+
"ちゃ": "cha"
|
|
145
|
+
"ちゅ": "chu"
|
|
146
|
+
"ちょ": "cho"
|
|
147
|
+
"とう": "tō"
|
|
148
|
+
"ちょう": "chō"
|
|
149
|
+
|
|
150
|
+
"な": "na"
|
|
151
|
+
"に": "ni"
|
|
152
|
+
"ぬ": "nu"
|
|
153
|
+
"ね": "ne"
|
|
154
|
+
"の": "no"
|
|
155
|
+
"にゃ": "nya"
|
|
156
|
+
"にゅ": "nyu"
|
|
157
|
+
"にょ": "nyo"
|
|
158
|
+
"にょう": "nyō"
|
|
159
|
+
"のう": "nō"
|
|
160
|
+
|
|
161
|
+
"は": "ha"
|
|
162
|
+
"ひ": "hi"
|
|
163
|
+
"ふ": "fu"
|
|
164
|
+
"へ": "he"
|
|
165
|
+
"ほ": "ho"
|
|
166
|
+
"ひゃ": "hya"
|
|
167
|
+
"ひゅ": "hyu"
|
|
168
|
+
"ひょ": "hyo"
|
|
169
|
+
"ひょう": "hyō"
|
|
170
|
+
"ほう": "hō"
|
|
171
|
+
|
|
172
|
+
"ま": "ma"
|
|
173
|
+
"み": "mi"
|
|
174
|
+
"む": "mu"
|
|
175
|
+
"め": "me"
|
|
176
|
+
"も": "mo"
|
|
177
|
+
"みゃ": "mya"
|
|
178
|
+
"みゅ": "myu"
|
|
179
|
+
"みょ": "myo"
|
|
180
|
+
"みょう": "myō"
|
|
181
|
+
"もう": "mō"
|
|
182
|
+
|
|
183
|
+
"や": "ya"
|
|
184
|
+
"ゆ": "yu"
|
|
185
|
+
"よ": "yo"
|
|
186
|
+
"よう": "yō"
|
|
187
|
+
|
|
188
|
+
"ら": "ra"
|
|
189
|
+
"り": "ri"
|
|
190
|
+
"る": "ru"
|
|
191
|
+
"れ": "re"
|
|
192
|
+
"ろ": "ro"
|
|
193
|
+
"りゃ": "rya"
|
|
194
|
+
"りゅ": "ryu"
|
|
195
|
+
"りょ": "ryo"
|
|
196
|
+
"りょう": "ryō"
|
|
197
|
+
"ろう": "rō"
|
|
198
|
+
|
|
199
|
+
"わ": "wa"
|
|
200
|
+
"を": "wo"
|
|
201
|
+
|
|
202
|
+
"が": "ga"
|
|
203
|
+
"ぎ": "gi"
|
|
204
|
+
"ぐ": "gu"
|
|
205
|
+
"げ": "ge"
|
|
206
|
+
"ご": "go"
|
|
207
|
+
"ぎゃ": "gya"
|
|
208
|
+
"ぎゅ": "gyu"
|
|
209
|
+
"ぎょ": "gyo"
|
|
210
|
+
"ぎょう": "gyō"
|
|
211
|
+
"ごう": "gō"
|
|
212
|
+
|
|
213
|
+
"ざ": "za"
|
|
214
|
+
"じ": "ji"
|
|
215
|
+
"ず": "zu"
|
|
216
|
+
"ぜ": "ze"
|
|
217
|
+
"ぞ": "zo"
|
|
218
|
+
"じゃ": "ja"
|
|
219
|
+
"じゅ": "ju"
|
|
220
|
+
"じょ": "jo"
|
|
221
|
+
"じょう": "jō"
|
|
222
|
+
"ぞう": "zō"
|
|
223
|
+
|
|
224
|
+
"だ": "da"
|
|
225
|
+
"ぢ": "ji"
|
|
226
|
+
"づ": "zu"
|
|
227
|
+
"で": "de"
|
|
228
|
+
"ど": "do"
|
|
229
|
+
"ぢゃ": "ja"
|
|
230
|
+
"ぢゅ": "ju"
|
|
231
|
+
"ぢょ": "jo"
|
|
232
|
+
"どう": "dō"
|
|
233
|
+
|
|
234
|
+
"ば": "ba"
|
|
235
|
+
"び": "bi"
|
|
236
|
+
"ぶ": "bu"
|
|
237
|
+
"べ": "be"
|
|
238
|
+
"ぼ": "bo"
|
|
239
|
+
"びゃ": "bya"
|
|
240
|
+
"びゅ": "byu"
|
|
241
|
+
"びょ": "byo"
|
|
242
|
+
"びょう": "byō"
|
|
243
|
+
"ぼう": "bō"
|
|
244
|
+
|
|
245
|
+
"ぱ": "pa"
|
|
246
|
+
"ぴ": "pi"
|
|
247
|
+
"ぷ": "pu"
|
|
248
|
+
"ぺ": "pe"
|
|
249
|
+
"ぽ": "po"
|
|
250
|
+
"ぴゃ": "pya"
|
|
251
|
+
"ぴゅ": "pyu"
|
|
252
|
+
"ぴょ": "pyo"
|
|
253
|
+
"ぴょう": "pyō"
|
|
254
|
+
"ぽう": "pō"
|
|
255
|
+
|
|
256
|
+
"ん": "n"
|
|
257
|
+
|
|
258
|
+
# Katakana
|
|
259
|
+
|
|
260
|
+
"ア": "a"
|
|
261
|
+
"イ": "i"
|
|
262
|
+
"ウ": "u"
|
|
263
|
+
"エ": "e"
|
|
264
|
+
"オ": "o"
|
|
265
|
+
"オウ": "ō"
|
|
266
|
+
|
|
267
|
+
"カ": "ka"
|
|
268
|
+
"キ": "ki"
|
|
269
|
+
"ク": "ku"
|
|
270
|
+
"ケ": "ke"
|
|
271
|
+
"コ": "ko"
|
|
272
|
+
"キャ": "kya"
|
|
273
|
+
"キュ": "kyu"
|
|
274
|
+
"キョ": "kyo"
|
|
275
|
+
"キョウ": "kyō"
|
|
276
|
+
"コウ": "kō"
|
|
277
|
+
|
|
278
|
+
"サ": "sa"
|
|
279
|
+
"シ": "shi"
|
|
280
|
+
"ス": "su"
|
|
281
|
+
"セ": "se"
|
|
282
|
+
"ソ": "so"
|
|
283
|
+
"シャ": "sha"
|
|
284
|
+
"シュ": "shu"
|
|
285
|
+
"ショ": "sho"
|
|
286
|
+
"ショウ": "shō"
|
|
287
|
+
"ソウ": "sō"
|
|
288
|
+
|
|
289
|
+
"タ": "ta"
|
|
290
|
+
"チ": "chi"
|
|
291
|
+
"ツ": "tsu"
|
|
292
|
+
"テ": "te"
|
|
293
|
+
"ト": "to"
|
|
294
|
+
"チャ": "cha"
|
|
295
|
+
"チュ": "chu"
|
|
296
|
+
"チョ": "cho"
|
|
297
|
+
"チョウ": "chō"
|
|
298
|
+
"トウ": "tō"
|
|
299
|
+
|
|
300
|
+
"ナ": "na"
|
|
301
|
+
"ニ": "ni"
|
|
302
|
+
"ヌ": "nu"
|
|
303
|
+
"ネ": "ne"
|
|
304
|
+
"ノ": "no"
|
|
305
|
+
"ニャ": "nya"
|
|
306
|
+
"ニュ": "nyu"
|
|
307
|
+
"ニョ": "nyo"
|
|
308
|
+
"ニョウ": "nyō"
|
|
309
|
+
"ノウ": "nō"
|
|
310
|
+
|
|
311
|
+
"ハ": "ha"
|
|
312
|
+
"ヒ": "hi"
|
|
313
|
+
"フ": "fu"
|
|
314
|
+
"ヘ": "he"
|
|
315
|
+
"ホ": "ho"
|
|
316
|
+
"ヒャ": "hya"
|
|
317
|
+
"ヒュ": "hyu"
|
|
318
|
+
"ヒョ": "hyo"
|
|
319
|
+
"ヒョウ": "hyō"
|
|
320
|
+
"ホウ": "hō"
|
|
321
|
+
|
|
322
|
+
"マ": "ma"
|
|
323
|
+
"ミ": "mi"
|
|
324
|
+
"ム": "mu"
|
|
325
|
+
"メ": "me"
|
|
326
|
+
"モ": "mo"
|
|
327
|
+
"ミャ": "mya"
|
|
328
|
+
"ミュ": "myu"
|
|
329
|
+
"ミョ": "myo"
|
|
330
|
+
"ミョウ": "myō"
|
|
331
|
+
"モウ": "mō"
|
|
332
|
+
|
|
333
|
+
"ヤ": "ya"
|
|
334
|
+
"ユ": "yu"
|
|
335
|
+
"ヨ": "yo"
|
|
336
|
+
"ヨウ": "yō"
|
|
337
|
+
|
|
338
|
+
"ラ": "ra"
|
|
339
|
+
"リ": "ri"
|
|
340
|
+
"ル": "ru"
|
|
341
|
+
"レ": "re"
|
|
342
|
+
"ロ": "ro"
|
|
343
|
+
"リャ": "rya"
|
|
344
|
+
"リュ": "ryu"
|
|
345
|
+
"リョ": "ryo"
|
|
346
|
+
"リョウ": "ryō"
|
|
347
|
+
"ロウ": "rō"
|
|
348
|
+
|
|
349
|
+
"ワ": "wa"
|
|
350
|
+
"ヲ": "wo"
|
|
351
|
+
|
|
352
|
+
"ガ": "ga"
|
|
353
|
+
"ギ": "gi"
|
|
354
|
+
"グ": "gu"
|
|
355
|
+
"ゲ": "ge"
|
|
356
|
+
"ゴ": "go"
|
|
357
|
+
"ギャ": "gya"
|
|
358
|
+
"ギュ": "gyu"
|
|
359
|
+
"ギョ": "gyo"
|
|
360
|
+
"ギョウ": "gyō"
|
|
361
|
+
"ゴウ": "gō"
|
|
362
|
+
|
|
363
|
+
"ザ": "za"
|
|
364
|
+
"ジ": "ji"
|
|
365
|
+
"ズ": "zu"
|
|
366
|
+
"ゼ": "ze"
|
|
367
|
+
"ゾ": "zo"
|
|
368
|
+
"ジャ": "ja"
|
|
369
|
+
"ジュ": "ju"
|
|
370
|
+
"ジョ": "jo"
|
|
371
|
+
"ジョウ": "jō"
|
|
372
|
+
"ゾウ": "zō"
|
|
373
|
+
|
|
374
|
+
"ダ": "da"
|
|
375
|
+
"ヂ": "ji"
|
|
376
|
+
"ヅ": "zu"
|
|
377
|
+
"デ": "de"
|
|
378
|
+
"ド": "do"
|
|
379
|
+
"ヂャ": "ja"
|
|
380
|
+
"ヂュ": "ju"
|
|
381
|
+
"ヂョ": "jo"
|
|
382
|
+
"ドウ": "dō"
|
|
383
|
+
|
|
384
|
+
"バ": "ba"
|
|
385
|
+
"ビ": "bi"
|
|
386
|
+
"ブ": "bu"
|
|
387
|
+
"ベ": "be"
|
|
388
|
+
"ボ": "bo"
|
|
389
|
+
"ビャ": "bya"
|
|
390
|
+
"ビュ": "byu"
|
|
391
|
+
"ビョ": "byo"
|
|
392
|
+
"ビョウ": "byō"
|
|
393
|
+
"ボウ": "bō"
|
|
394
|
+
|
|
395
|
+
"パ": "pa"
|
|
396
|
+
"ピ": "pi"
|
|
397
|
+
"プ": "pu"
|
|
398
|
+
"ペ": "pe"
|
|
399
|
+
"ポ": "po"
|
|
400
|
+
"ピャ": "pya"
|
|
401
|
+
"ピュ": "pyu"
|
|
402
|
+
"ピョ": "pyo"
|
|
403
|
+
"ピョウ": "pyō"
|
|
404
|
+
"ポウ": "pō"
|
|
405
|
+
|
|
406
|
+
"ン": "n"
|