interscript 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/bas-rus-Cyrl-Latn-bss.yaml +149 -0
- data/maps/bas-rus-Cyrl-Latn-oss.yaml +149 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +109 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +92 -0
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +7503 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +233 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +90 -0
- data/maps/cn-chn-Hans-Latn-pinyin.yaml +24760 -0
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +336 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +125 -0
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +123 -0
- data/maps/icao-gre-Grek-Latn-9303.yaml +101 -0
- data/maps/icao-heb-Hebr-Latn-9303.yaml +157 -0
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +118 -0
- data/maps/icao-per-Arab-Latn-9303.yaml +105 -0
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +119 -0
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +118 -0
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +121 -0
- data/maps/iso-rus-Cyrl-Latn-iso9.yaml +273 -0
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +330 -0
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +308 -0
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +313 -0
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +354 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +80 -0
- data/spec/interscript_spec.rb +11 -0
- data/spec/spec_helper.rb +1 -0
- metadata +32 -3
@@ -0,0 +1,336 @@
|
|
1
|
+
---
|
2
|
+
authority_id: mext
|
3
|
+
id: hepburn
|
4
|
+
language: jpn
|
5
|
+
source_script: Hrkt
|
6
|
+
destination_script: Latn
|
7
|
+
name: Romanization of Japanese, Modified Hepburn System
|
8
|
+
url: http://www.eki.ee/wgrs/rom2_ja.htm
|
9
|
+
creation_date: 1954
|
10
|
+
adoption_date: 1954-12-09
|
11
|
+
description: |
|
12
|
+
Widely used Romanization system in customs, e.g. in passports. In
|
13
|
+
international cartographic products the Modified Hepburn System remains
|
14
|
+
the most used system.
|
15
|
+
|
16
|
+
notes:
|
17
|
+
- A small-script tu/tsu form (ッ / っ) is inserted between kana symbols to indicate a double consonant (kk, ss, ssh, tt, tts, tch, pp in Hepburn).
|
18
|
+
- ン / ん in modified Hepburn the character is romanized n’ before y or a vowel letter, n in all other cases; earlier also m was used before b, p, or m.
|
19
|
+
- Long vowels are expressed in Hepburn by placing a macron (¯) over a vowel.
|
20
|
+
- The romanization in parentheses (in modified Hepburn) is used only in those cases where the kana symbol is known to be pronounced in the manner indicated.
|
21
|
+
- The combination in parentheses is used to denote the word meaning ’big, great’.
|
22
|
+
|
23
|
+
tests:
|
24
|
+
- source: おばあさん
|
25
|
+
expected: obāsan
|
26
|
+
- source: おにいさん
|
27
|
+
expected: oniisan
|
28
|
+
- source: みずうみ
|
29
|
+
expected: mizuumi
|
30
|
+
- source: とおまわり
|
31
|
+
expected: tōmawari
|
32
|
+
- source: べんきょう
|
33
|
+
expected: benkyō
|
34
|
+
- source: じゃあく
|
35
|
+
expected: jaaku
|
36
|
+
- source: バレーボール
|
37
|
+
expected: barēbōru
|
38
|
+
- source: スーパーマン
|
39
|
+
expected: sūpāman
|
40
|
+
|
41
|
+
map:
|
42
|
+
characters:
|
43
|
+
"あ": "a"
|
44
|
+
"い": "i"
|
45
|
+
"う": "u"
|
46
|
+
"え": "e"
|
47
|
+
"お": "o"
|
48
|
+
"か": "ka"
|
49
|
+
"き": "ki"
|
50
|
+
"く": "ku"
|
51
|
+
"け":
|
52
|
+
- "ke"
|
53
|
+
- "ga" # See note 4
|
54
|
+
"こ": "ko"
|
55
|
+
"さ": "sa"
|
56
|
+
"し": "shi"
|
57
|
+
"す": "su"
|
58
|
+
"せ": "se"
|
59
|
+
"そ": "so"
|
60
|
+
"た": "ta"
|
61
|
+
"ち": "chi"
|
62
|
+
"つ": "tsu" # See note 1
|
63
|
+
"て": "te"
|
64
|
+
"と": "to"
|
65
|
+
"な": "na"
|
66
|
+
"に": "ni"
|
67
|
+
"ぬ": "nu"
|
68
|
+
"ね": "ne"
|
69
|
+
"の": "no"
|
70
|
+
"は":
|
71
|
+
- "ha"
|
72
|
+
- "wa" # See note 4
|
73
|
+
"ひ": "hi"
|
74
|
+
"ふ": "fu"
|
75
|
+
"へ":
|
76
|
+
- "he"
|
77
|
+
- "e" # See note 4
|
78
|
+
"ほ": "ho"
|
79
|
+
"ま": "ma"
|
80
|
+
"み": "mi"
|
81
|
+
"む": "mu"
|
82
|
+
"め": "me"
|
83
|
+
"も": "mo"
|
84
|
+
"や": "ya"
|
85
|
+
"ゆ": "yu"
|
86
|
+
"よ": "yo"
|
87
|
+
"ら": "ra"
|
88
|
+
"り": "ri"
|
89
|
+
"る": "ru"
|
90
|
+
"れ": "re"
|
91
|
+
"ろ": "ro"
|
92
|
+
"わ": "wa"
|
93
|
+
"ん": "n" # See note 2
|
94
|
+
"が": "ga"
|
95
|
+
"ぎ": "gi"
|
96
|
+
"ぐ": "gu"
|
97
|
+
"げ": "ge"
|
98
|
+
"ご": "go"
|
99
|
+
"ざ": "za"
|
100
|
+
"じ": "ji"
|
101
|
+
"ず": "zu"
|
102
|
+
"ぜ": "ze"
|
103
|
+
"ぞ": "zo"
|
104
|
+
"だ": "da"
|
105
|
+
"ぢ": "ji"
|
106
|
+
"づ": "zu"
|
107
|
+
"で": "de"
|
108
|
+
"ど": "do"
|
109
|
+
"ば": "ba"
|
110
|
+
"び": "bi"
|
111
|
+
"ぶ": "bu"
|
112
|
+
"べ": "be"
|
113
|
+
"ぼ": "bo"
|
114
|
+
"ぱ": "pa"
|
115
|
+
"ぴ": "pi"
|
116
|
+
"ぷ": "pu"
|
117
|
+
"ぺ": "pe"
|
118
|
+
"ぽ": "po"
|
119
|
+
"おぅ": "ō"
|
120
|
+
"おお": "ō" # See note 4
|
121
|
+
"きゃ": "kya"
|
122
|
+
"きゅ": "kyu"
|
123
|
+
"きゅぅ": "kyū"
|
124
|
+
"きょ": "kyo"
|
125
|
+
"きょぅ": "kyō"
|
126
|
+
"こぅ": "kō"
|
127
|
+
"しゃ": "sha"
|
128
|
+
"しゅ": "shu"
|
129
|
+
"しゅぅ": "shū"
|
130
|
+
"しょ": "sho"
|
131
|
+
"しょぅ": "shō"
|
132
|
+
"そぅ": "sō"
|
133
|
+
"ちゃ": "cha"
|
134
|
+
"ちゅ": "chu"
|
135
|
+
"ちゅぅ": "chū"
|
136
|
+
"ちょ": "cho"
|
137
|
+
"ちょぅ": "chō"
|
138
|
+
"とぅ": "tō"
|
139
|
+
"にゃ": "nya"
|
140
|
+
"にゅ": "nyu"
|
141
|
+
"にゅぅ": "nyū"
|
142
|
+
"にょ": "nyo"
|
143
|
+
"にょぅ": "nyō"
|
144
|
+
"のぅ": "nō"
|
145
|
+
"ひゃ": "hya"
|
146
|
+
"ひゅ": "hyu"
|
147
|
+
"ひゅぅ": "hyū"
|
148
|
+
"ひょ": "hyo"
|
149
|
+
"ひょぅ": "hyō"
|
150
|
+
"ほぅ":
|
151
|
+
- "hō"
|
152
|
+
- "ō" # See note 4
|
153
|
+
"みゃ": "mya"
|
154
|
+
"みゅ": "myu"
|
155
|
+
"みゅぅ": "myū"
|
156
|
+
"みょ": "myo"
|
157
|
+
"みょぅ": "myō"
|
158
|
+
"もぅ": "mō"
|
159
|
+
"よぅ": "yō"
|
160
|
+
"りゃ": "rya"
|
161
|
+
"りゅ": "ryu"
|
162
|
+
"りゅぅ": "ryū"
|
163
|
+
"りょ": "ryo"
|
164
|
+
"りょぅ": "ryō"
|
165
|
+
"ろぅ": "rō"
|
166
|
+
"ぎゃ": "gya"
|
167
|
+
"ぎゅ": "gyu"
|
168
|
+
"ぎゅぅ": "gyū"
|
169
|
+
"ぎょ": "gyo"
|
170
|
+
"ぎょぅ": "gyō"
|
171
|
+
"ごぅ": "gō"
|
172
|
+
"じゃ": "ja"
|
173
|
+
"じゅ": "ju"
|
174
|
+
"じゅぅ": "jū"
|
175
|
+
"じょ": "jo"
|
176
|
+
"じょぅ": "jō"
|
177
|
+
"ぞぅ": "zō"
|
178
|
+
"どぅ": "dō"
|
179
|
+
"びゃ": "bya"
|
180
|
+
"びゅ": "byu"
|
181
|
+
"びゅぅ": "byū"
|
182
|
+
"びょ": "byo"
|
183
|
+
"びょぅ": "byō"
|
184
|
+
"ぼぅ": "bō"
|
185
|
+
"ぴゃ": "pya"
|
186
|
+
"ぴゅ": "pyu"
|
187
|
+
"ぴゅぅ": "pyū"
|
188
|
+
"ぴょ": "pyo"
|
189
|
+
"ぴょぅ": "pyō"
|
190
|
+
"ぽぅ": "pō"
|
191
|
+
"ア": "a"
|
192
|
+
"イ": "i"
|
193
|
+
"ウ": "u"
|
194
|
+
"エ": "e"
|
195
|
+
"オ": "o"
|
196
|
+
"カ": "ka"
|
197
|
+
"キ": "ki"
|
198
|
+
"ク": "ku"
|
199
|
+
"ケ": "ke"
|
200
|
+
"コ": "ko"
|
201
|
+
"サ": "sa"
|
202
|
+
"シ": "shi"
|
203
|
+
"ス": "su"
|
204
|
+
"セ": "se"
|
205
|
+
"ソ": "so"
|
206
|
+
"タ": "ta"
|
207
|
+
"チ": "chi"
|
208
|
+
"ツ": "tsu" # See note 1
|
209
|
+
"テ": "te"
|
210
|
+
"ト": "to"
|
211
|
+
"ナ": "na"
|
212
|
+
"ニ": "ni"
|
213
|
+
"ヌ": "nu"
|
214
|
+
"ネ": "ne"
|
215
|
+
"ノ": "no"
|
216
|
+
"ハ":
|
217
|
+
- "ha"
|
218
|
+
- "wa" # See note 4
|
219
|
+
"ヒ": "hi"
|
220
|
+
"フ": "fu"
|
221
|
+
"ヘ":
|
222
|
+
- "he"
|
223
|
+
- "e" # See note 4
|
224
|
+
"ホ": "ho"
|
225
|
+
"マ": "ma"
|
226
|
+
"ミ": "mi"
|
227
|
+
"ム": "mu"
|
228
|
+
"メ": "me"
|
229
|
+
"モ": "mo"
|
230
|
+
"ヤ": "ya"
|
231
|
+
"ユ": "yu"
|
232
|
+
"ヨ": "yo"
|
233
|
+
"ラ": "ra"
|
234
|
+
"リ": "ri"
|
235
|
+
"ル": "ru"
|
236
|
+
"レ": "re"
|
237
|
+
"ロ": "ro"
|
238
|
+
"ワ": "wa"
|
239
|
+
"ン": "n" # See note 2
|
240
|
+
"ガ": "ga"
|
241
|
+
"ギ": "gi"
|
242
|
+
"グ": "gu"
|
243
|
+
"ゲ": "ge"
|
244
|
+
"ゴ": "go"
|
245
|
+
"ザ": "za"
|
246
|
+
"ジ": "ji"
|
247
|
+
"ズ": "zu"
|
248
|
+
"ゼ": "ze"
|
249
|
+
"ゾ": "zo"
|
250
|
+
"ダ": "da"
|
251
|
+
"ヂ": "ji"
|
252
|
+
"ヅ": "zu"
|
253
|
+
"デ": "de"
|
254
|
+
"ド": "do"
|
255
|
+
"バ": "ba"
|
256
|
+
"ビ": "bi"
|
257
|
+
"ブ": "bu"
|
258
|
+
"ベ": "be"
|
259
|
+
"ボ": "bo"
|
260
|
+
"パ": "pa"
|
261
|
+
"ピ": "pi"
|
262
|
+
"プ": "pu"
|
263
|
+
"ペ": "pe"
|
264
|
+
"ポ": "po"
|
265
|
+
"オゥ": "ō"
|
266
|
+
"オオ": "ō" # See note 4
|
267
|
+
"キャ": "kya"
|
268
|
+
"キュ": "kyu"
|
269
|
+
"キュゥ": "kyū"
|
270
|
+
"キョ": "kyo"
|
271
|
+
"キョゥ": "kyō"
|
272
|
+
"コゥ": "kō"
|
273
|
+
"シャ": "sha"
|
274
|
+
"シュ": "shu"
|
275
|
+
"シュゥ": "shū"
|
276
|
+
"ショ": "sho"
|
277
|
+
"ショゥ": "shō"
|
278
|
+
"ソゥ": "sō"
|
279
|
+
"チャ": "cha"
|
280
|
+
"チュ": "chu"
|
281
|
+
"チュゥ": "chū"
|
282
|
+
"チョ": "cho"
|
283
|
+
"チョゥ": "chō"
|
284
|
+
"トゥ": "tō"
|
285
|
+
"ニャ": "nya"
|
286
|
+
"ニュ": "nyu"
|
287
|
+
"ニュゥ": "nyū"
|
288
|
+
"ニョ": "nyo"
|
289
|
+
"ニョゥ": "nyō"
|
290
|
+
"ノゥ": "nō"
|
291
|
+
"ヒャ": "hya"
|
292
|
+
"ヒュ": "hyu"
|
293
|
+
"ヒュゥ": "hyū"
|
294
|
+
"ヒョ": "hyo"
|
295
|
+
"ヒョゥ": "hyō"
|
296
|
+
"ホゥ":
|
297
|
+
- "hō"
|
298
|
+
- "ō" # See note 4
|
299
|
+
"ミャ": "mya"
|
300
|
+
"ミュ": "myu"
|
301
|
+
"ミュゥ": "myū"
|
302
|
+
"ミョ": "myo"
|
303
|
+
"ミョゥ": "myō"
|
304
|
+
"モゥ": "mō"
|
305
|
+
"ヨゥ": "yō"
|
306
|
+
"リャ": "rya"
|
307
|
+
"リュ": "ryu"
|
308
|
+
"リュゥ": "ryū"
|
309
|
+
"リョ": "ryo"
|
310
|
+
"リョゥ": "ryō"
|
311
|
+
"ロゥ": "rō"
|
312
|
+
"ギャ": "gya"
|
313
|
+
"ギュ": "gyu"
|
314
|
+
"ギュゥ": "gyū"
|
315
|
+
"ギョ": "gyo"
|
316
|
+
"ギョゥ": "gyō"
|
317
|
+
"ゴゥ": "gō"
|
318
|
+
"ジャ": "ja"
|
319
|
+
"ジュ": "ju"
|
320
|
+
"ジュゥ": "jū"
|
321
|
+
"ジョ": "jo"
|
322
|
+
"ジョゥ": "jō"
|
323
|
+
"ゾゥ": "zō"
|
324
|
+
"ドゥ": "dō"
|
325
|
+
"ビャ": "bya"
|
326
|
+
"ビュ": "byu"
|
327
|
+
"ビュゥ": "byū"
|
328
|
+
"ビョ": "byo"
|
329
|
+
"ビョゥ": "byō"
|
330
|
+
"ボゥ": "bō"
|
331
|
+
"ピャ": "pya"
|
332
|
+
"ピュ": "pyu"
|
333
|
+
"ピュゥ": "pyū"
|
334
|
+
"ピョ": "pyo"
|
335
|
+
"ピョゥ": "pyō"
|
336
|
+
"ポゥ": "pō"
|
@@ -0,0 +1,125 @@
|
|
1
|
+
---
|
2
|
+
authority_id: icao
|
3
|
+
id: 9303
|
4
|
+
language: bel
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Belorussian Cyrillic to Latin
|
8
|
+
url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
|
9
|
+
creation_date: 2015
|
10
|
+
description: |
|
11
|
+
Part 3 defines specifications that are common to TD1, TD2 and TD3
|
12
|
+
size machine readable travel documents (MRTDs) including those
|
13
|
+
necessary for global interoperability using visual inspection and
|
14
|
+
machine readable (optical character recognition) means.
|
15
|
+
|
16
|
+
Since only Latin-alphabet characters are allowed in the VIZ, if
|
17
|
+
mandatory data elements are in a national language that does not use
|
18
|
+
the Latin alphabet, a transcription or transliteration shall also be
|
19
|
+
provided.
|
20
|
+
|
21
|
+
This document defines the transliteration mappings used to produce
|
22
|
+
this transcription or transliteration.
|
23
|
+
tests:
|
24
|
+
- source:
|
25
|
+
expected:
|
26
|
+
map:
|
27
|
+
# https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf
|
28
|
+
characters:
|
29
|
+
"\u0027": "", # '
|
30
|
+
"\u0410": "A", # А
|
31
|
+
"\u0411": "B", # Б
|
32
|
+
"\u0414": "D", # Д
|
33
|
+
"\u0401": "IO", # Ё
|
34
|
+
"\u0415": "E", # Е
|
35
|
+
"\u042D": "E", # Э
|
36
|
+
"\u0424": "F", # Ф
|
37
|
+
"\u0413": "H", # Г
|
38
|
+
"\u0418": "I", # И
|
39
|
+
"\u0419": "I", # Й
|
40
|
+
"\u041A": "K", # К
|
41
|
+
"\u041B": "L", # Л
|
42
|
+
"\u041C": "M", # М
|
43
|
+
"\u041D": "N", # Н
|
44
|
+
"\u041E": "O", # О
|
45
|
+
"\u041F": "P", # П
|
46
|
+
"\u0420": "R", # Р
|
47
|
+
"\u0421": "S", # С
|
48
|
+
"\u0422": "T", # Т
|
49
|
+
"\u0423": "U", # У
|
50
|
+
"\u0412": "V", # В
|
51
|
+
"\u042B": "Y", # Ы
|
52
|
+
"\u0417": "Z", # З
|
53
|
+
"\u0427": "CH", # Ч
|
54
|
+
"\u042F": "IA", # Я
|
55
|
+
"\u042E": "IU", # Ю
|
56
|
+
"\u0425": "KH", # Х
|
57
|
+
"\u0428": "SH", # Ш
|
58
|
+
"\u0429": "SHCH", # Щ
|
59
|
+
"\u0426": "TS", # Ц
|
60
|
+
"\u0416": "ZH", # Ж
|
61
|
+
"\u0490": "G", # Ґ
|
62
|
+
"\u040E": "U", # Ў
|
63
|
+
"\u046A": "U", # Ѫ
|
64
|
+
"\u0402": "D", # Ђ
|
65
|
+
"\u0405": "DZ", # Ѕ
|
66
|
+
"\u0408": "J", # Ј
|
67
|
+
"\u0409": "LJ", # Љ
|
68
|
+
"\u040A": "NJ", # Њ
|
69
|
+
"\u04BA": "C", # Һ
|
70
|
+
"\u040F": "DZ", # Џ
|
71
|
+
"\u0404": "IE", # Є
|
72
|
+
"\u0407": "I", # Ї
|
73
|
+
"\u0403": "G", # Ѓ
|
74
|
+
"\u0406": "I", # І
|
75
|
+
|
76
|
+
"\u0430": "a", # а
|
77
|
+
"\u0431": "b", # б
|
78
|
+
"\u0434": "d", # д
|
79
|
+
"\u0451": "io", # ё
|
80
|
+
"\u0435": "e", # e
|
81
|
+
"\u044D": "e", # э
|
82
|
+
"\u0444": "f", # ф
|
83
|
+
"\u0433": "h", # г
|
84
|
+
"\u0438": "i", # и
|
85
|
+
"\u0439": "i", # й
|
86
|
+
"\u043A": "k", # к
|
87
|
+
"\u043B": "l", # л
|
88
|
+
"\u043C": "m", # м
|
89
|
+
"\u043D": "n", # н
|
90
|
+
"\u043E": "o", # о
|
91
|
+
"\u043F": "p", # п
|
92
|
+
"\u0440": "r", # р
|
93
|
+
"\u0441": "s", # с
|
94
|
+
"\u0442": "t", # т
|
95
|
+
"\u0443": "", # у
|
96
|
+
"\u0432": "v", # в
|
97
|
+
"\u044B": "y", # ы
|
98
|
+
"\u0437": "z", # з
|
99
|
+
"\u0447": "ch", # ч
|
100
|
+
"\u044F": "ia", # я
|
101
|
+
"\u044E": "i", # ю
|
102
|
+
"\u0445": "kh", # х
|
103
|
+
"\u0448": "sh", # ш
|
104
|
+
"\u0449": "shch", # щ
|
105
|
+
"\u0446": "ts", # ц
|
106
|
+
"\u0436": "zh", # ж
|
107
|
+
"\u0491": "g", # ґ
|
108
|
+
"\u045E": "", # ў
|
109
|
+
"\u046B": "", # ѫ
|
110
|
+
"\u0452": "d", # ђ
|
111
|
+
"\u0455": "dz", # ѕ
|
112
|
+
"\u0458": "j", # ј
|
113
|
+
"\u0459": "lj", # љ
|
114
|
+
"\u045A": "nj", # њ
|
115
|
+
"\u04BB": "c", # һ
|
116
|
+
"\u045F": "dz", # џ
|
117
|
+
"\u0454": "ie", # є
|
118
|
+
"\u0457": "i", # ї
|
119
|
+
"\u0453": "g", # ѓ
|
120
|
+
"\u0456": "i" # і
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
|