interscript 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,336 @@
1
+ ---
2
+ authority_id: mext
3
+ id: hepburn
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: Romanization of Japanese, Modified Hepburn System
8
+ url: http://www.eki.ee/wgrs/rom2_ja.htm
9
+ creation_date: 1954
10
+ adoption_date: 1954-12-09
11
+ description: |
12
+ Widely used Romanization system in customs, e.g. in passports. In
13
+ international cartographic products the Modified Hepburn System remains
14
+ the most used system.
15
+
16
+ notes:
17
+ - A small-script tu/tsu form (ッ / っ) is inserted between kana symbols to indicate a double consonant (kk, ss, ssh, tt, tts, tch, pp in Hepburn).
18
+ - ン / ん in modified Hepburn the character is romanized n’ before y or a vowel letter, n in all other cases; earlier also m was used before b, p, or m.
19
+ - Long vowels are expressed in Hepburn by placing a macron (¯) over a vowel.
20
+ - The romanization in parentheses (in modified Hepburn) is used only in those cases where the kana symbol is known to be pronounced in the manner indicated.
21
+ - The combination in parentheses is used to denote the word meaning ’big, great’.
22
+
23
+ tests:
24
+ - source: おばあさん
25
+ expected: obāsan
26
+ - source: おにいさん
27
+ expected: oniisan
28
+ - source: みずうみ
29
+ expected: mizuumi
30
+ - source: とおまわり
31
+ expected: tōmawari
32
+ - source: べんきょう
33
+ expected: benkyō
34
+ - source: じゃあく
35
+ expected: jaaku
36
+ - source: バレーボール
37
+ expected: barēbōru
38
+ - source: スーパーマン
39
+ expected: sūpāman
40
+
41
+ map:
42
+ characters:
43
+ "あ": "a"
44
+ "い": "i"
45
+ "う": "u"
46
+ "え": "e"
47
+ "お": "o"
48
+ "か": "ka"
49
+ "き": "ki"
50
+ "く": "ku"
51
+ "け":
52
+ - "ke"
53
+ - "ga" # See note 4
54
+ "こ": "ko"
55
+ "さ": "sa"
56
+ "し": "shi"
57
+ "す": "su"
58
+ "せ": "se"
59
+ "そ": "so"
60
+ "た": "ta"
61
+ "ち": "chi"
62
+ "つ": "tsu" # See note 1
63
+ "て": "te"
64
+ "と": "to"
65
+ "な": "na"
66
+ "に": "ni"
67
+ "ぬ": "nu"
68
+ "ね": "ne"
69
+ "の": "no"
70
+ "は":
71
+ - "ha"
72
+ - "wa" # See note 4
73
+ "ひ": "hi"
74
+ "ふ": "fu"
75
+ "へ":
76
+ - "he"
77
+ - "e" # See note 4
78
+ "ほ": "ho"
79
+ "ま": "ma"
80
+ "み": "mi"
81
+ "む": "mu"
82
+ "め": "me"
83
+ "も": "mo"
84
+ "や": "ya"
85
+ "ゆ": "yu"
86
+ "よ": "yo"
87
+ "ら": "ra"
88
+ "り": "ri"
89
+ "る": "ru"
90
+ "れ": "re"
91
+ "ろ": "ro"
92
+ "わ": "wa"
93
+ "ん": "n" # See note 2
94
+ "が": "ga"
95
+ "ぎ": "gi"
96
+ "ぐ": "gu"
97
+ "げ": "ge"
98
+ "ご": "go"
99
+ "ざ": "za"
100
+ "じ": "ji"
101
+ "ず": "zu"
102
+ "ぜ": "ze"
103
+ "ぞ": "zo"
104
+ "だ": "da"
105
+ "ぢ": "ji"
106
+ "づ": "zu"
107
+ "で": "de"
108
+ "ど": "do"
109
+ "ば": "ba"
110
+ "び": "bi"
111
+ "ぶ": "bu"
112
+ "べ": "be"
113
+ "ぼ": "bo"
114
+ "ぱ": "pa"
115
+ "ぴ": "pi"
116
+ "ぷ": "pu"
117
+ "ぺ": "pe"
118
+ "ぽ": "po"
119
+ "おぅ": "ō"
120
+ "おお": "ō" # See note 4
121
+ "きゃ": "kya"
122
+ "きゅ": "kyu"
123
+ "きゅぅ": "kyū"
124
+ "きょ": "kyo"
125
+ "きょぅ": "kyō"
126
+ "こぅ": "kō"
127
+ "しゃ": "sha"
128
+ "しゅ": "shu"
129
+ "しゅぅ": "shū"
130
+ "しょ": "sho"
131
+ "しょぅ": "shō"
132
+ "そぅ": "sō"
133
+ "ちゃ": "cha"
134
+ "ちゅ": "chu"
135
+ "ちゅぅ": "chū"
136
+ "ちょ": "cho"
137
+ "ちょぅ": "chō"
138
+ "とぅ": "tō"
139
+ "にゃ": "nya"
140
+ "にゅ": "nyu"
141
+ "にゅぅ": "nyū"
142
+ "にょ": "nyo"
143
+ "にょぅ": "nyō"
144
+ "のぅ": "nō"
145
+ "ひゃ": "hya"
146
+ "ひゅ": "hyu"
147
+ "ひゅぅ": "hyū"
148
+ "ひょ": "hyo"
149
+ "ひょぅ": "hyō"
150
+ "ほぅ":
151
+ - "hō"
152
+ - "ō" # See note 4
153
+ "みゃ": "mya"
154
+ "みゅ": "myu"
155
+ "みゅぅ": "myū"
156
+ "みょ": "myo"
157
+ "みょぅ": "myō"
158
+ "もぅ": "mō"
159
+ "よぅ": "yō"
160
+ "りゃ": "rya"
161
+ "りゅ": "ryu"
162
+ "りゅぅ": "ryū"
163
+ "りょ": "ryo"
164
+ "りょぅ": "ryō"
165
+ "ろぅ": "rō"
166
+ "ぎゃ": "gya"
167
+ "ぎゅ": "gyu"
168
+ "ぎゅぅ": "gyū"
169
+ "ぎょ": "gyo"
170
+ "ぎょぅ": "gyō"
171
+ "ごぅ": "gō"
172
+ "じゃ": "ja"
173
+ "じゅ": "ju"
174
+ "じゅぅ": "jū"
175
+ "じょ": "jo"
176
+ "じょぅ": "jō"
177
+ "ぞぅ": "zō"
178
+ "どぅ": "dō"
179
+ "びゃ": "bya"
180
+ "びゅ": "byu"
181
+ "びゅぅ": "byū"
182
+ "びょ": "byo"
183
+ "びょぅ": "byō"
184
+ "ぼぅ": "bō"
185
+ "ぴゃ": "pya"
186
+ "ぴゅ": "pyu"
187
+ "ぴゅぅ": "pyū"
188
+ "ぴょ": "pyo"
189
+ "ぴょぅ": "pyō"
190
+ "ぽぅ": "pō"
191
+ "ア": "a"
192
+ "イ": "i"
193
+ "ウ": "u"
194
+ "エ": "e"
195
+ "オ": "o"
196
+ "カ": "ka"
197
+ "キ": "ki"
198
+ "ク": "ku"
199
+ "ケ": "ke"
200
+ "コ": "ko"
201
+ "サ": "sa"
202
+ "シ": "shi"
203
+ "ス": "su"
204
+ "セ": "se"
205
+ "ソ": "so"
206
+ "タ": "ta"
207
+ "チ": "chi"
208
+ "ツ": "tsu" # See note 1
209
+ "テ": "te"
210
+ "ト": "to"
211
+ "ナ": "na"
212
+ "ニ": "ni"
213
+ "ヌ": "nu"
214
+ "ネ": "ne"
215
+ "ノ": "no"
216
+ "ハ":
217
+ - "ha"
218
+ - "wa" # See note 4
219
+ "ヒ": "hi"
220
+ "フ": "fu"
221
+ "ヘ":
222
+ - "he"
223
+ - "e" # See note 4
224
+ "ホ": "ho"
225
+ "マ": "ma"
226
+ "ミ": "mi"
227
+ "ム": "mu"
228
+ "メ": "me"
229
+ "モ": "mo"
230
+ "ヤ": "ya"
231
+ "ユ": "yu"
232
+ "ヨ": "yo"
233
+ "ラ": "ra"
234
+ "リ": "ri"
235
+ "ル": "ru"
236
+ "レ": "re"
237
+ "ロ": "ro"
238
+ "ワ": "wa"
239
+ "ン": "n" # See note 2
240
+ "ガ": "ga"
241
+ "ギ": "gi"
242
+ "グ": "gu"
243
+ "ゲ": "ge"
244
+ "ゴ": "go"
245
+ "ザ": "za"
246
+ "ジ": "ji"
247
+ "ズ": "zu"
248
+ "ゼ": "ze"
249
+ "ゾ": "zo"
250
+ "ダ": "da"
251
+ "ヂ": "ji"
252
+ "ヅ": "zu"
253
+ "デ": "de"
254
+ "ド": "do"
255
+ "バ": "ba"
256
+ "ビ": "bi"
257
+ "ブ": "bu"
258
+ "ベ": "be"
259
+ "ボ": "bo"
260
+ "パ": "pa"
261
+ "ピ": "pi"
262
+ "プ": "pu"
263
+ "ペ": "pe"
264
+ "ポ": "po"
265
+ "オゥ": "ō"
266
+ "オオ": "ō" # See note 4
267
+ "キャ": "kya"
268
+ "キュ": "kyu"
269
+ "キュゥ": "kyū"
270
+ "キョ": "kyo"
271
+ "キョゥ": "kyō"
272
+ "コゥ": "kō"
273
+ "シャ": "sha"
274
+ "シュ": "shu"
275
+ "シュゥ": "shū"
276
+ "ショ": "sho"
277
+ "ショゥ": "shō"
278
+ "ソゥ": "sō"
279
+ "チャ": "cha"
280
+ "チュ": "chu"
281
+ "チュゥ": "chū"
282
+ "チョ": "cho"
283
+ "チョゥ": "chō"
284
+ "トゥ": "tō"
285
+ "ニャ": "nya"
286
+ "ニュ": "nyu"
287
+ "ニュゥ": "nyū"
288
+ "ニョ": "nyo"
289
+ "ニョゥ": "nyō"
290
+ "ノゥ": "nō"
291
+ "ヒャ": "hya"
292
+ "ヒュ": "hyu"
293
+ "ヒュゥ": "hyū"
294
+ "ヒョ": "hyo"
295
+ "ヒョゥ": "hyō"
296
+ "ホゥ":
297
+ - "hō"
298
+ - "ō" # See note 4
299
+ "ミャ": "mya"
300
+ "ミュ": "myu"
301
+ "ミュゥ": "myū"
302
+ "ミョ": "myo"
303
+ "ミョゥ": "myō"
304
+ "モゥ": "mō"
305
+ "ヨゥ": "yō"
306
+ "リャ": "rya"
307
+ "リュ": "ryu"
308
+ "リュゥ": "ryū"
309
+ "リョ": "ryo"
310
+ "リョゥ": "ryō"
311
+ "ロゥ": "rō"
312
+ "ギャ": "gya"
313
+ "ギュ": "gyu"
314
+ "ギュゥ": "gyū"
315
+ "ギョ": "gyo"
316
+ "ギョゥ": "gyō"
317
+ "ゴゥ": "gō"
318
+ "ジャ": "ja"
319
+ "ジュ": "ju"
320
+ "ジュゥ": "jū"
321
+ "ジョ": "jo"
322
+ "ジョゥ": "jō"
323
+ "ゾゥ": "zō"
324
+ "ドゥ": "dō"
325
+ "ビャ": "bya"
326
+ "ビュ": "byu"
327
+ "ビュゥ": "byū"
328
+ "ビョ": "byo"
329
+ "ビョゥ": "byō"
330
+ "ボゥ": "bō"
331
+ "ピャ": "pya"
332
+ "ピュ": "pyu"
333
+ "ピュゥ": "pyū"
334
+ "ピョ": "pyo"
335
+ "ピョゥ": "pyō"
336
+ "ポゥ": "pō"
@@ -0,0 +1,125 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Belorussian Cyrillic to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf
28
+ characters:
29
+ "\u0027": "", # '
30
+ "\u0410": "A", # А
31
+ "\u0411": "B", # Б
32
+ "\u0414": "D", # Д
33
+ "\u0401": "IO", # Ё
34
+ "\u0415": "E", # Е
35
+ "\u042D": "E", # Э
36
+ "\u0424": "F", # Ф
37
+ "\u0413": "H", # Г
38
+ "\u0418": "I", # И
39
+ "\u0419": "I", # Й
40
+ "\u041A": "K", # К
41
+ "\u041B": "L", # Л
42
+ "\u041C": "M", # М
43
+ "\u041D": "N", # Н
44
+ "\u041E": "O", # О
45
+ "\u041F": "P", # П
46
+ "\u0420": "R", # Р
47
+ "\u0421": "S", # С
48
+ "\u0422": "T", # Т
49
+ "\u0423": "U", # У
50
+ "\u0412": "V", # В
51
+ "\u042B": "Y", # Ы
52
+ "\u0417": "Z", # З
53
+ "\u0427": "CH", # Ч
54
+ "\u042F": "IA", # Я
55
+ "\u042E": "IU", # Ю
56
+ "\u0425": "KH", # Х
57
+ "\u0428": "SH", # Ш
58
+ "\u0429": "SHCH", # Щ
59
+ "\u0426": "TS", # Ц
60
+ "\u0416": "ZH", # Ж
61
+ "\u0490": "G", # Ґ
62
+ "\u040E": "U", # Ў
63
+ "\u046A": "U", # Ѫ
64
+ "\u0402": "D", # Ђ
65
+ "\u0405": "DZ", # Ѕ
66
+ "\u0408": "J", # Ј
67
+ "\u0409": "LJ", # Љ
68
+ "\u040A": "NJ", # Њ
69
+ "\u04BA": "C", # Һ
70
+ "\u040F": "DZ", # Џ
71
+ "\u0404": "IE", # Є
72
+ "\u0407": "I", # Ї
73
+ "\u0403": "G", # Ѓ
74
+ "\u0406": "I", # І
75
+
76
+ "\u0430": "a", # а
77
+ "\u0431": "b", # б
78
+ "\u0434": "d", # д
79
+ "\u0451": "io", # ё
80
+ "\u0435": "e", # e
81
+ "\u044D": "e", # э
82
+ "\u0444": "f", # ф
83
+ "\u0433": "h", # г
84
+ "\u0438": "i", # и
85
+ "\u0439": "i", # й
86
+ "\u043A": "k", # к
87
+ "\u043B": "l", # л
88
+ "\u043C": "m", # м
89
+ "\u043D": "n", # н
90
+ "\u043E": "o", # о
91
+ "\u043F": "p", # п
92
+ "\u0440": "r", # р
93
+ "\u0441": "s", # с
94
+ "\u0442": "t", # т
95
+ "\u0443": "", # у
96
+ "\u0432": "v", # в
97
+ "\u044B": "y", # ы
98
+ "\u0437": "z", # з
99
+ "\u0447": "ch", # ч
100
+ "\u044F": "ia", # я
101
+ "\u044E": "i", # ю
102
+ "\u0445": "kh", # х
103
+ "\u0448": "sh", # ш
104
+ "\u0449": "shch", # щ
105
+ "\u0446": "ts", # ц
106
+ "\u0436": "zh", # ж
107
+ "\u0491": "g", # ґ
108
+ "\u045E": "", # ў
109
+ "\u046B": "", # ѫ
110
+ "\u0452": "d", # ђ
111
+ "\u0455": "dz", # ѕ
112
+ "\u0458": "j", # ј
113
+ "\u0459": "lj", # љ
114
+ "\u045A": "nj", # њ
115
+ "\u04BB": "c", # һ
116
+ "\u045F": "dz", # џ
117
+ "\u0454": "ie", # є
118
+ "\u0457": "i", # ї
119
+ "\u0453": "g", # ѓ
120
+ "\u0456": "i" # і
121
+
122
+
123
+
124
+
125
+