interscript 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,336 @@
1
+ ---
2
+ authority_id: mext
3
+ id: hepburn
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: Romanization of Japanese, Modified Hepburn System
8
+ url: http://www.eki.ee/wgrs/rom2_ja.htm
9
+ creation_date: 1954
10
+ adoption_date: 1954-12-09
11
+ description: |
12
+ Widely used Romanization system in customs, e.g. in passports. In
13
+ international cartographic products the Modified Hepburn System remains
14
+ the most used system.
15
+
16
+ notes:
17
+ - A small-script tu/tsu form (ッ / っ) is inserted between kana symbols to indicate a double consonant (kk, ss, ssh, tt, tts, tch, pp in Hepburn).
18
+ - ン / ん in modified Hepburn the character is romanized n’ before y or a vowel letter, n in all other cases; earlier also m was used before b, p, or m.
19
+ - Long vowels are expressed in Hepburn by placing a macron (¯) over a vowel.
20
+ - The romanization in parentheses (in modified Hepburn) is used only in those cases where the kana symbol is known to be pronounced in the manner indicated.
21
+ - The combination in parentheses is used to denote the word meaning ’big, great’.
22
+
23
+ tests:
24
+ - source: おばあさん
25
+ expected: obāsan
26
+ - source: おにいさん
27
+ expected: oniisan
28
+ - source: みずうみ
29
+ expected: mizuumi
30
+ - source: とおまわり
31
+ expected: tōmawari
32
+ - source: べんきょう
33
+ expected: benkyō
34
+ - source: じゃあく
35
+ expected: jaaku
36
+ - source: バレーボール
37
+ expected: barēbōru
38
+ - source: スーパーマン
39
+ expected: sūpāman
40
+
41
+ map:
42
+ characters:
43
+ "あ": "a"
44
+ "い": "i"
45
+ "う": "u"
46
+ "え": "e"
47
+ "お": "o"
48
+ "か": "ka"
49
+ "き": "ki"
50
+ "く": "ku"
51
+ "け":
52
+ - "ke"
53
+ - "ga" # See note 4
54
+ "こ": "ko"
55
+ "さ": "sa"
56
+ "し": "shi"
57
+ "す": "su"
58
+ "せ": "se"
59
+ "そ": "so"
60
+ "た": "ta"
61
+ "ち": "chi"
62
+ "つ": "tsu" # See note 1
63
+ "て": "te"
64
+ "と": "to"
65
+ "な": "na"
66
+ "に": "ni"
67
+ "ぬ": "nu"
68
+ "ね": "ne"
69
+ "の": "no"
70
+ "は":
71
+ - "ha"
72
+ - "wa" # See note 4
73
+ "ひ": "hi"
74
+ "ふ": "fu"
75
+ "へ":
76
+ - "he"
77
+ - "e" # See note 4
78
+ "ほ": "ho"
79
+ "ま": "ma"
80
+ "み": "mi"
81
+ "む": "mu"
82
+ "め": "me"
83
+ "も": "mo"
84
+ "や": "ya"
85
+ "ゆ": "yu"
86
+ "よ": "yo"
87
+ "ら": "ra"
88
+ "り": "ri"
89
+ "る": "ru"
90
+ "れ": "re"
91
+ "ろ": "ro"
92
+ "わ": "wa"
93
+ "ん": "n" # See note 2
94
+ "が": "ga"
95
+ "ぎ": "gi"
96
+ "ぐ": "gu"
97
+ "げ": "ge"
98
+ "ご": "go"
99
+ "ざ": "za"
100
+ "じ": "ji"
101
+ "ず": "zu"
102
+ "ぜ": "ze"
103
+ "ぞ": "zo"
104
+ "だ": "da"
105
+ "ぢ": "ji"
106
+ "づ": "zu"
107
+ "で": "de"
108
+ "ど": "do"
109
+ "ば": "ba"
110
+ "び": "bi"
111
+ "ぶ": "bu"
112
+ "べ": "be"
113
+ "ぼ": "bo"
114
+ "ぱ": "pa"
115
+ "ぴ": "pi"
116
+ "ぷ": "pu"
117
+ "ぺ": "pe"
118
+ "ぽ": "po"
119
+ "おぅ": "ō"
120
+ "おお": "ō" # See note 4
121
+ "きゃ": "kya"
122
+ "きゅ": "kyu"
123
+ "きゅぅ": "kyū"
124
+ "きょ": "kyo"
125
+ "きょぅ": "kyō"
126
+ "こぅ": "kō"
127
+ "しゃ": "sha"
128
+ "しゅ": "shu"
129
+ "しゅぅ": "shū"
130
+ "しょ": "sho"
131
+ "しょぅ": "shō"
132
+ "そぅ": "sō"
133
+ "ちゃ": "cha"
134
+ "ちゅ": "chu"
135
+ "ちゅぅ": "chū"
136
+ "ちょ": "cho"
137
+ "ちょぅ": "chō"
138
+ "とぅ": "tō"
139
+ "にゃ": "nya"
140
+ "にゅ": "nyu"
141
+ "にゅぅ": "nyū"
142
+ "にょ": "nyo"
143
+ "にょぅ": "nyō"
144
+ "のぅ": "nō"
145
+ "ひゃ": "hya"
146
+ "ひゅ": "hyu"
147
+ "ひゅぅ": "hyū"
148
+ "ひょ": "hyo"
149
+ "ひょぅ": "hyō"
150
+ "ほぅ":
151
+ - "hō"
152
+ - "ō" # See note 4
153
+ "みゃ": "mya"
154
+ "みゅ": "myu"
155
+ "みゅぅ": "myū"
156
+ "みょ": "myo"
157
+ "みょぅ": "myō"
158
+ "もぅ": "mō"
159
+ "よぅ": "yō"
160
+ "りゃ": "rya"
161
+ "りゅ": "ryu"
162
+ "りゅぅ": "ryū"
163
+ "りょ": "ryo"
164
+ "りょぅ": "ryō"
165
+ "ろぅ": "rō"
166
+ "ぎゃ": "gya"
167
+ "ぎゅ": "gyu"
168
+ "ぎゅぅ": "gyū"
169
+ "ぎょ": "gyo"
170
+ "ぎょぅ": "gyō"
171
+ "ごぅ": "gō"
172
+ "じゃ": "ja"
173
+ "じゅ": "ju"
174
+ "じゅぅ": "jū"
175
+ "じょ": "jo"
176
+ "じょぅ": "jō"
177
+ "ぞぅ": "zō"
178
+ "どぅ": "dō"
179
+ "びゃ": "bya"
180
+ "びゅ": "byu"
181
+ "びゅぅ": "byū"
182
+ "びょ": "byo"
183
+ "びょぅ": "byō"
184
+ "ぼぅ": "bō"
185
+ "ぴゃ": "pya"
186
+ "ぴゅ": "pyu"
187
+ "ぴゅぅ": "pyū"
188
+ "ぴょ": "pyo"
189
+ "ぴょぅ": "pyō"
190
+ "ぽぅ": "pō"
191
+ "ア": "a"
192
+ "イ": "i"
193
+ "ウ": "u"
194
+ "エ": "e"
195
+ "オ": "o"
196
+ "カ": "ka"
197
+ "キ": "ki"
198
+ "ク": "ku"
199
+ "ケ": "ke"
200
+ "コ": "ko"
201
+ "サ": "sa"
202
+ "シ": "shi"
203
+ "ス": "su"
204
+ "セ": "se"
205
+ "ソ": "so"
206
+ "タ": "ta"
207
+ "チ": "chi"
208
+ "ツ": "tsu" # See note 1
209
+ "テ": "te"
210
+ "ト": "to"
211
+ "ナ": "na"
212
+ "ニ": "ni"
213
+ "ヌ": "nu"
214
+ "ネ": "ne"
215
+ "ノ": "no"
216
+ "ハ":
217
+ - "ha"
218
+ - "wa" # See note 4
219
+ "ヒ": "hi"
220
+ "フ": "fu"
221
+ "ヘ":
222
+ - "he"
223
+ - "e" # See note 4
224
+ "ホ": "ho"
225
+ "マ": "ma"
226
+ "ミ": "mi"
227
+ "ム": "mu"
228
+ "メ": "me"
229
+ "モ": "mo"
230
+ "ヤ": "ya"
231
+ "ユ": "yu"
232
+ "ヨ": "yo"
233
+ "ラ": "ra"
234
+ "リ": "ri"
235
+ "ル": "ru"
236
+ "レ": "re"
237
+ "ロ": "ro"
238
+ "ワ": "wa"
239
+ "ン": "n" # See note 2
240
+ "ガ": "ga"
241
+ "ギ": "gi"
242
+ "グ": "gu"
243
+ "ゲ": "ge"
244
+ "ゴ": "go"
245
+ "ザ": "za"
246
+ "ジ": "ji"
247
+ "ズ": "zu"
248
+ "ゼ": "ze"
249
+ "ゾ": "zo"
250
+ "ダ": "da"
251
+ "ヂ": "ji"
252
+ "ヅ": "zu"
253
+ "デ": "de"
254
+ "ド": "do"
255
+ "バ": "ba"
256
+ "ビ": "bi"
257
+ "ブ": "bu"
258
+ "ベ": "be"
259
+ "ボ": "bo"
260
+ "パ": "pa"
261
+ "ピ": "pi"
262
+ "プ": "pu"
263
+ "ペ": "pe"
264
+ "ポ": "po"
265
+ "オゥ": "ō"
266
+ "オオ": "ō" # See note 4
267
+ "キャ": "kya"
268
+ "キュ": "kyu"
269
+ "キュゥ": "kyū"
270
+ "キョ": "kyo"
271
+ "キョゥ": "kyō"
272
+ "コゥ": "kō"
273
+ "シャ": "sha"
274
+ "シュ": "shu"
275
+ "シュゥ": "shū"
276
+ "ショ": "sho"
277
+ "ショゥ": "shō"
278
+ "ソゥ": "sō"
279
+ "チャ": "cha"
280
+ "チュ": "chu"
281
+ "チュゥ": "chū"
282
+ "チョ": "cho"
283
+ "チョゥ": "chō"
284
+ "トゥ": "tō"
285
+ "ニャ": "nya"
286
+ "ニュ": "nyu"
287
+ "ニュゥ": "nyū"
288
+ "ニョ": "nyo"
289
+ "ニョゥ": "nyō"
290
+ "ノゥ": "nō"
291
+ "ヒャ": "hya"
292
+ "ヒュ": "hyu"
293
+ "ヒュゥ": "hyū"
294
+ "ヒョ": "hyo"
295
+ "ヒョゥ": "hyō"
296
+ "ホゥ":
297
+ - "hō"
298
+ - "ō" # See note 4
299
+ "ミャ": "mya"
300
+ "ミュ": "myu"
301
+ "ミュゥ": "myū"
302
+ "ミョ": "myo"
303
+ "ミョゥ": "myō"
304
+ "モゥ": "mō"
305
+ "ヨゥ": "yō"
306
+ "リャ": "rya"
307
+ "リュ": "ryu"
308
+ "リュゥ": "ryū"
309
+ "リョ": "ryo"
310
+ "リョゥ": "ryō"
311
+ "ロゥ": "rō"
312
+ "ギャ": "gya"
313
+ "ギュ": "gyu"
314
+ "ギュゥ": "gyū"
315
+ "ギョ": "gyo"
316
+ "ギョゥ": "gyō"
317
+ "ゴゥ": "gō"
318
+ "ジャ": "ja"
319
+ "ジュ": "ju"
320
+ "ジュゥ": "jū"
321
+ "ジョ": "jo"
322
+ "ジョゥ": "jō"
323
+ "ゾゥ": "zō"
324
+ "ドゥ": "dō"
325
+ "ビャ": "bya"
326
+ "ビュ": "byu"
327
+ "ビュゥ": "byū"
328
+ "ビョ": "byo"
329
+ "ビョゥ": "byō"
330
+ "ボゥ": "bō"
331
+ "ピャ": "pya"
332
+ "ピュ": "pyu"
333
+ "ピュゥ": "pyū"
334
+ "ピョ": "pyo"
335
+ "ピョゥ": "pyō"
336
+ "ポゥ": "pō"
@@ -0,0 +1,125 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Belorussian Cyrillic to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf
28
+ characters:
29
+ "\u0027": "", # '
30
+ "\u0410": "A", # А
31
+ "\u0411": "B", # Б
32
+ "\u0414": "D", # Д
33
+ "\u0401": "IO", # Ё
34
+ "\u0415": "E", # Е
35
+ "\u042D": "E", # Э
36
+ "\u0424": "F", # Ф
37
+ "\u0413": "H", # Г
38
+ "\u0418": "I", # И
39
+ "\u0419": "I", # Й
40
+ "\u041A": "K", # К
41
+ "\u041B": "L", # Л
42
+ "\u041C": "M", # М
43
+ "\u041D": "N", # Н
44
+ "\u041E": "O", # О
45
+ "\u041F": "P", # П
46
+ "\u0420": "R", # Р
47
+ "\u0421": "S", # С
48
+ "\u0422": "T", # Т
49
+ "\u0423": "U", # У
50
+ "\u0412": "V", # В
51
+ "\u042B": "Y", # Ы
52
+ "\u0417": "Z", # З
53
+ "\u0427": "CH", # Ч
54
+ "\u042F": "IA", # Я
55
+ "\u042E": "IU", # Ю
56
+ "\u0425": "KH", # Х
57
+ "\u0428": "SH", # Ш
58
+ "\u0429": "SHCH", # Щ
59
+ "\u0426": "TS", # Ц
60
+ "\u0416": "ZH", # Ж
61
+ "\u0490": "G", # Ґ
62
+ "\u040E": "U", # Ў
63
+ "\u046A": "U", # Ѫ
64
+ "\u0402": "D", # Ђ
65
+ "\u0405": "DZ", # Ѕ
66
+ "\u0408": "J", # Ј
67
+ "\u0409": "LJ", # Љ
68
+ "\u040A": "NJ", # Њ
69
+ "\u04BA": "C", # Һ
70
+ "\u040F": "DZ", # Џ
71
+ "\u0404": "IE", # Є
72
+ "\u0407": "I", # Ї
73
+ "\u0403": "G", # Ѓ
74
+ "\u0406": "I", # І
75
+
76
+ "\u0430": "a", # а
77
+ "\u0431": "b", # б
78
+ "\u0434": "d", # д
79
+ "\u0451": "io", # ё
80
+ "\u0435": "e", # e
81
+ "\u044D": "e", # э
82
+ "\u0444": "f", # ф
83
+ "\u0433": "h", # г
84
+ "\u0438": "i", # и
85
+ "\u0439": "i", # й
86
+ "\u043A": "k", # к
87
+ "\u043B": "l", # л
88
+ "\u043C": "m", # м
89
+ "\u043D": "n", # н
90
+ "\u043E": "o", # о
91
+ "\u043F": "p", # п
92
+ "\u0440": "r", # р
93
+ "\u0441": "s", # с
94
+ "\u0442": "t", # т
95
+ "\u0443": "", # у
96
+ "\u0432": "v", # в
97
+ "\u044B": "y", # ы
98
+ "\u0437": "z", # з
99
+ "\u0447": "ch", # ч
100
+ "\u044F": "ia", # я
101
+ "\u044E": "i", # ю
102
+ "\u0445": "kh", # х
103
+ "\u0448": "sh", # ш
104
+ "\u0449": "shch", # щ
105
+ "\u0446": "ts", # ц
106
+ "\u0436": "zh", # ж
107
+ "\u0491": "g", # ґ
108
+ "\u045E": "", # ў
109
+ "\u046B": "", # ѫ
110
+ "\u0452": "d", # ђ
111
+ "\u0455": "dz", # ѕ
112
+ "\u0458": "j", # ј
113
+ "\u0459": "lj", # љ
114
+ "\u045A": "nj", # њ
115
+ "\u04BB": "c", # һ
116
+ "\u045F": "dz", # џ
117
+ "\u0454": "ie", # є
118
+ "\u0457": "i", # ї
119
+ "\u0453": "g", # ѓ
120
+ "\u0456": "i" # і
121
+
122
+
123
+
124
+
125
+