interscript 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,354 @@
1
+ ---
2
+ authority_id: un
3
+ id: kunrei
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: Romanization of Japanese, Kunrei-siki
8
+ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/9th-uncsgn-docs/econf/9th_UNCSGN_e-conf-98-47-add1.pdf
9
+ creation_date: 2007
10
+ adoption_date: 2007-06-29
11
+ description: |
12
+ The official romanization system for Japanese is the Kunrei-siki.
13
+ It was officially adopted on 9 December, 1954 (Cabinet Notification No. 1, table 1).
14
+
15
+ notes:
16
+ - A small-script tu/tsu form (ッ / っ) is inserted between kana symbols to indicate a double consonant (kk, ss, tt, pp in Kunrei-siki).
17
+ - ン / ん is romanized always n in Kunrei-siki; when it is necessary to separate the sound n from the vowel or y to follow, the apostrophe is added after the n.
18
+ - Long vowels are expressed in Kunrei-siki by placing a circumflex (^) over a vowel.
19
+ - The combination in parentheses is used to denote the word meaning ’big, great’.
20
+
21
+ tests:
22
+ - source: かんおう
23
+ expected: kanô #kan'ô
24
+ - source: かのう
25
+ expected: kanô
26
+ - source: きんゆう
27
+ expected: kinyû # kin'yû
28
+ - source: きにゅう
29
+ expected: kinyû
30
+ - source: とうきょう
31
+ expected: tôkyô
32
+
33
+ map:
34
+ characters:
35
+ "あ": "a"
36
+ "い": "i"
37
+ "う": "u"
38
+ "え": "e"
39
+ "お": "o"
40
+
41
+ "か": "ka"
42
+ "き": "ki"
43
+ "く": "ku"
44
+ "け": "ke"
45
+ "こ": "ko"
46
+
47
+ "さ": "sa"
48
+ "し": "si"
49
+ "す": "su"
50
+ "せ": "se"
51
+ "そ": "so"
52
+
53
+ "た": "ta"
54
+ "ち": "ti"
55
+ "つ": "tu" # See note 1
56
+ "て": "te"
57
+ "と": "to"
58
+
59
+ "な": "na"
60
+ "に": "ni"
61
+ "ぬ": "nu"
62
+ "ね": "ne"
63
+ "の": "no"
64
+
65
+ "は": "ha"
66
+ "ひ": "hi"
67
+ "ふ": "hu"
68
+ "へ": "he"
69
+ "ほ": "ho"
70
+
71
+ "ま": "ma"
72
+ "み": "mi"
73
+ "む": "mu"
74
+ "め": "me"
75
+ "も": "mo"
76
+
77
+ "ら": "ra"
78
+ "り": "ri"
79
+ "る": "ru"
80
+ "れ": "re"
81
+ "ろ": "ro"
82
+
83
+ "わ": "wa"
84
+ "を": "o"
85
+
86
+ "ん": "n" # See note 2
87
+
88
+ "が": "ga"
89
+ "ぎ": "gi"
90
+ "ぐ": "gu"
91
+ "げ": "ge"
92
+ "ご": "go"
93
+
94
+ "ざ": "za"
95
+ "じ": "zi"
96
+ "ず": "zu"
97
+ "ぜ": "ze"
98
+ "ぞ": "zo"
99
+
100
+ "だ": "da"
101
+ "ぢ": "di"
102
+ "づ": "du"
103
+ "で": "de"
104
+ "ど": "do"
105
+
106
+ "ば": "ba"
107
+ "び": "bi"
108
+ "ぶ": "bu"
109
+ "べ": "be"
110
+ "ぼ": "bo"
111
+
112
+ "ぱ": "pa"
113
+ "ぴ": "pi"
114
+ "ぷ": "pu"
115
+ "ぺ": "pe"
116
+ "ぽ": "po"
117
+
118
+ "おぅ": "ô"
119
+ "おお": "ô" # See note 4
120
+
121
+ # Officially only kya kyu kyo??
122
+ "きゃ": "kya"
123
+ "きゅ": "kyu"
124
+ "きゅぅ": "kyû"
125
+ "きょ": "kyo"
126
+ "きょぅ": "kyô"
127
+
128
+ # Officially not exists?
129
+ "こぅ": "kô"
130
+
131
+ # Officially only sya syu syo??
132
+ "しゃ": "sya"
133
+ "しゅ": "syu"
134
+ "しゅぅ": "syû"
135
+ "しょ": "syo"
136
+ "しょぅ": "syô"
137
+
138
+ # Supplementary
139
+ "ゃ": "ya"
140
+ "ゅ": "yu"
141
+ "ょ": "yo"
142
+ "ぅ": "u"
143
+ "ょぅ": "yô"
144
+ "ゅぅ": "yû"
145
+
146
+ # Officially not exists?
147
+ "そぅ": "sô"
148
+
149
+ # Officially only tya tyu tyo??
150
+ "ちゃ": "tya"
151
+ "ちゅ": "tyu"
152
+ "ちゅぅ": "tyû"
153
+ "ちょ": "tyo"
154
+ "ちょぅ": "tyô"
155
+
156
+ # Officially not exists?
157
+ "とぅ": "tô"
158
+
159
+ # Officially only nya nyu nyo??
160
+ "にゃ": "nya"
161
+ "にゅ": "nyu"
162
+ "にゅぅ": "nyû"
163
+ "にょ": "nyo"
164
+ "にょぅ": "nyô"
165
+
166
+ # Officially not exists?
167
+ "のぅ": "nô"
168
+
169
+ "ひゃ": "hya"
170
+ "ひゅ": "hyu"
171
+ "ひゅぅ": "hyû"
172
+ "ひょ": "hyo"
173
+ "ひょぅ": "hyô"
174
+ "ほぅ": "hô"
175
+ "みゃ": "mya"
176
+ "みゅ": "myu"
177
+ "みゅぅ": "myû"
178
+ "みょ": "myo"
179
+ "みょぅ": "myô"
180
+ "もぅ": "mô"
181
+ "よぅ": "yô"
182
+ "りゃ": "rya"
183
+ "りゅ": "ryu"
184
+ "りゅぅ": "ryû"
185
+ "りょ": "ryo"
186
+ "りょぅ": "ryô"
187
+ "ろぅ": "rô"
188
+ "ぎゃ": "gya"
189
+ "ぎゅ": "gyu"
190
+ "ぎゅぅ": "gyû"
191
+ "ぎょ": "gyo"
192
+ "ぎょぅ": "gyô"
193
+ "ごぅ": "gô"
194
+ "じゃ": "zya"
195
+ "じゅ": "zyu"
196
+ "じゅぅ": "zyû"
197
+ "じょ": "zyo"
198
+ "じょぅ": "zyô"
199
+ "ぞぅ": "zô"
200
+ "どぅ": "dô"
201
+ "びゃ": "bya"
202
+ "びゅ": "byu"
203
+ "びゅぅ": "byû"
204
+ "びょ": "byo"
205
+ "びょぅ": "byô"
206
+ "ぼぅ": "bô"
207
+ "ぴゃ": "pya"
208
+ "ぴゅ": "pyu"
209
+ "ぴゅぅ": "pyû"
210
+ "ぴょ": "pyo"
211
+ "ぴょぅ": "pyô"
212
+ "ぽぅ": "pô"
213
+
214
+
215
+ "ア": "a"
216
+ "イ": "i"
217
+ "ウ": "u"
218
+ "エ": "e"
219
+ "オ": "o"
220
+ "カ": "ka"
221
+ "キ": "ki"
222
+ "ク": "ku"
223
+ "ケ": "ke"
224
+ "コ": "ko"
225
+ "サ": "sa"
226
+ "シ": "si"
227
+ "ス": "su"
228
+ "セ": "se"
229
+ "ソ": "so"
230
+ "タ": "ta"
231
+ "チ": "ti"
232
+ "ツ": "tu" # See note 1
233
+ "テ": "te"
234
+ "ト": "to"
235
+ "ナ": "na"
236
+ "ニ": "ni"
237
+ "ヌ": "nu"
238
+ "ネ": "ne"
239
+ "ノ": "no"
240
+ "ハ": "ha"
241
+ "ヒ": "hi"
242
+ "フ": "hu"
243
+ "ヘ": "he"
244
+ "ホ": "ho"
245
+ "マ": "ma"
246
+ "ミ": "mi"
247
+ "ム": "mu"
248
+ "メ": "me"
249
+ "モ": "mo"
250
+ "ヤ": "ya"
251
+ "ユ": "yu"
252
+ "ヨ": "yo"
253
+ "ラ": "ra"
254
+ "リ": "ri"
255
+ "ル": "ru"
256
+ "レ": "re"
257
+ "ロ": "ro"
258
+ "ワ": "wa"
259
+ "ン": "n" # See note 2
260
+ "ガ": "ga"
261
+ "ギ": "gi"
262
+ "グ": "gu"
263
+ "ゲ": "ge"
264
+ "ゴ": "go"
265
+ "ザ": "za"
266
+ "ジ": "zi"
267
+ "ズ": "zu"
268
+ "ゼ": "ze"
269
+ "ゾ": "zo"
270
+ "ダ": "da"
271
+ "ヂ": "di"
272
+ "ヅ": "du"
273
+ "デ": "de"
274
+ "ド": "do"
275
+ "バ": "ba"
276
+ "ビ": "bi"
277
+ "ブ": "bu"
278
+ "ベ": "be"
279
+ "ボ": "bo"
280
+ "パ": "pa"
281
+ "ピ": "pi"
282
+ "プ": "pu"
283
+ "ペ": "pe"
284
+ "ポ": "po"
285
+ "オゥ": "ô"
286
+ "オオ": "ô" # See note 4
287
+ "キャ": "kya"
288
+ "キュ": "kyu"
289
+ "キュゥ": "kyû"
290
+ "キョ": "kyo"
291
+ "キョゥ": "kyô"
292
+ "コゥ": "kô"
293
+ "シャ": "sya"
294
+ "シュ": "syu"
295
+ "シュゥ": "syû"
296
+ "ショ": "syo"
297
+ "ショゥ": "syô"
298
+ "ソゥ": "sô"
299
+ "チャ": "tya"
300
+ "チュ": "tyu"
301
+ "チュゥ": "tyû"
302
+ "チョ": "tyo"
303
+ "チョゥ": "tyô"
304
+ "トゥ": "tô"
305
+ "ニャ": "nya"
306
+ "ニュ": "nyu"
307
+ "ニュゥ": "nyû"
308
+ "ニョ": "nyo"
309
+ "ニョゥ": "nyô"
310
+ "ノゥ": "nô"
311
+ "ヒャ": "hya"
312
+ "ヒュ": "hyu"
313
+ "ヒュゥ": "hyû"
314
+ "ヒョ": "hyo"
315
+ "ヒョゥ": "hyô"
316
+ "ホゥ": "hô"
317
+ "ミャ": "mya"
318
+ "ミュ": "myu"
319
+ "ミュゥ": "myû"
320
+ "ミョ": "myo"
321
+ "ミョゥ": "myô"
322
+ "モゥ": "mô"
323
+ "ヨゥ": "yô"
324
+ "リャ": "rya"
325
+ "リュ": "ryu"
326
+ "リュゥ": "ryû"
327
+ "リョ": "ryo"
328
+ "リョゥ": "ryô"
329
+ "ロゥ": "rô"
330
+ "ギャ": "gya"
331
+ "ギュ": "gyu"
332
+ "ギュゥ": "gyû"
333
+ "ギョ": "gyo"
334
+ "ギョゥ": "gyô"
335
+ "ゴゥ": "gô"
336
+ "ジャ": "zya"
337
+ "ジュ": "zyu"
338
+ "ジュゥ": "zyû"
339
+ "ジョ": "zyo"
340
+ "ジョゥ": "zyô"
341
+ "ゾゥ": "zô"
342
+ "ドゥ": "dô"
343
+ "ビャ": "bya"
344
+ "ビュ": "byu"
345
+ "ビュゥ": "byû"
346
+ "ビョ": "byo"
347
+ "ビョゥ": "byô"
348
+ "ボゥ": "bô"
349
+ "ピャ": "pya"
350
+ "ピュ": "pyu"
351
+ "ピュゥ": "pyû"
352
+ "ピョ": "pyo"
353
+ "ピョゥ": "pyô"
354
+ "ポゥ": "pô"
@@ -0,0 +1,80 @@
1
+ ---
2
+ authority_id: un
3
+ id: 2013
4
+ language: mon
5
+ source_script: Mong
6
+ destination_script: Latn
7
+ name: Mongolian Romanization in China, Version 4.0
8
+ url: http://www.eki.ee/wgrs/rom1_mnc.htm
9
+ creation_date: 2013-09
10
+ description: |
11
+ The United Nations resolution III/8 in 1977 recognized the Scheme for
12
+ a Chinese Phonetic Alphabet (Pinyin) as China’s official Roman alphabet
13
+ scheme and recommended the alphabet as the international system for the
14
+ romanization of Chinese geographical names. In China Mongolian
15
+ geographical names are transcribed directly from the Mongolian script
16
+ into Pinyin. The scheme was published in Toponymic Guidelines for Map
17
+ and Other Editors: China, 19821.
18
+
19
+ The system is used in China and in international cartographic
20
+ products.
21
+
22
+ Mongolian uses a vertical script. Transcription of Mongolian names is
23
+ made from their proper pronunciation based on the written form of the
24
+ Mongolian language and Qahar vernacular, with Zhenglan as its
25
+ representative pronunciation. Due to the complex nature of the script
26
+ the romanization scheme is not reversible, e.g. the name of the city
27
+ Hohhot is written ᠬᠥᠬᠡᠬᠣᠲᠠ but may be transliterated as kökeqota.
28
+
29
+ notes:
30
+ - The long and short Mongolian vowels are not distinguished in the
31
+ spelling for general use, but in recording the pronunciation of
32
+ place-names, the long vowel is represented by duplication.
33
+ - Where two Roman equivalents are given, the second (in brackets) is
34
+ used for recording the pronunciation of place-names while the first
35
+ form is for general use.
36
+ - In the table only word-initial character variants are shown.
37
+ Depending on the position in the word many variants of the characters
38
+ are used as well as some ligatures. These features are not covered here.
39
+ - For technical reasons the characters of the Mongolian script are
40
+ turned 90˚ anti-clockwise.
41
+
42
+ tests:
43
+ - source: "ᠬᠥᠬᠡᠬᠣᠲᠠ"
44
+ expected: "kökeqota"
45
+ map:
46
+ characters:
47
+ "ᠠ": "a"
48
+ "ᠪ": "b"
49
+ "ᠼ": "c"
50
+ "ᠲ": "d"
51
+ "ᠳ": "d"
52
+ "ᠡ": "e"
53
+ "ᠹ": "f"
54
+ "ᠭ": "g"
55
+ "ᠺ": "g"
56
+ "ᠬ": "h"
57
+ "ᠾ": "h"
58
+ "ᠢ": "i"
59
+ "ᠵ": "j"
60
+ "ᠺ": "k"
61
+ "ᠯ": "l"
62
+ "ᠮ": "m"
63
+ "ᠨ": "n"
64
+ "ᠥ": "o"
65
+ "ᠫ": "p"
66
+ "ᠴ": "q"
67
+ "ᠷ": "r"
68
+ "ᠰ": "s"
69
+ "ᠲ": "t"
70
+ "ᠦ": "u"
71
+ "ᠸ": "w"
72
+ "ᠱ": "x"
73
+ "ᠶ": "y"
74
+ "ᠽ": "z"
75
+ "ᠣ":
76
+ - "o" # General use
77
+ - "ô" # For place names
78
+ "ᠤ":
79
+ - "u" # General use
80
+ - "û" # For place names
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Interscript do
4
+ it "converts rus using bgnpcgn-rus-Cyrl-Latn-1947" do
5
+ system = YAML.load_file "maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml"
6
+ system["tests"].each do |test|
7
+ result = Interscript.transliterate "bgnpcgn-rus-Cyrl-Latn-1947", test["source"]
8
+ expect(result).to eq test["expected"]
9
+ end
10
+ end
11
+ end
@@ -0,0 +1 @@
1
+ require 'interscript'