interscript 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/bas-rus-Cyrl-Latn-bss.yaml +149 -0
- data/maps/bas-rus-Cyrl-Latn-oss.yaml +149 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +109 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +92 -0
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +7503 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +233 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +90 -0
- data/maps/cn-chn-Hans-Latn-pinyin.yaml +24760 -0
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +336 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +125 -0
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +123 -0
- data/maps/icao-gre-Grek-Latn-9303.yaml +101 -0
- data/maps/icao-heb-Hebr-Latn-9303.yaml +157 -0
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +118 -0
- data/maps/icao-per-Arab-Latn-9303.yaml +105 -0
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +119 -0
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +118 -0
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +121 -0
- data/maps/iso-rus-Cyrl-Latn-iso9.yaml +273 -0
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +330 -0
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +308 -0
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +313 -0
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +354 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +80 -0
- data/spec/interscript_spec.rb +11 -0
- data/spec/spec_helper.rb +1 -0
- metadata +32 -3
@@ -0,0 +1,354 @@
|
|
1
|
+
---
|
2
|
+
authority_id: un
|
3
|
+
id: kunrei
|
4
|
+
language: jpn
|
5
|
+
source_script: Hrkt
|
6
|
+
destination_script: Latn
|
7
|
+
name: Romanization of Japanese, Kunrei-siki
|
8
|
+
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/9th-uncsgn-docs/econf/9th_UNCSGN_e-conf-98-47-add1.pdf
|
9
|
+
creation_date: 2007
|
10
|
+
adoption_date: 2007-06-29
|
11
|
+
description: |
|
12
|
+
The official romanization system for Japanese is the Kunrei-siki.
|
13
|
+
It was officially adopted on 9 December, 1954 (Cabinet Notification No. 1, table 1).
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- A small-script tu/tsu form (ッ / っ) is inserted between kana symbols to indicate a double consonant (kk, ss, tt, pp in Kunrei-siki).
|
17
|
+
- ン / ん is romanized always n in Kunrei-siki; when it is necessary to separate the sound n from the vowel or y to follow, the apostrophe is added after the n.
|
18
|
+
- Long vowels are expressed in Kunrei-siki by placing a circumflex (^) over a vowel.
|
19
|
+
- The combination in parentheses is used to denote the word meaning ’big, great’.
|
20
|
+
|
21
|
+
tests:
|
22
|
+
- source: かんおう
|
23
|
+
expected: kanô #kan'ô
|
24
|
+
- source: かのう
|
25
|
+
expected: kanô
|
26
|
+
- source: きんゆう
|
27
|
+
expected: kinyû # kin'yû
|
28
|
+
- source: きにゅう
|
29
|
+
expected: kinyû
|
30
|
+
- source: とうきょう
|
31
|
+
expected: tôkyô
|
32
|
+
|
33
|
+
map:
|
34
|
+
characters:
|
35
|
+
"あ": "a"
|
36
|
+
"い": "i"
|
37
|
+
"う": "u"
|
38
|
+
"え": "e"
|
39
|
+
"お": "o"
|
40
|
+
|
41
|
+
"か": "ka"
|
42
|
+
"き": "ki"
|
43
|
+
"く": "ku"
|
44
|
+
"け": "ke"
|
45
|
+
"こ": "ko"
|
46
|
+
|
47
|
+
"さ": "sa"
|
48
|
+
"し": "si"
|
49
|
+
"す": "su"
|
50
|
+
"せ": "se"
|
51
|
+
"そ": "so"
|
52
|
+
|
53
|
+
"た": "ta"
|
54
|
+
"ち": "ti"
|
55
|
+
"つ": "tu" # See note 1
|
56
|
+
"て": "te"
|
57
|
+
"と": "to"
|
58
|
+
|
59
|
+
"な": "na"
|
60
|
+
"に": "ni"
|
61
|
+
"ぬ": "nu"
|
62
|
+
"ね": "ne"
|
63
|
+
"の": "no"
|
64
|
+
|
65
|
+
"は": "ha"
|
66
|
+
"ひ": "hi"
|
67
|
+
"ふ": "hu"
|
68
|
+
"へ": "he"
|
69
|
+
"ほ": "ho"
|
70
|
+
|
71
|
+
"ま": "ma"
|
72
|
+
"み": "mi"
|
73
|
+
"む": "mu"
|
74
|
+
"め": "me"
|
75
|
+
"も": "mo"
|
76
|
+
|
77
|
+
"ら": "ra"
|
78
|
+
"り": "ri"
|
79
|
+
"る": "ru"
|
80
|
+
"れ": "re"
|
81
|
+
"ろ": "ro"
|
82
|
+
|
83
|
+
"わ": "wa"
|
84
|
+
"を": "o"
|
85
|
+
|
86
|
+
"ん": "n" # See note 2
|
87
|
+
|
88
|
+
"が": "ga"
|
89
|
+
"ぎ": "gi"
|
90
|
+
"ぐ": "gu"
|
91
|
+
"げ": "ge"
|
92
|
+
"ご": "go"
|
93
|
+
|
94
|
+
"ざ": "za"
|
95
|
+
"じ": "zi"
|
96
|
+
"ず": "zu"
|
97
|
+
"ぜ": "ze"
|
98
|
+
"ぞ": "zo"
|
99
|
+
|
100
|
+
"だ": "da"
|
101
|
+
"ぢ": "di"
|
102
|
+
"づ": "du"
|
103
|
+
"で": "de"
|
104
|
+
"ど": "do"
|
105
|
+
|
106
|
+
"ば": "ba"
|
107
|
+
"び": "bi"
|
108
|
+
"ぶ": "bu"
|
109
|
+
"べ": "be"
|
110
|
+
"ぼ": "bo"
|
111
|
+
|
112
|
+
"ぱ": "pa"
|
113
|
+
"ぴ": "pi"
|
114
|
+
"ぷ": "pu"
|
115
|
+
"ぺ": "pe"
|
116
|
+
"ぽ": "po"
|
117
|
+
|
118
|
+
"おぅ": "ô"
|
119
|
+
"おお": "ô" # See note 4
|
120
|
+
|
121
|
+
# Officially only kya kyu kyo??
|
122
|
+
"きゃ": "kya"
|
123
|
+
"きゅ": "kyu"
|
124
|
+
"きゅぅ": "kyû"
|
125
|
+
"きょ": "kyo"
|
126
|
+
"きょぅ": "kyô"
|
127
|
+
|
128
|
+
# Officially not exists?
|
129
|
+
"こぅ": "kô"
|
130
|
+
|
131
|
+
# Officially only sya syu syo??
|
132
|
+
"しゃ": "sya"
|
133
|
+
"しゅ": "syu"
|
134
|
+
"しゅぅ": "syû"
|
135
|
+
"しょ": "syo"
|
136
|
+
"しょぅ": "syô"
|
137
|
+
|
138
|
+
# Supplementary
|
139
|
+
"ゃ": "ya"
|
140
|
+
"ゅ": "yu"
|
141
|
+
"ょ": "yo"
|
142
|
+
"ぅ": "u"
|
143
|
+
"ょぅ": "yô"
|
144
|
+
"ゅぅ": "yû"
|
145
|
+
|
146
|
+
# Officially not exists?
|
147
|
+
"そぅ": "sô"
|
148
|
+
|
149
|
+
# Officially only tya tyu tyo??
|
150
|
+
"ちゃ": "tya"
|
151
|
+
"ちゅ": "tyu"
|
152
|
+
"ちゅぅ": "tyû"
|
153
|
+
"ちょ": "tyo"
|
154
|
+
"ちょぅ": "tyô"
|
155
|
+
|
156
|
+
# Officially not exists?
|
157
|
+
"とぅ": "tô"
|
158
|
+
|
159
|
+
# Officially only nya nyu nyo??
|
160
|
+
"にゃ": "nya"
|
161
|
+
"にゅ": "nyu"
|
162
|
+
"にゅぅ": "nyû"
|
163
|
+
"にょ": "nyo"
|
164
|
+
"にょぅ": "nyô"
|
165
|
+
|
166
|
+
# Officially not exists?
|
167
|
+
"のぅ": "nô"
|
168
|
+
|
169
|
+
"ひゃ": "hya"
|
170
|
+
"ひゅ": "hyu"
|
171
|
+
"ひゅぅ": "hyû"
|
172
|
+
"ひょ": "hyo"
|
173
|
+
"ひょぅ": "hyô"
|
174
|
+
"ほぅ": "hô"
|
175
|
+
"みゃ": "mya"
|
176
|
+
"みゅ": "myu"
|
177
|
+
"みゅぅ": "myû"
|
178
|
+
"みょ": "myo"
|
179
|
+
"みょぅ": "myô"
|
180
|
+
"もぅ": "mô"
|
181
|
+
"よぅ": "yô"
|
182
|
+
"りゃ": "rya"
|
183
|
+
"りゅ": "ryu"
|
184
|
+
"りゅぅ": "ryû"
|
185
|
+
"りょ": "ryo"
|
186
|
+
"りょぅ": "ryô"
|
187
|
+
"ろぅ": "rô"
|
188
|
+
"ぎゃ": "gya"
|
189
|
+
"ぎゅ": "gyu"
|
190
|
+
"ぎゅぅ": "gyû"
|
191
|
+
"ぎょ": "gyo"
|
192
|
+
"ぎょぅ": "gyô"
|
193
|
+
"ごぅ": "gô"
|
194
|
+
"じゃ": "zya"
|
195
|
+
"じゅ": "zyu"
|
196
|
+
"じゅぅ": "zyû"
|
197
|
+
"じょ": "zyo"
|
198
|
+
"じょぅ": "zyô"
|
199
|
+
"ぞぅ": "zô"
|
200
|
+
"どぅ": "dô"
|
201
|
+
"びゃ": "bya"
|
202
|
+
"びゅ": "byu"
|
203
|
+
"びゅぅ": "byû"
|
204
|
+
"びょ": "byo"
|
205
|
+
"びょぅ": "byô"
|
206
|
+
"ぼぅ": "bô"
|
207
|
+
"ぴゃ": "pya"
|
208
|
+
"ぴゅ": "pyu"
|
209
|
+
"ぴゅぅ": "pyû"
|
210
|
+
"ぴょ": "pyo"
|
211
|
+
"ぴょぅ": "pyô"
|
212
|
+
"ぽぅ": "pô"
|
213
|
+
|
214
|
+
|
215
|
+
"ア": "a"
|
216
|
+
"イ": "i"
|
217
|
+
"ウ": "u"
|
218
|
+
"エ": "e"
|
219
|
+
"オ": "o"
|
220
|
+
"カ": "ka"
|
221
|
+
"キ": "ki"
|
222
|
+
"ク": "ku"
|
223
|
+
"ケ": "ke"
|
224
|
+
"コ": "ko"
|
225
|
+
"サ": "sa"
|
226
|
+
"シ": "si"
|
227
|
+
"ス": "su"
|
228
|
+
"セ": "se"
|
229
|
+
"ソ": "so"
|
230
|
+
"タ": "ta"
|
231
|
+
"チ": "ti"
|
232
|
+
"ツ": "tu" # See note 1
|
233
|
+
"テ": "te"
|
234
|
+
"ト": "to"
|
235
|
+
"ナ": "na"
|
236
|
+
"ニ": "ni"
|
237
|
+
"ヌ": "nu"
|
238
|
+
"ネ": "ne"
|
239
|
+
"ノ": "no"
|
240
|
+
"ハ": "ha"
|
241
|
+
"ヒ": "hi"
|
242
|
+
"フ": "hu"
|
243
|
+
"ヘ": "he"
|
244
|
+
"ホ": "ho"
|
245
|
+
"マ": "ma"
|
246
|
+
"ミ": "mi"
|
247
|
+
"ム": "mu"
|
248
|
+
"メ": "me"
|
249
|
+
"モ": "mo"
|
250
|
+
"ヤ": "ya"
|
251
|
+
"ユ": "yu"
|
252
|
+
"ヨ": "yo"
|
253
|
+
"ラ": "ra"
|
254
|
+
"リ": "ri"
|
255
|
+
"ル": "ru"
|
256
|
+
"レ": "re"
|
257
|
+
"ロ": "ro"
|
258
|
+
"ワ": "wa"
|
259
|
+
"ン": "n" # See note 2
|
260
|
+
"ガ": "ga"
|
261
|
+
"ギ": "gi"
|
262
|
+
"グ": "gu"
|
263
|
+
"ゲ": "ge"
|
264
|
+
"ゴ": "go"
|
265
|
+
"ザ": "za"
|
266
|
+
"ジ": "zi"
|
267
|
+
"ズ": "zu"
|
268
|
+
"ゼ": "ze"
|
269
|
+
"ゾ": "zo"
|
270
|
+
"ダ": "da"
|
271
|
+
"ヂ": "di"
|
272
|
+
"ヅ": "du"
|
273
|
+
"デ": "de"
|
274
|
+
"ド": "do"
|
275
|
+
"バ": "ba"
|
276
|
+
"ビ": "bi"
|
277
|
+
"ブ": "bu"
|
278
|
+
"ベ": "be"
|
279
|
+
"ボ": "bo"
|
280
|
+
"パ": "pa"
|
281
|
+
"ピ": "pi"
|
282
|
+
"プ": "pu"
|
283
|
+
"ペ": "pe"
|
284
|
+
"ポ": "po"
|
285
|
+
"オゥ": "ô"
|
286
|
+
"オオ": "ô" # See note 4
|
287
|
+
"キャ": "kya"
|
288
|
+
"キュ": "kyu"
|
289
|
+
"キュゥ": "kyû"
|
290
|
+
"キョ": "kyo"
|
291
|
+
"キョゥ": "kyô"
|
292
|
+
"コゥ": "kô"
|
293
|
+
"シャ": "sya"
|
294
|
+
"シュ": "syu"
|
295
|
+
"シュゥ": "syû"
|
296
|
+
"ショ": "syo"
|
297
|
+
"ショゥ": "syô"
|
298
|
+
"ソゥ": "sô"
|
299
|
+
"チャ": "tya"
|
300
|
+
"チュ": "tyu"
|
301
|
+
"チュゥ": "tyû"
|
302
|
+
"チョ": "tyo"
|
303
|
+
"チョゥ": "tyô"
|
304
|
+
"トゥ": "tô"
|
305
|
+
"ニャ": "nya"
|
306
|
+
"ニュ": "nyu"
|
307
|
+
"ニュゥ": "nyû"
|
308
|
+
"ニョ": "nyo"
|
309
|
+
"ニョゥ": "nyô"
|
310
|
+
"ノゥ": "nô"
|
311
|
+
"ヒャ": "hya"
|
312
|
+
"ヒュ": "hyu"
|
313
|
+
"ヒュゥ": "hyû"
|
314
|
+
"ヒョ": "hyo"
|
315
|
+
"ヒョゥ": "hyô"
|
316
|
+
"ホゥ": "hô"
|
317
|
+
"ミャ": "mya"
|
318
|
+
"ミュ": "myu"
|
319
|
+
"ミュゥ": "myû"
|
320
|
+
"ミョ": "myo"
|
321
|
+
"ミョゥ": "myô"
|
322
|
+
"モゥ": "mô"
|
323
|
+
"ヨゥ": "yô"
|
324
|
+
"リャ": "rya"
|
325
|
+
"リュ": "ryu"
|
326
|
+
"リュゥ": "ryû"
|
327
|
+
"リョ": "ryo"
|
328
|
+
"リョゥ": "ryô"
|
329
|
+
"ロゥ": "rô"
|
330
|
+
"ギャ": "gya"
|
331
|
+
"ギュ": "gyu"
|
332
|
+
"ギュゥ": "gyû"
|
333
|
+
"ギョ": "gyo"
|
334
|
+
"ギョゥ": "gyô"
|
335
|
+
"ゴゥ": "gô"
|
336
|
+
"ジャ": "zya"
|
337
|
+
"ジュ": "zyu"
|
338
|
+
"ジュゥ": "zyû"
|
339
|
+
"ジョ": "zyo"
|
340
|
+
"ジョゥ": "zyô"
|
341
|
+
"ゾゥ": "zô"
|
342
|
+
"ドゥ": "dô"
|
343
|
+
"ビャ": "bya"
|
344
|
+
"ビュ": "byu"
|
345
|
+
"ビュゥ": "byû"
|
346
|
+
"ビョ": "byo"
|
347
|
+
"ビョゥ": "byô"
|
348
|
+
"ボゥ": "bô"
|
349
|
+
"ピャ": "pya"
|
350
|
+
"ピュ": "pyu"
|
351
|
+
"ピュゥ": "pyû"
|
352
|
+
"ピョ": "pyo"
|
353
|
+
"ピョゥ": "pyô"
|
354
|
+
"ポゥ": "pô"
|
@@ -0,0 +1,80 @@
|
|
1
|
+
---
|
2
|
+
authority_id: un
|
3
|
+
id: 2013
|
4
|
+
language: mon
|
5
|
+
source_script: Mong
|
6
|
+
destination_script: Latn
|
7
|
+
name: Mongolian Romanization in China, Version 4.0
|
8
|
+
url: http://www.eki.ee/wgrs/rom1_mnc.htm
|
9
|
+
creation_date: 2013-09
|
10
|
+
description: |
|
11
|
+
The United Nations resolution III/8 in 1977 recognized the Scheme for
|
12
|
+
a Chinese Phonetic Alphabet (Pinyin) as China’s official Roman alphabet
|
13
|
+
scheme and recommended the alphabet as the international system for the
|
14
|
+
romanization of Chinese geographical names. In China Mongolian
|
15
|
+
geographical names are transcribed directly from the Mongolian script
|
16
|
+
into Pinyin. The scheme was published in Toponymic Guidelines for Map
|
17
|
+
and Other Editors: China, 19821.
|
18
|
+
|
19
|
+
The system is used in China and in international cartographic
|
20
|
+
products.
|
21
|
+
|
22
|
+
Mongolian uses a vertical script. Transcription of Mongolian names is
|
23
|
+
made from their proper pronunciation based on the written form of the
|
24
|
+
Mongolian language and Qahar vernacular, with Zhenglan as its
|
25
|
+
representative pronunciation. Due to the complex nature of the script
|
26
|
+
the romanization scheme is not reversible, e.g. the name of the city
|
27
|
+
Hohhot is written ᠬᠥᠬᠡᠬᠣᠲᠠ but may be transliterated as kökeqota.
|
28
|
+
|
29
|
+
notes:
|
30
|
+
- The long and short Mongolian vowels are not distinguished in the
|
31
|
+
spelling for general use, but in recording the pronunciation of
|
32
|
+
place-names, the long vowel is represented by duplication.
|
33
|
+
- Where two Roman equivalents are given, the second (in brackets) is
|
34
|
+
used for recording the pronunciation of place-names while the first
|
35
|
+
form is for general use.
|
36
|
+
- In the table only word-initial character variants are shown.
|
37
|
+
Depending on the position in the word many variants of the characters
|
38
|
+
are used as well as some ligatures. These features are not covered here.
|
39
|
+
- For technical reasons the characters of the Mongolian script are
|
40
|
+
turned 90˚ anti-clockwise.
|
41
|
+
|
42
|
+
tests:
|
43
|
+
- source: "ᠬᠥᠬᠡᠬᠣᠲᠠ"
|
44
|
+
expected: "kökeqota"
|
45
|
+
map:
|
46
|
+
characters:
|
47
|
+
"ᠠ": "a"
|
48
|
+
"ᠪ": "b"
|
49
|
+
"ᠼ": "c"
|
50
|
+
"ᠲ": "d"
|
51
|
+
"ᠳ": "d"
|
52
|
+
"ᠡ": "e"
|
53
|
+
"ᠹ": "f"
|
54
|
+
"ᠭ": "g"
|
55
|
+
"ᠺ": "g"
|
56
|
+
"ᠬ": "h"
|
57
|
+
"ᠾ": "h"
|
58
|
+
"ᠢ": "i"
|
59
|
+
"ᠵ": "j"
|
60
|
+
"ᠺ": "k"
|
61
|
+
"ᠯ": "l"
|
62
|
+
"ᠮ": "m"
|
63
|
+
"ᠨ": "n"
|
64
|
+
"ᠥ": "o"
|
65
|
+
"ᠫ": "p"
|
66
|
+
"ᠴ": "q"
|
67
|
+
"ᠷ": "r"
|
68
|
+
"ᠰ": "s"
|
69
|
+
"ᠲ": "t"
|
70
|
+
"ᠦ": "u"
|
71
|
+
"ᠸ": "w"
|
72
|
+
"ᠱ": "x"
|
73
|
+
"ᠶ": "y"
|
74
|
+
"ᠽ": "z"
|
75
|
+
"ᠣ":
|
76
|
+
- "o" # General use
|
77
|
+
- "ô" # For place names
|
78
|
+
"ᠤ":
|
79
|
+
- "u" # General use
|
80
|
+
- "û" # For place names
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe Interscript do
|
4
|
+
it "converts rus using bgnpcgn-rus-Cyrl-Latn-1947" do
|
5
|
+
system = YAML.load_file "maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml"
|
6
|
+
system["tests"].each do |test|
|
7
|
+
result = Interscript.transliterate "bgnpcgn-rus-Cyrl-Latn-1947", test["source"]
|
8
|
+
expect(result).to eq test["expected"]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'interscript'
|