interscript 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,184 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2007
4
+ language: rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
9
+ creation_date: 2007
10
+ confirmation_date: 2019
11
+ description: |
12
+ Bashkir is an official language within Respublika Bashkortostan, one of the
13
+ republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
14
+ which case it should be romanized by means of the Cyrillic-Roman table of
15
+ correspondences given below
16
+
17
+ notes:
18
+ - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
19
+ - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
20
+ - The letter w is used between or after vowels.
21
+ - The letter w is used after e, u, ö and ə.
22
+ - |
23
+ An inventory of letter-diacritic combinations, with their Unicode encoding,
24
+ in addition to the unmodified letters of the basic Roman script is:
25
+ Ğ (U+011E) ğ (U+011F)
26
+ Ź (U+0179) ź (U+017A)
27
+ Ë (U+00CB) ë (U+00EB)
28
+ Ñ (U+00D1) ñ (U+00F1)
29
+ Ö (U+00D6) ö (U+00F6)
30
+ Ś (U+015A) ś (U+015B)
31
+ Ü (U+00DC) ü (U+00FC)
32
+ Ç (U+00C7) ç (U+00E7)
33
+ Ş (U+015E) ş (U+015F)
34
+ Ə (U+018F) ə (U+0259)
35
+ - |
36
+ The Roman-script columns show only lowercase forms but, when applying the table,
37
+ uppercase and lowercase Roman letters as appropriate should be used.
38
+
39
+ tests:
40
+ # adopted http://www.eki.ee/knab/lat/kblba.pdf
41
+ - source: Васйылға
42
+ expected: Wasyılğa
43
+ - source: Еҙем
44
+ expected: Yeźem
45
+ - source: Раевка
46
+ expected: Raevka
47
+ - source: Сәйетҡол
48
+ expected: Səyetqol
49
+ - source: Ауырғазы
50
+ expected: Awırğazı
51
+ - source: Бурһыҡтау
52
+ expected: Burhıqtaw
53
+ - source: Мәләүез
54
+ expected: Mələwez
55
+ - source: Ҡыҙылъяр
56
+ expected: Qıźılyar
57
+ # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
58
+ - source: кемдең
59
+ expected: kemdeñ
60
+ - source: кем
61
+ expected: kem
62
+ - source: был
63
+ expected: bıl
64
+ - source: ошо
65
+ expected: oşo
66
+ - source: быларҙың
67
+ expected: bılarźıñ
68
+ - source: һеҙҙән
69
+ expected: heźźən
70
+ - source: һин
71
+ expected: hin
72
+ - source: һеҙҙең
73
+ expected: heźźeñ
74
+
75
+ map:
76
+ rules:
77
+ # note[1]
78
+ - pattern: \b\u0412(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
79
+ result: "W"
80
+ - pattern: \b\u0432(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
81
+ result: "w"
82
+ # note[2]
83
+ - pattern: \b\u0415
84
+ result: "Ye"
85
+ - pattern: \b\u0435
86
+ result: "ye"
87
+ - pattern: (?=\b)\u0415(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
88
+ result: "Ye"
89
+ - pattern: (?=\b)\u0435(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
90
+ result: "ye"
91
+
92
+ # note[3] # note[4]
93
+ - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0423\u04AE]
94
+ result: W
95
+ - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0443\u04AF]
96
+ result: w
97
+
98
+
99
+ characters:
100
+ '\u0410': 'A' # А
101
+ '\u0411': 'B' # Б note[1]
102
+ '\u0412': 'V' # В
103
+ '\u0413': 'G' # Г
104
+ '\u0492': "\u011E" # Ғ
105
+ '\u0414': 'D' # Д
106
+ '\u0498': "\u0179" # Ҙ
107
+ '\u0415': 'E' # Е note[2]
108
+ '\u0401': 'Ë' # Ё
109
+ '\u0416': 'J' # Ж
110
+ '\u0417': 'Z' # З
111
+ '\u0418': 'I' # И
112
+ '\u0419': 'Y' # Й
113
+ '\u041A': 'K' # К
114
+ '\u04A0': 'Q' # Ҡ
115
+ '\u041B': 'L' # Л
116
+ '\u041C': 'M' # М
117
+ '\u041D': 'N' # Н
118
+ '\u04A2': 'Ñ' # Ң
119
+ '\u041E': 'O' # О
120
+ '\u04E8': "ö" # Ө
121
+ '\u041F': 'P' # П
122
+ '\u0420': 'R' # Р
123
+ '\u0421': 'S' # С
124
+ '\u04AA': 'Ś' # Ҫ
125
+ '\u0422': 'T' # Т
126
+ '\u0423': 'U' # У
127
+ '\u04AE': 'Ü' # Ү note[3]
128
+ '\u0424': 'F' # Ф
129
+ '\u0425': 'X' # Х
130
+ '\u04BA': 'H' # Һ
131
+ '\u0426': 'Ts' # Ц
132
+ '\u0427': 'Ç' # Ч
133
+ '\u0428': 'Ş' # Ш
134
+ '\u0429': 'ŞÇ' # Щ
135
+ '\u042A': '' # Ъ
136
+ '\u042B': 'I' # Ы
137
+ '\u042C': '' # Ь
138
+ '\u042D': 'E' # Э
139
+ '\u04D8': "\u018F" # Ә
140
+ '\u042E': 'Yu' # Ю
141
+ '\u042F': 'Ya' # Я
142
+
143
+ '\u0430': 'a' # а
144
+ '\u0431': 'b' # б
145
+ '\u0432': 'v' # в note[1]
146
+ '\u0433': 'g' # г
147
+ '\u0493': "\u011F" # ғ
148
+ '\u0434': 'd' # д
149
+ '\u0499': 'ź' # ҙ
150
+ '\u0435': 'e' # e note[2]
151
+ '\u0451': 'yo' # ё
152
+ '\u0436': 'j' # ж
153
+ '\u0437': 'z' # з
154
+ '\u0438': 'i' # и
155
+ '\u0439': 'y' # й
156
+ '\u043A': 'k' # к
157
+ '\u04A1': 'q' # ҡ
158
+ '\u043B': 'l' # л
159
+ '\u043C': 'm' # м
160
+ '\u043D': 'n' # н
161
+ '\u04A3': 'ñ' # ң
162
+ '\u043E': 'o' # о
163
+ '\u04E9': "\u00F6" # ө
164
+ '\u043F': 'p' # п
165
+ '\u0440': 'r' # р
166
+ '\u0441': 's' # с
167
+ '\u04AB': 'ś' # ҫ
168
+ '\u0442': 't' # т
169
+ '\u0443': 'u' # у
170
+ "\u04AF": 'ü' # ү note[3]
171
+ '\u0444': 'f' # ф
172
+ '\u0445': 'x' # х
173
+ '\u04BB': 'h' # һ
174
+ '\u0446': 'ts' # ц
175
+ '\u0447': 'ç' # ч
176
+ '\u0448': 'ş' # ш
177
+ '\u0449': 'şç' # щ
178
+ '\u044A': '' # ъ
179
+ '\u044B': "\u0131" # ы
180
+ '\u044C': '' # ь
181
+ '\u044D': 'e' # э
182
+ '\u04D9': "\u0259" # ә
183
+ '\u044E': 'yu' # ю
184
+ '\u044F': 'ya' # я
@@ -51,7 +51,7 @@ map:
51
51
  '\u0413': 'G'
52
52
  '\u0414': 'D'
53
53
  '\u0415': 'E'
54
- '\u0416': 'ZH'
54
+ '\u0416': 'Zh'
55
55
  '\u0417': 'Z'
56
56
  '\u0418': 'I'
57
57
  '\u0419': 'Y'
@@ -66,15 +66,15 @@ map:
66
66
  '\u0422': 'T'
67
67
  '\u0423': 'U'
68
68
  '\u0424': 'F'
69
- '\u0425': 'KH'
70
- '\u0426': 'TS'
71
- '\u0427': 'CH'
72
- '\u0428': 'SH'
73
- '\u0429': 'SHT'
69
+ '\u0425': 'Kh'
70
+ '\u0426': 'Ts'
71
+ '\u0427': 'Ch'
72
+ '\u0428': 'Sh'
73
+ '\u0429': 'St'
74
74
  '\u042a': "U\u0306"
75
75
  '\u042c': "\\'"
76
- '\u042e': 'YU'
77
- '\u042f': 'YA'
76
+ '\u042e': 'Yu'
77
+ '\u042f': 'Ya'
78
78
  '\u0430': 'a'
79
79
  '\u0431': 'b'
80
80
  '\u0432': 'v'
@@ -111,5 +111,5 @@ map:
111
111
  '\u046B': "u\u0306" # ѫ
112
112
 
113
113
  # note[3]
114
- '\u0462': "YE" # Ѣ
114
+ '\u0462': "Ye" # Ѣ
115
115
  '\u0463': "ye" # ѣ
@@ -0,0 +1,225 @@
1
+ ---
2
+ authority_id: mvd
3
+ id: 19678
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: About approval of the Instructions for transliteration of surnames and proper names of citizens of the Republic of Belarus when their personal data is included in the population register
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2008
10
+
11
+ description: |
12
+ RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
13
+ October 9, 2008, No. 288
14
+ 8/19678 (10.23.2008)
15
+ About 8/19678 approval of the Instructions for transliteration of surnames and proper names of citizens of
16
+ the Republic of Belarus when their personal data is included in the population register
17
+
18
+ notes:
19
+ # Original notes
20
+ # - |
21
+ # Инструкция по транслитерации фамилий и собственных имен граждан Республики Беларусь при включении
22
+ # их персональных данных в регистр населения устанавливает правила передачи с белорусской либо русской формы
23
+ # написания на латиницу при включении фамилий и собственных имен граждан Республики Беларусь в регистр населения.
24
+ # - |
25
+ # Передача фамилий и собственных имен граждан Республики Беларусь на латиницу осуществляется с их правильного
26
+ # написания на белорусском либо русском языке, за свидетельствованного документами, удостоверяющими личность.
27
+ # - Передача фамилий и собственных имен граждан Республики Беларусь осуществляется путем транслитерации
28
+ # литер (букв, знаков) белорусского либорусского написания соответствующими литерами латиницы.
29
+ # - Транслитерацией достигается общность и унифицированность системы латинизированного написания,
30
+ # позволяющей пользоваться ею во всех латинопишущих государствах.
31
+ # - Смягчение согласной буквы, обозначенное мягким знаком, в белорусской латинице следует показывать
32
+ # диакритическим знаком (́), который располагается над соответствующей буквой: дзь – dź, зь – ź, ль – ĺ,
33
+ # нь – ń, сь – ś, ць – ć.
34
+ # - Правила транслитерации букв белорусского и русского алфавитов соответствующими буквами латиницы
35
+ # приведены в таблице транслитерации букв белорусского и русского алфавитов буквами латиницы соглас
36
+ # но приложению к настоящей Инструкции.
37
+ # - Сложные и составные фамилии и собственные имена, пишущиеся слитно, раздельно или через дефис,
38
+ # сохраняют слитное, раздельное или дефисное написание и в латинице.
39
+ - | # 1
40
+ Instructions for transliterating the names and first names of citizens of the Republic of Belarus
41
+ when including their personal data in the population register sets the rules for transferring
42
+ from the Belarusian or Russian form of writing in Latin when including the names and first names
43
+ of citizens of the Republic of Belarus in the population register.
44
+ - | # 2
45
+ Transfer of surnames and proper names of citizens of the Republic of Belarus to the Latin alphabet
46
+ is carried out with their correct spelling in Belarusian or Russian, for evidence of identity documents.
47
+ - | # 3
48
+ The transfer of surnames and proper names of citizens of the Republic of Belarus is carried out by
49
+ transliteration of the letters (letters, signs) of the Belarusian or Russian spelling in the corresponding
50
+ Latin letters.
51
+ - | # 4
52
+ Transliteration achieves the generality and unification of the system of Latinized writing,
53
+ which allows it to be used in all Latin-writing countries.
54
+ - | # 5
55
+ The softening of the consonant, indicated by a soft sign, in the Belarusian Latin should be shown
56
+ with a diacritic mark (́), which is located above the corresponding letter:
57
+ дзь - dź,
58
+ зь - ź,
59
+ ль - ĺ,
60
+ нь - ń,
61
+ сь - ś,
62
+ ць - ć.
63
+ # 6
64
+ - The rules for transliterating letters of the Belarusian and Russian alphabets with the corresponding
65
+ letters of the Latin alphabet are given in the table of transliteration of letters of the Belarusian
66
+ and Russian alphabets with the Latin letters according to the appendix to this Instruction.
67
+ # 7
68
+ - Compound and compound surnames and proper names, spelled together, separately or through a hyphen,
69
+ keep a single, separate or hyphen spelling in Latin.
70
+
71
+ tests:
72
+ - source: Ева
73
+ expected: Jeva
74
+ - source: Васiльева
75
+ expected: Vasiĺjeva
76
+ - source: Васiлёнак
77
+ expected: Vasilionak
78
+ - source: Ёрш
79
+ expected: Jorsh
80
+ - source: Вераб’ёў
81
+ expected: Vierabjow
82
+ - source: Салаўёва
83
+ expected: Salawjova
84
+ - source: Любоў
85
+ expected: Liubow
86
+ - source: В’юноў
87
+ expected: Vjunow
88
+ - source: Чарняк
89
+ expected: Charniak
90
+ - source: Лябецкая
91
+ expected: Liabietskaja # in reference doc it's Liabetskaja CAMOBAP waiting confirmation from officials
92
+ - source: Дар’я
93
+ expected: Darja
94
+
95
+ map:
96
+ rules:
97
+ - pattern: (\u2019\u0415) # Е
98
+ result: Je
99
+ - pattern: (\u2019\u0435) # е
100
+ result: je
101
+ - pattern: (\u2019\u0401) # Ë
102
+ result: Jo
103
+ - pattern: (\u2019\u0451) # ё
104
+ result: jo
105
+ - pattern: (\u2019\u042E) # Ю
106
+ result: Ju
107
+ - pattern: (\u2019\u044E) # ю
108
+ result: ju
109
+ - pattern: (\u2019\u042F) # Я
110
+ result: Ja
111
+ - pattern: (\u2019\u044F) # я
112
+ result: ja
113
+
114
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0415 # Е after vowels
115
+ result: Je
116
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0435 # е after vowels
117
+ result: je
118
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0401 # Ё after vowels
119
+ result: Jo
120
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0451 # ё after vowels
121
+ result: jo
122
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u042E # Ю after vowels
123
+ result: Ju
124
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u044E # ю after vowels
125
+ result: ju
126
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u042F # Я after vowels
127
+ result: Ja
128
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u044F # я after vowels
129
+ result: ja
130
+
131
+ # note[5]
132
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
133
+ result: "\\1\u0301"
134
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
135
+ result: "\\1\u0301"
136
+
137
+ # vowels initially
138
+ - pattern: \b\u0415 # Е
139
+ result: Je
140
+ - pattern: \b\u0435 # е
141
+ result: je
142
+ - pattern: \b\u0401 # Ё
143
+ result: Jo
144
+ - pattern: \b\u0451 # ё
145
+ result: jo
146
+ - pattern: \b\u042E # Ю
147
+ result: Ju
148
+ - pattern: \b\u044E # ю
149
+ result: ju
150
+ - pattern: \b\u042F # Я
151
+ result: Ja
152
+ - pattern: \b\u044F # я
153
+ result: ja
154
+
155
+ postrules:
156
+ - pattern: \u2019
157
+ result: j
158
+
159
+ characters:
160
+
161
+ '\u0410' : 'A' # А
162
+ '\u0411' : 'B' # Б
163
+ '\u0412' : 'V' # B
164
+ '\u0413' : 'G' # Г
165
+ '\u0414' : 'D' # Д
166
+ '\u0415' : 'Ie' # Е or JE TODO add rule
167
+ '\u0401' : 'Io' # Ё or JO TODO add rule
168
+ '\u0416' : 'Zh' # Ж
169
+ '\u0417' : 'Z' # З
170
+ '\u0406' : 'I' # І
171
+ '\u0419' : "J" # Й
172
+ '\u041A' : 'K' # К
173
+ '\u041B' : 'L' # Л
174
+ '\u041C' : 'M' # М
175
+ '\u041D' : 'N' # Н
176
+ '\u041E' : 'O' # О
177
+ '\u041F' : 'P' # П
178
+ '\u0420' : 'R' # Р
179
+ '\u0421' : 'S' # С
180
+ '\u0422' : 'T' # Т
181
+ '\u0423' : 'U' # У
182
+ '\U040E' : 'W' # Ў
183
+ '\u0424' : 'F' # Ф
184
+ '\u0425' : 'Kh' # Х
185
+ '\u0426' : 'Ts' # Ц
186
+ '\u0427' : 'Ch' # Ч
187
+ '\u0428' : 'Sh' # Ш
188
+ '\u0429' : 'Shch' # Щ
189
+ '\u042B' : 'Y' # Ы
190
+ '\u042D' : 'E' # Э
191
+ '\u042E' : "Iu" # Ю
192
+ '\u042F' : "Ia" # Я
193
+
194
+ '\u0430' : 'a' # а
195
+ '\u0431' : 'b' # б
196
+ '\u0432' : 'v' # в
197
+ '\u0433' : 'g' # г
198
+ '\u0434' : 'd' # д
199
+ '\u0435' : 'ie' # е
200
+ '\u0451' : 'io' # ё
201
+ '\u0436' : 'zh' # ж
202
+ '\u0437' : 'z' # з
203
+ '\u0456' : 'i' # і
204
+ '\u0439' : 'j' # й
205
+ '\u043A' : 'k' # к
206
+ '\u043B' : 'l' # л
207
+ '\u043C' : 'm' # м
208
+ '\u043D' : 'n' # н
209
+ '\u043E' : 'o' # о
210
+ '\u043F' : 'p' # п
211
+ '\u0440' : 'r' # р
212
+ '\u0441' : 's' # с
213
+ '\u0442' : 't' # т
214
+ '\u0443' : 'u' # у
215
+ '\u045E' : 'w' # ў
216
+ '\u0444' : 'f' # ф
217
+ '\u0445' : 'kh' # х
218
+ '\u0446' : 'ts' # Ц
219
+ '\u0447' : 'ch' # ч
220
+ '\u0448' : 'sh' # ш
221
+ '\u0449' : 'shch' # щ
222
+ '\u044B' : 'y' # ы
223
+ '\u044D' : 'e' # э
224
+ '\u044E' : "iu" # ю
225
+ '\u044F' : "ia" # я
@@ -0,0 +1,63 @@
1
+ ---
2
+ authority_id: mvd
3
+ id: 22721
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: |
8
+ About approval of the Instructions on the organization of work of units of citizenship
9
+ and migration of internal affairs bodies on the issuance, registration, exchange,
10
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
11
+ url: https://pravo.by/document/?guid=3871&p0=W21022721
12
+ creation_date: 2010
13
+
14
+ description: |
15
+ RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
16
+ June 28, 2010 No. 200
17
+ On approval of the Instructions on the organization of work of units of citizenship
18
+ and migration of internal affairs bodies on the issuance, registration, exchange,
19
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
20
+
21
+ notes:
22
+ - |
23
+ It is not allowed to use in the spelling of the surname, own name superscripts, punctuation, except
24
+ for the use of the apostrophe in Belarusian and Latin spelling.
25
+ - The Belarusian letter "Г" is written as the Latin "H"
26
+
27
+ tests:
28
+ - source: Бабрыковіч Аляксандр
29
+ expected: Babrykovich Aliaksandr
30
+ - source: Міховіч Марыя
31
+ expected: Mikhovich Maryia
32
+ - source: Максім
33
+ expected: Maksim
34
+ - source: Іван
35
+ expected: Ivan
36
+ - source: СВЯТЛАНА
37
+ expected: SVIATLANA
38
+ - source: Ігар
39
+ expected: Ihar
40
+ - source: МІХАІЛ
41
+ expected: MIKHAIL
42
+
43
+ map:
44
+ inherit: "mvd-bel-Cyrl-Latn-2008"
45
+
46
+ rules:
47
+ # note[5]
48
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
49
+ result: "\\1"
50
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
51
+ result: "\\1"
52
+ # Й at end
53
+ - pattern: (?<=[ЕеЁёЫыЮюЯя])\u0419$ # Я after vowels
54
+ result: ""
55
+ - pattern: (?<=[ЕеЁёЫыЮюЯя])\u0439$ # я after vowels
56
+ result: ""
57
+
58
+ characters:
59
+ '\u0413' : 'H' # Г
60
+ '\u0433' : 'h' # г
61
+
62
+ '\u042C' : '' # Ь
63
+ '\u044C' : '' # ь