interscript 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,16 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2011
4
+ language: ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization System 1997
8
+ url: https://www.loc.gov/catdir/cpso/romanization/ukrainia.pdf
9
+ creation_date: 2011
10
+
11
+ notes:
12
+ - The z͡h ligature is necessary to distinguish ж from the combination зг.
13
+ - The t͡s ligature is necessary to distinguish ц from the combination тс.
14
+
15
+ map:
16
+ inherit: alalc-ukr-Cyrl-Latn-1997
@@ -0,0 +1,283 @@
1
+ ---
2
+ authority_id: apcbg
3
+ id: 1995
4
+ language: bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Toponymic Guidelines for Antarctica, Lyubomir Ivanov. Antarctic Place-names Commission of Bulgaria, Sofia, 1995.
8
+ url: http://www.apcbg.org/Toponymic-Guidelines.htm
9
+ creation_date: 1995
10
+ description: |
11
+ In Antarctica geographical names are important elements of identification, orientation, localization and navigation,
12
+ providing an essential reference system for logistic operations, including search and rescue measures, and for
13
+ international scientific research. They facilitate information exchange in the field, in scientific publications and in
14
+ administrative measures under the Antarctic Treaty System. Geographical names also reflect the history of Antarctic
15
+ exploration.
16
+
17
+ tests:
18
+ - source: нунатак Абрит
19
+ expected: nunatak Abrit
20
+ - source: връх Академия
21
+ expected: vrah Akademiya
22
+ - source: връх Ами Буе
23
+ expected: vrah Ami Bue
24
+ - source: нос Айтос
25
+ expected: nos Aytos
26
+ - source: залив Баба Тонка
27
+ expected: zaliv Baba Tonka
28
+ - source: Балабански камък
29
+ expected: Balabanski kamak
30
+ - source: Бедечки поток
31
+ expected: Bedechki potok
32
+ - source: нос Бяга
33
+ expected: nos Byaga
34
+ - source: остров Качо
35
+ expected: ostrov Kacho # wiki ostrov Cacho
36
+ - source: Чакъров остров
37
+ expected: Chakarov ostrov
38
+ - source: връх Дъбник
39
+ expected: vrah Dabnik
40
+ - source: залив Десислава
41
+ expected: zaliv Desislava
42
+ - source: ледник Джераси
43
+ expected: lednik Dzherasi
44
+ - source: Джегова скала
45
+ expected: Dzhegova skala
46
+ - source: Нунатак Едуард
47
+ expected: Nunatak Eduard
48
+ - source: Елховска седловина
49
+ expected: Elhovska sedlovina
50
+ - source: ледник Етър
51
+ expected: lednik Etar
52
+ - source: нунатак Филип Тотю
53
+ expected: nunatak Filip Totyu
54
+ - source: ледник Габаре
55
+ expected: lednik Gabare
56
+ - source: риф Гергини
57
+ expected: rif Gergini
58
+ - source: Гяуров връх
59
+ expected: Gyaurov vrah
60
+ - source: Гуцалски рид
61
+ expected: Gutsalski rid # not found
62
+ - source: Хараламбиев остров
63
+ expected: Haralambiev ostrov
64
+ - source: връх Ичера
65
+ expected: vrah Ichera
66
+ - source: полуостров Йоан Павел II
67
+ expected: poluostrov Yoan Pavel II
68
+ - source: нос Иван Александър
69
+ expected: nos Ivan Aleksandar
70
+ - source: нос Иречек
71
+ expected: nos Irechek
72
+ - source: нос Кърджали
73
+ expected: nos Kardzhali
74
+ - source: седловина Кърнаре
75
+ expected: sedlovina Karnare
76
+ - source: нунатак Керсеблепт
77
+ expected: nunatak Kerseblept
78
+ - source: Кондофрейски възвишения
79
+ expected: Kondofreyski vazvisheniya
80
+ - source: Костинбродски проход
81
+ expected: Kostinbrodski prohod
82
+ - source: връх Кожух
83
+ expected: vrah Kozhuh
84
+ - source: Кукерски нунатаци
85
+ expected: Kukerski nunatatsi
86
+ - source: залив Лазурен бряг
87
+ expected: zaliv Lazuren bryag
88
+ - source: връх Лудогорие
89
+ expected: vrah Ludogorie
90
+ - source: Лютибродски скали
91
+ expected: Lyutibrodski skali
92
+ - source: Масларов нунатак
93
+ expected: Maslarov nunatak
94
+ - source: Михневски връх
95
+ expected: Mihnevski vrah
96
+ - source: рид Митино
97
+ expected: rid Mitino
98
+ - source: езеро Наяда
99
+ expected: ezero Nayada
100
+ - source: нос Никюп
101
+ expected: nos Nikyup
102
+ - source: рид Оборище
103
+ expected: rid Oborishte
104
+ - source: залив Олуша
105
+ expected: zaliv Olusha
106
+ - source: Оряховски възвишения
107
+ expected: Oryahovski vazvisheniya
108
+ - source: нунатак Памидово
109
+ expected: nunatak Pamidovo
110
+ - source: връх Парангалица
111
+ expected: vrah Parangalitsa
112
+ - source: Първомайски провлак
113
+ expected: Parvomayski provlak
114
+ - source: ледник Патлейна
115
+ expected: lednik Patleyna
116
+ - source: полуостров Перник
117
+ expected: poluostrov Pernik
118
+ - source: връх Петко Войвода
119
+ expected: vrah Petko Voyvoda
120
+ - source: остров Фанагория
121
+ expected: ostrov Fanagoriya
122
+ - source: нос Плас
123
+ expected: nos Plas
124
+ - source: Пресиянов рид
125
+ expected: Presiyanov rid
126
+ - source: връх Принсипе де Астуриас
127
+ expected: vrah Prinsipe de Asturias # wiki: vrah Príncipe de Asturias
128
+ - source: нунатак Ръченица
129
+ expected: nunatak Rachenitsa
130
+ - source: връх Райна Княгиня
131
+ expected: vrah Rayna Knyaginya
132
+ - source: Рид Ръжана
133
+ expected: Rid Razhana
134
+ - source: връх Ригс
135
+ expected: vrah Rigs
136
+ - source: остров Рогулят
137
+ expected: ostrov Rogulyat
138
+ - source: ледник Сабазий
139
+ expected: lednik Sabaziy
140
+ - source: ледник Съединение
141
+ expected: lednik Saedinenie
142
+ - source: нунатак Сенокос
143
+ expected: nunatak Senokos
144
+ - source: Сейдолски камък
145
+ expected: Seydolski kamak
146
+ - source: ледник Щерна
147
+ expected: lednik Shterna
148
+ - source: връх Шишман
149
+ expected: vrah Shishman
150
+ - source: ледник Сигмен
151
+ expected: lednik Sigmen
152
+ - source: Седловина Синитово
153
+ expected: Sedlovina Sinitovo
154
+ - source: Ледник Скаплизо
155
+ expected: Lednik Skaplizo
156
+ - source: залив Слънчев бряг
157
+ expected: zaliv Slanchev bryag
158
+ - source: остров Соатрис
159
+ expected: ostrov Soatris
160
+ - source: планина Софийски Университет
161
+ expected: planina Sofiyski Universitet
162
+ - source: ледник Сребърна
163
+ expected: lednik Srebarna
164
+ - source: Средногорски възвишения
165
+ expected: Srednogorski vazvisheniya
166
+ - source: Св. Евтимиев камък
167
+ expected: Sv. Evtimiev kamak
168
+ - source: база Св. Климент Охридски
169
+ expected: baza Sv. Kliment Ohridski
170
+ - source: връх Стъргел
171
+ expected: vrah Stargel
172
+ - source: нунатак Сторгозия
173
+ expected: nunatak Storgoziya # nunatak Storgozia according to wiki
174
+ - source: нунатак Сурвакари
175
+ expected: nunatak Survakari
176
+ - source: ледник Световрачене
177
+ expected: lednik Svetovrachene
178
+ - source: остров Теменуга
179
+ expected: ostrov Temenuga
180
+ - source: Тракийски възвишения
181
+ expected: Trakiyski vazvisheniya
182
+ - source: хълм Цамблак
183
+ expected: halm Tsamblak
184
+ - source: ледник Урдовиза
185
+ expected: lednik Urdoviza
186
+ - source: остров Вълчедръм
187
+ expected: ostrov Valchedram
188
+ - source: острови Вардим
189
+ expected: ostrovi Vardim
190
+ - source: Владигеров проток
191
+ expected: Vladigerov protok
192
+ - source: ледник Ябланица
193
+ expected: lednik Yablanitsa
194
+ - source: залив Ямфорина
195
+ expected: zaliv Yamforina
196
+ - source: Йовков нос
197
+ expected: Yovkov nos
198
+ - source: рид Заберново
199
+ expected: rid Zabernovo
200
+ - source: ледник Збелсурд
201
+ expected: lednik Zbelsurd
202
+ - source: Жефарович камък
203
+ expected: Zhefarovich kamak
204
+ - source: връх Зиези
205
+ expected: vrah Ziezi
206
+ - source: залив Златни пясъци
207
+ expected: zaliv Zlatni pyasatsi
208
+ - source: ледник Злокучене
209
+ expected: lednik Zlokuchene
210
+ - source: проток Злогош
211
+ expected: protok Zlogosh
212
+
213
+ map:
214
+ characters:
215
+ '\u0410': 'A'
216
+ '\u0411': 'B'
217
+ '\u0412': 'V'
218
+ '\u0413': 'G'
219
+ '\u0414': 'D'
220
+ '\u0415': 'E'
221
+ '\u0416': 'Zh'
222
+ '\u0417': 'Z'
223
+ '\u0418': 'I'
224
+ '\u0419': 'Y'
225
+ '\u041a': 'K'
226
+ '\u041b': 'L'
227
+ '\u041c': 'M'
228
+ '\u041d': 'N'
229
+ '\u041e': 'O'
230
+ '\u041f': 'P'
231
+ '\u0420': 'R'
232
+ '\u0421': 'S'
233
+ '\u0422': 'T'
234
+ '\u0423': 'U'
235
+ '\u0424': 'F'
236
+ '\u0425': 'H'
237
+ '\u0426': 'Ts'
238
+ '\u0427': 'Ch'
239
+ '\u0428': 'Sh'
240
+ '\u0429': 'Sht'
241
+ '\u042a': 'A'
242
+ '\u042c': 'Y'
243
+ '\u042e': 'Yu'
244
+ '\u042f': 'Ya'
245
+
246
+ '\u0430': 'a'
247
+ '\u0431': 'b'
248
+ '\u0432': 'v'
249
+ '\u0433': 'g'
250
+ '\u0434': 'd'
251
+ '\u0435': 'e'
252
+ '\u0436': 'zh'
253
+ '\u0437': 'z'
254
+ '\u0438': 'i'
255
+ '\u0439': 'y'
256
+ '\u043a': 'k'
257
+ '\u043b': 'l'
258
+ '\u043c': 'm'
259
+ '\u043d': 'n'
260
+ '\u043e': 'o'
261
+ '\u043f': 'p'
262
+ '\u0440': 'r'
263
+ '\u0441': 's'
264
+ '\u0442': 't'
265
+ '\u0443': 'u'
266
+ '\u0444': 'f'
267
+ '\u0445': 'h'
268
+ '\u0446': 'ts'
269
+ '\u0447': 'ch'
270
+ '\u0448': 'sh'
271
+ '\u0449': 'sht'
272
+ '\u044a': 'a'
273
+ '\u044c': 'y'
274
+ '\u044e': 'yu'
275
+ '\u044f': 'ya'
276
+
277
+ # note 2
278
+ '\u046A': "U\u0306" # Ѫ
279
+ '\u046B': "u\u0306" # ѫ
280
+
281
+ # note[3]
282
+ '\u0462': "YE" # Ѣ
283
+ '\u0463': "ye" # ѣ
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  authority_id: bas
3
- id: bss
3
+ id: 2017-bss
4
4
  language: rus
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
@@ -41,23 +41,48 @@ tests:
41
41
 
42
42
  Н.В. Гоголь
43
43
  expected: |
44
- Eh, troyka! ptitsa troyka, kto tebya vidumal? znat, u boykogo
45
- naroda ti mogla tolko roditsya, v toy zemle, chto ne lyubit shutit, a
46
- rovnem-gladnem razmetnulas na polsveta, da i stupay schitay versti,
47
- poka ne zaryabit tebe v ochi. I ne hitriy, kazhis, dorozhniy snaryad,
48
- ne zheleznim shvachen vintom, a naskoro zhivyem s odnim toporom da
49
- dolotom sobral tebya yaroslavskiy rastoropniy muzhik. Ne v nemetskih
50
- botfortah yamshchik: boroda da rukavitsi, i sidit chert znaet na chem;
51
- a privstal, da zamahnulsya, da zatyanul pesnyu — koni vihrem, spitsi v
52
- kolesah smeshalis v odin gladkiy krug, tolko drognula doroga, da
53
- vskriknul v ispuge ostanovivshiysya peshehod — i von ona poneslas,
54
- poneslas, poneslas!
44
+ Eh, troyka! ptitsa troyka, kto tebya vidumal? znat, u boykogo naroda
45
+ ti mogla tolko roditsya, v toy zemle, chto ne lyubit shutit, a
46
+ rovnem-gladnem razmetnulas na polsveta, da i stupay schitat versti, poka
47
+ ne zaryabit tebe v ochi. I ne hitriy, kazhis, dorozhniy snaryad, ne
48
+ zheleznim shvachen vintom, a naskoro zhivyem s odnim toporom da dolotom
49
+ snaryadil i sobral tebya yaroslavskiy rastoropniy muzhik. Ne v nemetskih
50
+ botfortah yamshchik: boroda da rukavitsi, i sidit chert znaet na chem; a
51
+ privstal, da zamahnulsya, da zatyanul pesnyu — koni vihrem, spitsi v
52
+ kolesah smeshalis v odin gladkiy krug, tolko drognula doroga, da vskriknul
53
+ v ispuge ostanovivshiysya peshehod — i von ona poneslas, poneslas,
54
+ poneslas!
55
55
 
56
56
  N.V. Gogol
57
57
 
58
+ - source: ЁЖ Ёж ёж
59
+ expected: EZH Ezh ezh
60
+ - source: Цветущий сад
61
+ expected: Tsvetushchiy sad
62
+ - source: Чувство юмора
63
+ expected: Chuvstvo yumora
64
+ - source: Широкий выбор
65
+ expected: Shirokiy vibor
66
+ - source: Все подъезды заблокированны
67
+ expected: Vse podezdi zablokirovanni
68
+ - source: Ожерелье
69
+ expected: Ozherelye
70
+ - source: Ручьи
71
+ expected: Ruchyi
72
+ - source: Каньон
73
+ expected: Kanyon
74
+ - source: Бельэтаж
75
+ expected: Belyetazh
76
+
58
77
  map:
78
+ rules:
79
+ - pattern: \u042c(?=[ЕеЁёИиОоЭэ]) # Ь (before Е, Ё, И, O, Э)
80
+ result: Y
81
+ - pattern: \u044c(?=[ЕеЁёИиОоЭэ]) # ь (before Е, Ё, И, O, Э)
82
+ result: y
83
+
59
84
  characters:
60
- "\u0027": "" # '
85
+ # "\u0027": "" # '
61
86
  "\u0410": "A" # А
62
87
  "\u0411": "B" # Б
63
88
  "\u0412": "V" # В
@@ -65,7 +90,7 @@ map:
65
90
  "\u0414": "D" # Д
66
91
  "\u0401": "E" # Ё
67
92
  "\u0415": "E" # Е
68
- "\u0416": "ZH" # Ж
93
+ "\u0416": "Zh" # Ж
69
94
  "\u0417": "Z" # З
70
95
  "\u042D": "E" # Э
71
96
  "\u0418": "I" # И
@@ -82,23 +107,24 @@ map:
82
107
  "\u0423": "U" # У
83
108
  "\u0424": "F" # Ф
84
109
  "\u0425": "H" # Х
85
- "\u0426": "TS" # Ц
86
- "\u0427": "CH" # Ч
87
- "\u0428": "SH" # Ш
88
- "\u0429": "SHCH" # Щ
110
+ "\u0426": "Ts" # Ц
111
+ "\u0427": "Ch" # Ч
112
+ "\u0428": "Sh" # Ш
113
+ "\u0429": "Shch" # Щ
89
114
  "\u042B": "I" # Ы
90
- "\u042F": "YA" # Я
91
- "\u042E": "YU" # Ю
115
+ "\u042F": "Ya" # Я
116
+ "\u042E": "Yu" # Ю
92
117
 
93
118
  # Ь (before Е, Ё, И, O, Э)
94
- "\u042c\u0401": "YE" # Ё
95
- "\u042c\u0415": "YE" # Е
96
- "\u042c\u0418": "YI" # И
97
- "\u042c\u041E": "YO" # O
98
- "\u042c\u0417": "YE" # Э
119
+ # "\u042c\u0401": "YE" # Ё
120
+ # "\u042c\u0415": "YE" # Е
121
+ # "\u042c\u0418": "YI" # И
122
+ # "\u042c\u041E": "YO" # O
123
+ # "\u042c\u0417": "YE" # Э
99
124
 
100
- # Ъ (otherwise) -> (none)
125
+ # Ь (otherwise) -> (none)
101
126
  "\u042c": ""
127
+
102
128
  # Ъ -> (none)
103
129
  "\u042a": ""
104
130
 
@@ -135,11 +161,11 @@ map:
135
161
  "\u044E": "yu" # ю
136
162
 
137
163
  # ь (before е, ё, и, o, э)
138
- "\u044c\u0435": "ye" # ё
139
- "\u044c\u0451": "ye" # е
140
- "\u044c\u0438": "yi" # и
141
- "\u044c\u006f": "yo" # o
142
- "\u044c\u044d": "ye" # э
164
+ # "\u044c\u0435": "ye" # ё
165
+ # "\u044c\u0451": "ye" # е
166
+ # "\u044c\u0438": "yi" # и
167
+ # "\u044c\u006f": "yo" # o
168
+ # "\u044c\u044d": "ye" # э
143
169
 
144
170
  # ь (otherwise) -> (none)
145
171
  "\u044c": ""
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  authority_id: bas
3
- id: oss
3
+ id: 2017-oss
4
4
  language: rus
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
@@ -26,8 +26,7 @@ notes:
26
26
  for Cyrillic х.
27
27
 
28
28
  tests:
29
- - source: |
30
- Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа
29
+ - source: "Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа
31
30
  ты могла только родиться, в той земле, что не любит шутить, а
32
31
  ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока
33
32
  не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не
@@ -37,27 +36,48 @@ tests:
37
36
  привстал, да замахнулся, да затянул песню — кони вихрем, спицы в
38
37
  колесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул
39
38
  в испуге остановившийся пешеход — и вон она понеслась, понеслась,
40
- понеслась!
39
+ понеслась!\nН.В. Гоголь"
41
40
 
42
- Н.В. Гоголь
43
- expected: |
44
- `Eh, troyka! ptitsa troyka, kto tebya v`idumal? znat', u boykogo
41
+ expected: "`Eh, troyka! ptitsa troyka, kto tebya v`idumal? znat', u boykogo
45
42
  naroda t`i mogla tol'ko rodit'sya, v toy zemle, chto ne lyubit shutit',
46
- a rovnem-gladnem razmetnulas' na polsveta, da i stupay schitay verst`i,
43
+ a rovnem-gladnem razmetnulas' na polsveta, da i stupay schitat' verst`i,
47
44
  poka ne zaryabit tebe v ochi. I ne hitr`iy, kazhis', dorozhn`iy
48
45
  snaryad, ne zhelezn`im shvachen vintom, a naskoro zhivy``em s odnim
49
- toporom da dolotom sobral tebya yaroslavskiy rastoropn`iy muzhik. Ne v
46
+ toporom da dolotom snaryadil i sobral tebya yaroslavskiy rastoropn`iy muzhik. Ne v
50
47
  nemetskih botfortah yamshchik: boroda da rukavits`i, i sidit ch``ert
51
48
  znaet na ch``em; a privstal, da zamahnulsya, da zatyanul pesnyu — koni
52
49
  vihrem, spits`i v kolesah smeshalis' v odin gladkiy krug, tol'ko
53
50
  drognula doroga, da vskriknul v ispuge ostanovivshiysya peshehod — i
54
- von ona poneslas', poneslas', poneslas'!
51
+ von ona poneslas', poneslas', poneslas'!\nN.V. Gogol'"
55
52
 
56
- N.V. Gogol'
53
+ - source: ЁЖ Ёж ёж
54
+ expected: "``EZH ``Ezh ``ezh"
55
+ - source: Цветущий сад
56
+ expected: Tsvetushchiy sad
57
+ - source: Чувство юмора
58
+ expected: Chuvstvo yumora
59
+ - source: Широкий выбор
60
+ expected: Shirokiy v`ibor
61
+ - source: Все подъезды заблокированны
62
+ expected: Vse pod"ezd`i zablokirovann`i
63
+ - source: Ожерелье
64
+ expected: Ozherelye
65
+ - source: Ручьи
66
+ expected: Ruchyi
67
+ - source: Каньон
68
+ expected: Kanyon
69
+ - source: Бельэтаж
70
+ expected: Bely`etazh
57
71
 
58
72
  map:
73
+ rules:
74
+ - pattern: \u042c(?=[ЕеЁёИиОоЭэ]) # Ь (before Е, Ё, И, O, Э)
75
+ result: Y
76
+ - pattern: \u044c(?=[ЕеЁёИиОоЭэ]) # ь (before Е, Ё, И, O, Э)
77
+ result: y
78
+
59
79
  characters:
60
- "\u0027": "" # '
80
+ # "\u0027": "" # '
61
81
  "\u0410": "A" # А
62
82
  "\u0411": "B" # Б
63
83
  "\u0412": "V" # В
@@ -65,7 +85,7 @@ map:
65
85
  "\u0414": "D" # Д
66
86
  "\u0401": "``E" # Ё
67
87
  "\u0415": "E" # Е
68
- "\u0416": "ZH" # Ж
88
+ "\u0416": "Zh" # Ж
69
89
  "\u0417": "Z" # З
70
90
  "\u042D": "`E" # Э
71
91
  "\u0418": "I" # И
@@ -82,25 +102,25 @@ map:
82
102
  "\u0423": "U" # У
83
103
  "\u0424": "F" # Ф
84
104
  "\u0425": "H" # Х
85
- "\u0426": "TS" # Ц
86
- "\u0427": "CH" # Ч
87
- "\u0428": "SH" # Ш
88
- "\u0429": "SHCH" # Щ
105
+ "\u0426": "Ts" # Ц
106
+ "\u0427": "Ch" # Ч
107
+ "\u0428": "Sh" # Ш
108
+ "\u0429": "Shch" # Щ
89
109
  "\u042B": "`I" # Ы
90
- "\u042F": "YA" # Я
91
- "\u042E": "YU" # Ю
110
+ "\u042F": "Ya" # Я
111
+ "\u042E": "Yu" # Ю
92
112
 
93
113
  # Ь (before Е, Ё, И, O, Э)
94
- "\u042c\u0401": "Y``E" # Ё
95
- "\u042c\u0415": "YE" # Е
96
- "\u042c\u0418": "YI" # И
97
- "\u042c\u041E": "YO" # O
98
- "\u042c\u0417": "Y`E" # Э
99
-
114
+ # "\u042c\u0401": "Y``e" # Ё
115
+ # "\u042c\u0415": "Ye" # Е
116
+ # "\u042c\u0418": "Yi" # И
117
+ # "\u042c\u041E": "Yo" # O
118
+ # "\u042c\u0417": "Y`e" # Э
100
119
 
101
- # Ъ (otherwise) -> " (or none)
120
+ # Ь (otherwise) -> ' (or none)
102
121
  "\u042c": "'"
103
- # Ъ -> ' (or none)
122
+
123
+ # Ъ -> " (or none)
104
124
  "\u042a": '"'
105
125
 
106
126
  "\u0430": "a" # а
@@ -136,14 +156,14 @@ map:
136
156
  "\u044E": "yu" # ю
137
157
 
138
158
  # ь (before е, ё, и, o, э)
139
- "\u044c\u0435": "ye" # ё
140
- "\u044c\u0451": "y``e" # e
141
- "\u044c\u0438": "yi" # и
142
- "\u044c\u006f": "yo" # o
143
- "\u044c\u044d": "y`e" # э
159
+ # "\u044c\u0435": "ye" # ё
160
+ # "\u044c\u0451": "y``e" # e
161
+ # "\u044c\u0438": "yi" # и
162
+ # "\u044c\u006f": "yo" # o
163
+ # "\u044c\u044d": "y`e" # э
144
164
 
145
- # Ъ (otherwise) -> " (or none)
165
+ # ь (otherwise) -> ' (or none)
146
166
  "\u044c": "'"
147
167
 
148
- # Ъ -> ' (or none)
168
+ # ъ -> " (or none)
149
169
  "\u044a": '"'