interscript 0.1.1 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  6. data/lib/g2pwrapper.py +34 -0
  7. data/lib/interscript-opal.rb +2 -0
  8. data/lib/interscript.rb +138 -20
  9. data/lib/interscript/command.rb +28 -0
  10. data/lib/interscript/fs.rb +71 -0
  11. data/lib/interscript/mapping.rb +142 -0
  12. data/lib/interscript/opal.rb +27 -0
  13. data/lib/interscript/opal/maps.js.erb +10 -0
  14. data/lib/interscript/opal_map_translate.rb +12 -0
  15. data/lib/interscript/version.rb +1 -1
  16. data/lib/model-7 +0 -0
  17. data/lib/tha-pt-b-7 +0 -0
  18. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  19. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  20. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  21. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  22. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  23. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  24. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +125 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  27. data/maps/alalc-ell-Grek-Latn-1997.yaml +624 -0
  28. data/maps/alalc-ell-Grek-Latn-2010.yaml +627 -0
  29. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  30. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  31. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  32. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  33. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  34. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  35. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  36. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  37. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +221 -0
  38. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  39. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  40. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  41. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  42. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  43. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  44. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +174 -0
  45. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  46. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  47. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  48. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  49. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  50. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  51. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  52. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  53. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  54. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  55. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  56. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +285 -0
  57. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  58. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  59. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +701 -0
  60. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +19 -0
  61. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  62. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  63. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  64. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  65. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  66. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  67. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  68. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  69. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  70. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +92 -0
  71. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  72. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  73. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +162 -0
  74. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  75. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  76. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  77. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  78. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  79. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  80. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  81. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  82. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  83. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  84. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  85. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  86. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  87. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  88. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  89. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  90. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  91. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  92. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  93. data/maps/ggg-kat-Geor-Latn-2002.yaml +88 -0
  94. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  95. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  96. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +186 -0
  97. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  98. data/maps/icao-bel-Cyrl-Latn-9303.yaml +136 -0
  99. data/maps/icao-bul-Cyrl-Latn-9303.yaml +118 -0
  100. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  101. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  102. data/maps/icao-per-Arab-Latn-9303.yaml +103 -0
  103. data/maps/icao-rus-Cyrl-Latn-9303.yaml +117 -0
  104. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  105. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +119 -0
  106. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  107. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +609 -0
  108. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +40 -0
  109. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  110. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +271 -0
  111. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  112. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  113. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  114. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  115. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  116. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  117. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  118. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  119. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  120. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  121. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  122. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  123. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  124. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  125. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  126. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  127. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  128. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  129. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  130. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  131. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  132. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  133. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  134. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  135. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  136. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  137. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  138. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  139. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  140. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  141. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  142. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  143. data/maps/ses-ara-Arab-Latn-1930.yaml +279 -0
  144. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  145. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  146. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  147. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  148. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  149. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  150. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  151. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  152. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  153. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  154. data/maps/un-mon-Mong-Latn-2013.yaml +99 -0
  155. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  156. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  157. data/maps/un-ukr-Cyrl-Latn-1998.yaml +30 -0
  158. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  159. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  160. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  161. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  162. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  163. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  164. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  165. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  166. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  167. data/spec/interscript/mapping_spec.rb +42 -0
  168. data/spec/interscript_spec.rb +26 -0
  169. data/spec/spec_helper.rb +3 -0
  170. metadata +298 -18
@@ -0,0 +1,221 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization System 1997
8
+ url: https://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian
9
+ creation_date: 1997
10
+ description: |
11
+ The ALA-LC Romanization tables for Slavic alphabets is a set of standards for romanization of texts
12
+ in various writing systems used in North American libraries and publications.
13
+ This version was published by the American Library Association and the Library of Congress in 1997.
14
+ This article is about the ALA-LC method of transliteration of Russian-language text from Cyrillic script to Latin script.
15
+
16
+ The formal, unambiguous version of the system requires some diacritics and two-letter tie characters which are often omitted in practice.
17
+
18
+ notes:
19
+ - Pre-1918 letters skipped
20
+ - Pre-18th century letters skipped
21
+
22
+ tests:
23
+ - source: Азов
24
+ expected: Azov
25
+ - source: Тамбов
26
+ expected: Tambov
27
+ - source: Барнаул
28
+ expected: Barnaul
29
+ - source: Кубань
30
+ expected: Kubanʹ
31
+ - source: Владимир
32
+ expected: Vladimir
33
+ - source: Ульяновск
34
+ expected: Ulʹi͡anovsk
35
+ - source: Грозный
36
+ expected: Groznyǐ
37
+ - source: Волгодонск
38
+ expected: Volgodonsk
39
+ - source: Дзержинский
40
+ expected: Dzerzhinskiǐ
41
+ - source: Нелидово
42
+ expected: Nelidovo
43
+ - source: Елизово
44
+ expected: Elizovo
45
+ - source: Чебоксары
46
+ expected: Cheboksary
47
+ - source: Ёлкин
48
+ expected: Ëlkin
49
+ - source: Озёрный
50
+ expected: Ozërnyǐ
51
+ - source: Жуков
52
+ expected: Zhukov
53
+ - source: Лужники
54
+ expected: Luzhniki
55
+ - source: Звенигород
56
+ expected: Zvenigorod
57
+ - source: Вязьма
58
+ expected: Vi͡azʹma
59
+ - source: Иркутск
60
+ expected: Irkutsk
61
+ - source: Апатиты
62
+ expected: Apatity
63
+ - source: Йошкар-Ола
64
+ expected: Ǐoshkar-Ola
65
+ - source: Бийск
66
+ expected: Biǐsk
67
+ - source: Киров
68
+ expected: Kirov
69
+ - source: Енисейск
70
+ expected: Eniseǐsk
71
+ - source: Ломоносов
72
+ expected: Lomonosov
73
+ - source: Нелидово
74
+ expected: Nelidovo
75
+ - source: Менделеев
76
+ expected: Mendeleev
77
+ - source: Каменка
78
+ expected: Kamenka
79
+ - source: Новосибирск
80
+ expected: Novosibirsk
81
+ - source: Кандалакша
82
+ expected: Kandalaksha
83
+ - source: Омск
84
+ expected: Omsk
85
+ - source: Красноярск
86
+ expected: Krasnoi͡arsk
87
+ - source: Петрозаводск
88
+ expected: Petrozavodsk
89
+ - source: Серпухов
90
+ expected: Serpukhov
91
+ - source: Ростов
92
+ expected: Rostov
93
+ - source: Северобайкальск
94
+ expected: Severobaǐkalʹsk
95
+ - source: Сковородино
96
+ expected: Skovorodino
97
+ - source: Чайковский
98
+ expected: Chaǐkovskiǐ
99
+ - source: Тамбов
100
+ expected: Tambov
101
+ - source: Мытищи
102
+ expected: Mytishchi
103
+ - source: Углич
104
+ expected: Uglich
105
+ - source: Дудинка
106
+ expected: Dudinka
107
+ - source: Фурманов
108
+ expected: Furmanov
109
+ - source: Уфа
110
+ expected: Ufa
111
+ - source: Хабаровск
112
+ expected: Khabarovsk
113
+ - source: Прохладный
114
+ expected: Prokhladnyǐ
115
+ - source: Цимлянск
116
+ expected: T͡Simli͡ansk
117
+ - source: Ельцин
118
+ expected: Elʹt͡sin
119
+ - source: Чебоксары
120
+ expected: Cheboksary
121
+ - source: Печора
122
+ expected: Pechora
123
+ - source: Шахтёрск
124
+ expected: Shakhtërsk
125
+ - source: Мышкин
126
+ expected: Myshkin
127
+ - source: Щёлково
128
+ expected: Shchëlkovo
129
+ - source: Ртищево
130
+ expected: Rtishchevo
131
+ - source: Подъездной
132
+ expected: Podʺezdnoǐ
133
+ - source: Ыттык-Кёль
134
+ expected: Yttyk-Këlʹ
135
+ - source: Тында
136
+ expected: Tynda
137
+ - source: Тюмень
138
+ expected: Ti͡umenʹ
139
+ - source: Электрогорск
140
+ expected: Ėlektrogorsk
141
+ - source: Радиоэлектроника
142
+ expected: Radioėlektronika
143
+ - source: Юбилейный
144
+ expected: I͡Ubileǐnyǐ
145
+ - source: Ключевская
146
+ expected: Kli͡uchevskai͡a
147
+ - source: Якутск
148
+ expected: I͡Akutsk
149
+ - source: Брянск
150
+ expected: Bri͡ansk
151
+
152
+ map:
153
+ characters:
154
+ "\u0027": "" # '
155
+ "\u0410": "A" # А
156
+ "\u0411": "B" # Б
157
+ "\u0412": "V" # В
158
+ "\u0413": "G" # Г
159
+ "\u0414": "D" # Д
160
+ "\u0415": "E" # Е
161
+ "\u0401": "Ë" # Ё
162
+ "\u0416": "Zh" # Ж
163
+ "\u0417": "Z" # З
164
+ "\u0418": "I" # И
165
+ "\u0419": "\u01CF" # Й
166
+ "\u041A": "K" # К
167
+ "\u041B": "L" # Л
168
+ "\u041C": "M" # М
169
+ "\u041D": "N" # Н
170
+ "\u041E": "O" # О
171
+ "\u041F": "P" # П
172
+ "\u0420": "R" # Р
173
+ "\u0421": "S" # С
174
+ "\u0422": "T" # Т
175
+ "\u0423": "U" # У
176
+ "\u0424": "F" # Ф
177
+ "\u0425": "Kh" # Х
178
+ "\u0426": "T\u0361S" # Ц
179
+ "\u0427": "Ch" # Ч
180
+ "\u0428": "Sh" # Ш
181
+ "\u0429": "Shch" # Щ
182
+ "\u042A": "ʺ" # Ъ
183
+ "\u042B": "Y" # Ы
184
+ "\u042C": "ʹ" # Ь
185
+ "\u042D": "E\u0307" # Э
186
+ "\u042E": "I\u0361U" # Ю
187
+ "\u042F": "I\u0361A" # Я
188
+
189
+ "\u0430": "a" # а
190
+ "\u0431": "b" # б
191
+ "\u0432": "v" # в
192
+ "\u0433": "g" # г
193
+ "\u0434": "d" # д
194
+ "\u0435": "e" # e
195
+ "\u0451": "ë" # ё
196
+ "\u0436": "zh" # ж
197
+ "\u0437": "z" # з
198
+ "\u0438": "i" # и
199
+ "\u0439": "\u01d0" # й
200
+ "\u043A": "k" # к
201
+ "\u043B": "l" # л
202
+ "\u043C": "m" # м
203
+ "\u043D": "n" # н
204
+ "\u043E": "o" # о
205
+ "\u043F": "p" # п
206
+ "\u0440": "r" # р
207
+ "\u0441": "s" # с
208
+ "\u0442": "t" # т
209
+ "\u0443": "u" # у
210
+ "\u0444": "f" # ф
211
+ "\u0445": "kh" # х
212
+ "\u0446": "t\u0361s" # ц
213
+ "\u0447": "ch" # ч
214
+ "\u0448": "sh" # ш
215
+ "\u0449": "shch" # щ
216
+ "\u044A": "ʺ" # ъ
217
+ "\u044B": "y" # ы
218
+ "\u044C": "ʹ" # ь
219
+ "\u044D": "e\u0307" # э
220
+ "\u044E": "i\u0361u" # ю
221
+ "\u044F": "i\u0361a" # я
@@ -0,0 +1,162 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2012
4
+ language: rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization System 2012
8
+ url: https://www.loc.gov/catdir/cpso/romanization/russian.pdf
9
+ creation_date: 2012
10
+ description: |
11
+ The ALA-LC Romanization tables for Slavic alphabets is a set of standards for romanization of texts
12
+ in various writing systems used in North American libraries and publications.
13
+ The latest version was published by the American Library Association and the Library of Congress in 2012.
14
+
15
+ notes:
16
+ - The letters І, Ѣ, Ѳ and Ѵ were eliminated in the orthographic reform of 1918. For other obsolete letters appearing in Russian texts, consult the Church Slavic table.
17
+
18
+ tests:
19
+ - source: Азов
20
+ expected: Azov
21
+ - source: Тамбов
22
+ expected: Tambov
23
+ - source: Барнаул
24
+ expected: Barnaul
25
+ - source: Кубань
26
+ expected: Kubanʹ
27
+ - source: Владимир
28
+ expected: Vladimir
29
+ - source: Ульяновск
30
+ expected: Ulʹi͡anovsk
31
+ - source: Грозный
32
+ expected: Groznyǐ
33
+ - source: Волгодонск
34
+ expected: Volgodonsk
35
+ - source: Дзержинский
36
+ expected: Dzerzhinskiǐ
37
+ - source: Нелидово
38
+ expected: Nelidovo
39
+ - source: Елизово
40
+ expected: Elizovo
41
+ - source: Чебоксары
42
+ expected: Cheboksary
43
+ - source: Ёлкин
44
+ expected: Ëlkin
45
+ - source: Озёрный
46
+ expected: Ozërnyǐ
47
+ - source: Жуков
48
+ expected: Zhukov
49
+ - source: Лужники
50
+ expected: Luzhniki
51
+ - source: Звенигород
52
+ expected: Zvenigorod
53
+ - source: Вязьма
54
+ expected: Vi͡azʹma
55
+ - source: Иркутск
56
+ expected: Irkutsk
57
+ - source: Апатиты
58
+ expected: Apatity
59
+ - source: Йошкар-Ола
60
+ expected: Ǐoshkar-Ola
61
+ - source: Бийск
62
+ expected: Biǐsk
63
+ - source: Киров
64
+ expected: Kirov
65
+ - source: Енисейск
66
+ expected: Eniseǐsk
67
+ - source: Ломоносов
68
+ expected: Lomonosov
69
+ - source: Нелидово
70
+ expected: Nelidovo
71
+ - source: Менделеев
72
+ expected: Mendeleev
73
+ - source: Каменка
74
+ expected: Kamenka
75
+ - source: Новосибирск
76
+ expected: Novosibirsk
77
+ - source: Кандалакша
78
+ expected: Kandalaksha
79
+ - source: Омск
80
+ expected: Omsk
81
+ - source: Красноярск
82
+ expected: Krasnoi͡arsk
83
+ - source: Петрозаводск
84
+ expected: Petrozavodsk
85
+ - source: Серпухов
86
+ expected: Serpukhov
87
+ - source: Ростов
88
+ expected: Rostov
89
+ - source: Северобайкальск
90
+ expected: Severobaǐkalʹsk
91
+ - source: Сковородино
92
+ expected: Skovorodino
93
+ - source: Чайковский
94
+ expected: Chaǐkovskiǐ
95
+ - source: Тамбов
96
+ expected: Tambov
97
+ - source: Мытищи
98
+ expected: Mytishchi
99
+ - source: Углич
100
+ expected: Uglich
101
+ - source: Дудинка
102
+ expected: Dudinka
103
+ - source: Фурманов
104
+ expected: Furmanov
105
+ - source: Уфа
106
+ expected: Ufa
107
+ - source: Хабаровск
108
+ expected: Khabarovsk
109
+ - source: Прохладный
110
+ expected: Prokhladnyǐ
111
+ - source: Цимлянск
112
+ expected: T͡Simli͡ansk
113
+ - source: Ельцин
114
+ expected: Elʹt͡sin
115
+ - source: Чебоксары
116
+ expected: Cheboksary
117
+ - source: Печора
118
+ expected: Pechora
119
+ - source: Шахтёрск
120
+ expected: Shakhtërsk
121
+ - source: Мышкин
122
+ expected: Myshkin
123
+ - source: Щёлково
124
+ expected: Shchëlkovo
125
+ - source: Ртищево
126
+ expected: Rtishchevo
127
+ - source: Подъездной
128
+ expected: Podʺezdnoǐ
129
+ - source: Ыттык-Кёль
130
+ expected: Yttyk-Këlʹ
131
+ - source: Тында
132
+ expected: Tynda
133
+ - source: Тюмень
134
+ expected: Ti͡umenʹ
135
+ - source: Электрогорск
136
+ expected: Ėlektrogorsk
137
+ - source: Радиоэлектроника
138
+ expected: Radioėlektronika
139
+ - source: Юбилейный
140
+ expected: I͡Ubileǐnyǐ
141
+ - source: Ключевская
142
+ expected: Kli͡uchevskai͡a
143
+ - source: Якутск
144
+ expected: I͡Akutsk
145
+ - source: Брянск
146
+ expected: Bri͡ansk
147
+
148
+ map:
149
+ inherit: alalc-rus-Cyrl-Latn-1997
150
+
151
+ characters:
152
+ '\u0406': "\u012A"
153
+ '\u0456': "\u012B"
154
+
155
+ '\u0462': "I\u0361E" # Ѣ
156
+ '\u0463': "i\u0361e" # ѣ
157
+
158
+ '\u0472': "F\u0307" # Ѳ
159
+ '\u0473': "f\u0307" # ѳ
160
+
161
+ '\u0474': "Y\u0307" # Ѵ
162
+ '\u0475': "y\u0307" # ѵ
@@ -0,0 +1,114 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: srp
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Serbian and Makedonian Romanization, ALA-LC 1997 System
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/serbian.pdf
9
+ creation_date: 1997
10
+ description: ALA-LC Romanization table for Serbian and Makedonian.
11
+
12
+ notes:
13
+ - Special characters in romanization
14
+ Đ - D with crossbar (upper case). USMARC hexadecimal code A3.
15
+ đ - d with crossbar (upper case). USMARC hexadecimal code B3.
16
+
17
+ - Character modifiers in romanization
18
+ ´ - acute. USMARC hexadecimal code E2.
19
+ ˇ - hachek. USMARC hexadecimal code E9.
20
+
21
+ tests:
22
+ - source: Општина Ердут
23
+ expected: Opština Erdut
24
+ - source: Општина Двор
25
+ expected: Opština Dvor
26
+ - source: ЛУЃЕ луѓе
27
+ expected: LUǴE luǵe
28
+ - source: ЅВЕЗДА ѕвезда Ѕвезда
29
+ expected: DZVEZDA dzvezda Dzvezda
30
+ - source: ЌАРУВАЊЕ ќарување
31
+ expected: ḰARUVANJE ḱaruvanje
32
+
33
+ map:
34
+ postrules:
35
+ # DZ
36
+ - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
37
+ result: "DZ"
38
+ #LJ
39
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
40
+ result: "LJ"
41
+ #NJ
42
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
43
+ result: "NJ"
44
+ #DŽ
45
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
46
+ result: "DŽ"
47
+
48
+ characters:
49
+ "\u0410": "A"
50
+ "\u0411": "B"
51
+ "\u0412": "V"
52
+ "\u0413": "G"
53
+ "\u0403": "\u01F4" # Ǵ
54
+ "\u0414": "D"
55
+ "\u0402": "\u0110" # Đ
56
+ "\u0415": "E"
57
+ "\u0416": "\u005a\u030c" # Ž
58
+ "\u0417": "Z"
59
+ "\u0405": "Dz"
60
+ "\u0418": "I"
61
+ "\u0408": "J"
62
+ "\u041A": "K"
63
+ "\u040C": "\u1E30" # Ḱ
64
+ "\u041B": "L"
65
+ "\u0409": "Lj"
66
+ "\u041C": "M"
67
+ "\u041D": "N"
68
+ "\u040A": "Nj"
69
+ "\u041E": "O"
70
+ "\u041F": "P"
71
+ "\u0420": "R"
72
+ "\u0421": "S"
73
+ "\u0422": "T"
74
+ "\u040B": "\u0043\u0301" # Ć
75
+ "\u0423": "U"
76
+ "\u0424": "F"
77
+ "\u0425": "H"
78
+ "\u0426": "C"
79
+ "\u0427": "\u0043\u030c" # Č
80
+ "\u040F": "D\u007a\u030c" # Dž
81
+ "\u0428": "\u0053\u030c" # Š
82
+ "\u0430": "a"
83
+ "\u0431": "b"
84
+ "\u0432": "v"
85
+ "\u0433": "g"
86
+ "\u0453": "\u01F5" # ǵ
87
+ "\u0434": "d"
88
+ "\u0452": "\u0111" # đ
89
+ "\u0435": "e"
90
+ "\u0436": "\u007a\u030c" # ž
91
+ "\u0437": "z"
92
+ "\u0455": "dz"
93
+ "\u0438": "i"
94
+ "\u0458": "j"
95
+ "\u043A": "k"
96
+ "\u045C": "\u1E31" # ḱ
97
+ "\u043B": "l"
98
+ "\u0459": "lj"
99
+ "\u043C": "m"
100
+ "\u043D": "n"
101
+ "\u045A": "nj"
102
+ "\u043E": "o"
103
+ "\u043F": "p"
104
+ "\u0440": "r"
105
+ "\u0441": "s"
106
+ "\u0442": "t"
107
+ "\u045B": "\u0063\u0301" # ć
108
+ "\u0443": "u"
109
+ "\u0444": "f"
110
+ "\u0445": "h"
111
+ "\u0446": "c"
112
+ "\u0447": "\u0063\u030c" # č
113
+ "\u045F": "d\u007a\u030c" # dž
114
+ "\u0448": "\u0073\u030c" # š