interscript 0.1.6 → 2.1.0a9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -1,221 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 1997
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ALA-LC Romanization System 1997
8
- url: https://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian
9
- creation_date: 1997
10
- description: |
11
- The ALA-LC Romanization tables for Slavic alphabets is a set of standards for romanization of texts
12
- in various writing systems used in North American libraries and publications.
13
- This version was published by the American Library Association and the Library of Congress in 1997.
14
- This article is about the ALA-LC method of transliteration of Russian-language text from Cyrillic script to Latin script.
15
-
16
- The formal, unambiguous version of the system requires some diacritics and two-letter tie characters which are often omitted in practice.
17
-
18
- notes:
19
- - Pre-1918 letters skipped
20
- - Pre-18th century letters skipped
21
-
22
- tests:
23
- - source: Азов
24
- expected: Azov
25
- - source: Тамбов
26
- expected: Tambov
27
- - source: Барнаул
28
- expected: Barnaul
29
- - source: Кубань
30
- expected: Kubanʹ
31
- - source: Владимир
32
- expected: Vladimir
33
- - source: Ульяновск
34
- expected: Ulʹi͡anovsk
35
- - source: Грозный
36
- expected: Groznyǐ
37
- - source: Волгодонск
38
- expected: Volgodonsk
39
- - source: Дзержинский
40
- expected: Dzerzhinskiǐ
41
- - source: Нелидово
42
- expected: Nelidovo
43
- - source: Елизово
44
- expected: Elizovo
45
- - source: Чебоксары
46
- expected: Cheboksary
47
- - source: Ёлкин
48
- expected: Ëlkin
49
- - source: Озёрный
50
- expected: Ozërnyǐ
51
- - source: Жуков
52
- expected: Zhukov
53
- - source: Лужники
54
- expected: Luzhniki
55
- - source: Звенигород
56
- expected: Zvenigorod
57
- - source: Вязьма
58
- expected: Vi͡azʹma
59
- - source: Иркутск
60
- expected: Irkutsk
61
- - source: Апатиты
62
- expected: Apatity
63
- - source: Йошкар-Ола
64
- expected: Ǐoshkar-Ola
65
- - source: Бийск
66
- expected: Biǐsk
67
- - source: Киров
68
- expected: Kirov
69
- - source: Енисейск
70
- expected: Eniseǐsk
71
- - source: Ломоносов
72
- expected: Lomonosov
73
- - source: Нелидово
74
- expected: Nelidovo
75
- - source: Менделеев
76
- expected: Mendeleev
77
- - source: Каменка
78
- expected: Kamenka
79
- - source: Новосибирск
80
- expected: Novosibirsk
81
- - source: Кандалакша
82
- expected: Kandalaksha
83
- - source: Омск
84
- expected: Omsk
85
- - source: Красноярск
86
- expected: Krasnoi͡arsk
87
- - source: Петрозаводск
88
- expected: Petrozavodsk
89
- - source: Серпухов
90
- expected: Serpukhov
91
- - source: Ростов
92
- expected: Rostov
93
- - source: Северобайкальск
94
- expected: Severobaǐkalʹsk
95
- - source: Сковородино
96
- expected: Skovorodino
97
- - source: Чайковский
98
- expected: Chaǐkovskiǐ
99
- - source: Тамбов
100
- expected: Tambov
101
- - source: Мытищи
102
- expected: Mytishchi
103
- - source: Углич
104
- expected: Uglich
105
- - source: Дудинка
106
- expected: Dudinka
107
- - source: Фурманов
108
- expected: Furmanov
109
- - source: Уфа
110
- expected: Ufa
111
- - source: Хабаровск
112
- expected: Khabarovsk
113
- - source: Прохладный
114
- expected: Prokhladnyǐ
115
- - source: Цимлянск
116
- expected: T͡Simli͡ansk
117
- - source: Ельцин
118
- expected: Elʹt͡sin
119
- - source: Чебоксары
120
- expected: Cheboksary
121
- - source: Печора
122
- expected: Pechora
123
- - source: Шахтёрск
124
- expected: Shakhtërsk
125
- - source: Мышкин
126
- expected: Myshkin
127
- - source: Щёлково
128
- expected: Shchëlkovo
129
- - source: Ртищево
130
- expected: Rtishchevo
131
- - source: Подъездной
132
- expected: Podʺezdnoǐ
133
- - source: Ыттык-Кёль
134
- expected: Yttyk-Këlʹ
135
- - source: Тында
136
- expected: Tynda
137
- - source: Тюмень
138
- expected: Ti͡umenʹ
139
- - source: Электрогорск
140
- expected: Ėlektrogorsk
141
- - source: Радиоэлектроника
142
- expected: Radioėlektronika
143
- - source: Юбилейный
144
- expected: I͡Ubileǐnyǐ
145
- - source: Ключевская
146
- expected: Kli͡uchevskai͡a
147
- - source: Якутск
148
- expected: I͡Akutsk
149
- - source: Брянск
150
- expected: Bri͡ansk
151
-
152
- map:
153
- characters:
154
- "\u0027": "" # '
155
- "\u0410": "A" # А
156
- "\u0411": "B" # Б
157
- "\u0412": "V" # В
158
- "\u0413": "G" # Г
159
- "\u0414": "D" # Д
160
- "\u0415": "E" # Е
161
- "\u0401": "Ë" # Ё
162
- "\u0416": "Zh" # Ж
163
- "\u0417": "Z" # З
164
- "\u0418": "I" # И
165
- "\u0419": "\u01CF" # Й
166
- "\u041A": "K" # К
167
- "\u041B": "L" # Л
168
- "\u041C": "M" # М
169
- "\u041D": "N" # Н
170
- "\u041E": "O" # О
171
- "\u041F": "P" # П
172
- "\u0420": "R" # Р
173
- "\u0421": "S" # С
174
- "\u0422": "T" # Т
175
- "\u0423": "U" # У
176
- "\u0424": "F" # Ф
177
- "\u0425": "Kh" # Х
178
- "\u0426": "T\u0361S" # Ц
179
- "\u0427": "Ch" # Ч
180
- "\u0428": "Sh" # Ш
181
- "\u0429": "Shch" # Щ
182
- "\u042A": "ʺ" # Ъ
183
- "\u042B": "Y" # Ы
184
- "\u042C": "ʹ" # Ь
185
- "\u042D": "E\u0307" # Э
186
- "\u042E": "I\u0361U" # Ю
187
- "\u042F": "I\u0361A" # Я
188
-
189
- "\u0430": "a" # а
190
- "\u0431": "b" # б
191
- "\u0432": "v" # в
192
- "\u0433": "g" # г
193
- "\u0434": "d" # д
194
- "\u0435": "e" # e
195
- "\u0451": "ë" # ё
196
- "\u0436": "zh" # ж
197
- "\u0437": "z" # з
198
- "\u0438": "i" # и
199
- "\u0439": "\u01d0" # й
200
- "\u043A": "k" # к
201
- "\u043B": "l" # л
202
- "\u043C": "m" # м
203
- "\u043D": "n" # н
204
- "\u043E": "o" # о
205
- "\u043F": "p" # п
206
- "\u0440": "r" # р
207
- "\u0441": "s" # с
208
- "\u0442": "t" # т
209
- "\u0443": "u" # у
210
- "\u0444": "f" # ф
211
- "\u0445": "kh" # х
212
- "\u0446": "t\u0361s" # ц
213
- "\u0447": "ch" # ч
214
- "\u0448": "sh" # ш
215
- "\u0449": "shch" # щ
216
- "\u044A": "ʺ" # ъ
217
- "\u044B": "y" # ы
218
- "\u044C": "ʹ" # ь
219
- "\u044D": "e\u0307" # э
220
- "\u044E": "i\u0361u" # ю
221
- "\u044F": "i\u0361a" # я
@@ -1,162 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 2012
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ALA-LC Romanization System 2012
8
- url: https://www.loc.gov/catdir/cpso/romanization/russian.pdf
9
- creation_date: 2012
10
- description: |
11
- The ALA-LC Romanization tables for Slavic alphabets is a set of standards for romanization of texts
12
- in various writing systems used in North American libraries and publications.
13
- The latest version was published by the American Library Association and the Library of Congress in 2012.
14
-
15
- notes:
16
- - The letters І, Ѣ, Ѳ and Ѵ were eliminated in the orthographic reform of 1918. For other obsolete letters appearing in Russian texts, consult the Church Slavic table.
17
-
18
- tests:
19
- - source: Азов
20
- expected: Azov
21
- - source: Тамбов
22
- expected: Tambov
23
- - source: Барнаул
24
- expected: Barnaul
25
- - source: Кубань
26
- expected: Kubanʹ
27
- - source: Владимир
28
- expected: Vladimir
29
- - source: Ульяновск
30
- expected: Ulʹi͡anovsk
31
- - source: Грозный
32
- expected: Groznyǐ
33
- - source: Волгодонск
34
- expected: Volgodonsk
35
- - source: Дзержинский
36
- expected: Dzerzhinskiǐ
37
- - source: Нелидово
38
- expected: Nelidovo
39
- - source: Елизово
40
- expected: Elizovo
41
- - source: Чебоксары
42
- expected: Cheboksary
43
- - source: Ёлкин
44
- expected: Ëlkin
45
- - source: Озёрный
46
- expected: Ozërnyǐ
47
- - source: Жуков
48
- expected: Zhukov
49
- - source: Лужники
50
- expected: Luzhniki
51
- - source: Звенигород
52
- expected: Zvenigorod
53
- - source: Вязьма
54
- expected: Vi͡azʹma
55
- - source: Иркутск
56
- expected: Irkutsk
57
- - source: Апатиты
58
- expected: Apatity
59
- - source: Йошкар-Ола
60
- expected: Ǐoshkar-Ola
61
- - source: Бийск
62
- expected: Biǐsk
63
- - source: Киров
64
- expected: Kirov
65
- - source: Енисейск
66
- expected: Eniseǐsk
67
- - source: Ломоносов
68
- expected: Lomonosov
69
- - source: Нелидово
70
- expected: Nelidovo
71
- - source: Менделеев
72
- expected: Mendeleev
73
- - source: Каменка
74
- expected: Kamenka
75
- - source: Новосибирск
76
- expected: Novosibirsk
77
- - source: Кандалакша
78
- expected: Kandalaksha
79
- - source: Омск
80
- expected: Omsk
81
- - source: Красноярск
82
- expected: Krasnoi͡arsk
83
- - source: Петрозаводск
84
- expected: Petrozavodsk
85
- - source: Серпухов
86
- expected: Serpukhov
87
- - source: Ростов
88
- expected: Rostov
89
- - source: Северобайкальск
90
- expected: Severobaǐkalʹsk
91
- - source: Сковородино
92
- expected: Skovorodino
93
- - source: Чайковский
94
- expected: Chaǐkovskiǐ
95
- - source: Тамбов
96
- expected: Tambov
97
- - source: Мытищи
98
- expected: Mytishchi
99
- - source: Углич
100
- expected: Uglich
101
- - source: Дудинка
102
- expected: Dudinka
103
- - source: Фурманов
104
- expected: Furmanov
105
- - source: Уфа
106
- expected: Ufa
107
- - source: Хабаровск
108
- expected: Khabarovsk
109
- - source: Прохладный
110
- expected: Prokhladnyǐ
111
- - source: Цимлянск
112
- expected: T͡Simli͡ansk
113
- - source: Ельцин
114
- expected: Elʹt͡sin
115
- - source: Чебоксары
116
- expected: Cheboksary
117
- - source: Печора
118
- expected: Pechora
119
- - source: Шахтёрск
120
- expected: Shakhtërsk
121
- - source: Мышкин
122
- expected: Myshkin
123
- - source: Щёлково
124
- expected: Shchëlkovo
125
- - source: Ртищево
126
- expected: Rtishchevo
127
- - source: Подъездной
128
- expected: Podʺezdnoǐ
129
- - source: Ыттык-Кёль
130
- expected: Yttyk-Këlʹ
131
- - source: Тында
132
- expected: Tynda
133
- - source: Тюмень
134
- expected: Ti͡umenʹ
135
- - source: Электрогорск
136
- expected: Ėlektrogorsk
137
- - source: Радиоэлектроника
138
- expected: Radioėlektronika
139
- - source: Юбилейный
140
- expected: I͡Ubileǐnyǐ
141
- - source: Ключевская
142
- expected: Kli͡uchevskai͡a
143
- - source: Якутск
144
- expected: I͡Akutsk
145
- - source: Брянск
146
- expected: Bri͡ansk
147
-
148
- map:
149
- inherit: alalc-rus-Cyrl-Latn-1997
150
-
151
- characters:
152
- '\u0406': "\u012A"
153
- '\u0456': "\u012B"
154
-
155
- '\u0462': "I\u0361E" # Ѣ
156
- '\u0463': "i\u0361e" # ѣ
157
-
158
- '\u0472': "F\u0307" # Ѳ
159
- '\u0473': "f\u0307" # ѳ
160
-
161
- '\u0474': "Y\u0307" # Ѵ
162
- '\u0475': "y\u0307" # ѵ
@@ -1,114 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 1997
4
- language: srp
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Serbian and Makedonian Romanization, ALA-LC 1997 System
8
- url: http://catdir.loc.gov/catdir/cpso/romanization/serbian.pdf
9
- creation_date: 1997
10
- description: ALA-LC Romanization table for Serbian and Makedonian.
11
-
12
- notes:
13
- - Special characters in romanization
14
- Đ - D with crossbar (upper case). USMARC hexadecimal code A3.
15
- đ - d with crossbar (upper case). USMARC hexadecimal code B3.
16
-
17
- - Character modifiers in romanization
18
- ´ - acute. USMARC hexadecimal code E2.
19
- ˇ - hachek. USMARC hexadecimal code E9.
20
-
21
- tests:
22
- - source: Општина Ердут
23
- expected: Opština Erdut
24
- - source: Општина Двор
25
- expected: Opština Dvor
26
- - source: ЛУЃЕ луѓе
27
- expected: LUǴE luǵe
28
- - source: ЅВЕЗДА ѕвезда Ѕвезда
29
- expected: DZVEZDA dzvezda Dzvezda
30
- - source: ЌАРУВАЊЕ ќарување
31
- expected: ḰARUVANJE ḱaruvanje
32
-
33
- map:
34
- postrules:
35
- # DZ
36
- - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
37
- result: "DZ"
38
- #LJ
39
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
40
- result: "LJ"
41
- #NJ
42
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
43
- result: "NJ"
44
- #DŽ
45
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
46
- result: "DŽ"
47
-
48
- characters:
49
- "\u0410": "A"
50
- "\u0411": "B"
51
- "\u0412": "V"
52
- "\u0413": "G"
53
- "\u0403": "\u01F4" # Ǵ
54
- "\u0414": "D"
55
- "\u0402": "\u0110" # Đ
56
- "\u0415": "E"
57
- "\u0416": "\u005a\u030c" # Ž
58
- "\u0417": "Z"
59
- "\u0405": "Dz"
60
- "\u0418": "I"
61
- "\u0408": "J"
62
- "\u041A": "K"
63
- "\u040C": "\u1E30" # Ḱ
64
- "\u041B": "L"
65
- "\u0409": "Lj"
66
- "\u041C": "M"
67
- "\u041D": "N"
68
- "\u040A": "Nj"
69
- "\u041E": "O"
70
- "\u041F": "P"
71
- "\u0420": "R"
72
- "\u0421": "S"
73
- "\u0422": "T"
74
- "\u040B": "\u0043\u0301" # Ć
75
- "\u0423": "U"
76
- "\u0424": "F"
77
- "\u0425": "H"
78
- "\u0426": "C"
79
- "\u0427": "\u0043\u030c" # Č
80
- "\u040F": "D\u007a\u030c" # Dž
81
- "\u0428": "\u0053\u030c" # Š
82
- "\u0430": "a"
83
- "\u0431": "b"
84
- "\u0432": "v"
85
- "\u0433": "g"
86
- "\u0453": "\u01F5" # ǵ
87
- "\u0434": "d"
88
- "\u0452": "\u0111" # đ
89
- "\u0435": "e"
90
- "\u0436": "\u007a\u030c" # ž
91
- "\u0437": "z"
92
- "\u0455": "dz"
93
- "\u0438": "i"
94
- "\u0458": "j"
95
- "\u043A": "k"
96
- "\u045C": "\u1E31" # ḱ
97
- "\u043B": "l"
98
- "\u0459": "lj"
99
- "\u043C": "m"
100
- "\u043D": "n"
101
- "\u045A": "nj"
102
- "\u043E": "o"
103
- "\u043F": "p"
104
- "\u0440": "r"
105
- "\u0441": "s"
106
- "\u0442": "t"
107
- "\u045B": "\u0063\u0301" # ć
108
- "\u0443": "u"
109
- "\u0444": "f"
110
- "\u0445": "h"
111
- "\u0446": "c"
112
- "\u0447": "\u0063\u030c" # č
113
- "\u045F": "d\u007a\u030c" # dž
114
- "\u0448": "\u0073\u030c" # š