interscript 0.1.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +76 -128
  21. data/lib/interscript/command.rb +6 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -223
  63. data/README.adoc +0 -297
  64. data/bin/rspec +0 -29
  65. data/lib/g2pwrapper.py +0 -34
  66. data/lib/interscript/mapping.rb +0 -125
  67. data/lib/model-7 +0 -0
  68. data/lib/tha-pt-b-7 +0 -0
  69. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  70. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  71. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  72. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  73. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  74. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  75. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  76. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  77. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  78. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  79. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  80. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  81. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  82. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  83. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  84. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  85. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  86. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  87. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  88. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  89. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  90. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  91. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  92. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  93. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  94. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  95. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  96. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  97. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  98. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  99. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  100. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  101. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +0 -7456
  102. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  103. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  104. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  105. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  106. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  107. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  108. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  109. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  110. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  111. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  112. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  113. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  114. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  115. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  116. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  117. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  118. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  119. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  120. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  121. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  122. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  123. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  124. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  125. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  126. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  127. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  128. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  129. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  130. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  131. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  132. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  133. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  134. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  135. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  136. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  137. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  138. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  139. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  140. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  141. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  142. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  143. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  144. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  145. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  146. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  147. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  148. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  149. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  150. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  151. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  152. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  153. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  154. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  155. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  156. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  157. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  158. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  159. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  160. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  161. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  162. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  163. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  164. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  165. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  166. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  167. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  168. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  169. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  170. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  171. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  172. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  173. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  174. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  175. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  176. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  177. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  178. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  179. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  180. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  181. data/spec/interscript/mapping_spec.rb +0 -42
  182. data/spec/interscript_spec.rb +0 -26
  183. data/spec/spec_helper.rb +0 -3
@@ -1,114 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 1997
4
- language: srp
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Serbian and Makedonian Romanization, ALA-LC 1997 System
8
- url: http://catdir.loc.gov/catdir/cpso/romanization/serbian.pdf
9
- creation_date: 1997
10
- description: ALA-LC Romanization table for Serbian and Makedonian.
11
-
12
- notes:
13
- - Special characters in romanization
14
- Đ - D with crossbar (upper case). USMARC hexadecimal code A3.
15
- đ - d with crossbar (upper case). USMARC hexadecimal code B3.
16
-
17
- - Character modifiers in romanization
18
- ´ - acute. USMARC hexadecimal code E2.
19
- ˇ - hachek. USMARC hexadecimal code E9.
20
-
21
- tests:
22
- - source: Општина Ердут
23
- expected: Opština Erdut
24
- - source: Општина Двор
25
- expected: Opština Dvor
26
- - source: ЛУЃЕ луѓе
27
- expected: LUǴE luǵe
28
- - source: ЅВЕЗДА ѕвезда Ѕвезда
29
- expected: DZVEZDA dzvezda Dzvezda
30
- - source: ЌАРУВАЊЕ ќарување
31
- expected: ḰARUVANJE ḱaruvanje
32
-
33
- map:
34
- postrules:
35
- # DZ
36
- - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
37
- result: "DZ"
38
- #LJ
39
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
40
- result: "LJ"
41
- #NJ
42
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
43
- result: "NJ"
44
- #DŽ
45
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
46
- result: "DŽ"
47
-
48
- characters:
49
- "\u0410": "A"
50
- "\u0411": "B"
51
- "\u0412": "V"
52
- "\u0413": "G"
53
- "\u0403": "\u01F4" # Ǵ
54
- "\u0414": "D"
55
- "\u0402": "\u0110" # Đ
56
- "\u0415": "E"
57
- "\u0416": "\u005a\u030c" # Ž
58
- "\u0417": "Z"
59
- "\u0405": "Dz"
60
- "\u0418": "I"
61
- "\u0408": "J"
62
- "\u041A": "K"
63
- "\u040C": "\u1E30" # Ḱ
64
- "\u041B": "L"
65
- "\u0409": "Lj"
66
- "\u041C": "M"
67
- "\u041D": "N"
68
- "\u040A": "Nj"
69
- "\u041E": "O"
70
- "\u041F": "P"
71
- "\u0420": "R"
72
- "\u0421": "S"
73
- "\u0422": "T"
74
- "\u040B": "\u0043\u0301" # Ć
75
- "\u0423": "U"
76
- "\u0424": "F"
77
- "\u0425": "H"
78
- "\u0426": "C"
79
- "\u0427": "\u0043\u030c" # Č
80
- "\u040F": "D\u007a\u030c" # Dž
81
- "\u0428": "\u0053\u030c" # Š
82
- "\u0430": "a"
83
- "\u0431": "b"
84
- "\u0432": "v"
85
- "\u0433": "g"
86
- "\u0453": "\u01F5" # ǵ
87
- "\u0434": "d"
88
- "\u0452": "\u0111" # đ
89
- "\u0435": "e"
90
- "\u0436": "\u007a\u030c" # ž
91
- "\u0437": "z"
92
- "\u0455": "dz"
93
- "\u0438": "i"
94
- "\u0458": "j"
95
- "\u043A": "k"
96
- "\u045C": "\u1E31" # ḱ
97
- "\u043B": "l"
98
- "\u0459": "lj"
99
- "\u043C": "m"
100
- "\u043D": "n"
101
- "\u045A": "nj"
102
- "\u043E": "o"
103
- "\u043F": "p"
104
- "\u0440": "r"
105
- "\u0441": "s"
106
- "\u0442": "t"
107
- "\u045B": "\u0063\u0301" # ć
108
- "\u0443": "u"
109
- "\u0444": "f"
110
- "\u0445": "h"
111
- "\u0446": "c"
112
- "\u0447": "\u0063\u030c" # č
113
- "\u045F": "d\u007a\u030c" # dž
114
- "\u0448": "\u0073\u030c" # š
@@ -1,135 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 2013
4
- language: srp
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ROMANIZATION OF SERBIAN, ALA-LC 2013 System
8
- url: https://www.loc.gov/catdir/cpso/romanization/serbian.pdf
9
- creation_date: 2013
10
- description: |
11
- Serbian uses the Cyrillic and Roman alphabets. When the Roman alphabet is used it is the same as Croatian.
12
-
13
- tests:
14
- - source: Шупља Стена
15
- expected: Šuplja Stena
16
- - source: Чукарица
17
- expected: Čukarica
18
- - source: Црна Трава
19
- expected: Crna Trava
20
- - source: Херцег Нови
21
- expected: Herceg Novi
22
- - source: Улцињ
23
- expected: Ulcinj
24
- - source: Ужице
25
- expected: Užice
26
- - source: Тресаначка Река
27
- expected: Tresanačka Reka
28
- - source: Сјеница
29
- expected: Sjenica
30
- - source: Рожаје
31
- expected: Rožaje
32
- - source: Пљевља
33
- expected: Pljevlja
34
- - source: Оџаци
35
- expected: Odžaci
36
- - source: Никшић
37
- expected: Nikšić
38
- - source: Медвеђа
39
- expected: Medveđa
40
- - source: Лозница
41
- expected: Loznica
42
- - source: Књажевац
43
- expected: Knjaževac
44
- - source: Зрењанин
45
- expected: Zrenjanin
46
- - source: Житорађа
47
- expected: Žitorađa
48
- - source: Ервеник
49
- expected: Ervenik
50
- - source: Доње Љупче
51
- expected: Donje Ljupče
52
- - source: Гусиње
53
- expected: Gusinje
54
- - source: ГУСИЊЕ
55
- expected: GUSINJE
56
- - source: Врњачка Бања
57
- expected: Vrnjačka Banja
58
- - source: Бијело Поље
59
- expected: Bijelo Polje
60
- - source: Алибунар
61
- expected: Alibunar
62
-
63
- map:
64
- postrules:
65
- #LJ
66
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
67
- result: "LJ"
68
- #NJ
69
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
70
- result: "NJ"
71
- #DŽ
72
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
73
- result: "DŽ"
74
-
75
- characters:
76
- "\u0410": "A"
77
- "\u0411": "B"
78
- "\u0412": "V"
79
- "\u0413": "G"
80
- "\u0414": "D"
81
- "\u0402": "\u0110" # Đ
82
- "\u0415": "E"
83
- "\u0416": "\u005a\u030c" # Ž
84
- "\u0417": "Z"
85
- "\u0418": "I"
86
- "\u0408": "J"
87
- "\u041A": "K"
88
- "\u041B": "L"
89
- "\u0409": "Lj"
90
- "\u041C": "M"
91
- "\u041D": "N"
92
- "\u040A": "Nj"
93
- "\u041E": "O"
94
- "\u041F": "P"
95
- "\u0420": "R"
96
- "\u0421": "S"
97
- "\u0422": "T"
98
- "\u040B": "\u0043\u0301" # Ć
99
- "\u0423": "U"
100
- "\u0424": "F"
101
- "\u0425": "H"
102
- "\u0426": "C"
103
- "\u0427": "\u0043\u030c" # Č
104
- "\u040F": "D\u007a\u030c" # Dž
105
- "\u0428": "\u0053\u030c" # Š
106
- "\u0430": "a"
107
- "\u0431": "b"
108
- "\u0432": "v"
109
- "\u0433": "g"
110
- "\u0434": "d"
111
- "\u0452": "\u0111" # đ
112
- "\u0435": "e"
113
- "\u0436": "\u007a\u030c" # ž
114
- "\u0437": "z"
115
- "\u0438": "i"
116
- "\u0458": "j"
117
- "\u043A": "k"
118
- "\u043B": "l"
119
- "\u0459": "lj"
120
- "\u043C": "m"
121
- "\u043D": "n"
122
- "\u045A": "nj"
123
- "\u043E": "o"
124
- "\u043F": "p"
125
- "\u0440": "r"
126
- "\u0441": "s"
127
- "\u0442": "t"
128
- "\u045B": "\u0063\u0301" # ć́
129
- "\u0443": "u"
130
- "\u0444": "f"
131
- "\u0445": "h"
132
- "\u0446": "c"
133
- "\u0447": "\u0063\u030c" # č
134
- "\u045F": "d\u007a\u030c" # dž
135
- "\u0448": "\u0073\u030c" # š
@@ -1,141 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 1997
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ALA-LC Romanization System 1997
8
- url: http://catdir.loc.gov/catdir/cpso/romanization/ukrainia.pdf
9
- creation_date: 1997
10
- description: |
11
- The ALA-LC Romanization Tables, published by the American Library Association (1885)
12
- and Library of Congress (1905). Used to represent bibliographic information by US and
13
- Canadian libraries, by the British Library since 1975,[3] and in North American publications.
14
- The latest 1997 revision is very similar to the 1905 version.
15
-
16
- notes:
17
- - The z͡h ligature is necessary to distinguish ж from the combination зг.
18
- - The t͡s ligature is necessary to distinguish ц from the combination тс.
19
-
20
- tests:
21
- - source: Автономна Республіка Крим
22
- expected: Avtonomna Respublika Krym
23
- - source: Висунь
24
- expected: Vysunʹ
25
- - source: Долинське
26
- expected: Dolynsʹke
27
- - source: Дубище
28
- expected: Dubyshche
29
- - source: Єнакієве
30
- expected: I͡enakii͡eve
31
- - source: Зупиночний Пункт Мокіївці
32
- expected: Zupynochnyĭ Punkt Mokiïvt͡si
33
- - source: Іванівщина
34
- expected: Ivanivshchyna
35
- - source: Киликиїв
36
- expected: Kylykyïv
37
- - source: Кожанка
38
- expected: Koz͡hanka
39
- - source: Краснянка
40
- expected: Krasni͡anka
41
- - source: Краснівка
42
- expected: Krasnivka
43
- - source: Мале Микільське
44
- expected: Male Mykilʹsʹke
45
- - source: Маломиколаївка
46
- expected: Malomykolaïvka
47
- - source: Нове Село
48
- expected: Nove Selo
49
- - source: Новопавлівка
50
- expected: Novopavlivka
51
- - source: Новошичі
52
- expected: Novoshychi
53
- - source: Новоєфремівка
54
- expected: Novoi͡efremivka
55
- - source: Одеська Область
56
- expected: Odesʹka Oblastʹ
57
- - source: Орлівське
58
- expected: Orlivsʹke
59
- - source: Раневичі
60
- expected: Ranevychi
61
- - source: Рокувата
62
- expected: Rokuvata
63
- - source: Рудаєве
64
- expected: Rudai͡eve
65
- - source: Сахнівці
66
- expected: Sakhnivt͡si
67
- - source: Тернівка
68
- expected: Ternivka
69
- - source: Турбівка
70
- expected: Turbivka
71
- - source: Херсонська Область
72
- expected: Khersonsʹka Oblastʹ
73
-
74
- map:
75
- characters:
76
- "\u0410": "A" # А
77
- "\u0411": "B" # Б
78
- "\u0412": "V" # В
79
- "\u0413": "H" # Г
80
- "\u0490": "G" # Ґ
81
- "\u0414": "D" # Д
82
- "\u0415": "E" # Е
83
- "\u0404": "I\u0361e" # Є -> I͡e
84
- "\u0416": "Z\u0361h" # Ж -> Z͡h
85
- "\u0417": "Z" # З
86
- "\u0418": "Y" # И
87
- "\u0406": "I" # І
88
- "\u0407": "I\u0308" # Ї -> Ï
89
- "\u0419": "I\u0306" # Й -> Ĭ
90
- "\u041a": "K" # К
91
- "\u041b": "L" # Л
92
- "\u041c": "M" # М
93
- "\u041d": "N" # Н
94
- "\u041e": "O" # О
95
- "\u041f": "P" # П
96
- "\u0420": "R" # Р
97
- "\u0421": "S" # С
98
- "\u0422": "T" # Т
99
- "\u0423": "U" # У
100
- "\u0424": "F" # Ф
101
- "\u0425": "Kh" # Х
102
- "\u0426": "T\u0361s" # Ц -> T͡s
103
- "\u0427": "Ch" # Ч
104
- "\u0428": "Sh" # Ш
105
- "\u0429": "Shch" # Щ
106
- "\u042e": "I\u0361u" # Ю -> I͡u
107
- "\u042f": "I\u0361a" # Я -> I͡a
108
- "\u042c": "\u02B9" # Ь -> ʹ
109
- "\u0430": "a" # а
110
- "\u0431": "b" # б
111
- "\u0432": "v" # в
112
- "\u0433": "h" # г
113
- "\u0491": "g" # ґ
114
- "\u0434": "d" # д
115
- "\u0435": "e" # е
116
- "\u0454": "i\u0361e" # є -> i͡e
117
- "\u0436": "z\u0361h" # ж -> z͡h
118
- "\u0437": "z" # з
119
- "\u0438": "y" # и
120
- "\u0456": "i" # і
121
- "\u0457": "i\u0308" # ї -> ï
122
- "\u0439": "i\u0306" # й -> ĭ
123
- "\u043a": "k" # к
124
- "\u043b": "l" # л
125
- "\u043c": "m" # м
126
- "\u043d": "n" # н
127
- "\u043e": "o" # о
128
- "\u043f": "p" # п
129
- "\u0440": "r" # р
130
- "\u0441": "s" # с
131
- "\u0442": "t" # т
132
- "\u0443": "u" # у
133
- "\u0444": "f" # ф
134
- "\u0445": "kh" # х
135
- "\u0446": "t\u0361s" # ц -> t͡s
136
- "\u0447": "ch" # ч
137
- "\u0448": "sh" # ш
138
- "\u0449": "shch" # щ
139
- "\u044e": "i\u0361u" # ю -> i͡u
140
- "\u044f": "i\u0361a" # я -> i͡a
141
- "\u044c": "\u02B9" # Ь -> ʹ
@@ -1,16 +0,0 @@
1
- ---
2
- authority_id: alalc
3
- id: 2011
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ALA-LC Romanization System 1997
8
- url: https://www.loc.gov/catdir/cpso/romanization/ukrainia.pdf
9
- creation_date: 2011
10
-
11
- notes:
12
- - The z͡h ligature is necessary to distinguish ж from the combination зг.
13
- - The t͡s ligature is necessary to distinguish ц from the combination тс.
14
-
15
- map:
16
- inherit: alalc-ukr-Cyrl-Latn-1997
@@ -1,283 +0,0 @@
1
- ---
2
- authority_id: apcbg
3
- id: 1995
4
- language: bul
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Toponymic Guidelines for Antarctica, Lyubomir Ivanov. Antarctic Place-names Commission of Bulgaria, Sofia, 1995.
8
- url: http://www.apcbg.org/Toponymic-Guidelines.htm
9
- creation_date: 1995
10
- description: |
11
- In Antarctica geographical names are important elements of identification, orientation, localization and navigation,
12
- providing an essential reference system for logistic operations, including search and rescue measures, and for
13
- international scientific research. They facilitate information exchange in the field, in scientific publications and in
14
- administrative measures under the Antarctic Treaty System. Geographical names also reflect the history of Antarctic
15
- exploration.
16
-
17
- tests:
18
- - source: нунатак Абрит
19
- expected: nunatak Abrit
20
- - source: връх Академия
21
- expected: vrah Akademiya
22
- - source: връх Ами Буе
23
- expected: vrah Ami Bue
24
- - source: нос Айтос
25
- expected: nos Aytos
26
- - source: залив Баба Тонка
27
- expected: zaliv Baba Tonka
28
- - source: Балабански камък
29
- expected: Balabanski kamak
30
- - source: Бедечки поток
31
- expected: Bedechki potok
32
- - source: нос Бяга
33
- expected: nos Byaga
34
- - source: остров Качо
35
- expected: ostrov Kacho # wiki ostrov Cacho
36
- - source: Чакъров остров
37
- expected: Chakarov ostrov
38
- - source: връх Дъбник
39
- expected: vrah Dabnik
40
- - source: залив Десислава
41
- expected: zaliv Desislava
42
- - source: ледник Джераси
43
- expected: lednik Dzherasi
44
- - source: Джегова скала
45
- expected: Dzhegova skala
46
- - source: Нунатак Едуард
47
- expected: Nunatak Eduard
48
- - source: Елховска седловина
49
- expected: Elhovska sedlovina
50
- - source: ледник Етър
51
- expected: lednik Etar
52
- - source: нунатак Филип Тотю
53
- expected: nunatak Filip Totyu
54
- - source: ледник Габаре
55
- expected: lednik Gabare
56
- - source: риф Гергини
57
- expected: rif Gergini
58
- - source: Гяуров връх
59
- expected: Gyaurov vrah
60
- - source: Гуцалски рид
61
- expected: Gutsalski rid # not found
62
- - source: Хараламбиев остров
63
- expected: Haralambiev ostrov
64
- - source: връх Ичера
65
- expected: vrah Ichera
66
- - source: полуостров Йоан Павел II
67
- expected: poluostrov Yoan Pavel II
68
- - source: нос Иван Александър
69
- expected: nos Ivan Aleksandar
70
- - source: нос Иречек
71
- expected: nos Irechek
72
- - source: нос Кърджали
73
- expected: nos Kardzhali
74
- - source: седловина Кърнаре
75
- expected: sedlovina Karnare
76
- - source: нунатак Керсеблепт
77
- expected: nunatak Kerseblept
78
- - source: Кондофрейски възвишения
79
- expected: Kondofreyski vazvisheniya
80
- - source: Костинбродски проход
81
- expected: Kostinbrodski prohod
82
- - source: връх Кожух
83
- expected: vrah Kozhuh
84
- - source: Кукерски нунатаци
85
- expected: Kukerski nunatatsi
86
- - source: залив Лазурен бряг
87
- expected: zaliv Lazuren bryag
88
- - source: връх Лудогорие
89
- expected: vrah Ludogorie
90
- - source: Лютибродски скали
91
- expected: Lyutibrodski skali
92
- - source: Масларов нунатак
93
- expected: Maslarov nunatak
94
- - source: Михневски връх
95
- expected: Mihnevski vrah
96
- - source: рид Митино
97
- expected: rid Mitino
98
- - source: езеро Наяда
99
- expected: ezero Nayada
100
- - source: нос Никюп
101
- expected: nos Nikyup
102
- - source: рид Оборище
103
- expected: rid Oborishte
104
- - source: залив Олуша
105
- expected: zaliv Olusha
106
- - source: Оряховски възвишения
107
- expected: Oryahovski vazvisheniya
108
- - source: нунатак Памидово
109
- expected: nunatak Pamidovo
110
- - source: връх Парангалица
111
- expected: vrah Parangalitsa
112
- - source: Първомайски провлак
113
- expected: Parvomayski provlak
114
- - source: ледник Патлейна
115
- expected: lednik Patleyna
116
- - source: полуостров Перник
117
- expected: poluostrov Pernik
118
- - source: връх Петко Войвода
119
- expected: vrah Petko Voyvoda
120
- - source: остров Фанагория
121
- expected: ostrov Fanagoriya
122
- - source: нос Плас
123
- expected: nos Plas
124
- - source: Пресиянов рид
125
- expected: Presiyanov rid
126
- - source: връх Принсипе де Астуриас
127
- expected: vrah Prinsipe de Asturias # wiki: vrah Príncipe de Asturias
128
- - source: нунатак Ръченица
129
- expected: nunatak Rachenitsa
130
- - source: връх Райна Княгиня
131
- expected: vrah Rayna Knyaginya
132
- - source: Рид Ръжана
133
- expected: Rid Razhana
134
- - source: връх Ригс
135
- expected: vrah Rigs
136
- - source: остров Рогулят
137
- expected: ostrov Rogulyat
138
- - source: ледник Сабазий
139
- expected: lednik Sabaziy
140
- - source: ледник Съединение
141
- expected: lednik Saedinenie
142
- - source: нунатак Сенокос
143
- expected: nunatak Senokos
144
- - source: Сейдолски камък
145
- expected: Seydolski kamak
146
- - source: ледник Щерна
147
- expected: lednik Shterna
148
- - source: връх Шишман
149
- expected: vrah Shishman
150
- - source: ледник Сигмен
151
- expected: lednik Sigmen
152
- - source: Седловина Синитово
153
- expected: Sedlovina Sinitovo
154
- - source: Ледник Скаплизо
155
- expected: Lednik Skaplizo
156
- - source: залив Слънчев бряг
157
- expected: zaliv Slanchev bryag
158
- - source: остров Соатрис
159
- expected: ostrov Soatris
160
- - source: планина Софийски Университет
161
- expected: planina Sofiyski Universitet
162
- - source: ледник Сребърна
163
- expected: lednik Srebarna
164
- - source: Средногорски възвишения
165
- expected: Srednogorski vazvisheniya
166
- - source: Св. Евтимиев камък
167
- expected: Sv. Evtimiev kamak
168
- - source: база Св. Климент Охридски
169
- expected: baza Sv. Kliment Ohridski
170
- - source: връх Стъргел
171
- expected: vrah Stargel
172
- - source: нунатак Сторгозия
173
- expected: nunatak Storgoziya # nunatak Storgozia according to wiki
174
- - source: нунатак Сурвакари
175
- expected: nunatak Survakari
176
- - source: ледник Световрачене
177
- expected: lednik Svetovrachene
178
- - source: остров Теменуга
179
- expected: ostrov Temenuga
180
- - source: Тракийски възвишения
181
- expected: Trakiyski vazvisheniya
182
- - source: хълм Цамблак
183
- expected: halm Tsamblak
184
- - source: ледник Урдовиза
185
- expected: lednik Urdoviza
186
- - source: остров Вълчедръм
187
- expected: ostrov Valchedram
188
- - source: острови Вардим
189
- expected: ostrovi Vardim
190
- - source: Владигеров проток
191
- expected: Vladigerov protok
192
- - source: ледник Ябланица
193
- expected: lednik Yablanitsa
194
- - source: залив Ямфорина
195
- expected: zaliv Yamforina
196
- - source: Йовков нос
197
- expected: Yovkov nos
198
- - source: рид Заберново
199
- expected: rid Zabernovo
200
- - source: ледник Збелсурд
201
- expected: lednik Zbelsurd
202
- - source: Жефарович камък
203
- expected: Zhefarovich kamak
204
- - source: връх Зиези
205
- expected: vrah Ziezi
206
- - source: залив Златни пясъци
207
- expected: zaliv Zlatni pyasatsi
208
- - source: ледник Злокучене
209
- expected: lednik Zlokuchene
210
- - source: проток Злогош
211
- expected: protok Zlogosh
212
-
213
- map:
214
- characters:
215
- '\u0410': 'A'
216
- '\u0411': 'B'
217
- '\u0412': 'V'
218
- '\u0413': 'G'
219
- '\u0414': 'D'
220
- '\u0415': 'E'
221
- '\u0416': 'Zh'
222
- '\u0417': 'Z'
223
- '\u0418': 'I'
224
- '\u0419': 'Y'
225
- '\u041a': 'K'
226
- '\u041b': 'L'
227
- '\u041c': 'M'
228
- '\u041d': 'N'
229
- '\u041e': 'O'
230
- '\u041f': 'P'
231
- '\u0420': 'R'
232
- '\u0421': 'S'
233
- '\u0422': 'T'
234
- '\u0423': 'U'
235
- '\u0424': 'F'
236
- '\u0425': 'H'
237
- '\u0426': 'Ts'
238
- '\u0427': 'Ch'
239
- '\u0428': 'Sh'
240
- '\u0429': 'Sht'
241
- '\u042a': 'A'
242
- '\u042c': 'Y'
243
- '\u042e': 'Yu'
244
- '\u042f': 'Ya'
245
-
246
- '\u0430': 'a'
247
- '\u0431': 'b'
248
- '\u0432': 'v'
249
- '\u0433': 'g'
250
- '\u0434': 'd'
251
- '\u0435': 'e'
252
- '\u0436': 'zh'
253
- '\u0437': 'z'
254
- '\u0438': 'i'
255
- '\u0439': 'y'
256
- '\u043a': 'k'
257
- '\u043b': 'l'
258
- '\u043c': 'm'
259
- '\u043d': 'n'
260
- '\u043e': 'o'
261
- '\u043f': 'p'
262
- '\u0440': 'r'
263
- '\u0441': 's'
264
- '\u0442': 't'
265
- '\u0443': 'u'
266
- '\u0444': 'f'
267
- '\u0445': 'h'
268
- '\u0446': 'ts'
269
- '\u0447': 'ch'
270
- '\u0448': 'sh'
271
- '\u0449': 'sht'
272
- '\u044a': 'a'
273
- '\u044c': 'y'
274
- '\u044e': 'yu'
275
- '\u044f': 'ya'
276
-
277
- # note 2
278
- '\u046A': "U\u0306" # Ѫ
279
- '\u046B': "u\u0306" # ѫ
280
-
281
- # note[3]
282
- '\u0462': "YE" # Ѣ
283
- '\u0463': "ye" # ѣ