interscript 0.1.6 → 2.1.0a9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -1,19 +0,0 @@
1
- ---
2
- authority_id: elot
3
- id: 2001
4
- language: ell
5
- source_script: Grek
6
- destination_script: Latn
7
- name: ELOT 743:2001
8
- url: https://docplayer.gr/docview/18/849832/#file=/storage/18/849832/849832.pdf
9
- creation_date: 2001
10
- description: |
11
- Reversible transliteration standard, ELOT
12
-
13
- note:
14
- - Transliteration standard (reversible): Clause 3.1, Table 1
15
-
16
- map:
17
- character_separator: ""
18
- word_separator: " "
19
- inherit: "iso-ell-Grek-Latn-843-1997-t1"
@@ -1,31 +0,0 @@
1
- ---
2
- authority_id: elot
3
- id: 2001
4
- language: ell
5
- source_script: Grek
6
- destination_script: Latn
7
- name: ELOT 743:2001
8
- url: https://docplayer.gr/docview/18/849832/#file=/storage/18/849832/849832.pdf
9
- creation_date: 2001
10
- description: |
11
- Reversible transliteration standard, ELOT
12
-
13
- note:
14
- - Transcription standard (reversible): Clause 3.1, Table 2
15
-
16
- tests:
17
-
18
- - source: |
19
- Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
20
-
21
- Γιάννης Μακρυγιάννης.
22
-
23
- expected: |
24
- Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
25
-
26
- Giánnis Makrygiánnis.
27
-
28
- map:
29
- character_separator: ""
30
- word_separator: " "
31
- inherit: "iso-ell-Grek-Latn-843-1997-t2"
@@ -1,88 +0,0 @@
1
- ---
2
- authority_id: ggg
3
- id: 2002
4
- language: kat
5
- source_script: Geor
6
- destination_script: Latn
7
- name: Georgian State Department of Geodesy and Cartography 2002 System
8
- url: https://transliteration.eki.ee/pdf/Georgian.pdf
9
- creation_date: 1998
10
- confirmation_date: 2002
11
- description: |
12
- The national system of romanization adopted in February 2002 by the State
13
- Department of Geodesy and Cartography of Georgia and the Institute
14
- of Linguistics, Georgian Academy of Sciences.
15
-
16
- notes:
17
- - Georgian script refers in this document to the Mkhedruli alphabet.
18
-
19
- - There is no case in Georgian.
20
-
21
- tests:
22
- - source: თბილისი
23
- expected: tbilisi
24
-
25
- - source: მეღვინეთუხუცესი
26
- expected: meghvinetukhutsesi
27
-
28
- - source: ჭიანჭველა
29
- expected: ch’ianch’vela
30
-
31
- - source: ბაყაყი
32
- expected: baq’aq’i
33
-
34
- - source: ჩხალთის ქედი
35
- expected: chkhaltis kedi
36
-
37
- - source: აბჟააფთრა
38
- expected: abzhaaptra
39
-
40
- - source: ამბროლაურის მუნიციპალიტეტი
41
- expected: ambrolauris munitsip’alit’et’i
42
-
43
- - source: მარტვილის მუნიციპალიტეტი
44
- expected: mart’vilis munitsip’alit’et’i
45
-
46
- - source: ლეკუხონა
47
- expected: lek’ukhona
48
-
49
- - source: მყინვარი აღმოსავლეთი მაგუაშირხა
50
- expected: mq’invari aghmosavleti maguashirkha
51
-
52
-
53
-
54
- map:
55
- characters:
56
- '\u10d0' : 'a' # ა
57
- '\u10d1' : 'b' # ბ
58
- '\u10d2' : 'g' # გ
59
- '\u10d3' : 'd' # დ
60
- '\u10d4' : 'e' # ე
61
- '\u10d5' : 'v' # ვ
62
- '\u10d6' : 'z' # ზ
63
- '\u10d7' : 't' # თ
64
- '\u10d8' : 'i' # ი
65
- '\u10d9' : 'k’' # კ
66
- '\u10da' : 'l' # ლ
67
- '\u10db' : 'm' # მ
68
- '\u10dc' : 'n' # ნ
69
- '\u10dd' : 'o' # ო
70
- '\u10de' : 'p’' # პ
71
- '\u10df' : 'zh' # ჟ
72
- '\u10e0' : 'r' # რ
73
- '\u10e1' : 's' # ს
74
- '\u10e2' : 't’' # ტ
75
- '\u10e3' : 'u' # უ
76
- '\u10e4' : 'p' # ფ
77
- '\u10e5' : 'k' # ქ
78
- '\u10e6' : 'gh' # ღ
79
- '\u10e7' : 'q’' # ყ
80
- '\u10e8' : 'sh' # შ
81
- '\u10e9' : 'ch' # ჩ
82
- '\u10ea' : 'ts' # ც
83
- '\u10eb' : 'dz' # ძ
84
- '\u10ec' : 'ts’' # წ
85
- '\u10ed' : 'ch’' # ჭ
86
- '\u10ee' : 'kh' # ხ
87
- '\u10ef' : 'j' # ჯ
88
- '\u10f0' : 'h' # ჰ
@@ -1,33 +0,0 @@
1
- ---
2
- authority_id: gki
3
- id: 1992
4
- language: bel
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Byelorussian National Cartographic Authority 1992-3 System (based on GOST 1983)
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/7th-uncsgn-docs/econf/7th_UNCSGN_econf.91_3_Add1.pdf
9
- creation_date: 1992
10
-
11
- tests:
12
- - source: 'Сямашкі'
13
- expected: 'Sjamaški'
14
- # "Syamashki" in GNDB `bel_Cyrl2Latn_GBO_1992`, but that is clearly bgnpcgn-ukr-Cyrl-Latn-1965
15
- - source: 'Старадворцы'
16
- expected: 'Staradvorcy'
17
- # "Staradvortsy" in GNDB `bel_Cyrl2Latn_GBO_1992`, but that is clearly bgnpcgn-ukr-Cyrl-Latn-1965
18
- - source: 'Канюхі'
19
- expected: 'Kanjuhi'
20
- # "Kanyukhi" in GNDB `bel_Cyrl2Latn_GBO_1992`, but that is clearly bgnpcgn-ukr-Cyrl-Latn-1965
21
-
22
- map:
23
- inherit: gost-rus-Cyrl-Latn-16876-71-1983
24
-
25
- characters:
26
- '\u0406' : 'I' # І
27
- '\u0456' : 'i' # і
28
-
29
- '\u0413' : 'G' # Г
30
- '\u0433' : 'g' # г
31
-
32
- '\U040E' : 'Ŭ' # Ў
33
- '\u045E' : 'ŭ' # ў
@@ -1,201 +0,0 @@
1
- ---
2
- authority_id: gki
3
- id: 2000
4
- language: bel
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: On approval of the Instructions for the transliteration of geographical names of the Republic of Belarus with letters of the Latin alphabet
8
- url: https://registr.by/doc/103003
9
- creation_date: 2000
10
- description: |
11
- Act name:
12
- On approval of the Instructions for the transliteration of geographical names of
13
- the Republic of Belarus in letters of the Latin alphabet
14
- Type of act, adoption authority, date and number of adoption (publication):
15
- Resolution of the State Committee for Land Resources, Geodesy and Cartography of
16
- the Republic of Belarus of November 23, 2000 No. 15
17
- National Registry Registration Number: 8/4488
18
- Date of inclusion in the National Register: 11/30/2000
19
- Source (s) of official publication:
20
- National Register of Legal Acts of the Republic of Belarus, 2001,
21
- No. 3, 8/4488 (published - January 11, 2001)
22
- tests:
23
- - source: Аршанскi
24
- expected: Aršanski
25
- - source: Бешанковічы
26
- expected: Biešankovičy
27
- - source: Віцебск
28
- expected: Viciebsk
29
- - source: Гомель
30
- expected: Homiel'
31
- - source: Гаўя
32
- expected: Haŭja
33
- - source: Добруш
34
- expected: Dobruš
35
- - source: Ельск
36
- expected: Jel'sk
37
- - source: Бабаедава
38
- expected: Babajedava
39
- - source: Лепель
40
- expected: Liepiel'
41
- - source: Ёды
42
- expected: Jody
43
- - source: Вераб'ёвічы
44
- expected: Vierabjovičy
45
- - source: Мёры
46
- expected: Miory
47
- - source: Жодзiшкi
48
- expected: Žodziški
49
- - source: Зэльва
50
- expected: Zel'va
51
- - source: Iванава
52
- expected: Ivanava
53
- - source: Iўе
54
- expected: Iŭje
55
- - source: Лагойск
56
- expected: Lahojsk
57
- - source: Круглае
58
- expected: Kruhlaje
59
- - source: Любань
60
- expected: Liuban'
61
- - source: Магілёў
62
- expected: Mahilioŭ
63
- - source: Нясвіж
64
- expected: Niasviž
65
- - source: Орша
66
- expected: Orša
67
- - source: Паставы
68
- expected: Pastavy
69
- - source: Рагачоў
70
- expected: Rahačoŭ
71
- - source: Светлагорск
72
- expected: Svietlahorsk # original doc Svetlahorsk but it looks like mistake https://en.wikipedia.org/wiki/Svietlahorsk
73
- - source: Талачын
74
- expected: Talačyn
75
- - source: Узда
76
- expected: Uzda
77
- - source: Шаркаўшчына
78
- expected: Šarkaŭščyna
79
- - source: Фаніпаль
80
- expected: Fanipal'
81
- - source: Хоцімск
82
- expected: Chocimsk
83
- - source: Цёмны Лес
84
- expected: Ciomny Lies
85
- - source: Чавусы
86
- expected: Čavusy
87
- - source: Шумілiна
88
- expected: Šumilina
89
- - source: Раз'езд
90
- expected: Razjezd
91
- - source: Чыгірынка
92
- expected: Čyhirynka
93
- - source: Чэрвень
94
- expected: Červien'
95
- - source: Чачэрск
96
- expected: Čačersk
97
- - source: Юхнаўка
98
- expected: Juchnaŭka
99
- - source: Гаюціна
100
- expected: Hajucina
101
- - source: Любонічы
102
- expected: Liuboničy
103
- - source: Ямнае
104
- expected: Jamnaje
105
- - source: Баяры
106
- expected: Bajary
107
- - source: Вязынка
108
- expected: Viazynka
109
- - source: Валяр'яны
110
- expected: Valiarjany
111
-
112
- map:
113
- rules:
114
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0415 # Е after consonants
115
- result: IE
116
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0435 # е after consonants
117
- result: ie
118
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0401 # Ё after consonants
119
- result: IO
120
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0451 # ё after consonants
121
- result: io
122
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u042E # Ю after consonants
123
- result: IU
124
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u044E # ю after consonants
125
- result: iu
126
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u042F # Я after consonants
127
- result: IA
128
- - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u044F # я after consonants
129
- result: ia
130
- postrules:
131
- - pattern: '\u042C' # Ь
132
- result: "'"
133
- - pattern: '\u044C' # ь
134
- result: "'"
135
-
136
- characters:
137
- '\u0027' : '' # '
138
-
139
- '\u0410' : 'A' # A
140
- '\u0411' : 'B' # Б
141
- '\u0412' : 'V' # B
142
- '\u0413' : 'H' # Г
143
- '\u0414' : 'D' # Д
144
- '\u0415' : 'Je' # Е
145
- '\u0401' : 'Jo' # Ё
146
- '\u0416' : 'Ž' # Ж
147
- '\u0417' : 'Z' # З
148
- '\u0406' : 'I' # І
149
- '\u0419' : 'J' # Й
150
- '\u041A' : 'K' # К
151
- '\u041B' : 'L' # Л
152
- '\u041C' : 'M' # М
153
- '\u041D' : 'N' # Н
154
- '\u041E' : 'O' # О
155
- '\u041F' : 'P' # П
156
- '\u0420' : 'R' # Р
157
- '\u0421' : 'S' # С
158
- '\u0422' : 'T' # Т
159
- '\u0423' : 'U' # У
160
- '\U040E' : 'Ŭ' # Ў
161
- '\u0424' : 'F' # Ф
162
- '\u0425' : 'Ch' # Х
163
- '\u0426' : 'C' # Ц
164
- '\u0427' : 'Č' # Ч
165
- '\u0428' : 'Š' # Ш
166
- '\u042B' : 'Y' # Ы
167
- '\u042D' : 'E' # Э
168
- '\u042E' : 'Ju' # Ю
169
- '\u042F' : 'Ja' # Я
170
-
171
- '\u0430' : 'a' # а
172
- '\u0431' : 'b' # б
173
- '\u0432' : 'v' # в
174
- '\u0433' : 'h' # г
175
- '\u0434' : 'd' # д
176
- '\u0435' : 'je' # е
177
- '\u0451' : 'jo' # ё
178
- '\u0436' : 'ž' # ж
179
- '\u0437' : 'z' # з
180
- '\u0456' : 'i' # і
181
- '\u0439' : 'j' # й
182
- '\u043A' : 'k' # к
183
- '\u043B' : 'l' # л
184
- '\u043C' : 'm' # м
185
- '\u043D' : 'n' # н
186
- '\u043E' : 'o' # о
187
- '\u043F' : 'p' # п
188
- '\u0440' : 'r' # р
189
- '\u0441' : 's' # с
190
- '\u0442' : 't' # т
191
- '\u0443' : 'u' # у
192
- '\u045E' : 'ŭ' # ў
193
- '\u0444' : 'f' # ф
194
- '\u0445' : 'ch' # х
195
- '\u0446' : 'c' # ц
196
- '\u0447' : 'č' # ч
197
- '\u0448' : 'š' # ш
198
- '\u044B' : 'y' # ы
199
- '\u044D' : 'e' # э
200
- '\u044E' : 'ju' # ю
201
- '\u044F' : 'ja' # я
@@ -1,186 +0,0 @@
1
- ---
2
- authority_id: gost
3
- id: 1983
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: LETTERS TRANSLATION RULES OF KIRILLOVSKY ALPHABET WITH LETTERS OF LATIN ALPHABET GOST 16876-71
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/7th-uncsgn-docs/econf/7th_UNCSGN_econf.91_3_Add1.pdf
9
- creation_date: 1978
10
- confirmation_date: 1982-04-16
11
- adoption_date: 1983-05-13
12
- description: |
13
- GOST 16876-71 (Russian: ГОСТ 16876-71) is a romanization system (for transliteration of Russian Cyrillic alphabet texts into the Latin alphabet)
14
- devised by the National Administration for Geodesy and Cartography of the Soviet Union.
15
- It is based on the scientific transliteration system used in linguistics.
16
- GOST was an international standard so it included provision for a number of the languages
17
- of the Soviet Union.
18
-
19
- GOST 16876-71 was used by the United Nations to develop its romanization system for geographical names,
20
- which was adopted for official use by the United Nations at the Fifth United Nations Conference
21
- on the Standardization of Geographical Names in Montreal, Quebec, Canada, in 1987.
22
- UN system relies on diacritics to compensate for non-Russian Cyrillic alphabets.
23
-
24
- In 1978, COMECON adopted GOST 16876-71 with minor modifications as its official transliteration standard,
25
- under the name of SEV 1362-78 (Russian: СЭВ 1362-78).
26
-
27
- In 1982, In accordance with Order No. 169 of April 16, 1982, GOST 16876-71 / ST SEV 1362-78 was put into effect on May 1, 1982.
28
-
29
- In 1983, In accordance with Order No. 231 of May 16, 1983, Additional guidelines was released (check notes[2])
30
-
31
- In 2002, the Russian Federation along with a number of CIS countries abandoned the use of GOST 16876
32
- in favor of ISO 9:1995, which was adopted as GOST 7.79-2000.
33
-
34
- notes:
35
- - http://vsegost.com/Catalog/45/45002.shtml - original GOST
36
- - https://rosreestr.ru/upload/documenty/doc_169.doc - orders for entry into force (with changes)
37
-
38
- tests:
39
- - source: Анапа
40
- expected: Anapa
41
- - source: Бабушкин
42
- expected: Babuškin
43
- - source: Вавилово
44
- expected: Vavilovo
45
- - source: Гагарин
46
- expected: Gagarin
47
- - source: Дудинка
48
- expected: Dudinka
49
- - source: Елисеевка
50
- expected: Eliseevka
51
- - source: Ёлкино
52
- expected: Ëlkino
53
- - source: Псёл
54
- expected: Psël
55
- - source: Жужа
56
- expected: Žuža
57
- - source: Звёздный
58
- expected: Zvëzdnyj
59
- - source: Идрица
60
- expected: Idrica
61
- - source: Зарайск
62
- expected: Zarajsk
63
- - source: Коканд
64
- expected: Kokand
65
- - source: Лалвар
66
- expected: Lalvar
67
- - source: Маймак
68
- expected: Majmak
69
- - source: Нежин
70
- expected: Nežin
71
- - source: Ободовка
72
- expected: Obodovka
73
- - source: Пап
74
- expected: Pap
75
- - source: Ребриха
76
- expected: Rebriha
77
- - source: Сасово
78
- expected: Sasovo
79
- - source: Татта
80
- expected: Tatta
81
- - source: Уржум
82
- expected: Uržum
83
- - source: Фофаново
84
- expected: Fofanovo
85
- - source: Хохлома
86
- expected: Hohloma
87
- - source: Цветково
88
- expected: Cvetkovo
89
- - source: Чечельник
90
- expected: Čečel´nik
91
- - source: Шишкино
92
- expected: Šiškino
93
- - source: Щукино
94
- expected: Ščukino
95
- - source: Подъячево
96
- expected: Pod"jačevo
97
- - source: Ыныкчанский
98
- expected: Ynykčanskij
99
- - source: Параньга
100
- expected: Paran´ga
101
- - source: Щучье
102
- expected: Ščuč´e
103
- - source: Элиста
104
- expected: Èlista
105
- - source: Юрино
106
- expected: Jurino
107
- - source: Юхнов
108
- expected: Juhnov
109
- - source: Юрюзань
110
- expected: Jurjuzan´
111
- - source: Ямал
112
- expected: Jamal
113
- - source: Язъяван
114
- expected: Jaz"javan
115
- - source: Яя
116
- expected: Jaja
117
-
118
- map:
119
- characters:
120
- '\u0410' : 'A' # A
121
- '\u0411' : 'B' # Б
122
- '\u0412' : 'V' # B
123
- '\u0413' : 'G' # Г
124
- '\u0414' : 'D' # Д
125
- '\u0415' : 'E' # Е
126
- '\u0401' : "\u00CB" # Ё
127
- '\u0416' : 'Ž' # Ж
128
- '\u0417' : 'Z' # З
129
- '\u0418' : "I" # И
130
- '\u0419' : 'J' # Й
131
- '\u041A' : 'K' # К
132
- '\u041B' : 'L' # Л
133
- '\u041C' : 'M' # М
134
- '\u041D' : 'N' # Н
135
- '\u041E' : 'O' # О
136
- '\u041F' : 'P' # П
137
- '\u0420' : 'R' # Р
138
- '\u0421' : 'S' # С
139
- '\u0422' : 'T' # Т
140
- '\u0423' : 'U' # У
141
- '\u0424' : 'F' # Ф
142
- '\u0425' : 'H' # Х
143
- '\u0426' : 'C' # Ц
144
- '\u0427' : 'Č' # Ч
145
- '\u0428' : 'Š' # Ш
146
- '\u0429' : 'Šč' # Щ
147
- '\u042a' : '"' # Ъ
148
- '\u042B' : 'Y' # Ы
149
- '\u042C' : "\u00B4" # Ь => ’
150
- '\u042D' : "E\u0300" # È
151
- '\u042E' : 'Ju' # Ю
152
- '\u042F' : 'Ja' # Я
153
-
154
- '\u0430' : 'a' # а
155
- '\u0431' : 'b' # б
156
- '\u0432' : 'v' # в
157
- '\u0433' : 'g' # г
158
- '\u0434' : 'd' # д
159
- '\u0435' : 'e' # е
160
- '\u0451' : "\u00EB" # ё
161
- '\u0436' : 'ž' # ж
162
- '\u0437' : 'z' # з
163
- '\u0438' : 'i' # и
164
- '\u0439' : 'j' # й
165
- '\u043A' : 'k' # к
166
- '\u043B' : 'l' # л
167
- '\u043C' : 'm' # м
168
- '\u043D' : 'n' # н
169
- '\u043E' : 'o' # о
170
- '\u043F' : 'p' # п
171
- '\u0440' : 'r' # р
172
- '\u0441' : 's' # с
173
- '\u0442' : 't' # т
174
- '\u0443' : 'u' # у
175
- '\u0444' : 'f' # ф
176
- '\u0445' : 'h' # х
177
- '\u0446' : 'c' # ц
178
- '\u0447' : 'č' # ч
179
- '\u0448' : 'š' # ш
180
- '\u0449' : 'šč' # щ
181
- '\u044A' : '"' # ъ
182
- '\u044B' : 'y' # ы
183
- '\u044C' : "\u00B4" # ь => ’
184
- '\u044D' : "e\u0300" # è
185
- '\u044E' : 'ju' # ю
186
- '\u044F' : 'ja' # я