interscript 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,20 @@
1
+ ---
2
+ authority_id: elot
3
+ id: 2001
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: ELOT 743:2001
8
+ url: https://docplayer.gr/docview/18/849832/#file=/storage/18/849832/849832.pdf
9
+ creation_date: 2001
10
+ description: |
11
+ Reversible transliteration standard, ELOT
12
+
13
+ note:
14
+ - Transliteration standard (reversible): Clause 3.1, Table 1
15
+
16
+ map:
17
+ character_separator: ""
18
+ word_separator: " "
19
+ inherit: "iso-ell-Grek-Latn-843-1997-t1"
20
+
@@ -0,0 +1,32 @@
1
+ ---
2
+ authority_id: elot
3
+ id: 2001
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: ELOT 743:2001
8
+ url: https://docplayer.gr/docview/18/849832/#file=/storage/18/849832/849832.pdf
9
+ creation_date: 2001
10
+ description: |
11
+ Reversible transliteration standard, ELOT
12
+
13
+ note:
14
+ - Transcription standard (reversible): Clause 3.1, Table 2
15
+
16
+ tests:
17
+
18
+ - source: |
19
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
20
+
21
+ Γιάννης Μακρυγιάννης.
22
+
23
+ expected: |
24
+ Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
25
+
26
+ Giánnis Makrygiánnis.
27
+
28
+ map:
29
+ character_separator: ""
30
+ word_separator: " "
31
+ inherit: "iso-ell-Grek-Latn-843-1997-t2"
32
+
@@ -0,0 +1,89 @@
1
+ ---
2
+ authority_id: ggg
3
+ id: 2002
4
+ language: kat
5
+ source_script: Geor
6
+ destination_script: Latn
7
+ name: Georgian State Department of Geodesy and Cartography 2002 System
8
+ url: https://transliteration.eki.ee/pdf/Georgian.pdf
9
+ creation_date: 1998
10
+ confirmation_date: 2002
11
+ description: |
12
+ The national system of romanization adopted in February 2002 by the State
13
+ Department of Geodesy and Cartography of Georgia and the Institute
14
+ of Linguistics, Georgian Academy of Sciences.
15
+
16
+ notes:
17
+ - Georgian script refers in this document to the Mkhedruli alphabet.
18
+
19
+ - There is no case in Georgian.
20
+
21
+ tests:
22
+ - source: თბილისი
23
+ expected: tbilisi
24
+
25
+ - source: მეღვინეთუხუცესი
26
+ expected: meghvinetukhutsesi
27
+
28
+ - source: ჭიანჭველა
29
+ expected: ch’ianch’vela
30
+
31
+ - source: ბაყაყი
32
+ expected: baq’aq’i
33
+
34
+ - source: ჩხალთის ქედი
35
+ expected: chkhaltis kedi
36
+
37
+ - source: აბჟააფთრა
38
+ expected: abzhaaptra
39
+
40
+ - source: ამბროლაურის მუნიციპალიტეტი
41
+ expected: ambrolauris munitsip’alit’et’i
42
+
43
+ - source: მარტვილის მუნიციპალიტეტი
44
+ expected: mart’vilis munitsip’alit’et’i
45
+
46
+ - source: ლეკუხონა
47
+ expected: lek’ukhona
48
+
49
+ - source: მყინვარი აღმოსავლეთი მაგუაშირხა
50
+ expected: mq’invari aghmosavleti maguashirkha
51
+
52
+
53
+
54
+ map:
55
+ characters:
56
+ '\u10d0' : 'a' # ა
57
+ '\u10d1' : 'b' # ბ
58
+ '\u10d2' : 'g' # გ
59
+ '\u10d3' : 'd' # დ
60
+ '\u10d4' : 'e' # ე
61
+ '\u10d5' : 'v' # ვ
62
+ '\u10d6' : 'z' # ზ
63
+ '\u10d7' : 't' # თ
64
+ '\u10d8' : 'i' # ი
65
+ '\u10d9' : 'k’' # კ
66
+ '\u10da' : 'l' # ლ
67
+ '\u10db' : 'm' # მ
68
+ '\u10dc' : 'n' # ნ
69
+ '\u10dd' : 'o' # ო
70
+ '\u10de' : 'p’' # პ
71
+ '\u10df' : 'zh' # ჟ
72
+ '\u10e0' : 'r' # რ
73
+ '\u10e1' : 's' # ს
74
+ '\u10e2' : 't’' # ტ
75
+ '\u10e3' : 'u' # უ
76
+ '\u10e4' : 'p' # ფ
77
+ '\u10e5' : 'k' # ქ
78
+ '\u10e6' : 'gh' # ღ
79
+ '\u10e7' : 'q’' # ყ
80
+ '\u10e8' : 'sh' # შ
81
+ '\u10e9' : 'ch' # ჩ
82
+ '\u10ea' : 'ts' # ც
83
+ '\u10eb' : 'dz' # ძ
84
+ '\u10ec' : 'ts’' # წ
85
+ '\u10ed' : 'ch’' # ჭ
86
+ '\u10ee' : 'kh' # ხ
87
+ '\u10ef' : 'j' # ჯ
88
+ '\u10f0' : 'h' # ჰ
89
+
@@ -0,0 +1,33 @@
1
+ ---
2
+ authority_id: gki
3
+ id: 1992
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Byelorussian National Cartographic Authority 1992-3 System (based on GOST 1983)
8
+ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/7th-uncsgn-docs/econf/7th_UNCSGN_econf.91_3_Add1.pdf
9
+ creation_date: 1992
10
+
11
+ tests:
12
+ - source: 'Сямашкі'
13
+ expected: 'Sjamaški'
14
+ # "Syamashki" in GNDB `bel_Cyrl2Latn_GBO_1992`, but that is clearly bgnpcgn-ukr-Cyrl-Latn-1965
15
+ - source: 'Старадворцы'
16
+ expected: 'Staradvorcy'
17
+ # "Staradvortsy" in GNDB `bel_Cyrl2Latn_GBO_1992`, but that is clearly bgnpcgn-ukr-Cyrl-Latn-1965
18
+ - source: 'Канюхі'
19
+ expected: 'Kanjuhi'
20
+ # "Kanyukhi" in GNDB `bel_Cyrl2Latn_GBO_1992`, but that is clearly bgnpcgn-ukr-Cyrl-Latn-1965
21
+
22
+ map:
23
+ inherit: gost-rus-cyrl-latn-16876-71-1983
24
+
25
+ characters:
26
+ '\u0406' : 'I' # І
27
+ '\u0456' : 'i' # і
28
+
29
+ '\u0413' : 'G' # Г
30
+ '\u0433' : 'g' # г
31
+
32
+ '\U040E' : 'Ŭ' # Ў
33
+ '\u045E' : 'ŭ' # ў
@@ -0,0 +1,201 @@
1
+ ---
2
+ authority_id: gki
3
+ id: 2000
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: On approval of the Instructions for the transliteration of geographical names of the Republic of Belarus with letters of the Latin alphabet
8
+ url: https://registr.by/doc/103003
9
+ creation_date: 2000
10
+ description: |
11
+ Act name:
12
+ On approval of the Instructions for the transliteration of geographical names of
13
+ the Republic of Belarus in letters of the Latin alphabet
14
+ Type of act, adoption authority, date and number of adoption (publication):
15
+ Resolution of the State Committee for Land Resources, Geodesy and Cartography of
16
+ the Republic of Belarus of November 23, 2000 No. 15
17
+ National Registry Registration Number: 8/4488
18
+ Date of inclusion in the National Register: 11/30/2000
19
+ Source (s) of official publication:
20
+ National Register of Legal Acts of the Republic of Belarus, 2001,
21
+ No. 3, 8/4488 (published - January 11, 2001)
22
+ tests:
23
+ - source: Аршанскi
24
+ expected: Aršanski
25
+ - source: Бешанковічы
26
+ expected: Biešankovičy
27
+ - source: Віцебск
28
+ expected: Viciebsk
29
+ - source: Гомель
30
+ expected: Homiel'
31
+ - source: Гаўя
32
+ expected: Haŭja
33
+ - source: Добруш
34
+ expected: Dobruš
35
+ - source: Ельск
36
+ expected: Jel'sk
37
+ - source: Бабаедава
38
+ expected: Babajedava
39
+ - source: Лепель
40
+ expected: Liepiel'
41
+ - source: Ёды
42
+ expected: Jody
43
+ - source: Вераб'ёвічы
44
+ expected: Vierabjovičy
45
+ - source: Мёры
46
+ expected: Miory
47
+ - source: Жодзiшкi
48
+ expected: Žodziški
49
+ - source: Зэльва
50
+ expected: Zel'va
51
+ - source: Iванава
52
+ expected: Ivanava
53
+ - source: Iўе
54
+ expected: Iŭje
55
+ - source: Лагойск
56
+ expected: Lahojsk
57
+ - source: Круглае
58
+ expected: Kruhlaje
59
+ - source: Любань
60
+ expected: Liuban'
61
+ - source: Магілёў
62
+ expected: Mahilioŭ
63
+ - source: Нясвіж
64
+ expected: Niasviž
65
+ - source: Орша
66
+ expected: Orša
67
+ - source: Паставы
68
+ expected: Pastavy
69
+ - source: Рагачоў
70
+ expected: Rahačoŭ
71
+ - source: Светлагорск
72
+ expected: Svietlahorsk # original doc Svetlahorsk but it looks like mistake https://en.wikipedia.org/wiki/Svietlahorsk
73
+ - source: Талачын
74
+ expected: Talačyn
75
+ - source: Узда
76
+ expected: Uzda
77
+ - source: Шаркаўшчына
78
+ expected: Šarkaŭščyna
79
+ - source: Фаніпаль
80
+ expected: Fanipal'
81
+ - source: Хоцімск
82
+ expected: Chocimsk
83
+ - source: Цёмны Лес
84
+ expected: Ciomny Lies
85
+ - source: Чавусы
86
+ expected: Čavusy
87
+ - source: Шумілiна
88
+ expected: Šumilina
89
+ - source: Раз'езд
90
+ expected: Razjezd
91
+ - source: Чыгірынка
92
+ expected: Čyhirynka
93
+ - source: Чэрвень
94
+ expected: Červien'
95
+ - source: Чачэрск
96
+ expected: Čačersk
97
+ - source: Юхнаўка
98
+ expected: Juchnaŭka
99
+ - source: Гаюціна
100
+ expected: Hajucina
101
+ - source: Любонічы
102
+ expected: Liuboničy
103
+ - source: Ямнае
104
+ expected: Jamnaje
105
+ - source: Баяры
106
+ expected: Bajary
107
+ - source: Вязынка
108
+ expected: Viazynka
109
+ - source: Валяр'яны
110
+ expected: Valiarjany
111
+
112
+ map:
113
+ rules:
114
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0415 # Е after consonants
115
+ result: IE
116
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0435 # е after consonants
117
+ result: ie
118
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0401 # Ё after consonants
119
+ result: IO
120
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0451 # ё after consonants
121
+ result: io
122
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u042E # Ю after consonants
123
+ result: IU
124
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u044E # ю after consonants
125
+ result: iu
126
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u042F # Я after consonants
127
+ result: IA
128
+ - pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u044F # я after consonants
129
+ result: ia
130
+ postrules:
131
+ - pattern: '\u042C' # Ь
132
+ result: "'"
133
+ - pattern: '\u044C' # ь
134
+ result: "'"
135
+
136
+ characters:
137
+ '\u0027' : '' # '
138
+
139
+ '\u0410' : 'A' # A
140
+ '\u0411' : 'B' # Б
141
+ '\u0412' : 'V' # B
142
+ '\u0413' : 'H' # Г
143
+ '\u0414' : 'D' # Д
144
+ '\u0415' : 'Je' # Е
145
+ '\u0401' : 'Jo' # Ё
146
+ '\u0416' : 'Ž' # Ж
147
+ '\u0417' : 'Z' # З
148
+ '\u0406' : 'I' # І
149
+ '\u0419' : 'J' # Й
150
+ '\u041A' : 'K' # К
151
+ '\u041B' : 'L' # Л
152
+ '\u041C' : 'M' # М
153
+ '\u041D' : 'N' # Н
154
+ '\u041E' : 'O' # О
155
+ '\u041F' : 'P' # П
156
+ '\u0420' : 'R' # Р
157
+ '\u0421' : 'S' # С
158
+ '\u0422' : 'T' # Т
159
+ '\u0423' : 'U' # У
160
+ '\U040E' : 'Ŭ' # Ў
161
+ '\u0424' : 'F' # Ф
162
+ '\u0425' : 'Ch' # Х
163
+ '\u0426' : 'C' # Ц
164
+ '\u0427' : 'Č' # Ч
165
+ '\u0428' : 'Š' # Ш
166
+ '\u042B' : 'Y' # Ы
167
+ '\u042D' : 'E' # Э
168
+ '\u042E' : 'Ju' # Ю
169
+ '\u042F' : 'Ja' # Я
170
+
171
+ '\u0430' : 'a' # а
172
+ '\u0431' : 'b' # б
173
+ '\u0432' : 'v' # в
174
+ '\u0433' : 'h' # г
175
+ '\u0434' : 'd' # д
176
+ '\u0435' : 'je' # е
177
+ '\u0451' : 'jo' # ё
178
+ '\u0436' : 'ž' # ж
179
+ '\u0437' : 'z' # з
180
+ '\u0456' : 'i' # і
181
+ '\u0439' : 'j' # й
182
+ '\u043A' : 'k' # к
183
+ '\u043B' : 'l' # л
184
+ '\u043C' : 'm' # м
185
+ '\u043D' : 'n' # н
186
+ '\u043E' : 'o' # о
187
+ '\u043F' : 'p' # п
188
+ '\u0440' : 'r' # р
189
+ '\u0441' : 's' # с
190
+ '\u0442' : 't' # т
191
+ '\u0443' : 'u' # у
192
+ '\u045E' : 'ŭ' # ў
193
+ '\u0444' : 'f' # ф
194
+ '\u0445' : 'ch' # х
195
+ '\u0446' : 'c' # ц
196
+ '\u0447' : 'č' # ч
197
+ '\u0448' : 'š' # ш
198
+ '\u044B' : 'y' # ы
199
+ '\u044D' : 'e' # э
200
+ '\u044E' : 'ju' # ю
201
+ '\u044F' : 'ja' # я
@@ -0,0 +1,186 @@
1
+ ---
2
+ authority_id: gost
3
+ id: 1983
4
+ language: rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: LETTERS TRANSLATION RULES OF KIRILLOVSKY ALPHABET WITH LETTERS OF LATIN ALPHABET GOST 16876-71
8
+ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/7th-uncsgn-docs/econf/7th_UNCSGN_econf.91_3_Add1.pdf
9
+ creation_date: 1978
10
+ confirmation_date: 1982-04-16
11
+ adoption_date: 1983-05-13
12
+ description: |
13
+ GOST 16876-71 (Russian: ГОСТ 16876-71) is a romanization system (for transliteration of Russian Cyrillic alphabet texts into the Latin alphabet)
14
+ devised by the National Administration for Geodesy and Cartography of the Soviet Union.
15
+ It is based on the scientific transliteration system used in linguistics.
16
+ GOST was an international standard so it included provision for a number of the languages
17
+ of the Soviet Union.
18
+
19
+ GOST 16876-71 was used by the United Nations to develop its romanization system for geographical names,
20
+ which was adopted for official use by the United Nations at the Fifth United Nations Conference
21
+ on the Standardization of Geographical Names in Montreal, Quebec, Canada, in 1987.
22
+ UN system relies on diacritics to compensate for non-Russian Cyrillic alphabets.
23
+
24
+ In 1978, COMECON adopted GOST 16876-71 with minor modifications as its official transliteration standard,
25
+ under the name of SEV 1362-78 (Russian: СЭВ 1362-78).
26
+
27
+ In 1982, In accordance with Order No. 169 of April 16, 1982, GOST 16876-71 / ST SEV 1362-78 was put into effect on May 1, 1982.
28
+
29
+ In 1983, In accordance with Order No. 231 of May 16, 1983, Additional guidelines was released (check notes[2])
30
+
31
+ In 2002, the Russian Federation along with a number of CIS countries abandoned the use of GOST 16876
32
+ in favor of ISO 9:1995, which was adopted as GOST 7.79-2000.
33
+
34
+ notes:
35
+ - http://vsegost.com/Catalog/45/45002.shtml - original GOST
36
+ - https://rosreestr.ru/upload/documenty/doc_169.doc - orders for entry into force (with changes)
37
+
38
+ tests:
39
+ - source: Анапа
40
+ expected: Anapa
41
+ - source: Бабушкин
42
+ expected: Babuškin
43
+ - source: Вавилово
44
+ expected: Vavilovo
45
+ - source: Гагарин
46
+ expected: Gagarin
47
+ - source: Дудинка
48
+ expected: Dudinka
49
+ - source: Елисеевка
50
+ expected: Eliseevka
51
+ - source: Ёлкино
52
+ expected: Ëlkino
53
+ - source: Псёл
54
+ expected: Psël
55
+ - source: Жужа
56
+ expected: Žuža
57
+ - source: Звёздный
58
+ expected: Zvëzdnyj
59
+ - source: Идрица
60
+ expected: Idrica
61
+ - source: Зарайск
62
+ expected: Zarajsk
63
+ - source: Коканд
64
+ expected: Kokand
65
+ - source: Лалвар
66
+ expected: Lalvar
67
+ - source: Маймак
68
+ expected: Majmak
69
+ - source: Нежин
70
+ expected: Nežin
71
+ - source: Ободовка
72
+ expected: Obodovka
73
+ - source: Пап
74
+ expected: Pap
75
+ - source: Ребриха
76
+ expected: Rebriha
77
+ - source: Сасово
78
+ expected: Sasovo
79
+ - source: Татта
80
+ expected: Tatta
81
+ - source: Уржум
82
+ expected: Uržum
83
+ - source: Фофаново
84
+ expected: Fofanovo
85
+ - source: Хохлома
86
+ expected: Hohloma
87
+ - source: Цветково
88
+ expected: Cvetkovo
89
+ - source: Чечельник
90
+ expected: Čečel´nik
91
+ - source: Шишкино
92
+ expected: Šiškino
93
+ - source: Щукино
94
+ expected: Ščukino
95
+ - source: Подъячево
96
+ expected: Pod"jačevo
97
+ - source: Ыныкчанский
98
+ expected: Ynykčanskij
99
+ - source: Параньга
100
+ expected: Paran´ga
101
+ - source: Щучье
102
+ expected: Ščuč´e
103
+ - source: Элиста
104
+ expected: Èlista
105
+ - source: Юрино
106
+ expected: Jurino
107
+ - source: Юхнов
108
+ expected: Juhnov
109
+ - source: Юрюзань
110
+ expected: Jurjuzan´
111
+ - source: Ямал
112
+ expected: Jamal
113
+ - source: Язъяван
114
+ expected: Jaz"javan
115
+ - source: Яя
116
+ expected: Jaja
117
+
118
+ map:
119
+ characters:
120
+ '\u0410' : 'A' # A
121
+ '\u0411' : 'B' # Б
122
+ '\u0412' : 'V' # B
123
+ '\u0413' : 'G' # Г
124
+ '\u0414' : 'D' # Д
125
+ '\u0415' : 'E' # Е
126
+ '\u0401' : "\u00CB" # Ё
127
+ '\u0416' : 'Ž' # Ж
128
+ '\u0417' : 'Z' # З
129
+ '\u0418' : "I" # И
130
+ '\u0419' : 'J' # Й
131
+ '\u041A' : 'K' # К
132
+ '\u041B' : 'L' # Л
133
+ '\u041C' : 'M' # М
134
+ '\u041D' : 'N' # Н
135
+ '\u041E' : 'O' # О
136
+ '\u041F' : 'P' # П
137
+ '\u0420' : 'R' # Р
138
+ '\u0421' : 'S' # С
139
+ '\u0422' : 'T' # Т
140
+ '\u0423' : 'U' # У
141
+ '\u0424' : 'F' # Ф
142
+ '\u0425' : 'H' # Х
143
+ '\u0426' : 'C' # Ц
144
+ '\u0427' : 'Č' # Ч
145
+ '\u0428' : 'Š' # Ш
146
+ '\u0429' : 'Šč' # Щ
147
+ '\u042a' : '"' # Ъ
148
+ '\u042B' : 'Y' # Ы
149
+ '\u042C' : "\u00B4" # Ь => ’
150
+ '\u042D' : "E\u0300" # È
151
+ '\u042E' : 'Ju' # Ю
152
+ '\u042F' : 'Ja' # Я
153
+
154
+ '\u0430' : 'a' # а
155
+ '\u0431' : 'b' # б
156
+ '\u0432' : 'v' # в
157
+ '\u0433' : 'g' # г
158
+ '\u0434' : 'd' # д
159
+ '\u0435' : 'e' # е
160
+ '\u0451' : "\u00EB" # ё
161
+ '\u0436' : 'ž' # ж
162
+ '\u0437' : 'z' # з
163
+ '\u0438' : 'i' # и
164
+ '\u0439' : 'j' # й
165
+ '\u043A' : 'k' # к
166
+ '\u043B' : 'l' # л
167
+ '\u043C' : 'm' # м
168
+ '\u043D' : 'n' # н
169
+ '\u043E' : 'o' # о
170
+ '\u043F' : 'p' # п
171
+ '\u0440' : 'r' # р
172
+ '\u0441' : 's' # с
173
+ '\u0442' : 't' # т
174
+ '\u0443' : 'u' # у
175
+ '\u0444' : 'f' # ф
176
+ '\u0445' : 'h' # х
177
+ '\u0446' : 'c' # ц
178
+ '\u0447' : 'č' # ч
179
+ '\u0448' : 'š' # ш
180
+ '\u0449' : 'šč' # щ
181
+ '\u044A' : '"' # ъ
182
+ '\u044B' : 'y' # ы
183
+ '\u044C' : "\u00B4" # ь => ’
184
+ '\u044D' : "e\u0300" # è
185
+ '\u044E' : 'ju' # ю
186
+ '\u044F' : 'ja' # я