interscript 0.1.6 → 2.1.0a9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -1,108 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1981
4
- language: arm
5
- source_script: Armn
6
- destination_script: Latn
7
- name: BGN/PCGN 1981 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/810208/ROMANIZATION_OF_ARMENIAN.pdf
9
- creation_date: 2013
10
- confirmation date: 2019-06
11
- description: |
12
- The BGN/PCGN system for Armenian was designed for use in romanizing
13
- names written in the Armenian alphabet. The Roman letters and letter
14
- combinations shown as equivalents to the Armenian characters reflect
15
- the eastern variety of Armenian, i.e. the language spoken in the
16
- Republic of Armenia.
17
-
18
- notes:
19
- - The character ե should be romanized ye initially and after the vowel characters ա, ե, է, ը, ի, ո, ու and օ. In all other instances, it should be romanized e.
20
- - The character ո should be romanized vo initially except in the word ով, which should be roman- ized ov. In all other instances, it should be romanized o.
21
- - In Soviet-era sources this upper-case digraph character is found as Եի (Unicode encoding 0535+056B).
22
- - This lower-case character may be seen either in digraph form as եւ (Unicode encoding 0565+0582) or in single character form as եւ (Unicode encoding 0587).
23
- - The characters ԵՎ , եւ and եւ should be romanized yev initially, in isolation, and after the vowel characters ա, ե, է, ը, ի, ո, ու, and օ. In all other instances these characters should be romanized ev.
24
- - All apostrophes appearing in Armenian romanization are encoded Unicode 2019.
25
- - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
26
-
27
- tests:
28
-
29
- map:
30
- characters:
31
- '\u0531' : 'A'
32
- '\u0532' : 'B'
33
- '\u0533' : 'G'
34
- '\u0534' : 'D'
35
- '\u0535' : 'Ye' #treated same as Russian 'ye'
36
- '\u0536' : 'Z'
37
- '\u0537' : 'E'
38
- '\u0538' : 'Y'
39
- '\u0539' : 'T\u2019'
40
- '\u053a' : 'Zh'
41
- '\u053b' : 'I'
42
- '\u053c' : 'L'
43
- '\u053d' : 'Kh'
44
- '\u053e' : 'Ts'
45
- '\u053f' : 'K'
46
- '\u0540' : 'H'
47
- '\u0541' : 'Dz'
48
- '\u0542' : 'Gh'
49
- '\u0543' : 'Ch'
50
- '\u0544' : 'M'
51
- '\u0545' : 'Y'
52
- '\u0546' : 'N'
53
- '\u0547' : 'Sh'
54
- '\u0548' : 'O' # VO initially and U when in combination with \u0552
55
- '\u0549' : u'Ch\u2019'
56
- '\u054a' : 'P'
57
- '\u054b' : 'J'
58
- '\u054c' : 'Rr'
59
- '\u054d' : 'S'
60
- '\u054e' : 'V'
61
- '\u054f' : 'T'
62
- '\u0550' : 'R'
63
- '\u0551' : 'Ts\u2019'
64
- '\u0548\u0552' : 'U'
65
- '\u0548\u0582' : 'U'
66
- '\u0553' : 'P\u2019'
67
- '\u0554' : 'K\u2019'
68
- '\u0555' : 'O'
69
- '\u0556' : 'F'
70
- '\u0561' : 'a'
71
- '\u0562' : 'b'
72
- '\u0563' : 'g'
73
- '\u0564' : 'd'
74
- '\u0565' : 'e' # ye initially
75
- '\u0566' : 'z'
76
- '\u0567' : 'e'
77
- '\u0568' : 'y'
78
- '\u0569' : u't\u2019'
79
- '\u056a' : 'zh'
80
- '\u056b' : 'i'
81
- '\u056c' : 'l'
82
- '\u056d' : 'kh'
83
- '\u056e' : 'ts'
84
- '\u056f' : 'k'
85
- '\u0570' : 'h'
86
- '\u0571' : 'dz'
87
- '\u0572' : 'gh'
88
- '\u0573' : 'ch'
89
- '\u0574' : 'm'
90
- '\u0575' : 'y'
91
- '\u0576' : 'n'
92
- '\u0577' : 'sh'
93
- '\u0578' : 'o' # vo initially and u when in combination with \u0582
94
- '\u0579' : 'ch\u2019'
95
- '\u057a' : 'p'
96
- '\u057b' : 'j'
97
- '\u057c' : 'rr'
98
- '\u057d' : 's'
99
- '\u057e' : 'v'
100
- '\u057f' : 't'
101
- '\u0580' : 'r'
102
- '\u0581' : 'ts\u2019'
103
- '\u0578\u0582' : 'u'
104
- '\u0583' : 'p\u2019'
105
- '\u0584' : 'k\u2019'
106
- '\u0585' : 'o'
107
- '\u0586' : 'f'
108
- '\u0587' : 'ev' # yev initially
@@ -1,104 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1993
4
- language: aze
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
9
- creation_date: 1993
10
- confirmation date: 2019-06
11
- description: |
12
- Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
13
-
14
- notes:
15
-
16
- - The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
17
-
18
- - The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
19
-
20
- - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
21
-
22
- - |
23
- An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
24
- Ğ (U+011E), ğ (U+011F)
25
- Ə (U+018F), ə (U+0259)
26
- İ (U+0130), ı (U+0131)
27
- Ö (U+00D6), ö (U+00F6)
28
- Ü (U+00DC), ü (U+00FC)
29
- Ç (U+00C7), ç (U+00E7)
30
- Ş (U+015E), ş (U+015F)
31
-
32
- - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
33
-
34
- tests:
35
- - source:
36
- expected:
37
-
38
- map:
39
- characters:
40
- '\u0410' : 'A'
41
- '\u0411' : 'B'
42
- '\u0412' : 'G'
43
- '\u0413' : 'V'
44
- '\u0492' : 'Ğ'
45
- '\u0414' : 'D'
46
- '\u0415' : 'E'
47
- '\u04D8' : 'Ә'
48
- '\u0416' : 'J'
49
- '\u0417' : 'Z'
50
- '\u0418' : 'I'
51
- '\u042B' : 'İ'
52
- '\u0408' : 'Y'
53
- '\u041A' : 'K'
54
- '\u049C' : 'G'
55
- '\u041B' : 'L'
56
- '\u041C' : 'M'
57
- '\u041D' : 'N'
58
- '\u041E' : 'O'
59
- '\u04E8' : 'ö'
60
- '\u041F' : 'P'
61
- '\u0420' : 'R'
62
- '\u0421' : 'S'
63
- '\u0422' : 'T'
64
- '\u0423' : 'U'
65
- '\u04AE' : 'Ü'
66
- '\u0424' : 'F'
67
- '\u0425' : 'X'
68
- '\u04BA' : 'H'
69
- '\u0427' : 'Ç'
70
- '\u04B8' : 'C'
71
- '\u0428' : 'Ş'
72
-
73
- '\u0430' : 'a'
74
- '\u0431' : 'b'
75
- '\u0432' : 'v'
76
- '\u0433' : 'g'
77
- '\u0493' : 'ğ'
78
- '\u0434' : 'd'
79
- '\u0435' : 'e'
80
- '\u04D9' : 'ә'
81
- '\u0436' : 'j'
82
- '\u0437' : 'z'
83
- '\u0438' : 'i'
84
- '\u044B' : 'ı'
85
- '\u0458' : 'y'
86
- '\u043A' : 'k'
87
- '\u049D' : 'g'
88
- '\u043B' : 'l'
89
- '\u043C' : 'm'
90
- '\u043D' : 'n'
91
- '\u043E' : 'o'
92
- '\u04E9' : 'ö'
93
- '\u043F' : 'p'
94
- '\u0440' : 'r'
95
- '\u0441' : 's'
96
- '\u0442' : 't'
97
- '\u0443' : 'u'
98
- '\u04AF' : 'ü'
99
- '\u0444' : 'f'
100
- '\u0445' : 'x'
101
- '\u04BB' : 'h'
102
- '\u0447' : 'ç'
103
- '\u04B9' : 'c'
104
- '\u0448' : 'ş'
@@ -1,184 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2007
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
9
- creation_date: 2007
10
- confirmation_date: 2019
11
- description: |
12
- Bashkir is an official language within Respublika Bashkortostan, one of the
13
- republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
14
- which case it should be romanized by means of the Cyrillic-Roman table of
15
- correspondences given below
16
-
17
- notes:
18
- - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
19
- - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
20
- - The letter w is used between or after vowels.
21
- - The letter w is used after e, u, ö and ə.
22
- - |
23
- An inventory of letter-diacritic combinations, with their Unicode encoding,
24
- in addition to the unmodified letters of the basic Roman script is:
25
- Ğ (U+011E) ğ (U+011F)
26
- Ź (U+0179) ź (U+017A)
27
- Ë (U+00CB) ë (U+00EB)
28
- Ñ (U+00D1) ñ (U+00F1)
29
- Ö (U+00D6) ö (U+00F6)
30
- Ś (U+015A) ś (U+015B)
31
- Ü (U+00DC) ü (U+00FC)
32
- Ç (U+00C7) ç (U+00E7)
33
- Ş (U+015E) ş (U+015F)
34
- Ə (U+018F) ə (U+0259)
35
- - |
36
- The Roman-script columns show only lowercase forms but, when applying the table,
37
- uppercase and lowercase Roman letters as appropriate should be used.
38
-
39
- tests:
40
- # adopted http://www.eki.ee/knab/lat/kblba.pdf
41
- - source: Васйылға
42
- expected: Wasyılğa
43
- - source: Еҙем
44
- expected: Yeźem
45
- - source: Раевка
46
- expected: Raevka
47
- - source: Сәйетҡол
48
- expected: Səyetqol
49
- - source: Ауырғазы
50
- expected: Awırğazı
51
- - source: Бурһыҡтау
52
- expected: Burhıqtaw
53
- - source: Мәләүез
54
- expected: Mələwez
55
- - source: Ҡыҙылъяр
56
- expected: Qıźılyar
57
- # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
58
- - source: кемдең
59
- expected: kemdeñ
60
- - source: кем
61
- expected: kem
62
- - source: был
63
- expected: bıl
64
- - source: ошо
65
- expected: oşo
66
- - source: быларҙың
67
- expected: bılarźıñ
68
- - source: һеҙҙән
69
- expected: heźźən
70
- - source: һин
71
- expected: hin
72
- - source: һеҙҙең
73
- expected: heźźeñ
74
-
75
- map:
76
- rules:
77
- # note[1]
78
- - pattern: \b\u0412(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
79
- result: "W"
80
- - pattern: \b\u0432(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
81
- result: "w"
82
- # note[2]
83
- - pattern: \b\u0415
84
- result: "Ye"
85
- - pattern: \b\u0435
86
- result: "ye"
87
- - pattern: (?=\b)\u0415(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
88
- result: "Ye"
89
- - pattern: (?=\b)\u0435(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
90
- result: "ye"
91
-
92
- # note[3] # note[4]
93
- - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0423\u04AE]
94
- result: W
95
- - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0443\u04AF]
96
- result: w
97
-
98
-
99
- characters:
100
- '\u0410': 'A' # А
101
- '\u0411': 'B' # Б note[1]
102
- '\u0412': 'V' # В
103
- '\u0413': 'G' # Г
104
- '\u0492': "\u011E" # Ғ
105
- '\u0414': 'D' # Д
106
- '\u0498': "\u0179" # Ҙ
107
- '\u0415': 'E' # Е note[2]
108
- '\u0401': 'Ë' # Ё
109
- '\u0416': 'J' # Ж
110
- '\u0417': 'Z' # З
111
- '\u0418': 'I' # И
112
- '\u0419': 'Y' # Й
113
- '\u041A': 'K' # К
114
- '\u04A0': 'Q' # Ҡ
115
- '\u041B': 'L' # Л
116
- '\u041C': 'M' # М
117
- '\u041D': 'N' # Н
118
- '\u04A2': 'Ñ' # Ң
119
- '\u041E': 'O' # О
120
- '\u04E8': "ö" # Ө
121
- '\u041F': 'P' # П
122
- '\u0420': 'R' # Р
123
- '\u0421': 'S' # С
124
- '\u04AA': 'Ś' # Ҫ
125
- '\u0422': 'T' # Т
126
- '\u0423': 'U' # У
127
- '\u04AE': 'Ü' # Ү note[3]
128
- '\u0424': 'F' # Ф
129
- '\u0425': 'X' # Х
130
- '\u04BA': 'H' # Һ
131
- '\u0426': 'Ts' # Ц
132
- '\u0427': 'Ç' # Ч
133
- '\u0428': 'Ş' # Ш
134
- '\u0429': 'ŞÇ' # Щ
135
- '\u042A': '' # Ъ
136
- '\u042B': 'I' # Ы
137
- '\u042C': '' # Ь
138
- '\u042D': 'E' # Э
139
- '\u04D8': "\u018F" # Ә
140
- '\u042E': 'Yu' # Ю
141
- '\u042F': 'Ya' # Я
142
-
143
- '\u0430': 'a' # а
144
- '\u0431': 'b' # б
145
- '\u0432': 'v' # в note[1]
146
- '\u0433': 'g' # г
147
- '\u0493': "\u011F" # ғ
148
- '\u0434': 'd' # д
149
- '\u0499': 'ź' # ҙ
150
- '\u0435': 'e' # e note[2]
151
- '\u0451': 'yo' # ё
152
- '\u0436': 'j' # ж
153
- '\u0437': 'z' # з
154
- '\u0438': 'i' # и
155
- '\u0439': 'y' # й
156
- '\u043A': 'k' # к
157
- '\u04A1': 'q' # ҡ
158
- '\u043B': 'l' # л
159
- '\u043C': 'm' # м
160
- '\u043D': 'n' # н
161
- '\u04A3': 'ñ' # ң
162
- '\u043E': 'o' # о
163
- '\u04E9': "\u00F6" # ө
164
- '\u043F': 'p' # п
165
- '\u0440': 'r' # р
166
- '\u0441': 's' # с
167
- '\u04AB': 'ś' # ҫ
168
- '\u0442': 't' # т
169
- '\u0443': 'u' # у
170
- "\u04AF": 'ü' # ү note[3]
171
- '\u0444': 'f' # ф
172
- '\u0445': 'x' # х
173
- '\u04BB': 'h' # һ
174
- '\u0446': 'ts' # ц
175
- '\u0447': 'ç' # ч
176
- '\u0448': 'ş' # ш
177
- '\u0449': 'şç' # щ
178
- '\u044A': '' # ъ
179
- '\u044B': "\u0131" # ы
180
- '\u044C': '' # ь
181
- '\u044D': 'e' # э
182
- '\u04D9': "\u0259" # ә
183
- '\u044E': 'yu' # ю
184
- '\u044F': 'ya' # я
@@ -1,285 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1979
4
- language: bel
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: United States Board on Geographic Names Foreign Names Committee Staff, 1994. Romanization Systems and Roman-Script Spelling Conventions, p. 23.
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811510/ROMANIZATION_OF_BELARUSIAN.pdf
9
- creation_date: 1979
10
- description: |
11
- The BGN/PCGN system for Belarusian (formerly referred to as Byelorussian) was designed for use in
12
- romanizing names written in the Belarusian Cyrillic alphabet. The Belarusian alphabet contains three
13
- characters not present in the Russian alphabet: і, ў, and ’.
14
-
15
- notes:
16
- - The character sequences зг, кг, сг, тс and цг and may be romanized z·h, k·h, s·h, t·s and ts·h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ш, ц, and the character sequence тш
17
- - All apostrophes appearing in romanization are Unicode encoding 2019.
18
-
19
- tests:
20
- - source: Антон
21
- expected: Anton
22
- - source: Вілейка
23
- expected: Vilyeyka
24
- - source: Брэст
25
- expected: Brest
26
- - source: Дубна
27
- expected: Dubna
28
- - source: Віцебск
29
- expected: Vitsyebsk
30
- - source: Асіповічы
31
- expected: Asipovichy
32
- - source: Гродна
33
- expected: Hrodna
34
- - source: Брагін
35
- expected: Brahin
36
- - source: Добруш
37
- expected: Dobrush
38
- - source: Ліда
39
- expected: Lida
40
- - source: Гомель
41
- expected: Homyel’
42
- - source: Беліца
43
- expected: Byelitsa
44
- - source: Ёдкавічы
45
- expected: Yodkavichy
46
- - source: Нёман
47
- expected: Nyoman
48
- - source: Жлобін
49
- expected: Zhlobin
50
- - source: Ружаны
51
- expected: Ruzhany
52
- - source: Зоя
53
- expected: Zoya
54
- - source: князь
55
- expected: knyaz’
56
- - source: Ігнат
57
- expected: Ihnat
58
- - source: Мінск
59
- expected: Minsk
60
- - source: Йосель
61
- expected: Yosyel’
62
- - source: Койданава
63
- expected: Koydanava
64
- - source: Крапіўна
65
- expected: Krapiwna
66
- - source: Менск
67
- expected: Myensk
68
- - source: Лаўна
69
- expected: Lawna
70
- - source: Лёсік
71
- expected: Lyosik
72
- - source: Купала
73
- expected: Kupala
74
- - source: Вілейка
75
- expected: Vilyeyka
76
- - source: Міхал
77
- expected: Mikhal
78
- - source: Вільня
79
- expected: Vil’nya
80
- - source: Лепель
81
- expected: Lyepyel’
82
- - source: Магілёў
83
- expected: Mahilyow
84
- - source: Няміга
85
- expected: Nyamiha
86
- - source: Наваградак
87
- expected: Navahradak
88
- - source: Баранавічы
89
- expected: Baranavichy
90
- - source: Орша
91
- expected: Orsha
92
- - source: Востраў
93
- expected: Vostraw
94
- - source: Пінск
95
- expected: Pinsk
96
- - source: Дняпро
97
- expected: Dnyapro
98
- - source: Рагачоў
99
- expected: Rahachow
100
- - source: Сураж
101
- expected: Surazh
102
- - source: Смаляны
103
- expected: Smalyany
104
- - source: Арэса
105
- expected: Aresa
106
- - source: Рось
107
- expected: Ros’
108
- - source: Талочын
109
- expected: Talochyn
110
- - source: Масты
111
- expected: Masty
112
- - source: Уладзімір
113
- expected: Uladzimir
114
- - source: Бабруйск
115
- expected: Babruysk
116
- - source: Быхаў
117
- expected: Bykhaw
118
- - source: Воўпа
119
- expected: Vowpa
120
- - source: Іўе
121
- expected: Iwye
122
- - source: Фолюш
123
- expected: Folyush
124
- - source: фортка
125
- expected: fortka
126
- - source: Хатынь
127
- expected: Khatyn’
128
- - source: Быхаў
129
- expected: Bykhaw
130
- - source: Ганцавічы
131
- expected: Hantsavichy
132
- - source: Стоўбцы
133
- expected: Stowbtsy
134
- - source: цьмяны
135
- expected: ts’myany
136
- - source: мясцовы
137
- expected: myastsovy
138
- - source: Астравец
139
- expected: Astravyets
140
- - source: Прыпяць
141
- expected: Prypyats’
142
- - source: Чэрыкаў
143
- expected: Cherykaw
144
- - source: Шчара
145
- expected: Shchara
146
- - source: Нарач
147
- expected: Narach
148
- - source: Шклоў
149
- expected: Shklow
150
- - source: Ашмяны
151
- expected: Ashmyany
152
- - source: Ыттык-Кёль
153
- expected: Yttyk-Kyol’
154
- - source: Кобрын
155
- expected: Kobryn
156
- - source: Солы
157
- expected: Soly
158
- - source: Копысь
159
- expected: Kopys’
160
- - source: рунь
161
- expected: run’
162
- - source: Эйсманты
163
- expected: Eysmanty
164
- - source: Крэва
165
- expected: Kreva
166
- - source: Юры
167
- expected: Yury
168
- - source: уюн
169
- expected: uyun
170
- - source: Язэп
171
- expected: Yazep
172
- - source: Івянец
173
- expected: Ivyanyets
174
- - source: з’езд
175
- expected: z”yezd
176
- - source: Вялiкiя Вераб’евічы
177
- expected: Vyalikiya Vyerab”yevichy
178
- - source: Дзям’янаўцы
179
- expected: Dzyam”yanawtsy
180
- - source: Задвор’е
181
- expected: Zadvor”ye
182
- - source: Гезгалы
183
- expected: Hyez·haly
184
- - source: Вадасховішча Гезгальскае
185
- expected: Vadaskhovishcha Hyez·hal’skaye
186
-
187
- map:
188
- postrules:
189
- - pattern: '\u042C' # Ь
190
- result: "\u2019"
191
- - pattern: '\u044C' # ь
192
- result: "\u2019"
193
- # Per documentation those rules are optional
194
- rules:
195
- - pattern: \u0417\u0413 # ЗГ
196
- result: "Z\u00B7H" # Z·H
197
- - pattern: \u0437\u0433 # зг
198
- result: "z\u00B7h" # z·h
199
- - pattern: \u041A\u0413 # КГ
200
- result: "K\u00B7H" # K·H
201
- - pattern: \u043A\u0433 # кг
202
- result: "k\u00B7h" # k·h
203
- - pattern: \u0421\u0413 # СГ
204
- result: "S\u00B7H" # S·H
205
- - pattern: \u0441\u0433 # сг
206
- result: "s\u00B7h" # s·h
207
- - pattern: \u0422\u0421 # ТС
208
- result: "T\u00B7S" # T·S
209
- - pattern: \u0442\u0441 # тс
210
- result: "t\u00B7s" # t·s
211
- - pattern: \u0426\u0413 # ЦГ
212
- result: "TS\u00B7H" # TS·H
213
- - pattern: \u0446\u0433 # цг
214
- result: "ts\u00B7h" # ts·h
215
-
216
- characters:
217
- '\u00B4' : "\u201D" # apostrophe according to spec
218
- '\u02BC' : "\u201D" # apostrophe according to spec
219
- '\u2019' : "\u201D" # apostrophe in actual examples
220
-
221
- '\u0410' : 'A' # A
222
- '\u0411' : 'B' # Б
223
- '\u0412' : 'V' # B
224
- '\u0413' : 'H' # Г
225
- '\u0414' : 'D' # Д
226
- '\u0415' : 'Ye' # Е
227
- '\u0401' : 'Yo' # Ё
228
- '\u0416' : 'Zh' # Ж
229
- '\u0417' : 'Z' # З
230
- '\u0406' : 'I' # І
231
- '\u0419' : 'Y' # Й
232
- '\u041A' : 'K' # К
233
- '\u041B' : 'L' # Л
234
- '\u041C' : 'M' # М
235
- '\u041D' : 'N' # Н
236
- '\u041E' : 'O' # О
237
- '\u041F' : 'P' # П
238
- '\u0420' : 'R' # Р
239
- '\u0421' : 'S' # С
240
- '\u0422' : 'T' # Т
241
- '\u0423' : 'U' # У
242
- '\U040E' : 'W' # Ў
243
- '\u0424' : 'F' # Ф
244
- '\u0425' : 'Kh' # Х
245
- '\u0426' : 'Ts' # Ц
246
- '\u0427' : 'Ch' # Ч
247
- '\u0428' : 'Sh' # Ш
248
- '\u042B' : 'Y' # Ы
249
- '\u042D' : 'E' # Э
250
- '\u042E' : 'Yu' # Ю
251
- '\u042F' : 'Ya' # Я
252
- '\u0490' : 'G' # Ґ
253
-
254
- '\u0430' : 'a' # а
255
- '\u0431' : 'b' # б
256
- '\u0432' : 'v' # в
257
- '\u0433' : 'h' # г
258
- '\u0434' : 'd' # д
259
- '\u0435' : 'ye' # е
260
- '\u0451' : 'yo' # ё
261
- '\u0436' : 'zh' # ж
262
- '\u0437' : 'z' # з
263
- '\u0456' : 'i' # і
264
- '\u0439' : 'y' # й
265
- '\u043A' : 'k' # к
266
- '\u043B' : 'l' # л
267
- '\u043C' : 'm' # м
268
- '\u043D' : 'n' # н
269
- '\u043E' : 'o' # о
270
- '\u043F' : 'p' # п
271
- '\u0440' : 'r' # р
272
- '\u0441' : 's' # с
273
- '\u0442' : 't' # т
274
- '\u0443' : 'u' # у
275
- '\u045E' : 'w' # ў
276
- '\u0444' : 'f' # ф
277
- '\u0445' : 'kh' # х
278
- '\u0446' : 'ts' # ц
279
- '\u0447' : 'ch' # ч
280
- '\u0448' : 'sh' # ш
281
- '\u044B' : 'y' # ы
282
- '\u044D' : 'e' # э
283
- '\u044E' : 'yu' # ю
284
- '\u044F' : 'ya' # я
285
- '\u0491' : 'g' # ґ