interscript 0.1.5 → 2.1.0a8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -1,175 +0,0 @@
1
- ---
2
- authority_id: bas
3
- id: 2017-bss
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Streamlined Romanization of Russian Cyrillic -- Basic Streamlined System
8
- url: https://www.researchgate.net/publication/318402098
9
- creation_date: 2017-07
10
- description: |
11
- The streamlined approach to transliteration was initiated by the
12
- author with the development of the Streamlined System for the
13
- Romanization of Bulgarian, which was eventually codified by the
14
- Transliteration Act of 2009 (ДВ 2009) of the Bulgarian Parliament.
15
-
16
- The four purposes of the system below are in order of priority:
17
- 1. ensure a plausible phonetic approximation of Russian words by English speaking users, including those having no knowledge of the Russian language and no available additional explanations;
18
- 2. the system should allow for the retrieval of the original Cyrillic spellings as much as feasible;
19
- 3. transliterated Russian words should fit an English language environment i.e. not be perceived as too ‘un-English’; and
20
- 4. transliterated word forms should be streamlined and simple. (Ivanov 2003, Ivanov et al. 2010)
21
-
22
- notes:
23
- - Typical for the streamlined approach is its non-use of diacritics,
24
- its use of Latin y for rendering only Cyrillic й rather than both й and
25
- ы, its non-use of Latin j, as well as its use of Latin h rather than kh
26
- for Cyrillic х.
27
-
28
- tests:
29
- - source: |
30
- Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа
31
- ты могла только родиться, в той земле, что не любит шутить, а
32
- ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока
33
- не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не
34
- железным схвачен винтом, а наскоро живьём с одним топором да долотом
35
- снарядил и собрал тебя ярославский расторопный мужик. Не в немецких
36
- ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а
37
- привстал, да замахнулся, да затянул песню — кони вихрем, спицы в
38
- колесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул
39
- в испуге остановившийся пешеход — и вон она понеслась, понеслась,
40
- понеслась!
41
-
42
- Н.В. Гоголь
43
- expected: |
44
- Eh, troyka! ptitsa troyka, kto tebya vidumal? znat, u boykogo naroda
45
- ti mogla tolko roditsya, v toy zemle, chto ne lyubit shutit, a
46
- rovnem-gladnem razmetnulas na polsveta, da i stupay schitat versti, poka
47
- ne zaryabit tebe v ochi. I ne hitriy, kazhis, dorozhniy snaryad, ne
48
- zheleznim shvachen vintom, a naskoro zhivyem s odnim toporom da dolotom
49
- snaryadil i sobral tebya yaroslavskiy rastoropniy muzhik. Ne v nemetskih
50
- botfortah yamshchik: boroda da rukavitsi, i sidit chert znaet na chem; a
51
- privstal, da zamahnulsya, da zatyanul pesnyu — koni vihrem, spitsi v
52
- kolesah smeshalis v odin gladkiy krug, tolko drognula doroga, da vskriknul
53
- v ispuge ostanovivshiysya peshehod — i von ona poneslas, poneslas,
54
- poneslas!
55
-
56
- N.V. Gogol
57
-
58
- - source: ЁЖ Ёж ёж
59
- expected: EZH Ezh ezh
60
- - source: Цветущий сад
61
- expected: Tsvetushchiy sad
62
- - source: Чувство юмора
63
- expected: Chuvstvo yumora
64
- - source: Широкий выбор
65
- expected: Shirokiy vibor
66
- - source: Все подъезды заблокированны
67
- expected: Vse podezdi zablokirovanni
68
- - source: Ожерелье
69
- expected: Ozherelye
70
- - source: Ручьи
71
- expected: Ruchyi
72
- - source: Каньон
73
- expected: Kanyon
74
- - source: Бельэтаж
75
- expected: Belyetazh
76
-
77
- map:
78
- rules:
79
- - pattern: \u042c(?=[ЕеЁёИиОоЭэ]) # Ь (before Е, Ё, И, O, Э)
80
- result: Y
81
- - pattern: \u044c(?=[ЕеЁёИиОоЭэ]) # ь (before Е, Ё, И, O, Э)
82
- result: y
83
-
84
- characters:
85
- # "\u0027": "" # '
86
- "\u0410": "A" # А
87
- "\u0411": "B" # Б
88
- "\u0412": "V" # В
89
- "\u0413": "G" # Г
90
- "\u0414": "D" # Д
91
- "\u0401": "E" # Ё
92
- "\u0415": "E" # Е
93
- "\u0416": "Zh" # Ж
94
- "\u0417": "Z" # З
95
- "\u042D": "E" # Э
96
- "\u0418": "I" # И
97
- "\u0419": "Y" # Й
98
- "\u041A": "K" # К
99
- "\u041B": "L" # Л
100
- "\u041C": "M" # М
101
- "\u041D": "N" # Н
102
- "\u041E": "O" # О
103
- "\u041F": "P" # П
104
- "\u0420": "R" # Р
105
- "\u0421": "S" # С
106
- "\u0422": "T" # Т
107
- "\u0423": "U" # У
108
- "\u0424": "F" # Ф
109
- "\u0425": "H" # Х
110
- "\u0426": "Ts" # Ц
111
- "\u0427": "Ch" # Ч
112
- "\u0428": "Sh" # Ш
113
- "\u0429": "Shch" # Щ
114
- "\u042B": "I" # Ы
115
- "\u042F": "Ya" # Я
116
- "\u042E": "Yu" # Ю
117
-
118
- # Ь (before Е, Ё, И, O, Э)
119
- # "\u042c\u0401": "YE" # Ё
120
- # "\u042c\u0415": "YE" # Е
121
- # "\u042c\u0418": "YI" # И
122
- # "\u042c\u041E": "YO" # O
123
- # "\u042c\u0417": "YE" # Э
124
-
125
- # Ь (otherwise) -> (none)
126
- "\u042c": ""
127
-
128
- # Ъ -> (none)
129
- "\u042a": ""
130
-
131
- "\u0430": "a" # а
132
- "\u0431": "b" # б
133
- "\u0432": "v" # в
134
- "\u0433": "g" # г
135
- "\u0434": "d" # д
136
- "\u0451": "e" # ё
137
- "\u0435": "e" # e
138
- "\u0436": "zh" # ж
139
- "\u0437": "z" # з
140
- "\u044D": "e" # э
141
- "\u0438": "i" # и
142
- "\u0439": "y" # й
143
- "\u043A": "k" # к
144
- "\u043B": "l" # л
145
- "\u043C": "m" # м
146
- "\u043D": "n" # н
147
- "\u043E": "o" # о
148
- "\u043F": "p" # п
149
- "\u0440": "r" # р
150
- "\u0441": "s" # с
151
- "\u0442": "t" # т
152
- "\u0443": "u" # у
153
- "\u0444": "f" # ф
154
- "\u0445": "h" # х
155
- "\u0446": "ts" # ц
156
- "\u0447": "ch" # ч
157
- "\u0448": "sh" # ш
158
- "\u0449": "shch" # щ
159
- "\u044B": "i" # ы
160
- "\u044F": "ya" # я
161
- "\u044E": "yu" # ю
162
-
163
- # ь (before е, ё, и, o, э)
164
- # "\u044c\u0435": "ye" # ё
165
- # "\u044c\u0451": "ye" # е
166
- # "\u044c\u0438": "yi" # и
167
- # "\u044c\u006f": "yo" # o
168
- # "\u044c\u044d": "ye" # э
169
-
170
- # ь (otherwise) -> (none)
171
- "\u044c": ""
172
-
173
- # ъ -> (none)
174
- "\u044a": ""
175
-
@@ -1,169 +0,0 @@
1
- ---
2
- authority_id: bas
3
- id: 2017-oss
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Streamlined Romanization of Russian Cyrillic -- Optimized Streamlined System
8
- url: https://www.researchgate.net/publication/318402098
9
- creation_date: 2017-07
10
- description: |
11
- The streamlined approach to transliteration was initiated by the
12
- author with the development of the Streamlined System for the
13
- Romanization of Bulgarian, which was eventually codified by the
14
- Transliteration Act of 2009 (ДВ 2009) of the Bulgarian Parliament.
15
-
16
- The four purposes of the system below are in order of priority:
17
- 1. ensure a plausible phonetic approximation of Russian words by English speaking users, including those having no knowledge of the Russian language and no available additional explanations;
18
- 2. the system should allow for the retrieval of the original Cyrillic spellings as much as feasible;
19
- 3. transliterated Russian words should fit an English language environment i.e. not be perceived as too ‘un-English’; and
20
- 4. transliterated word forms should be streamlined and simple. (Ivanov 2003, Ivanov et al. 2010)
21
-
22
- notes:
23
- - Typical for the streamlined approach is its non-use of diacritics,
24
- its use of Latin y for rendering only Cyrillic й rather than both й and
25
- ы, its non-use of Latin j, as well as its use of Latin h rather than kh
26
- for Cyrillic х.
27
-
28
- tests:
29
- - source: "Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа
30
- ты могла только родиться, в той земле, что не любит шутить, а
31
- ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока
32
- не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не
33
- железным схвачен винтом, а наскоро живьём с одним топором да долотом
34
- снарядил и собрал тебя ярославский расторопный мужик. Не в немецких
35
- ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а
36
- привстал, да замахнулся, да затянул песню — кони вихрем, спицы в
37
- колесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул
38
- в испуге остановившийся пешеход — и вон она понеслась, понеслась,
39
- понеслась!\nН.В. Гоголь"
40
-
41
- expected: "`Eh, troyka! ptitsa troyka, kto tebya v`idumal? znat', u boykogo
42
- naroda t`i mogla tol'ko rodit'sya, v toy zemle, chto ne lyubit shutit',
43
- a rovnem-gladnem razmetnulas' na polsveta, da i stupay schitat' verst`i,
44
- poka ne zaryabit tebe v ochi. I ne hitr`iy, kazhis', dorozhn`iy
45
- snaryad, ne zhelezn`im shvachen vintom, a naskoro zhivy``em s odnim
46
- toporom da dolotom snaryadil i sobral tebya yaroslavskiy rastoropn`iy muzhik. Ne v
47
- nemetskih botfortah yamshchik: boroda da rukavits`i, i sidit ch``ert
48
- znaet na ch``em; a privstal, da zamahnulsya, da zatyanul pesnyu — koni
49
- vihrem, spits`i v kolesah smeshalis' v odin gladkiy krug, tol'ko
50
- drognula doroga, da vskriknul v ispuge ostanovivshiysya peshehod — i
51
- von ona poneslas', poneslas', poneslas'!\nN.V. Gogol'"
52
-
53
- - source: ЁЖ Ёж ёж
54
- expected: "``EZH ``Ezh ``ezh"
55
- - source: Цветущий сад
56
- expected: Tsvetushchiy sad
57
- - source: Чувство юмора
58
- expected: Chuvstvo yumora
59
- - source: Широкий выбор
60
- expected: Shirokiy v`ibor
61
- - source: Все подъезды заблокированны
62
- expected: Vse pod"ezd`i zablokirovann`i
63
- - source: Ожерелье
64
- expected: Ozherelye
65
- - source: Ручьи
66
- expected: Ruchyi
67
- - source: Каньон
68
- expected: Kanyon
69
- - source: Бельэтаж
70
- expected: Bely`etazh
71
-
72
- map:
73
- rules:
74
- - pattern: \u042c(?=[ЕеЁёИиОоЭэ]) # Ь (before Е, Ё, И, O, Э)
75
- result: Y
76
- - pattern: \u044c(?=[ЕеЁёИиОоЭэ]) # ь (before Е, Ё, И, O, Э)
77
- result: y
78
-
79
- characters:
80
- # "\u0027": "" # '
81
- "\u0410": "A" # А
82
- "\u0411": "B" # Б
83
- "\u0412": "V" # В
84
- "\u0413": "G" # Г
85
- "\u0414": "D" # Д
86
- "\u0401": "``E" # Ё
87
- "\u0415": "E" # Е
88
- "\u0416": "Zh" # Ж
89
- "\u0417": "Z" # З
90
- "\u042D": "`E" # Э
91
- "\u0418": "I" # И
92
- "\u0419": "Y" # Й
93
- "\u041A": "K" # К
94
- "\u041B": "L" # Л
95
- "\u041C": "M" # М
96
- "\u041D": "N" # Н
97
- "\u041E": "O" # О
98
- "\u041F": "P" # П
99
- "\u0420": "R" # Р
100
- "\u0421": "S" # С
101
- "\u0422": "T" # Т
102
- "\u0423": "U" # У
103
- "\u0424": "F" # Ф
104
- "\u0425": "H" # Х
105
- "\u0426": "Ts" # Ц
106
- "\u0427": "Ch" # Ч
107
- "\u0428": "Sh" # Ш
108
- "\u0429": "Shch" # Щ
109
- "\u042B": "`I" # Ы
110
- "\u042F": "Ya" # Я
111
- "\u042E": "Yu" # Ю
112
-
113
- # Ь (before Е, Ё, И, O, Э)
114
- # "\u042c\u0401": "Y``e" # Ё
115
- # "\u042c\u0415": "Ye" # Е
116
- # "\u042c\u0418": "Yi" # И
117
- # "\u042c\u041E": "Yo" # O
118
- # "\u042c\u0417": "Y`e" # Э
119
-
120
- # Ь (otherwise) -> ' (or none)
121
- "\u042c": "'"
122
-
123
- # Ъ -> " (or none)
124
- "\u042a": '"'
125
-
126
- "\u0430": "a" # а
127
- "\u0431": "b" # б
128
- "\u0432": "v" # в
129
- "\u0433": "g" # г
130
- "\u0434": "d" # д
131
- "\u0451": "``e" # ё
132
- "\u0435": "e" # e
133
- "\u0436": "zh" # ж
134
- "\u0437": "z" # з
135
- "\u044D": "`e" # э
136
- "\u0438": "i" # и
137
- "\u0439": "y" # й
138
- "\u043A": "k" # к
139
- "\u043B": "l" # л
140
- "\u043C": "m" # м
141
- "\u043D": "n" # н
142
- "\u043E": "o" # о
143
- "\u043F": "p" # п
144
- "\u0440": "r" # р
145
- "\u0441": "s" # с
146
- "\u0442": "t" # т
147
- "\u0443": "u" # у
148
- "\u0444": "f" # ф
149
- "\u0445": "h" # х
150
- "\u0446": "ts" # ц
151
- "\u0447": "ch" # ч
152
- "\u0448": "sh" # ш
153
- "\u0449": "shch" # щ
154
- "\u044B": "`i" # ы
155
- "\u044F": "ya" # я
156
- "\u044E": "yu" # ю
157
-
158
- # ь (before е, ё, и, o, э)
159
- # "\u044c\u0435": "ye" # ё
160
- # "\u044c\u0451": "y``e" # e
161
- # "\u044c\u0438": "yi" # и
162
- # "\u044c\u006f": "yo" # o
163
- # "\u044c\u044d": "y`e" # э
164
-
165
- # ь (otherwise) -> ' (or none)
166
- "\u044c": "'"
167
-
168
- # ъ -> " (or none)
169
- "\u044a": '"'
@@ -1,294 +0,0 @@
1
- ---
2
- authority_id: bgn
3
- id: 1962
4
- language: jpn
5
- source_script: Hrkt
6
- destination_script: Latn
7
- name: BGN (Modified Hepburn) System
8
- url:
9
- creation_date: 1930
10
- adoption_date: 1962
11
- description: |
12
- The BGN (Modified Hepburn) System for the transliteration of Japanese
13
- has been in use by the Board on Geographic Names since about 1930 and
14
- has been extensively employed in the systematic standardsization of
15
- thousands of geographic names of Japan in romanized form.
16
-
17
- notes: |
18
-
19
- 1. The "tsu" forms (ツ/つ) are also used to indicate a double consonant and
20
- are generally (but not alwyas) written in smaller script or type
21
- slightly to the right of or below the regular line. These characters
22
- are trasnliterated as k before k; s before s or sh; t before t, ts, or
23
- ch; and p before p. Occasionally, when a "ku" (ク/く) or "ki" (キ/き) form
24
- precedes k, the u in ku or the i in ki is dropped.
25
-
26
- 2. The transliterations in parentheses are used in specific cases when
27
- the kana symbol is known to be so pronounced.
28
-
29
- 3. The transliteration m is used before b, p, and m.
30
-
31
- 4. This letter has been added for the use in transliterating foreign
32
- words.
33
-
34
- 5. The asterisk (*) indicates standard combined forms. Those combined
35
- forms not so marked are rarely used.
36
-
37
- ----
38
-
39
- Implementation Notes:
40
-
41
- a. Despite the mentioning of the term "Modified Hepburn" in the
42
- specification, the handling of ん/ン in this standard is different from
43
- Modified Hepburn. It follows the Traditional Hepburn in that the
44
- letter m is used before b, m, p.
45
-
46
- b. This document includes obsolete (pre-reform) combinations.
47
- Pre-reform combinations will clash with modern Japanese transliteration.
48
-
49
- c. There is no discussion on how cross-morpheme vowel sounds should be
50
- handled.
51
-
52
- d. There is no mentioning of separation mark between n and another vowel.
53
-
54
- e. Everything not explicitly stated in the specification will be
55
- assumed to be inherited from var-jpn-Hrkt-Latn-hepburn-1954.
56
-
57
- f. Obsolete combinations can be handled by post rules, and are
58
- included for the sake of completeness only. They have been commented
59
- out, since they are rarely used and follow different rules than modern
60
- Japanese.
61
-
62
- tests:
63
- # Note: these test cases follow the pre-reform standard.
64
- # They are commented out for now.
65
- #
66
- # - source: "けふ"
67
- # expected: "kyō"
68
- # - source: "ぎうにう"
69
- # expected: "gyūnyū"
70
- # - source: "きふ" # きふ should always be kifu in Modern Japanese
71
- # expected: "kyū"
72
- # - source: "ちう"
73
- # expected: "chū"
74
- # - source: "けう"
75
- # expected: "kyō"
76
-
77
- # Modern Japanese test cases
78
- - source: "しんばし"
79
- expected: "shimbashi"
80
- - source: "とうきょう"
81
- expected: "tōkyō"
82
- - source: "しんじゅく"
83
- expected: "shinjuku"
84
- - source: かんおう
85
- expected: kan’ō
86
- - source: かのう
87
- expected: kanō
88
- - source: きんゆう
89
- expected: kin’yū
90
- - source: とうきょう
91
- expected: tōkyō
92
- - source: かごっま
93
- expected: kagomma
94
- - source: ぽっぽっや
95
- expected: poppoyya
96
- - source: てっら
97
- expected: terra
98
- - source: にゃっほー
99
- expected: nyahhō
100
-
101
-
102
- map:
103
-
104
- inherit: var-jpn-Hrkt-Latn-hepburn-1954
105
-
106
- rules:
107
- # Convert ん into m before b, m, p
108
- - pattern: "[んン](?=[ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ])"
109
- result: "m"
110
- postrules:
111
- # Handle obsolete forms
112
- # Note that these forms are present in the rules, but will break
113
- # if used with Modern Japanese. They are commented out for now.
114
- #
115
- # - pattern: "ef?[uo]|iyau"
116
- # result: "yō"
117
- # - pattern: "if?u"
118
- # result: "yū"
119
- # - pattern: "[ao]f?[uo]"
120
- # result: "ō"
121
- # - pattern: "iy"
122
- # result: "y"
123
- # - pattern: "ty"
124
- # result: "ch"
125
- # - pattern: "dy"
126
- # result: "j"
127
- # - pattern: "[jz]y"
128
- # result: "j"
129
- # - pattern: "(?<=[sc])hy"
130
- # result: "h"
131
- # - pattern: "sy"
132
- # result: "sh"
133
-
134
- characters:
135
- # ke
136
- # These are listed as alternative pronunciation, but in fact this usage of ヶ
137
- # as the archaic possessive marker is not found in Kana only texts.
138
- # Also it is always typed using the smaller form. (ヶ U+30F6)
139
- "け": ["ke", "ga", "ka", "ko"]
140
- "ケ": ["ke", ga", "ka", "ko"]
141
- "ヶ": ["ga", "ka", "ko"]
142
-
143
-
144
- # The Ha-column
145
- # は is still pronounced as wa when used as a particle,
146
- # the alternative pronunciations for the other four kana's are obsolete.
147
- "は": ["ha", "wa"]
148
- "ひ": ["hi", "i"]
149
- "ふ": ["fu", "u", "o"]
150
- "へ": ["he", "e"]
151
- "ほ": ["ho", "o"]
152
- "ハ": ["ha", "wa"]
153
- "ヒ": ["hi", "i"]
154
- "フ": ["fu", "u", "o"]
155
- "ヘ": ["he", "e"]
156
- "ホ": ["ho", "o"]
157
-
158
-
159
- # The Wa-column
160
- # These two kanas below are only used in pre-reform texts.
161
- "ゐ" : "i"
162
- "ゑ" : "e"
163
- "ヰ" : "i"
164
- "ヱ" : "e"
165
-
166
- # Combined forms
167
- # These are obsolete forms. See Note 5.
168
- # They can be handled by post-rules if ever needed.
169
- # "あう": "ō"
170
- # "あふ": "ō"
171
- # "いふ": "yū"
172
- # "えう": "yō"
173
- # "えふ": "yō"
174
- # "おふ": "ō"
175
- # "かう": "kō"
176
- # "かふ": "kō"
177
- # "がう": "gō"
178
- # "がふ": "gō"
179
- # "きう": "kyū"
180
- # "きふ": "kyū"
181
- # "きやう": "kyō"
182
- # "ぎう": "gyū"
183
- # "ぎふ": "gyū"
184
- # "ぎやう": "gyō"
185
- "くわ": "ka"
186
- "くわう": "kō"
187
- "ぐわ": "ga"
188
- "ぐわう": "gō"
189
- "クワ": "ka"
190
- "クワウ": "kō"
191
- "グワ": "ga"
192
- "グワウ": "gō"
193
- # "けう": "kyō"
194
- # "けふ": "kyō"
195
- # "げう": "gyō"
196
- # "げふ": "gyō"
197
- # "こふ": "kō"
198
- # "ごふ": "gō"
199
- # "さう": "sō"
200
- # "さふ": "sō"
201
- # "ざう": "zō"
202
- # "ざふ": "zō"
203
- # "しう": "shū"
204
- # "しふ": "shū"
205
- # "しやう": "shō"
206
- # "じう": "jū"
207
- # "じふ": "jū"
208
- # "じやう": "jō"
209
- # "せう": "shō"
210
- # "せふ": "shō"
211
- # "ぜう": "jō"
212
- # "ぜふ": "jō"
213
- # "そふ": "sō"
214
- # "ぞふ": "zō"
215
- # "たう": "tō"
216
- # "たふ": "tō"
217
- # "だう": "dō"
218
- # "だふ": "dō"
219
- # "ちう": "chū"
220
- # "ちふ": "chū"
221
- # "ちやう": "chō"
222
- # "ぢう": "jū"
223
- # "ぢふ": "jū"
224
- # "ぢや": "ja"
225
- # "ぢやう": "jō"
226
- # "ぢゆ": "ju"
227
- # "ぢよ": "jo"
228
- # "ぢよう": "jō"
229
- # "てう": "chō"
230
- # "てふ": "chō"
231
- # "でう": "jō"
232
- # "でふ": "jō"
233
- # "とふ": "tō"
234
- # "どふ": "dō"
235
- # "なう": "nō"
236
- # "なふ": "nō"
237
- # "にう": "nyū"
238
- # "にふ": "nyū"
239
- # "にやう": "nyō"
240
- # "ねう": "nyō"
241
- # "ねふ": "nyō"
242
- # "のふ": "nō"
243
- # "はう": ["hō","ō"]
244
- # "はふ": "hō"
245
- # "ばふ": "bō"
246
- # "ばう": "bō"
247
- # "ぱう": "pō"
248
- # "ぱふ": "pō"
249
- # "ひう": "hyū"
250
- # "ひふ": "hyū"
251
- # "ひやう": "hyō"
252
- # "びう": "byū"
253
- # "びふ": "byū"
254
- # "びやう": "byō"
255
- # "ぴう": "pyū"
256
- # "ぴふ": "pyū"
257
- # "ぴやう": "pyō"
258
- # "へう": "hyō"
259
- # "へふ": "hyō"
260
- # "べう": "byō"
261
- # "べふ": "byō"
262
- # "ぺう": "pyō"
263
- # "ぺふ": "pyō"
264
- # "ほふ": "hō"
265
- # "ぼふ": "bō"
266
- # "ぽふ": "pō"
267
- # "まう": "mō"
268
- # "まふ": "mō"
269
- # "まを": "mō"
270
- # "みやう": "myō"
271
- # "みう": "myū"
272
- # "みふ": "myū"
273
- # "めう": "myō"
274
- # "めふ": "myō"
275
- # "めを": "myō"
276
- # "もふ": "mō"
277
- # "やう": "yō"
278
- # "やふ": "yō"
279
- # "よふ": "yō"
280
- # "らう": "rō"
281
- # "らふ": "rō"
282
- # "りう": "ryū"
283
- # "りふ": "ryū"
284
- # "りやう": "ryō"
285
- # "れう": "ryō"
286
- # "れふ": "ryō"
287
- # "ろふ": "rō"
288
- # "わう": "wō"
289
- # "わふ": "wō"
290
- # "ゑふ": "yō"
291
- # "をう": "ō"
292
- # "をふ": "ō"
293
-
294
-