interscript 0.1.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +76 -128
  21. data/lib/interscript/command.rb +6 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -223
  63. data/README.adoc +0 -297
  64. data/bin/rspec +0 -29
  65. data/lib/g2pwrapper.py +0 -34
  66. data/lib/interscript/mapping.rb +0 -125
  67. data/lib/model-7 +0 -0
  68. data/lib/tha-pt-b-7 +0 -0
  69. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  70. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  71. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  72. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  73. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  74. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  75. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  76. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  77. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  78. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  79. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  80. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  81. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  82. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  83. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  84. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  85. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  86. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  87. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  88. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  89. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  90. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  91. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  92. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  93. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  94. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  95. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  96. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  97. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  98. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  99. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  100. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  101. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +0 -7456
  102. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  103. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  104. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  105. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  106. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  107. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  108. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  109. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  110. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  111. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  112. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  113. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  114. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  115. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  116. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  117. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  118. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  119. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  120. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  121. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  122. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  123. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  124. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  125. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  126. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  127. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  128. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  129. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  130. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  131. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  132. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  133. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  134. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  135. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  136. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  137. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  138. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  139. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  140. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  141. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  142. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  143. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  144. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  145. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  146. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  147. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  148. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  149. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  150. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  151. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  152. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  153. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  154. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  155. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  156. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  157. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  158. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  159. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  160. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  161. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  162. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  163. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  164. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  165. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  166. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  167. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  168. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  169. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  170. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  171. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  172. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  173. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  174. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  175. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  176. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  177. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  178. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  179. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  180. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  181. data/spec/interscript/mapping_spec.rb +0 -42
  182. data/spec/interscript_spec.rb +0 -26
  183. data/spec/spec_helper.rb +0 -3
@@ -1,175 +0,0 @@
1
- ---
2
- authority_id: bas
3
- id: 2017-bss
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Streamlined Romanization of Russian Cyrillic -- Basic Streamlined System
8
- url: https://www.researchgate.net/publication/318402098
9
- creation_date: 2017-07
10
- description: |
11
- The streamlined approach to transliteration was initiated by the
12
- author with the development of the Streamlined System for the
13
- Romanization of Bulgarian, which was eventually codified by the
14
- Transliteration Act of 2009 (ДВ 2009) of the Bulgarian Parliament.
15
-
16
- The four purposes of the system below are in order of priority:
17
- 1. ensure a plausible phonetic approximation of Russian words by English speaking users, including those having no knowledge of the Russian language and no available additional explanations;
18
- 2. the system should allow for the retrieval of the original Cyrillic spellings as much as feasible;
19
- 3. transliterated Russian words should fit an English language environment i.e. not be perceived as too ‘un-English’; and
20
- 4. transliterated word forms should be streamlined and simple. (Ivanov 2003, Ivanov et al. 2010)
21
-
22
- notes:
23
- - Typical for the streamlined approach is its non-use of diacritics,
24
- its use of Latin y for rendering only Cyrillic й rather than both й and
25
- ы, its non-use of Latin j, as well as its use of Latin h rather than kh
26
- for Cyrillic х.
27
-
28
- tests:
29
- - source: |
30
- Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа
31
- ты могла только родиться, в той земле, что не любит шутить, а
32
- ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока
33
- не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не
34
- железным схвачен винтом, а наскоро живьём с одним топором да долотом
35
- снарядил и собрал тебя ярославский расторопный мужик. Не в немецких
36
- ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а
37
- привстал, да замахнулся, да затянул песню — кони вихрем, спицы в
38
- колесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул
39
- в испуге остановившийся пешеход — и вон она понеслась, понеслась,
40
- понеслась!
41
-
42
- Н.В. Гоголь
43
- expected: |
44
- Eh, troyka! ptitsa troyka, kto tebya vidumal? znat, u boykogo naroda
45
- ti mogla tolko roditsya, v toy zemle, chto ne lyubit shutit, a
46
- rovnem-gladnem razmetnulas na polsveta, da i stupay schitat versti, poka
47
- ne zaryabit tebe v ochi. I ne hitriy, kazhis, dorozhniy snaryad, ne
48
- zheleznim shvachen vintom, a naskoro zhivyem s odnim toporom da dolotom
49
- snaryadil i sobral tebya yaroslavskiy rastoropniy muzhik. Ne v nemetskih
50
- botfortah yamshchik: boroda da rukavitsi, i sidit chert znaet na chem; a
51
- privstal, da zamahnulsya, da zatyanul pesnyu — koni vihrem, spitsi v
52
- kolesah smeshalis v odin gladkiy krug, tolko drognula doroga, da vskriknul
53
- v ispuge ostanovivshiysya peshehod — i von ona poneslas, poneslas,
54
- poneslas!
55
-
56
- N.V. Gogol
57
-
58
- - source: ЁЖ Ёж ёж
59
- expected: EZH Ezh ezh
60
- - source: Цветущий сад
61
- expected: Tsvetushchiy sad
62
- - source: Чувство юмора
63
- expected: Chuvstvo yumora
64
- - source: Широкий выбор
65
- expected: Shirokiy vibor
66
- - source: Все подъезды заблокированны
67
- expected: Vse podezdi zablokirovanni
68
- - source: Ожерелье
69
- expected: Ozherelye
70
- - source: Ручьи
71
- expected: Ruchyi
72
- - source: Каньон
73
- expected: Kanyon
74
- - source: Бельэтаж
75
- expected: Belyetazh
76
-
77
- map:
78
- rules:
79
- - pattern: \u042c(?=[ЕеЁёИиОоЭэ]) # Ь (before Е, Ё, И, O, Э)
80
- result: Y
81
- - pattern: \u044c(?=[ЕеЁёИиОоЭэ]) # ь (before Е, Ё, И, O, Э)
82
- result: y
83
-
84
- characters:
85
- # "\u0027": "" # '
86
- "\u0410": "A" # А
87
- "\u0411": "B" # Б
88
- "\u0412": "V" # В
89
- "\u0413": "G" # Г
90
- "\u0414": "D" # Д
91
- "\u0401": "E" # Ё
92
- "\u0415": "E" # Е
93
- "\u0416": "Zh" # Ж
94
- "\u0417": "Z" # З
95
- "\u042D": "E" # Э
96
- "\u0418": "I" # И
97
- "\u0419": "Y" # Й
98
- "\u041A": "K" # К
99
- "\u041B": "L" # Л
100
- "\u041C": "M" # М
101
- "\u041D": "N" # Н
102
- "\u041E": "O" # О
103
- "\u041F": "P" # П
104
- "\u0420": "R" # Р
105
- "\u0421": "S" # С
106
- "\u0422": "T" # Т
107
- "\u0423": "U" # У
108
- "\u0424": "F" # Ф
109
- "\u0425": "H" # Х
110
- "\u0426": "Ts" # Ц
111
- "\u0427": "Ch" # Ч
112
- "\u0428": "Sh" # Ш
113
- "\u0429": "Shch" # Щ
114
- "\u042B": "I" # Ы
115
- "\u042F": "Ya" # Я
116
- "\u042E": "Yu" # Ю
117
-
118
- # Ь (before Е, Ё, И, O, Э)
119
- # "\u042c\u0401": "YE" # Ё
120
- # "\u042c\u0415": "YE" # Е
121
- # "\u042c\u0418": "YI" # И
122
- # "\u042c\u041E": "YO" # O
123
- # "\u042c\u0417": "YE" # Э
124
-
125
- # Ь (otherwise) -> (none)
126
- "\u042c": ""
127
-
128
- # Ъ -> (none)
129
- "\u042a": ""
130
-
131
- "\u0430": "a" # а
132
- "\u0431": "b" # б
133
- "\u0432": "v" # в
134
- "\u0433": "g" # г
135
- "\u0434": "d" # д
136
- "\u0451": "e" # ё
137
- "\u0435": "e" # e
138
- "\u0436": "zh" # ж
139
- "\u0437": "z" # з
140
- "\u044D": "e" # э
141
- "\u0438": "i" # и
142
- "\u0439": "y" # й
143
- "\u043A": "k" # к
144
- "\u043B": "l" # л
145
- "\u043C": "m" # м
146
- "\u043D": "n" # н
147
- "\u043E": "o" # о
148
- "\u043F": "p" # п
149
- "\u0440": "r" # р
150
- "\u0441": "s" # с
151
- "\u0442": "t" # т
152
- "\u0443": "u" # у
153
- "\u0444": "f" # ф
154
- "\u0445": "h" # х
155
- "\u0446": "ts" # ц
156
- "\u0447": "ch" # ч
157
- "\u0448": "sh" # ш
158
- "\u0449": "shch" # щ
159
- "\u044B": "i" # ы
160
- "\u044F": "ya" # я
161
- "\u044E": "yu" # ю
162
-
163
- # ь (before е, ё, и, o, э)
164
- # "\u044c\u0435": "ye" # ё
165
- # "\u044c\u0451": "ye" # е
166
- # "\u044c\u0438": "yi" # и
167
- # "\u044c\u006f": "yo" # o
168
- # "\u044c\u044d": "ye" # э
169
-
170
- # ь (otherwise) -> (none)
171
- "\u044c": ""
172
-
173
- # ъ -> (none)
174
- "\u044a": ""
175
-
@@ -1,169 +0,0 @@
1
- ---
2
- authority_id: bas
3
- id: 2017-oss
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Streamlined Romanization of Russian Cyrillic -- Optimized Streamlined System
8
- url: https://www.researchgate.net/publication/318402098
9
- creation_date: 2017-07
10
- description: |
11
- The streamlined approach to transliteration was initiated by the
12
- author with the development of the Streamlined System for the
13
- Romanization of Bulgarian, which was eventually codified by the
14
- Transliteration Act of 2009 (ДВ 2009) of the Bulgarian Parliament.
15
-
16
- The four purposes of the system below are in order of priority:
17
- 1. ensure a plausible phonetic approximation of Russian words by English speaking users, including those having no knowledge of the Russian language and no available additional explanations;
18
- 2. the system should allow for the retrieval of the original Cyrillic spellings as much as feasible;
19
- 3. transliterated Russian words should fit an English language environment i.e. not be perceived as too ‘un-English’; and
20
- 4. transliterated word forms should be streamlined and simple. (Ivanov 2003, Ivanov et al. 2010)
21
-
22
- notes:
23
- - Typical for the streamlined approach is its non-use of diacritics,
24
- its use of Latin y for rendering only Cyrillic й rather than both й and
25
- ы, its non-use of Latin j, as well as its use of Latin h rather than kh
26
- for Cyrillic х.
27
-
28
- tests:
29
- - source: "Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа
30
- ты могла только родиться, в той земле, что не любит шутить, а
31
- ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока
32
- не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не
33
- железным схвачен винтом, а наскоро живьём с одним топором да долотом
34
- снарядил и собрал тебя ярославский расторопный мужик. Не в немецких
35
- ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а
36
- привстал, да замахнулся, да затянул песню — кони вихрем, спицы в
37
- колесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул
38
- в испуге остановившийся пешеход — и вон она понеслась, понеслась,
39
- понеслась!\nН.В. Гоголь"
40
-
41
- expected: "`Eh, troyka! ptitsa troyka, kto tebya v`idumal? znat', u boykogo
42
- naroda t`i mogla tol'ko rodit'sya, v toy zemle, chto ne lyubit shutit',
43
- a rovnem-gladnem razmetnulas' na polsveta, da i stupay schitat' verst`i,
44
- poka ne zaryabit tebe v ochi. I ne hitr`iy, kazhis', dorozhn`iy
45
- snaryad, ne zhelezn`im shvachen vintom, a naskoro zhivy``em s odnim
46
- toporom da dolotom snaryadil i sobral tebya yaroslavskiy rastoropn`iy muzhik. Ne v
47
- nemetskih botfortah yamshchik: boroda da rukavits`i, i sidit ch``ert
48
- znaet na ch``em; a privstal, da zamahnulsya, da zatyanul pesnyu — koni
49
- vihrem, spits`i v kolesah smeshalis' v odin gladkiy krug, tol'ko
50
- drognula doroga, da vskriknul v ispuge ostanovivshiysya peshehod — i
51
- von ona poneslas', poneslas', poneslas'!\nN.V. Gogol'"
52
-
53
- - source: ЁЖ Ёж ёж
54
- expected: "``EZH ``Ezh ``ezh"
55
- - source: Цветущий сад
56
- expected: Tsvetushchiy sad
57
- - source: Чувство юмора
58
- expected: Chuvstvo yumora
59
- - source: Широкий выбор
60
- expected: Shirokiy v`ibor
61
- - source: Все подъезды заблокированны
62
- expected: Vse pod"ezd`i zablokirovann`i
63
- - source: Ожерелье
64
- expected: Ozherelye
65
- - source: Ручьи
66
- expected: Ruchyi
67
- - source: Каньон
68
- expected: Kanyon
69
- - source: Бельэтаж
70
- expected: Bely`etazh
71
-
72
- map:
73
- rules:
74
- - pattern: \u042c(?=[ЕеЁёИиОоЭэ]) # Ь (before Е, Ё, И, O, Э)
75
- result: Y
76
- - pattern: \u044c(?=[ЕеЁёИиОоЭэ]) # ь (before Е, Ё, И, O, Э)
77
- result: y
78
-
79
- characters:
80
- # "\u0027": "" # '
81
- "\u0410": "A" # А
82
- "\u0411": "B" # Б
83
- "\u0412": "V" # В
84
- "\u0413": "G" # Г
85
- "\u0414": "D" # Д
86
- "\u0401": "``E" # Ё
87
- "\u0415": "E" # Е
88
- "\u0416": "Zh" # Ж
89
- "\u0417": "Z" # З
90
- "\u042D": "`E" # Э
91
- "\u0418": "I" # И
92
- "\u0419": "Y" # Й
93
- "\u041A": "K" # К
94
- "\u041B": "L" # Л
95
- "\u041C": "M" # М
96
- "\u041D": "N" # Н
97
- "\u041E": "O" # О
98
- "\u041F": "P" # П
99
- "\u0420": "R" # Р
100
- "\u0421": "S" # С
101
- "\u0422": "T" # Т
102
- "\u0423": "U" # У
103
- "\u0424": "F" # Ф
104
- "\u0425": "H" # Х
105
- "\u0426": "Ts" # Ц
106
- "\u0427": "Ch" # Ч
107
- "\u0428": "Sh" # Ш
108
- "\u0429": "Shch" # Щ
109
- "\u042B": "`I" # Ы
110
- "\u042F": "Ya" # Я
111
- "\u042E": "Yu" # Ю
112
-
113
- # Ь (before Е, Ё, И, O, Э)
114
- # "\u042c\u0401": "Y``e" # Ё
115
- # "\u042c\u0415": "Ye" # Е
116
- # "\u042c\u0418": "Yi" # И
117
- # "\u042c\u041E": "Yo" # O
118
- # "\u042c\u0417": "Y`e" # Э
119
-
120
- # Ь (otherwise) -> ' (or none)
121
- "\u042c": "'"
122
-
123
- # Ъ -> " (or none)
124
- "\u042a": '"'
125
-
126
- "\u0430": "a" # а
127
- "\u0431": "b" # б
128
- "\u0432": "v" # в
129
- "\u0433": "g" # г
130
- "\u0434": "d" # д
131
- "\u0451": "``e" # ё
132
- "\u0435": "e" # e
133
- "\u0436": "zh" # ж
134
- "\u0437": "z" # з
135
- "\u044D": "`e" # э
136
- "\u0438": "i" # и
137
- "\u0439": "y" # й
138
- "\u043A": "k" # к
139
- "\u043B": "l" # л
140
- "\u043C": "m" # м
141
- "\u043D": "n" # н
142
- "\u043E": "o" # о
143
- "\u043F": "p" # п
144
- "\u0440": "r" # р
145
- "\u0441": "s" # с
146
- "\u0442": "t" # т
147
- "\u0443": "u" # у
148
- "\u0444": "f" # ф
149
- "\u0445": "h" # х
150
- "\u0446": "ts" # ц
151
- "\u0447": "ch" # ч
152
- "\u0448": "sh" # ш
153
- "\u0449": "shch" # щ
154
- "\u044B": "`i" # ы
155
- "\u044F": "ya" # я
156
- "\u044E": "yu" # ю
157
-
158
- # ь (before е, ё, и, o, э)
159
- # "\u044c\u0435": "ye" # ё
160
- # "\u044c\u0451": "y``e" # e
161
- # "\u044c\u0438": "yi" # и
162
- # "\u044c\u006f": "yo" # o
163
- # "\u044c\u044d": "y`e" # э
164
-
165
- # ь (otherwise) -> ' (or none)
166
- "\u044c": "'"
167
-
168
- # ъ -> " (or none)
169
- "\u044a": '"'
@@ -1,294 +0,0 @@
1
- ---
2
- authority_id: bgn
3
- id: 1962
4
- language: jpn
5
- source_script: Hrkt
6
- destination_script: Latn
7
- name: BGN (Modified Hepburn) System
8
- url:
9
- creation_date: 1930
10
- adoption_date: 1962
11
- description: |
12
- The BGN (Modified Hepburn) System for the transliteration of Japanese
13
- has been in use by the Board on Geographic Names since about 1930 and
14
- has been extensively employed in the systematic standardsization of
15
- thousands of geographic names of Japan in romanized form.
16
-
17
- notes: |
18
-
19
- 1. The "tsu" forms (ツ/つ) are also used to indicate a double consonant and
20
- are generally (but not alwyas) written in smaller script or type
21
- slightly to the right of or below the regular line. These characters
22
- are trasnliterated as k before k; s before s or sh; t before t, ts, or
23
- ch; and p before p. Occasionally, when a "ku" (ク/く) or "ki" (キ/き) form
24
- precedes k, the u in ku or the i in ki is dropped.
25
-
26
- 2. The transliterations in parentheses are used in specific cases when
27
- the kana symbol is known to be so pronounced.
28
-
29
- 3. The transliteration m is used before b, p, and m.
30
-
31
- 4. This letter has been added for the use in transliterating foreign
32
- words.
33
-
34
- 5. The asterisk (*) indicates standard combined forms. Those combined
35
- forms not so marked are rarely used.
36
-
37
- ----
38
-
39
- Implementation Notes:
40
-
41
- a. Despite the mentioning of the term "Modified Hepburn" in the
42
- specification, the handling of ん/ン in this standard is different from
43
- Modified Hepburn. It follows the Traditional Hepburn in that the
44
- letter m is used before b, m, p.
45
-
46
- b. This document includes obsolete (pre-reform) combinations.
47
- Pre-reform combinations will clash with modern Japanese transliteration.
48
-
49
- c. There is no discussion on how cross-morpheme vowel sounds should be
50
- handled.
51
-
52
- d. There is no mentioning of separation mark between n and another vowel.
53
-
54
- e. Everything not explicitly stated in the specification will be
55
- assumed to be inherited from var-jpn-Hrkt-Latn-hepburn-1954.
56
-
57
- f. Obsolete combinations can be handled by post rules, and are
58
- included for the sake of completeness only. They have been commented
59
- out, since they are rarely used and follow different rules than modern
60
- Japanese.
61
-
62
- tests:
63
- # Note: these test cases follow the pre-reform standard.
64
- # They are commented out for now.
65
- #
66
- # - source: "けふ"
67
- # expected: "kyō"
68
- # - source: "ぎうにう"
69
- # expected: "gyūnyū"
70
- # - source: "きふ" # きふ should always be kifu in Modern Japanese
71
- # expected: "kyū"
72
- # - source: "ちう"
73
- # expected: "chū"
74
- # - source: "けう"
75
- # expected: "kyō"
76
-
77
- # Modern Japanese test cases
78
- - source: "しんばし"
79
- expected: "shimbashi"
80
- - source: "とうきょう"
81
- expected: "tōkyō"
82
- - source: "しんじゅく"
83
- expected: "shinjuku"
84
- - source: かんおう
85
- expected: kan’ō
86
- - source: かのう
87
- expected: kanō
88
- - source: きんゆう
89
- expected: kin’yū
90
- - source: とうきょう
91
- expected: tōkyō
92
- - source: かごっま
93
- expected: kagomma
94
- - source: ぽっぽっや
95
- expected: poppoyya
96
- - source: てっら
97
- expected: terra
98
- - source: にゃっほー
99
- expected: nyahhō
100
-
101
-
102
- map:
103
-
104
- inherit: var-jpn-Hrkt-Latn-hepburn-1954
105
-
106
- rules:
107
- # Convert ん into m before b, m, p
108
- - pattern: "[んン](?=[ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ])"
109
- result: "m"
110
- postrules:
111
- # Handle obsolete forms
112
- # Note that these forms are present in the rules, but will break
113
- # if used with Modern Japanese. They are commented out for now.
114
- #
115
- # - pattern: "ef?[uo]|iyau"
116
- # result: "yō"
117
- # - pattern: "if?u"
118
- # result: "yū"
119
- # - pattern: "[ao]f?[uo]"
120
- # result: "ō"
121
- # - pattern: "iy"
122
- # result: "y"
123
- # - pattern: "ty"
124
- # result: "ch"
125
- # - pattern: "dy"
126
- # result: "j"
127
- # - pattern: "[jz]y"
128
- # result: "j"
129
- # - pattern: "(?<=[sc])hy"
130
- # result: "h"
131
- # - pattern: "sy"
132
- # result: "sh"
133
-
134
- characters:
135
- # ke
136
- # These are listed as alternative pronunciation, but in fact this usage of ヶ
137
- # as the archaic possessive marker is not found in Kana only texts.
138
- # Also it is always typed using the smaller form. (ヶ U+30F6)
139
- "け": ["ke", "ga", "ka", "ko"]
140
- "ケ": ["ke", ga", "ka", "ko"]
141
- "ヶ": ["ga", "ka", "ko"]
142
-
143
-
144
- # The Ha-column
145
- # は is still pronounced as wa when used as a particle,
146
- # the alternative pronunciations for the other four kana's are obsolete.
147
- "は": ["ha", "wa"]
148
- "ひ": ["hi", "i"]
149
- "ふ": ["fu", "u", "o"]
150
- "へ": ["he", "e"]
151
- "ほ": ["ho", "o"]
152
- "ハ": ["ha", "wa"]
153
- "ヒ": ["hi", "i"]
154
- "フ": ["fu", "u", "o"]
155
- "ヘ": ["he", "e"]
156
- "ホ": ["ho", "o"]
157
-
158
-
159
- # The Wa-column
160
- # These two kanas below are only used in pre-reform texts.
161
- "ゐ" : "i"
162
- "ゑ" : "e"
163
- "ヰ" : "i"
164
- "ヱ" : "e"
165
-
166
- # Combined forms
167
- # These are obsolete forms. See Note 5.
168
- # They can be handled by post-rules if ever needed.
169
- # "あう": "ō"
170
- # "あふ": "ō"
171
- # "いふ": "yū"
172
- # "えう": "yō"
173
- # "えふ": "yō"
174
- # "おふ": "ō"
175
- # "かう": "kō"
176
- # "かふ": "kō"
177
- # "がう": "gō"
178
- # "がふ": "gō"
179
- # "きう": "kyū"
180
- # "きふ": "kyū"
181
- # "きやう": "kyō"
182
- # "ぎう": "gyū"
183
- # "ぎふ": "gyū"
184
- # "ぎやう": "gyō"
185
- "くわ": "ka"
186
- "くわう": "kō"
187
- "ぐわ": "ga"
188
- "ぐわう": "gō"
189
- "クワ": "ka"
190
- "クワウ": "kō"
191
- "グワ": "ga"
192
- "グワウ": "gō"
193
- # "けう": "kyō"
194
- # "けふ": "kyō"
195
- # "げう": "gyō"
196
- # "げふ": "gyō"
197
- # "こふ": "kō"
198
- # "ごふ": "gō"
199
- # "さう": "sō"
200
- # "さふ": "sō"
201
- # "ざう": "zō"
202
- # "ざふ": "zō"
203
- # "しう": "shū"
204
- # "しふ": "shū"
205
- # "しやう": "shō"
206
- # "じう": "jū"
207
- # "じふ": "jū"
208
- # "じやう": "jō"
209
- # "せう": "shō"
210
- # "せふ": "shō"
211
- # "ぜう": "jō"
212
- # "ぜふ": "jō"
213
- # "そふ": "sō"
214
- # "ぞふ": "zō"
215
- # "たう": "tō"
216
- # "たふ": "tō"
217
- # "だう": "dō"
218
- # "だふ": "dō"
219
- # "ちう": "chū"
220
- # "ちふ": "chū"
221
- # "ちやう": "chō"
222
- # "ぢう": "jū"
223
- # "ぢふ": "jū"
224
- # "ぢや": "ja"
225
- # "ぢやう": "jō"
226
- # "ぢゆ": "ju"
227
- # "ぢよ": "jo"
228
- # "ぢよう": "jō"
229
- # "てう": "chō"
230
- # "てふ": "chō"
231
- # "でう": "jō"
232
- # "でふ": "jō"
233
- # "とふ": "tō"
234
- # "どふ": "dō"
235
- # "なう": "nō"
236
- # "なふ": "nō"
237
- # "にう": "nyū"
238
- # "にふ": "nyū"
239
- # "にやう": "nyō"
240
- # "ねう": "nyō"
241
- # "ねふ": "nyō"
242
- # "のふ": "nō"
243
- # "はう": ["hō","ō"]
244
- # "はふ": "hō"
245
- # "ばふ": "bō"
246
- # "ばう": "bō"
247
- # "ぱう": "pō"
248
- # "ぱふ": "pō"
249
- # "ひう": "hyū"
250
- # "ひふ": "hyū"
251
- # "ひやう": "hyō"
252
- # "びう": "byū"
253
- # "びふ": "byū"
254
- # "びやう": "byō"
255
- # "ぴう": "pyū"
256
- # "ぴふ": "pyū"
257
- # "ぴやう": "pyō"
258
- # "へう": "hyō"
259
- # "へふ": "hyō"
260
- # "べう": "byō"
261
- # "べふ": "byō"
262
- # "ぺう": "pyō"
263
- # "ぺふ": "pyō"
264
- # "ほふ": "hō"
265
- # "ぼふ": "bō"
266
- # "ぽふ": "pō"
267
- # "まう": "mō"
268
- # "まふ": "mō"
269
- # "まを": "mō"
270
- # "みやう": "myō"
271
- # "みう": "myū"
272
- # "みふ": "myū"
273
- # "めう": "myō"
274
- # "めふ": "myō"
275
- # "めを": "myō"
276
- # "もふ": "mō"
277
- # "やう": "yō"
278
- # "やふ": "yō"
279
- # "よふ": "yō"
280
- # "らう": "rō"
281
- # "らふ": "rō"
282
- # "りう": "ryū"
283
- # "りふ": "ryū"
284
- # "りやう": "ryō"
285
- # "れう": "ryō"
286
- # "れふ": "ryō"
287
- # "ろふ": "rō"
288
- # "わう": "wō"
289
- # "わふ": "wō"
290
- # "ゑふ": "yō"
291
- # "をう": "ō"
292
- # "をふ": "ō"
293
-
294
-