interscript 0.1.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +76 -128
  21. data/lib/interscript/command.rb +6 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -223
  63. data/README.adoc +0 -297
  64. data/bin/rspec +0 -29
  65. data/lib/g2pwrapper.py +0 -34
  66. data/lib/interscript/mapping.rb +0 -125
  67. data/lib/model-7 +0 -0
  68. data/lib/tha-pt-b-7 +0 -0
  69. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  70. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  71. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  72. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  73. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  74. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  75. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  76. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  77. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  78. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  79. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  80. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  81. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  82. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  83. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  84. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  85. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  86. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  87. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  88. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  89. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  90. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  91. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  92. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  93. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  94. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  95. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  96. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  97. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  98. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  99. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  100. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  101. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +0 -7456
  102. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  103. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  104. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  105. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  106. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  107. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  108. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  109. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  110. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  111. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  112. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  113. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  114. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  115. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  116. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  117. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  118. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  119. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  120. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  121. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  122. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  123. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  124. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  125. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  126. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  127. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  128. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  129. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  130. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  131. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  132. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  133. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  134. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  135. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  136. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  137. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  138. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  139. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  140. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  141. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  142. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  143. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  144. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  145. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  146. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  147. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  148. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  149. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  150. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  151. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  152. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  153. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  154. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  155. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  156. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  157. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  158. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  159. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  160. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  161. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  162. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  163. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  164. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  165. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  166. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  167. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  168. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  169. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  170. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  171. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  172. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  173. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  174. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  175. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  176. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  177. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  178. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  179. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  180. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  181. data/spec/interscript/mapping_spec.rb +0 -42
  182. data/spec/interscript_spec.rb +0 -26
  183. data/spec/spec_helper.rb +0 -3
@@ -1,93 +0,0 @@
1
- ---
2
- authority_id: un
3
- id: 2013
4
- language: mon
5
- source_script: Mong
6
- destination_script: Latn
7
- name: Mongolian Romanization in China, Version 4.0
8
- url: http://www.eki.ee/wgrs/rom1_mnc.htm
9
- creation_date: 2013-09
10
- description: |
11
- The United Nations resolution III/8 in 1977 recognized the Scheme for
12
- a Chinese Phonetic Alphabet (Pinyin) as China’s official Roman alphabet
13
- scheme and recommended the alphabet as the international system for the
14
- romanization of Chinese geographical names. In China Mongolian
15
- geographical names are transcribed directly from the Mongolian script
16
- into Pinyin. The scheme was published in Toponymic Guidelines for Map
17
- and Other Editors: China, 19821.
18
-
19
- The system is used in China and in international cartographic
20
- products.
21
-
22
- Mongolian uses a vertical script. Transcription of Mongolian names is
23
- made from their proper pronunciation based on the written form of the
24
- Mongolian language and Qahar vernacular, with Zhenglan as its
25
- representative pronunciation. Due to the complex nature of the script
26
- the romanization scheme is not reversible, e.g. the name of the city
27
- Hohhot is written ᠬᠥᠬᠡᠬᠣᠲᠠ but may be transliterated as kökeqota.
28
-
29
- notes:
30
- - The long and short Mongolian vowels are not distinguished in the
31
- spelling for general use, but in recording the pronunciation of
32
- place-names, the long vowel is represented by duplication.
33
- - Where two Roman equivalents are given, the second (in brackets) is
34
- used for recording the pronunciation of place-names while the first
35
- form is for general use.
36
- - In the table only word-initial character variants are shown.
37
- Depending on the position in the word many variants of the characters
38
- are used as well as some ligatures. These features are not covered here.
39
- - For technical reasons the characters of the Mongolian script are
40
- turned 90˚ anti-clockwise.
41
-
42
- tests:
43
- - source: "ᠬᠥᠬᠡᠬᠣᠲᠠ"
44
- expected: "kökeqota"
45
- map:
46
- characters:
47
- "ᠠ": "a"
48
- "ᠪ": "b"
49
- "ᠼ": "c"
50
- "ᠲ": "d"
51
- "ᠳ": "d"
52
- "ᠡ": "e"
53
- "ᠹ": "f"
54
- "ᠭ": "g"
55
- "ᠺ": "g"
56
- "ᠬ": "h"
57
- "ᠾ": "h"
58
- "ᠢ": "i"
59
- "ᠵ": "j"
60
- "ᠺ": "k"
61
- "ᠯ": "l"
62
- "ᠮ": "m"
63
- "ᠨ": "n"
64
- "ᠥ": "o"
65
- "ᠫ": "p"
66
- "ᠴ": "q"
67
- "ᠷ": "r"
68
- "ᠰ": "s"
69
- "ᠲ": "t"
70
- "ᠦ": "u"
71
- "ᠸ": "w"
72
- "ᠱ": "x"
73
- "ᠶ": "y"
74
- "ᠽ": "z"
75
- "ᠣ": "o"
76
- # - "o" # General use
77
- # - "ô" # For place names
78
- "ᠤ": "u"
79
- # - "u" # General use
80
- # - "û" # For place names
81
-
82
-
83
- # @TODO Exceptional
84
- #
85
- # This is failing the whole test suite, but as far as I understood
86
- # from the comment, this city name is exceptional, so we are temporarliy
87
- # adding it as execeptional rules for now.
88
- #
89
- # But, we will need some natives attension to help us out here, and maybe
90
- # come up with some basic rules for this exceptional pattern.
91
- #
92
-
93
- "ᠬᠥᠬᠡᠬᠣᠲᠠ": "kökeqota"
@@ -1,166 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 1987
4
- language: rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Russian Romanization system
8
- url: http://www.eki.ee/wgrs/rom1_ru.htm
9
- creation_date: 1987
10
- confirmation_date: 2016
11
- description: |
12
- The United Nations recommended system was approved in 1987 (V/18),
13
- based on the official system of the Main Administration of Geodesy and
14
- Cartography of the former Soviet Union, also known as the GOST 1983
15
- system (GOST 16876-71). The table was published as an annex to the
16
- resolution.
17
-
18
- The system is used in the Russian Federation and increasingly in
19
- international cartographic products.
20
-
21
- Russian uses the Cyrillic script which is alphabetic. The
22
- romanization table is unambiguous and can be applied automatically. The
23
- system is reversible, although rarely there can be ambiguities.
24
-
25
- notes:
26
- - "Cursive forms of some characters might be formed differently: Аа Бб
27
- Вв Гг Дд Ее Ёё Жж Зз Ии Йй Кк Лл Мм Нн Оо Пп Рр Сс Тт Уу Фф Хх Цц Чч Шш
28
- Щщ Ъъ Ыы Ьь Ээ Юю Яя."
29
-
30
- - Fifth United Nations Conference on the Standardization of
31
- Geographical Names. Montreal, 18–31 August 1987. Vol. I. Report of the
32
- Conference, pp. 40–41.
33
-
34
- tests:
35
- - source: Aнaпa
36
- expected: Anapa
37
-
38
- - source: Бaбушкин
39
- expected: Babuškin
40
-
41
- - source: Вaвилово
42
- expected: Vavilovo
43
-
44
- - source: Гaгaрин
45
- expected: Gagarin
46
-
47
- - source: Дудинкa
48
- expected: Dudinka
49
-
50
- - source: Елисeeвкa
51
- expected: Eliseevka
52
-
53
- - source: Ёлкино
54
- expected: "\u00CBlkino"
55
-
56
- - source: Псëл
57
- expected: Psël
58
-
59
- - source: Жужa
60
- expected: Žuža
61
-
62
- - source: Звëздный
63
- expected: Zvëzdnyj
64
-
65
- - source: Идрицa
66
- expected: Idrica
67
-
68
- - source: Зaрaйск
69
- expected: Zarajsk
70
-
71
- - source: Кокaнд
72
- expected: Kokand
73
-
74
- - source: Лaлвaр
75
- expected: Lalvar
76
-
77
- - source: Мaймaк
78
- expected: Majmak
79
-
80
- - source: Нeжин
81
- expected: Nežin
82
-
83
- - source: Обoдoвкa
84
- expected: Obodovka
85
-
86
- - source: Пaп
87
- expected: Pap
88
-
89
- - source: Рeбрихa
90
- expected: Rebriha
91
-
92
- - source: Сaсoвo
93
- expected: Sasovo
94
-
95
- - source: Тaттa
96
- expected: Tatta
97
-
98
- - source: Уржум
99
- expected: Uržum
100
-
101
- - source: Фoфaнoвo
102
- expected: Fofanovo
103
-
104
- - source: Хoхломa
105
- expected: Hohloma
106
-
107
- - source: Цвeткoвo
108
- expected: Cvetkovo
109
-
110
- - source: Чeчeльник
111
- expected: Čečel’nik
112
-
113
- - source: Шишкинo
114
- expected: Šiškino
115
-
116
- - source: Щукинo
117
- expected: Ščukino
118
-
119
- - source: Пoдъячeвo
120
- expected: Pod”jačevo
121
-
122
- - source: Ыныкчaнский
123
- expected: Ynykčanskij
124
-
125
- - source: Пaрaньгa
126
- expected: Paran’ga
127
-
128
- - source: Кaзaнь
129
- expected: Kazan’
130
-
131
- - source: Щучьe
132
- expected: Ščuč’e
133
-
134
- - source: Элистa
135
- expected: Èlista
136
-
137
- - source: Юринo
138
- expected: Jurino
139
-
140
- - source: Юхнoв
141
- expected: Juhnov
142
-
143
- - source: Юрюзaнь
144
- expected: Jurjuzan’
145
-
146
- - source: Ямaл
147
- expected: Jamal
148
-
149
- - source: Язъявaн
150
- expected: Jaz”javan
151
-
152
- - source: Яя
153
- expected: Jaja
154
-
155
- - source: Вязьмa
156
- expected: Vjaz’ma
157
-
158
-
159
- map:
160
- inherit: gost-rus-cyrl-latn-16876-71-1983
161
-
162
- characters:
163
- '\u042A' : '”' # Ъ
164
- '\u042C' : '’' # Ь
165
- '\u044A' : '”' # ъ
166
- '\u044C' : '’' # ь
@@ -1,30 +0,0 @@
1
- ---
2
- authority_id: un
3
- id: 1998
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ROMANIZATION SYSTEM FOR BELARUSIAN, RUSSIAN AND UKRAINIAN CYRILLIC
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/7th-uncsgn-docs/econf/7th_UNCSGN_econf.91_3_Add1.pdf
9
- creation_date: 1998
10
-
11
- tests:
12
-
13
- map:
14
- inherit: gost-rus-cyrl-latn-16876-71-1983
15
-
16
- characters:
17
- '\u0490' : '?' # Ґ
18
- '\u0491' : '?' # ґ
19
-
20
- '\u0404' : "Je" # Є
21
- '\u0454' : "je" # є
22
-
23
- '\u0406' : 'I' # І
24
- '\u0456' : 'i' # і
25
-
26
- '\u0407' : 'I' # Ї
27
- '\u0457' : 'i' # ї
28
-
29
- '\u2019' : '?'
30
- "'" : '?'
@@ -1,406 +0,0 @@
1
- ---
2
- authority_id: var
3
- id: hepburn-1886
4
- language: jpn
5
- source_script: Hrkt
6
- destination_script: Latn
7
- name: Traditional Hepburn
8
- url: http://www.ab.cyberhome.ne.jp/~kaizu/roomazi/doc/hep3.html
9
- creation_date: 1886
10
- adoption_date:
11
- description:
12
- This is a traditional version of Hepburn romanization.
13
-
14
- notes:
15
- "
16
- The book was published before the Japanese orthographic reform,
17
- and this map takes the reformed orthography in Kana as the source
18
- form.
19
- https://en.wikipedia.org/wiki/Historical_kana_orthography
20
-
21
- The distinction for long-vowel vs. repeating vowels has not been
22
- implemented.
23
- For example, the consecutive o's in these words are considered
24
- a case of long vowel, and is transliterated as ō:
25
-
26
- 氷 (こおり) - kōri, 大阪(おおさか)- Ōsaka
27
-
28
- If there are two consecutive o's in a string, but they belong to
29
- different morpheme, then they should be transliterated separately.
30
-
31
- 小躍り(こおどり)- koodori
32
-
33
- The same goes for the combinations o+u, u+u as well.
34
-
35
- However, this cannot be easily determined from the Kana.
36
- Lexical knowledge is needed, and sometimes the Kanji representation
37
- will give more hints about morpheme boundary.
38
-
39
- For now, this map will assume that all o+o, o+u, u+u combinations to
40
- be instances of long vowels.
41
- "
42
-
43
- tests:
44
- - source: "ぐんま"
45
- expected: "gumma"
46
- - source: "しんよう"
47
- expected: "shin-yō"
48
- - source: "きんようび"
49
- expected: "kin-yōbi"
50
- - source: "とうきょう"
51
- expected: "tōkyō"
52
- - source: "しんばし"
53
- expected: "shimbashi"
54
-
55
- map:
56
-
57
- rules:
58
- # Add a dash (-) between ん and a vowel sound or ya, yu, yo
59
- - pattern: "([んン])(?=[あいうえおやゆよアイウエオヤユヨ])"
60
- result: "\\1-"
61
-
62
- # Convert ん into m before b, m, p
63
- - pattern: "[んン](?=[ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ])"
64
- result: "m"
65
-
66
- postrules:
67
- # Handling of っ/ッ
68
- #
69
- # The kana っ/ッ is a geminate marker.
70
- # When followed by a consonant, repeat the first letter of
71
- # the following syllable. Exception: the combination -cch-
72
- # should be transliterated as -tch-
73
- #
74
- # If っ/ッ is not followed by a consonant, then it is usually
75
- # phonetically realised as an abrupt stop or shorterning of
76
- # the previous syllable. There is no documented or commonly
77
- # accepted way to transliterate this sound.
78
-
79
- - pattern: "[っッ]([BbDdFfGgHhJjKkLlMmNnPpQqRrSsTtVvWwXxYyZz])"
80
- result: "\\1\\1"
81
- - pattern: "[っッ]([Cc])" # ッ followed by ch-
82
- result: "t\\1"
83
- - pattern: "[っッ]" # drop all other っッ.
84
- result: ""
85
-
86
- # In Traditional Hepburn, long o (which can be o+o or o+u), and long u
87
- # are transliterated as ō and ū.
88
- #
89
- # Macron should not be used if two repeating letters split across
90
- # a morpheme boundary.
91
- #
92
- # Long vowels in loanwords are indicated with a macron instead
93
- # of letter doubling.
94
-
95
- - pattern: "a[ー]"
96
- result: "ā"
97
- - pattern: "i[ー]"
98
- result: "ī"
99
- - pattern: "u[ーu]"
100
- result: "ū"
101
- - pattern: "e[ー]"
102
- result: "ē"
103
- - pattern: "o[ーo]"
104
- result: "ō"
105
-
106
- characters:
107
-
108
- # Hiragana
109
-
110
- "あ": "a"
111
- "い": "i"
112
- "う": "u"
113
- "え": "e"
114
- "お": "o"
115
- "おう": "ō"
116
-
117
- "か": "ka"
118
- "き": "ki"
119
- "く": "ku"
120
- "け": "ke"
121
- "こ": "ko"
122
- "きゃ": "kya"
123
- "きゅ": "kyu"
124
- "きょ": "kyo"
125
- "きょう": "kyō"
126
- "こう": "kō"
127
-
128
- "さ": "sa"
129
- "し": "shi"
130
- "す": "su"
131
- "せ": "se"
132
- "そ": "so"
133
- "しゃ": "sha"
134
- "しゅ": "shu"
135
- "しょ": "sho"
136
- "しょう": "shō"
137
- "そう": "sō"
138
-
139
- "た": "ta"
140
- "ち": "chi"
141
- "つ": "tsu"
142
- "て": "te"
143
- "と": "to"
144
- "ちゃ": "cha"
145
- "ちゅ": "chu"
146
- "ちょ": "cho"
147
- "とう": "tō"
148
- "ちょう": "chō"
149
-
150
- "な": "na"
151
- "に": "ni"
152
- "ぬ": "nu"
153
- "ね": "ne"
154
- "の": "no"
155
- "にゃ": "nya"
156
- "にゅ": "nyu"
157
- "にょ": "nyo"
158
- "にょう": "nyō"
159
- "のう": "nō"
160
-
161
- "は": "ha"
162
- "ひ": "hi"
163
- "ふ": "fu"
164
- "へ": "he"
165
- "ほ": "ho"
166
- "ひゃ": "hya"
167
- "ひゅ": "hyu"
168
- "ひょ": "hyo"
169
- "ひょう": "hyō"
170
- "ほう": "hō"
171
-
172
- "ま": "ma"
173
- "み": "mi"
174
- "む": "mu"
175
- "め": "me"
176
- "も": "mo"
177
- "みゃ": "mya"
178
- "みゅ": "myu"
179
- "みょ": "myo"
180
- "みょう": "myō"
181
- "もう": "mō"
182
-
183
- "や": "ya"
184
- "ゆ": "yu"
185
- "よ": "yo"
186
- "よう": "yō"
187
-
188
- "ら": "ra"
189
- "り": "ri"
190
- "る": "ru"
191
- "れ": "re"
192
- "ろ": "ro"
193
- "りゃ": "rya"
194
- "りゅ": "ryu"
195
- "りょ": "ryo"
196
- "りょう": "ryō"
197
- "ろう": "rō"
198
-
199
- "わ": "wa"
200
- "を": "wo"
201
-
202
- "が": "ga"
203
- "ぎ": "gi"
204
- "ぐ": "gu"
205
- "げ": "ge"
206
- "ご": "go"
207
- "ぎゃ": "gya"
208
- "ぎゅ": "gyu"
209
- "ぎょ": "gyo"
210
- "ぎょう": "gyō"
211
- "ごう": "gō"
212
-
213
- "ざ": "za"
214
- "じ": "ji"
215
- "ず": "zu"
216
- "ぜ": "ze"
217
- "ぞ": "zo"
218
- "じゃ": "ja"
219
- "じゅ": "ju"
220
- "じょ": "jo"
221
- "じょう": "jō"
222
- "ぞう": "zō"
223
-
224
- "だ": "da"
225
- "ぢ": "ji"
226
- "づ": "zu"
227
- "で": "de"
228
- "ど": "do"
229
- "ぢゃ": "ja"
230
- "ぢゅ": "ju"
231
- "ぢょ": "jo"
232
- "どう": "dō"
233
-
234
- "ば": "ba"
235
- "び": "bi"
236
- "ぶ": "bu"
237
- "べ": "be"
238
- "ぼ": "bo"
239
- "びゃ": "bya"
240
- "びゅ": "byu"
241
- "びょ": "byo"
242
- "びょう": "byō"
243
- "ぼう": "bō"
244
-
245
- "ぱ": "pa"
246
- "ぴ": "pi"
247
- "ぷ": "pu"
248
- "ぺ": "pe"
249
- "ぽ": "po"
250
- "ぴゃ": "pya"
251
- "ぴゅ": "pyu"
252
- "ぴょ": "pyo"
253
- "ぴょう": "pyō"
254
- "ぽう": "pō"
255
-
256
- "ん": "n"
257
-
258
- # Katakana
259
-
260
- "ア": "a"
261
- "イ": "i"
262
- "ウ": "u"
263
- "エ": "e"
264
- "オ": "o"
265
- "オウ": "ō"
266
-
267
- "カ": "ka"
268
- "キ": "ki"
269
- "ク": "ku"
270
- "ケ": "ke"
271
- "コ": "ko"
272
- "キャ": "kya"
273
- "キュ": "kyu"
274
- "キョ": "kyo"
275
- "キョウ": "kyō"
276
- "コウ": "kō"
277
-
278
- "サ": "sa"
279
- "シ": "shi"
280
- "ス": "su"
281
- "セ": "se"
282
- "ソ": "so"
283
- "シャ": "sha"
284
- "シュ": "shu"
285
- "ショ": "sho"
286
- "ショウ": "shō"
287
- "ソウ": "sō"
288
-
289
- "タ": "ta"
290
- "チ": "chi"
291
- "ツ": "tsu"
292
- "テ": "te"
293
- "ト": "to"
294
- "チャ": "cha"
295
- "チュ": "chu"
296
- "チョ": "cho"
297
- "チョウ": "chō"
298
- "トウ": "tō"
299
-
300
- "ナ": "na"
301
- "ニ": "ni"
302
- "ヌ": "nu"
303
- "ネ": "ne"
304
- "ノ": "no"
305
- "ニャ": "nya"
306
- "ニュ": "nyu"
307
- "ニョ": "nyo"
308
- "ニョウ": "nyō"
309
- "ノウ": "nō"
310
-
311
- "ハ": "ha"
312
- "ヒ": "hi"
313
- "フ": "fu"
314
- "ヘ": "he"
315
- "ホ": "ho"
316
- "ヒャ": "hya"
317
- "ヒュ": "hyu"
318
- "ヒョ": "hyo"
319
- "ヒョウ": "hyō"
320
- "ホウ": "hō"
321
-
322
- "マ": "ma"
323
- "ミ": "mi"
324
- "ム": "mu"
325
- "メ": "me"
326
- "モ": "mo"
327
- "ミャ": "mya"
328
- "ミュ": "myu"
329
- "ミョ": "myo"
330
- "ミョウ": "myō"
331
- "モウ": "mō"
332
-
333
- "ヤ": "ya"
334
- "ユ": "yu"
335
- "ヨ": "yo"
336
- "ヨウ": "yō"
337
-
338
- "ラ": "ra"
339
- "リ": "ri"
340
- "ル": "ru"
341
- "レ": "re"
342
- "ロ": "ro"
343
- "リャ": "rya"
344
- "リュ": "ryu"
345
- "リョ": "ryo"
346
- "リョウ": "ryō"
347
- "ロウ": "rō"
348
-
349
- "ワ": "wa"
350
- "ヲ": "wo"
351
-
352
- "ガ": "ga"
353
- "ギ": "gi"
354
- "グ": "gu"
355
- "ゲ": "ge"
356
- "ゴ": "go"
357
- "ギャ": "gya"
358
- "ギュ": "gyu"
359
- "ギョ": "gyo"
360
- "ギョウ": "gyō"
361
- "ゴウ": "gō"
362
-
363
- "ザ": "za"
364
- "ジ": "ji"
365
- "ズ": "zu"
366
- "ゼ": "ze"
367
- "ゾ": "zo"
368
- "ジャ": "ja"
369
- "ジュ": "ju"
370
- "ジョ": "jo"
371
- "ジョウ": "jō"
372
- "ゾウ": "zō"
373
-
374
- "ダ": "da"
375
- "ヂ": "ji"
376
- "ヅ": "zu"
377
- "デ": "de"
378
- "ド": "do"
379
- "ヂャ": "ja"
380
- "ヂュ": "ju"
381
- "ヂョ": "jo"
382
- "ドウ": "dō"
383
-
384
- "バ": "ba"
385
- "ビ": "bi"
386
- "ブ": "bu"
387
- "ベ": "be"
388
- "ボ": "bo"
389
- "ビャ": "bya"
390
- "ビュ": "byu"
391
- "ビョ": "byo"
392
- "ビョウ": "byō"
393
- "ボウ": "bō"
394
-
395
- "パ": "pa"
396
- "ピ": "pi"
397
- "プ": "pu"
398
- "ペ": "pe"
399
- "ポ": "po"
400
- "ピャ": "pya"
401
- "ピュ": "pyu"
402
- "ピョ": "pyo"
403
- "ピョウ": "pyō"
404
- "ポウ": "pō"
405
-
406
- "ン": "n"