interscript 0.1.1 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  6. data/lib/g2pwrapper.py +34 -0
  7. data/lib/interscript-opal.rb +2 -0
  8. data/lib/interscript.rb +138 -20
  9. data/lib/interscript/command.rb +28 -0
  10. data/lib/interscript/fs.rb +71 -0
  11. data/lib/interscript/mapping.rb +142 -0
  12. data/lib/interscript/opal.rb +27 -0
  13. data/lib/interscript/opal/maps.js.erb +10 -0
  14. data/lib/interscript/opal_map_translate.rb +12 -0
  15. data/lib/interscript/version.rb +1 -1
  16. data/lib/model-7 +0 -0
  17. data/lib/tha-pt-b-7 +0 -0
  18. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  19. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  20. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  21. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  22. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  23. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  24. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +125 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  27. data/maps/alalc-ell-Grek-Latn-1997.yaml +624 -0
  28. data/maps/alalc-ell-Grek-Latn-2010.yaml +627 -0
  29. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  30. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  31. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  32. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  33. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  34. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  35. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  36. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  37. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +221 -0
  38. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  39. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  40. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  41. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  42. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  43. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  44. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +174 -0
  45. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  46. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  47. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  48. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  49. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  50. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  51. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  52. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  53. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  54. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  55. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  56. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +285 -0
  57. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  58. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  59. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +701 -0
  60. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +19 -0
  61. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  62. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  63. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  64. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  65. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  66. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  67. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  68. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  69. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  70. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +92 -0
  71. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  72. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  73. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +162 -0
  74. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  75. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  76. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  77. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  78. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  79. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  80. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  81. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  82. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  83. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  84. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  85. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  86. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  87. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  88. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  89. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  90. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  91. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  92. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  93. data/maps/ggg-kat-Geor-Latn-2002.yaml +88 -0
  94. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  95. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  96. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +186 -0
  97. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  98. data/maps/icao-bel-Cyrl-Latn-9303.yaml +136 -0
  99. data/maps/icao-bul-Cyrl-Latn-9303.yaml +118 -0
  100. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  101. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  102. data/maps/icao-per-Arab-Latn-9303.yaml +103 -0
  103. data/maps/icao-rus-Cyrl-Latn-9303.yaml +117 -0
  104. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  105. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +119 -0
  106. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  107. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +609 -0
  108. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +40 -0
  109. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  110. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +271 -0
  111. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  112. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  113. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  114. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  115. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  116. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  117. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  118. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  119. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  120. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  121. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  122. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  123. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  124. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  125. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  126. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  127. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  128. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  129. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  130. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  131. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  132. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  133. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  134. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  135. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  136. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  137. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  138. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  139. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  140. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  141. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  142. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  143. data/maps/ses-ara-Arab-Latn-1930.yaml +279 -0
  144. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  145. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  146. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  147. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  148. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  149. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  150. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  151. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  152. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  153. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  154. data/maps/un-mon-Mong-Latn-2013.yaml +99 -0
  155. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  156. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  157. data/maps/un-ukr-Cyrl-Latn-1998.yaml +30 -0
  158. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  159. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  160. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  161. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  162. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  163. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  164. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  165. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  166. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  167. data/spec/interscript/mapping_spec.rb +42 -0
  168. data/spec/interscript_spec.rb +26 -0
  169. data/spec/spec_helper.rb +3 -0
  170. metadata +298 -18
@@ -0,0 +1,406 @@
1
+ ---
2
+ authority_id: var
3
+ id: hepburn-1886
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: Traditional Hepburn
8
+ url: http://www.ab.cyberhome.ne.jp/~kaizu/roomazi/doc/hep3.html
9
+ creation_date: 1886
10
+ adoption_date:
11
+ description:
12
+ This is a traditional version of Hepburn romanization.
13
+
14
+ notes:
15
+ "
16
+ The book was published before the Japanese orthographic reform,
17
+ and this map takes the reformed orthography in Kana as the source
18
+ form.
19
+ https://en.wikipedia.org/wiki/Historical_kana_orthography
20
+
21
+ The distinction for long-vowel vs. repeating vowels has not been
22
+ implemented.
23
+ For example, the consecutive o's in these words are considered
24
+ a case of long vowel, and is transliterated as ō:
25
+
26
+ 氷 (こおり) - kōri, 大阪(おおさか)- Ōsaka
27
+
28
+ If there are two consecutive o's in a string, but they belong to
29
+ different morpheme, then they should be transliterated separately.
30
+
31
+ 小躍り(こおどり)- koodori
32
+
33
+ The same goes for the combinations o+u, u+u as well.
34
+
35
+ However, this cannot be easily determined from the Kana.
36
+ Lexical knowledge is needed, and sometimes the Kanji representation
37
+ will give more hints about morpheme boundary.
38
+
39
+ For now, this map will assume that all o+o, o+u, u+u combinations to
40
+ be instances of long vowels.
41
+ "
42
+
43
+ tests:
44
+ - source: "ぐんま"
45
+ expected: "gumma"
46
+ - source: "しんよう"
47
+ expected: "shin-yō"
48
+ - source: "きんようび"
49
+ expected: "kin-yōbi"
50
+ - source: "とうきょう"
51
+ expected: "tōkyō"
52
+ - source: "しんばし"
53
+ expected: "shimbashi"
54
+
55
+ map:
56
+
57
+ rules:
58
+ # Add a dash (-) between ん and a vowel sound or ya, yu, yo
59
+ - pattern: "([んン])(?=[あいうえおやゆよアイウエオヤユヨ])"
60
+ result: "\\1-"
61
+
62
+ # Convert ん into m before b, m, p
63
+ - pattern: "[んン](?=[ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ])"
64
+ result: "m"
65
+
66
+ postrules:
67
+ # Handling of っ/ッ
68
+ #
69
+ # The kana っ/ッ is a geminate marker.
70
+ # When followed by a consonant, repeat the first letter of
71
+ # the following syllable. Exception: the combination -cch-
72
+ # should be transliterated as -tch-
73
+ #
74
+ # If っ/ッ is not followed by a consonant, then it is usually
75
+ # phonetically realised as an abrupt stop or shorterning of
76
+ # the previous syllable. There is no documented or commonly
77
+ # accepted way to transliterate this sound.
78
+
79
+ - pattern: "[っッ]([BbDdFfGgHhJjKkLlMmNnPpQqRrSsTtVvWwXxYyZz])"
80
+ result: "\\1\\1"
81
+ - pattern: "[っッ]([Cc])" # ッ followed by ch-
82
+ result: "t\\1"
83
+ - pattern: "[っッ]" # drop all other っッ.
84
+ result: ""
85
+
86
+ # In Traditional Hepburn, long o (which can be o+o or o+u), and long u
87
+ # are transliterated as ō and ū.
88
+ #
89
+ # Macron should not be used if two repeating letters split across
90
+ # a morpheme boundary.
91
+ #
92
+ # Long vowels in loanwords are indicated with a macron instead
93
+ # of letter doubling.
94
+
95
+ - pattern: "a[ー]"
96
+ result: "ā"
97
+ - pattern: "i[ー]"
98
+ result: "ī"
99
+ - pattern: "u[ーu]"
100
+ result: "ū"
101
+ - pattern: "e[ー]"
102
+ result: "ē"
103
+ - pattern: "o[ーo]"
104
+ result: "ō"
105
+
106
+ characters:
107
+
108
+ # Hiragana
109
+
110
+ "あ": "a"
111
+ "い": "i"
112
+ "う": "u"
113
+ "え": "e"
114
+ "お": "o"
115
+ "おう": "ō"
116
+
117
+ "か": "ka"
118
+ "き": "ki"
119
+ "く": "ku"
120
+ "け": "ke"
121
+ "こ": "ko"
122
+ "きゃ": "kya"
123
+ "きゅ": "kyu"
124
+ "きょ": "kyo"
125
+ "きょう": "kyō"
126
+ "こう": "kō"
127
+
128
+ "さ": "sa"
129
+ "し": "shi"
130
+ "す": "su"
131
+ "せ": "se"
132
+ "そ": "so"
133
+ "しゃ": "sha"
134
+ "しゅ": "shu"
135
+ "しょ": "sho"
136
+ "しょう": "shō"
137
+ "そう": "sō"
138
+
139
+ "た": "ta"
140
+ "ち": "chi"
141
+ "つ": "tsu"
142
+ "て": "te"
143
+ "と": "to"
144
+ "ちゃ": "cha"
145
+ "ちゅ": "chu"
146
+ "ちょ": "cho"
147
+ "とう": "tō"
148
+ "ちょう": "chō"
149
+
150
+ "な": "na"
151
+ "に": "ni"
152
+ "ぬ": "nu"
153
+ "ね": "ne"
154
+ "の": "no"
155
+ "にゃ": "nya"
156
+ "にゅ": "nyu"
157
+ "にょ": "nyo"
158
+ "にょう": "nyō"
159
+ "のう": "nō"
160
+
161
+ "は": "ha"
162
+ "ひ": "hi"
163
+ "ふ": "fu"
164
+ "へ": "he"
165
+ "ほ": "ho"
166
+ "ひゃ": "hya"
167
+ "ひゅ": "hyu"
168
+ "ひょ": "hyo"
169
+ "ひょう": "hyō"
170
+ "ほう": "hō"
171
+
172
+ "ま": "ma"
173
+ "み": "mi"
174
+ "む": "mu"
175
+ "め": "me"
176
+ "も": "mo"
177
+ "みゃ": "mya"
178
+ "みゅ": "myu"
179
+ "みょ": "myo"
180
+ "みょう": "myō"
181
+ "もう": "mō"
182
+
183
+ "や": "ya"
184
+ "ゆ": "yu"
185
+ "よ": "yo"
186
+ "よう": "yō"
187
+
188
+ "ら": "ra"
189
+ "り": "ri"
190
+ "る": "ru"
191
+ "れ": "re"
192
+ "ろ": "ro"
193
+ "りゃ": "rya"
194
+ "りゅ": "ryu"
195
+ "りょ": "ryo"
196
+ "りょう": "ryō"
197
+ "ろう": "rō"
198
+
199
+ "わ": "wa"
200
+ "を": "wo"
201
+
202
+ "が": "ga"
203
+ "ぎ": "gi"
204
+ "ぐ": "gu"
205
+ "げ": "ge"
206
+ "ご": "go"
207
+ "ぎゃ": "gya"
208
+ "ぎゅ": "gyu"
209
+ "ぎょ": "gyo"
210
+ "ぎょう": "gyō"
211
+ "ごう": "gō"
212
+
213
+ "ざ": "za"
214
+ "じ": "ji"
215
+ "ず": "zu"
216
+ "ぜ": "ze"
217
+ "ぞ": "zo"
218
+ "じゃ": "ja"
219
+ "じゅ": "ju"
220
+ "じょ": "jo"
221
+ "じょう": "jō"
222
+ "ぞう": "zō"
223
+
224
+ "だ": "da"
225
+ "ぢ": "ji"
226
+ "づ": "zu"
227
+ "で": "de"
228
+ "ど": "do"
229
+ "ぢゃ": "ja"
230
+ "ぢゅ": "ju"
231
+ "ぢょ": "jo"
232
+ "どう": "dō"
233
+
234
+ "ば": "ba"
235
+ "び": "bi"
236
+ "ぶ": "bu"
237
+ "べ": "be"
238
+ "ぼ": "bo"
239
+ "びゃ": "bya"
240
+ "びゅ": "byu"
241
+ "びょ": "byo"
242
+ "びょう": "byō"
243
+ "ぼう": "bō"
244
+
245
+ "ぱ": "pa"
246
+ "ぴ": "pi"
247
+ "ぷ": "pu"
248
+ "ぺ": "pe"
249
+ "ぽ": "po"
250
+ "ぴゃ": "pya"
251
+ "ぴゅ": "pyu"
252
+ "ぴょ": "pyo"
253
+ "ぴょう": "pyō"
254
+ "ぽう": "pō"
255
+
256
+ "ん": "n"
257
+
258
+ # Katakana
259
+
260
+ "ア": "a"
261
+ "イ": "i"
262
+ "ウ": "u"
263
+ "エ": "e"
264
+ "オ": "o"
265
+ "オウ": "ō"
266
+
267
+ "カ": "ka"
268
+ "キ": "ki"
269
+ "ク": "ku"
270
+ "ケ": "ke"
271
+ "コ": "ko"
272
+ "キャ": "kya"
273
+ "キュ": "kyu"
274
+ "キョ": "kyo"
275
+ "キョウ": "kyō"
276
+ "コウ": "kō"
277
+
278
+ "サ": "sa"
279
+ "シ": "shi"
280
+ "ス": "su"
281
+ "セ": "se"
282
+ "ソ": "so"
283
+ "シャ": "sha"
284
+ "シュ": "shu"
285
+ "ショ": "sho"
286
+ "ショウ": "shō"
287
+ "ソウ": "sō"
288
+
289
+ "タ": "ta"
290
+ "チ": "chi"
291
+ "ツ": "tsu"
292
+ "テ": "te"
293
+ "ト": "to"
294
+ "チャ": "cha"
295
+ "チュ": "chu"
296
+ "チョ": "cho"
297
+ "チョウ": "chō"
298
+ "トウ": "tō"
299
+
300
+ "ナ": "na"
301
+ "ニ": "ni"
302
+ "ヌ": "nu"
303
+ "ネ": "ne"
304
+ "ノ": "no"
305
+ "ニャ": "nya"
306
+ "ニュ": "nyu"
307
+ "ニョ": "nyo"
308
+ "ニョウ": "nyō"
309
+ "ノウ": "nō"
310
+
311
+ "ハ": "ha"
312
+ "ヒ": "hi"
313
+ "フ": "fu"
314
+ "ヘ": "he"
315
+ "ホ": "ho"
316
+ "ヒャ": "hya"
317
+ "ヒュ": "hyu"
318
+ "ヒョ": "hyo"
319
+ "ヒョウ": "hyō"
320
+ "ホウ": "hō"
321
+
322
+ "マ": "ma"
323
+ "ミ": "mi"
324
+ "ム": "mu"
325
+ "メ": "me"
326
+ "モ": "mo"
327
+ "ミャ": "mya"
328
+ "ミュ": "myu"
329
+ "ミョ": "myo"
330
+ "ミョウ": "myō"
331
+ "モウ": "mō"
332
+
333
+ "ヤ": "ya"
334
+ "ユ": "yu"
335
+ "ヨ": "yo"
336
+ "ヨウ": "yō"
337
+
338
+ "ラ": "ra"
339
+ "リ": "ri"
340
+ "ル": "ru"
341
+ "レ": "re"
342
+ "ロ": "ro"
343
+ "リャ": "rya"
344
+ "リュ": "ryu"
345
+ "リョ": "ryo"
346
+ "リョウ": "ryō"
347
+ "ロウ": "rō"
348
+
349
+ "ワ": "wa"
350
+ "ヲ": "wo"
351
+
352
+ "ガ": "ga"
353
+ "ギ": "gi"
354
+ "グ": "gu"
355
+ "ゲ": "ge"
356
+ "ゴ": "go"
357
+ "ギャ": "gya"
358
+ "ギュ": "gyu"
359
+ "ギョ": "gyo"
360
+ "ギョウ": "gyō"
361
+ "ゴウ": "gō"
362
+
363
+ "ザ": "za"
364
+ "ジ": "ji"
365
+ "ズ": "zu"
366
+ "ゼ": "ze"
367
+ "ゾ": "zo"
368
+ "ジャ": "ja"
369
+ "ジュ": "ju"
370
+ "ジョ": "jo"
371
+ "ジョウ": "jō"
372
+ "ゾウ": "zō"
373
+
374
+ "ダ": "da"
375
+ "ヂ": "ji"
376
+ "ヅ": "zu"
377
+ "デ": "de"
378
+ "ド": "do"
379
+ "ヂャ": "ja"
380
+ "ヂュ": "ju"
381
+ "ヂョ": "jo"
382
+ "ドウ": "dō"
383
+
384
+ "バ": "ba"
385
+ "ビ": "bi"
386
+ "ブ": "bu"
387
+ "ベ": "be"
388
+ "ボ": "bo"
389
+ "ビャ": "bya"
390
+ "ビュ": "byu"
391
+ "ビョ": "byo"
392
+ "ビョウ": "byō"
393
+ "ボウ": "bō"
394
+
395
+ "パ": "pa"
396
+ "ピ": "pi"
397
+ "プ": "pu"
398
+ "ペ": "pe"
399
+ "ポ": "po"
400
+ "ピャ": "pya"
401
+ "ピュ": "pyu"
402
+ "ピョ": "pyo"
403
+ "ピョウ": "pyō"
404
+ "ポウ": "pō"
405
+
406
+ "ン": "n"
@@ -0,0 +1,386 @@
1
+ ---
2
+ authority_id: var
3
+ id: hepburn-1954
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: Modified Hepburn
8
+ url:
9
+ creation_date: 1954
10
+ adoption_date:
11
+ description:
12
+ This is a modified version of Hepburn romanization, which was
13
+ adopted by Kenkyusha's New Japanese-English Dictionary (3rd edition)
14
+ One crucial difference between Modified and Tranditional Hepburn is
15
+ that the ん/ン sound is always transliterated as n, even before the
16
+ letters b,m,p.
17
+
18
+ notes:
19
+
20
+ tests:
21
+ # - source: かんおう
22
+ # expected: kan'ō
23
+ # - source: かのう
24
+ # expected: kanō
25
+ # - source: きんゆう
26
+ # expected: kin'yū
27
+ # - source: とうきょう
28
+ # expected: tōkyō
29
+ # - source: がっこう
30
+ # expected: gakkō
31
+ - source: かごっま
32
+ expected: kagomma
33
+ # - source: ぽっぽっや
34
+ # expected: poppoyya
35
+ # - source: てっら
36
+ # expected: terra
37
+ # - source: にゃっほー
38
+ # expected: nyahhō
39
+
40
+ map:
41
+
42
+ rules:
43
+ - pattern: "([んン])(?=[あいうえおやゆよアイウエオヤユヨ])"
44
+ result: "\\1’"
45
+
46
+ postrules:
47
+ # Handling of っ/ッ
48
+ #
49
+ # The kana っ/ッ is a geminate marker.
50
+ # When followed by a consonant, repeat the first letter of
51
+ # the following syllable. Exception: the combination -cch-
52
+ # should be transliterated as -tch-
53
+ #
54
+ # If っ/ッ is not followed by a consonant, then it is usually
55
+ # phonetically realised as an abrupt stop or shorterning of
56
+ # the previous syllable. There is no documented or commonly
57
+ # accepted way to transliterate this sound.
58
+
59
+ - pattern: "[っッ]([BbDdFfGgHhJjKkLlMmNnPpQqRrSsTtVvWwXxYyZz])" # っ or ッ folloved by consonant
60
+ result: "\\1\\1"
61
+ - pattern: "[っッ]([Cc])" # ッ followed by ch-
62
+ result: "t\\1"
63
+ - pattern: "[っッ]" # drop all other っッ.
64
+ result: ""
65
+
66
+ # In Modified Hepburn, long vowels within the same morpheme are
67
+ # transliterated using a macron, i.e. ā, ī, ū, ē, ō.
68
+ #
69
+ # If two repeating letters are split across a morpheme boundary
70
+ # then they should not be changed into macron.
71
+ #
72
+ # Long vowels in loanwords are indicated with a macron instead
73
+ # of letter doubling.
74
+
75
+ - pattern: "a[ーa]"
76
+ result: "ā"
77
+ - pattern: "i[ーi]"
78
+ result: "ī"
79
+ - pattern: "u[ーu]"
80
+ result: "ū"
81
+ - pattern: "e[ーe]"
82
+ result: "ē"
83
+ - pattern: "o[ーo]"
84
+ result: "ō"
85
+
86
+ characters:
87
+
88
+ # Hiragana
89
+
90
+ "あ": "a"
91
+ "い": "i"
92
+ "う": "u"
93
+ "え": "e"
94
+ "お": "o"
95
+ "おう": "ō"
96
+
97
+ "か": "ka"
98
+ "き": "ki"
99
+ "く": "ku"
100
+ "け": "ke"
101
+ "こ": "ko"
102
+ "きゃ": "kya"
103
+ "きゅ": "kyu"
104
+ "きょ": "kyo"
105
+ "きょう": "kyō"
106
+ "こう": "kō"
107
+
108
+ "さ": "sa"
109
+ "し": "shi"
110
+ "す": "su"
111
+ "せ": "se"
112
+ "そ": "so"
113
+ "しゃ": "sha"
114
+ "しゅ": "shu"
115
+ "しょ": "sho"
116
+ "しょう": "shō"
117
+ "そう": "sō"
118
+
119
+ "た": "ta"
120
+ "ち": "chi"
121
+ "つ": "tsu"
122
+ "て": "te"
123
+ "と": "to"
124
+ "ちゃ": "cha"
125
+ "ちゅ": "chu"
126
+ "ちょ": "cho"
127
+ "とう": "tō"
128
+ "ちょう": "chō"
129
+
130
+ "な": "na"
131
+ "に": "ni"
132
+ "ぬ": "nu"
133
+ "ね": "ne"
134
+ "の": "no"
135
+ "にゃ": "nya"
136
+ "にゅ": "nyu"
137
+ "にょ": "nyo"
138
+ "にょう": "nyō"
139
+ "のう": "nō"
140
+
141
+ "は": "ha"
142
+ "ひ": "hi"
143
+ "ふ": "fu"
144
+ "へ": "he"
145
+ "ほ": "ho"
146
+ "ひゃ": "hya"
147
+ "ひゅ": "hyu"
148
+ "ひょ": "hyo"
149
+ "ひょう": "hyō"
150
+ "ほう": "hō"
151
+
152
+ "ま": "ma"
153
+ "み": "mi"
154
+ "む": "mu"
155
+ "め": "me"
156
+ "も": "mo"
157
+ "みゃ": "mya"
158
+ "みゅ": "myu"
159
+ "みょ": "myo"
160
+ "みょう": "myō"
161
+ "もう": "mō"
162
+
163
+ "や": "ya"
164
+ "ゆ": "yu"
165
+ "よ": "yo"
166
+ "よう": "yō"
167
+
168
+ "ら": "ra"
169
+ "り": "ri"
170
+ "る": "ru"
171
+ "れ": "re"
172
+ "ろ": "ro"
173
+ "りゃ": "rya"
174
+ "りゅ": "ryu"
175
+ "りょ": "ryo"
176
+ "りょう": "ryō"
177
+ "ろう": "rō"
178
+
179
+ "わ": "wa"
180
+ "を": "o"
181
+
182
+ "が": "ga"
183
+ "ぎ": "gi"
184
+ "ぐ": "gu"
185
+ "げ": "ge"
186
+ "ご": "go"
187
+ "ぎゃ": "gya"
188
+ "ぎゅ": "gyu"
189
+ "ぎょ": "gyo"
190
+ "ぎょう": "gyō"
191
+ "ごう": "gō"
192
+
193
+ "ざ": "za"
194
+ "じ": "ji"
195
+ "ず": "zu"
196
+ "ぜ": "ze"
197
+ "ぞ": "zo"
198
+ "じゃ": "ja"
199
+ "じゅ": "ju"
200
+ "じょ": "jo"
201
+ "じょう": "jō"
202
+ "ぞう": "zō"
203
+
204
+ "だ": "da"
205
+ "ぢ": "ji"
206
+ "づ": "zu"
207
+ "で": "de"
208
+ "ど": "do"
209
+ "ぢゃ": "ja"
210
+ "ぢゅ": "ju"
211
+ "ぢょ": "jo"
212
+ "どう": "dō"
213
+
214
+ "ば": "ba"
215
+ "び": "bi"
216
+ "ぶ": "bu"
217
+ "べ": "be"
218
+ "ぼ": "bo"
219
+ "びゃ": "bya"
220
+ "びゅ": "byu"
221
+ "びょ": "byo"
222
+ "びょう": "byō"
223
+ "ぼう": "bō"
224
+
225
+ "ぱ": "pa"
226
+ "ぴ": "pi"
227
+ "ぷ": "pu"
228
+ "ぺ": "pe"
229
+ "ぽ": "po"
230
+ "ぴゃ": "pya"
231
+ "ぴゅ": "pyu"
232
+ "ぴょ": "pyo"
233
+ "ぴょう": "pyō"
234
+ "ぽう": "pō"
235
+
236
+ "ん": "n"
237
+
238
+ # Katakana
239
+
240
+ "ア": "a"
241
+ "イ": "i"
242
+ "ウ": "u"
243
+ "エ": "e"
244
+ "オ": "o"
245
+ "オウ": "ō"
246
+
247
+ "カ": "ka"
248
+ "キ": "ki"
249
+ "ク": "ku"
250
+ "ケ": "ke"
251
+ "コ": "ko"
252
+ "キャ": "kya"
253
+ "キュ": "kyu"
254
+ "キョ": "kyo"
255
+ "キョウ": "kyō"
256
+ "コウ": "kō"
257
+
258
+ "サ": "sa"
259
+ "シ": "shi"
260
+ "ス": "su"
261
+ "セ": "se"
262
+ "ソ": "so"
263
+ "シャ": "sha"
264
+ "シュ": "shu"
265
+ "ショ": "sho"
266
+ "ショウ": "shō"
267
+ "ソウ": "sō"
268
+
269
+ "タ": "ta"
270
+ "チ": "chi"
271
+ "ツ": "tsu"
272
+ "テ": "te"
273
+ "ト": "to"
274
+ "チャ": "cha"
275
+ "チュ": "chu"
276
+ "チョ": "cho"
277
+ "チョウ": "chō"
278
+ "トウ": "tō"
279
+
280
+ "ナ": "na"
281
+ "ニ": "ni"
282
+ "ヌ": "nu"
283
+ "ネ": "ne"
284
+ "ノ": "no"
285
+ "ニャ": "nya"
286
+ "ニュ": "nyu"
287
+ "ニョ": "nyo"
288
+ "ニョウ": "nyō"
289
+ "ノウ": "nō"
290
+
291
+ "ハ": "ha"
292
+ "ヒ": "hi"
293
+ "フ": "fu"
294
+ "ヘ": "he"
295
+ "ホ": "ho"
296
+ "ヒャ": "hya"
297
+ "ヒュ": "hyu"
298
+ "ヒョ": "hyo"
299
+ "ヒョウ": "hyō"
300
+ "ホウ": "hō"
301
+
302
+ "マ": "ma"
303
+ "ミ": "mi"
304
+ "ム": "mu"
305
+ "メ": "me"
306
+ "モ": "mo"
307
+ "ミャ": "mya"
308
+ "ミュ": "myu"
309
+ "ミョ": "myo"
310
+ "ミョウ": "myō"
311
+ "モウ": "mō"
312
+
313
+ "ヤ": "ya"
314
+ "ユ": "yu"
315
+ "ヨ": "yo"
316
+ "ヨウ": "yō"
317
+
318
+ "ラ": "ra"
319
+ "リ": "ri"
320
+ "ル": "ru"
321
+ "レ": "re"
322
+ "ロ": "ro"
323
+ "リャ": "rya"
324
+ "リュ": "ryu"
325
+ "リョ": "ryo"
326
+ "リョウ": "ryō"
327
+ "ロウ": "rō"
328
+
329
+ "ワ": "wa"
330
+ "ヲ": "o"
331
+
332
+ "ガ": "ga"
333
+ "ギ": "gi"
334
+ "グ": "gu"
335
+ "ゲ": "ge"
336
+ "ゴ": "go"
337
+ "ギャ": "gya"
338
+ "ギュ": "gyu"
339
+ "ギョ": "gyo"
340
+ "ギョウ": "gyō"
341
+ "ゴウ": "gō"
342
+
343
+ "ザ": "za"
344
+ "ジ": "ji"
345
+ "ズ": "zu"
346
+ "ゼ": "ze"
347
+ "ゾ": "zo"
348
+ "ジャ": "ja"
349
+ "ジュ": "ju"
350
+ "ジョ": "jo"
351
+ "ジョウ": "jō"
352
+ "ゾウ": "zō"
353
+
354
+ "ダ": "da"
355
+ "ヂ": "ji"
356
+ "ヅ": "zu"
357
+ "デ": "de"
358
+ "ド": "do"
359
+ "ヂャ": "ja"
360
+ "ヂュ": "ju"
361
+ "ヂョ": "jo"
362
+ "ドウ": "dō"
363
+
364
+ "バ": "ba"
365
+ "ビ": "bi"
366
+ "ブ": "bu"
367
+ "ベ": "be"
368
+ "ボ": "bo"
369
+ "ビャ": "bya"
370
+ "ビュ": "byu"
371
+ "ビョ": "byo"
372
+ "ビョウ": "byō"
373
+ "ボウ": "bō"
374
+
375
+ "パ": "pa"
376
+ "ピ": "pi"
377
+ "プ": "pu"
378
+ "ペ": "pe"
379
+ "ポ": "po"
380
+ "ピャ": "pya"
381
+ "ピュ": "pyu"
382
+ "ピョ": "pyo"
383
+ "ピョウ": "pyō"
384
+ "ポウ": "pō"
385
+
386
+ "ン": "n"