JayVerb 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/japanese/conjugator.rb +1521 -0
- data/lib/japanese/to_romaji.rb +195 -0
- data/lib/japanese/verb_identifier.rb +112 -0
- data/lib/jay_verb.rb +59 -0
- metadata +47 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
module Japanese
|
|
2
|
+
module ToRomaji
|
|
3
|
+
|
|
4
|
+
BASIC_HIRAGANA_MAP = {"あ": "a",
|
|
5
|
+
"い": "i",
|
|
6
|
+
"う": "u",
|
|
7
|
+
"え": "e",
|
|
8
|
+
"お": "o",
|
|
9
|
+
"か": "ka",
|
|
10
|
+
"き": "ki",
|
|
11
|
+
"く": "ku",
|
|
12
|
+
"け": "ke",
|
|
13
|
+
"こ": "ko",
|
|
14
|
+
"さ": "sa",
|
|
15
|
+
"し": "shi",
|
|
16
|
+
"す": "su",
|
|
17
|
+
"せ": "se",
|
|
18
|
+
"そ": "so",
|
|
19
|
+
"た": "ta",
|
|
20
|
+
"ち": "chi",
|
|
21
|
+
"つ": "tsu",
|
|
22
|
+
"て": "te",
|
|
23
|
+
"と": "to",
|
|
24
|
+
"な": "na",
|
|
25
|
+
"に": "ni",
|
|
26
|
+
"ぬ": "nu",
|
|
27
|
+
"ね": "ne",
|
|
28
|
+
"の": "no",
|
|
29
|
+
"は": "ha",
|
|
30
|
+
"ひ": "hi",
|
|
31
|
+
"ふ": "fu",
|
|
32
|
+
"へ": "he",
|
|
33
|
+
"ほ": "ho",
|
|
34
|
+
"ま": "ma",
|
|
35
|
+
"み": "mi",
|
|
36
|
+
"む": "mu",
|
|
37
|
+
"め": "me",
|
|
38
|
+
"も": "mo",
|
|
39
|
+
"や": "ya",
|
|
40
|
+
"ゆ": "yu",
|
|
41
|
+
"よ": "yo",
|
|
42
|
+
"ら": "ra",
|
|
43
|
+
"り": "ri",
|
|
44
|
+
"る": "ru",
|
|
45
|
+
"れ": "re",
|
|
46
|
+
"ろ": "ro",
|
|
47
|
+
"わ": "wa",
|
|
48
|
+
"を": "wo",
|
|
49
|
+
"ん": "n",
|
|
50
|
+
"が": "ga",
|
|
51
|
+
"ぎ": "gi",
|
|
52
|
+
"ぐ": "gu",
|
|
53
|
+
"げ": "ge",
|
|
54
|
+
"ご": "go",
|
|
55
|
+
"ざ": "za",
|
|
56
|
+
"じ": "ji",
|
|
57
|
+
"ず": "zu",
|
|
58
|
+
"ぜ": "ze",
|
|
59
|
+
"ぞ": "zo",
|
|
60
|
+
"だ": "da",
|
|
61
|
+
"ぢ": "dchi",
|
|
62
|
+
"づ": "dzu",
|
|
63
|
+
"で": "de",
|
|
64
|
+
"ど": "do",
|
|
65
|
+
"ば": "ba",
|
|
66
|
+
"び": "bi",
|
|
67
|
+
"ぶ": "bu",
|
|
68
|
+
"べ": "be",
|
|
69
|
+
"ぼ": "bo",
|
|
70
|
+
"ぱ": "pa",
|
|
71
|
+
"ぴ": "pi",
|
|
72
|
+
"ぷ": "pu",
|
|
73
|
+
"ぺ": "pe",
|
|
74
|
+
"ぽ": "po"
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
SMALL_COMBO_HIRAGANA_MAP = {
|
|
78
|
+
"きゃ": "kya",
|
|
79
|
+
"きゅ": "kyu",
|
|
80
|
+
"きょ": "kyo",
|
|
81
|
+
"しゃ": "sha",
|
|
82
|
+
"しゅ": "shu",
|
|
83
|
+
"しょ": "sho",
|
|
84
|
+
"ちゃ": "cha",
|
|
85
|
+
"ちゅ": "chu",
|
|
86
|
+
"ちょ": "cho",
|
|
87
|
+
"にゃ": "nya",
|
|
88
|
+
"にゅ": "nyu",
|
|
89
|
+
"にょ": "nyo",
|
|
90
|
+
"ひゃ": "hya",
|
|
91
|
+
"ひゅ": "hyu",
|
|
92
|
+
"ひょ": "hyo",
|
|
93
|
+
"みゃ": "mya",
|
|
94
|
+
"みゅ": "myu",
|
|
95
|
+
"みょ": "myo",
|
|
96
|
+
"りゃ": "rya",
|
|
97
|
+
"りゅ": "ryu",
|
|
98
|
+
"りょ": "ryo",
|
|
99
|
+
"ぎゃ": "gya",
|
|
100
|
+
"ぎゅ": "gyu",
|
|
101
|
+
"ぎょ": "gyo",
|
|
102
|
+
"じゃ": "ja",
|
|
103
|
+
"じゅ": "ju",
|
|
104
|
+
"じょ": "jo",
|
|
105
|
+
"ぢゃ": "dja",
|
|
106
|
+
"ぢゅ": "dju",
|
|
107
|
+
"ぢょ": "djo",
|
|
108
|
+
"びゃ": "bya",
|
|
109
|
+
"びゅ": "byu",
|
|
110
|
+
"びょ": "byo",
|
|
111
|
+
"ぴゃ": "pya",
|
|
112
|
+
"ぴゅ": "pyu",
|
|
113
|
+
"ぴょ": "pyo",
|
|
114
|
+
"てぃ": "ti"
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
DOUBLED_CONSONANT_HIRAGANA_MAP = {
|
|
118
|
+
"っか": "kka",
|
|
119
|
+
"っき": "kki",
|
|
120
|
+
"っく": "kku",
|
|
121
|
+
"っけ": "kke",
|
|
122
|
+
"っこ": "kko",
|
|
123
|
+
"っさ": "ssa",
|
|
124
|
+
"っし": "sshi",
|
|
125
|
+
"っす": "ssu",
|
|
126
|
+
"っせ": "sse",
|
|
127
|
+
"っそ": "sso",
|
|
128
|
+
"った": "tta",
|
|
129
|
+
"っち": "cchi",
|
|
130
|
+
"っつ": "ttsu",
|
|
131
|
+
"って": "tte",
|
|
132
|
+
"っと": "tto",
|
|
133
|
+
"っば": "bba",
|
|
134
|
+
"っび": "bbi",
|
|
135
|
+
"っぶ": "bbu",
|
|
136
|
+
"っべ": "bbe",
|
|
137
|
+
"っぼ": "bbo",
|
|
138
|
+
"っぱ": "ppa",
|
|
139
|
+
"っぴ": "ppi",
|
|
140
|
+
"っぷ": "ppu",
|
|
141
|
+
"っぺ": "ppe",
|
|
142
|
+
"っぽ": "ppo"
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
TRIPLE_CHAR_HIRAGANA_MAP = {
|
|
146
|
+
"っきゃ": "kkya",
|
|
147
|
+
"っきゅ": "kkyu",
|
|
148
|
+
"っきょ": "kkyo",
|
|
149
|
+
"っしゃ": "ssha",
|
|
150
|
+
"っしゅ": "sshu",
|
|
151
|
+
"っしょ": "ssho",
|
|
152
|
+
"っちゃ": "ccha",
|
|
153
|
+
"っちゅ": "cchu",
|
|
154
|
+
"っちょ": "ccho",
|
|
155
|
+
"っじゃ": "jja",
|
|
156
|
+
"っじゅ": "jju",
|
|
157
|
+
"っじょ": "jjo",
|
|
158
|
+
"っびゃ": "bbya",
|
|
159
|
+
"っびゅ": "bbyu",
|
|
160
|
+
"っびょ": "bbyo",
|
|
161
|
+
"っぴゃ": "ppya",
|
|
162
|
+
"っぴゅ": "ppyu",
|
|
163
|
+
"っぴょ": "ppyo"
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
SPECIALS = {
|
|
167
|
+
"ゃ": "ya",
|
|
168
|
+
"ゅ": "yu",
|
|
169
|
+
"ょ": "yo",
|
|
170
|
+
"っ": "",
|
|
171
|
+
"ぁ": "a",
|
|
172
|
+
"ぃ": "i",
|
|
173
|
+
"ぅ": "u",
|
|
174
|
+
"ぇ": "e",
|
|
175
|
+
"ぉ": "o",
|
|
176
|
+
"。": "."
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
# Converts a hiragana string into Roman letters
|
|
180
|
+
def self.convert_hiragana(string)
|
|
181
|
+
hierarchy = [TRIPLE_CHAR_HIRAGANA_MAP, DOUBLED_CONSONANT_HIRAGANA_MAP, SMALL_COMBO_HIRAGANA_MAP, BASIC_HIRAGANA_MAP, SPECIALS]
|
|
182
|
+
hierarchy.length.times do |n|
|
|
183
|
+
# Stringify the keys so that you can use them in the Regex in the loop.
|
|
184
|
+
hierarchy[n].stringify_keys!
|
|
185
|
+
hierarchy[n].each do |k, v|
|
|
186
|
+
mtch = Regexp.new k
|
|
187
|
+
if mtch.match(string)
|
|
188
|
+
string.gsub!(mtch, v)
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
return string
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
require 'active_support'
|
|
2
|
+
require 'active_support/core_ext'
|
|
3
|
+
|
|
4
|
+
module Japanese
|
|
5
|
+
module VerbIdentifier
|
|
6
|
+
|
|
7
|
+
# List of consonant stem verbs ending in -iru
|
|
8
|
+
CONSONANT_VERBS = %w(脂ぎる びびる 契る 散る どじる 愚痴る 入る 走る 穿る 迸る
|
|
9
|
+
熱る いびる 弄る 炒る 要る 限る 齧る 呪る 切る 霧る 切る
|
|
10
|
+
きしる 軋る 抉る 参る 混じる 交じる 滅入る 見縊る 漲る
|
|
11
|
+
毟る 捩じる 握る 罵る 陥る 思い入る 思い切る せびる 知る
|
|
12
|
+
謗る 滾る 魂消る 迸る とちる 野次る 過る 横切る 嘲る 駄弁る
|
|
13
|
+
彫る 選る 啁る 耽る 伏せる 侍る 減る 撚る 翻る 火照る 帰る
|
|
14
|
+
返る 反る 還る 孵る 陰る 駆ける 蹴る くねる 覆る 練る のめる
|
|
15
|
+
滑る 阿る 競る 挵る 喋る 茂る 湿気る そべる 滑る 猛る 照る
|
|
16
|
+
抓める 抓る うねる 蘇る 甦る 放る 括る 抉る 捥る 捩る 詰る
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
RU_IRREGULAR_MAPPING = {
|
|
20
|
+
"する": "v-suru",
|
|
21
|
+
"来る": "v-kuru",
|
|
22
|
+
"有る": "v-aru",
|
|
23
|
+
"ある": "v-aru",
|
|
24
|
+
"居らっしゃる": "v5r-i",
|
|
25
|
+
"いらっしゃる": "v5r-i"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
AMBIGUITY_IF_HIRAGANA_ONLY = %w(いる きる える へる かえる ねる しめる)
|
|
29
|
+
|
|
30
|
+
E_HIRAGANA = %w(え け せ て へ ね め れ げ ぜ で べ ぺ)
|
|
31
|
+
I_HIRAGANA = %w(い き し ち ひ に み り ぎ じ ぢぃ び ぴ)
|
|
32
|
+
|
|
33
|
+
def resolve_ru_verb_class
|
|
34
|
+
return nil if ambiguous?
|
|
35
|
+
unless irregular?
|
|
36
|
+
if ends_in_iru_eru? && is_consonant_verb?
|
|
37
|
+
self.part_of_speech = "v5r"
|
|
38
|
+
elsif ends_in_ru? && !ends_in_iru_eru?
|
|
39
|
+
self.part_of_speech = "v5r"
|
|
40
|
+
elsif ends_in_iru_eru? && !is_consonant_verb?
|
|
41
|
+
self.part_of_speech = "v1"
|
|
42
|
+
end
|
|
43
|
+
else
|
|
44
|
+
RU_IRREGULAR_MAPPING.stringify_keys!
|
|
45
|
+
RU_IRREGULAR_MAPPING.each do |k, v|
|
|
46
|
+
self.part_of_speech = v if self.kanji == k
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def resolve_verb_class
|
|
52
|
+
# Check if the word is a special case of "k" or "u" verb
|
|
53
|
+
if self.kanji == "行く"
|
|
54
|
+
self.part_of_speech = "v5k-s"
|
|
55
|
+
elsif self.kanji == "問う"
|
|
56
|
+
self.part_of_speech = "v5u-s"
|
|
57
|
+
else
|
|
58
|
+
case self.kanji[-1]
|
|
59
|
+
when "ぶ"
|
|
60
|
+
self.part_of_speech = "v5b"
|
|
61
|
+
when "ぐ"
|
|
62
|
+
self.part_of_speech = "v5g"
|
|
63
|
+
when "く"
|
|
64
|
+
self.part_of_speech = "v5k" unless self.kanji == "行く"
|
|
65
|
+
when "む"
|
|
66
|
+
self.part_of_speech = "v5m"
|
|
67
|
+
when "ぬ"
|
|
68
|
+
self.part_of_speech = "v5n"
|
|
69
|
+
when "す"
|
|
70
|
+
self.part_of_speech = "v5s"
|
|
71
|
+
when "る"
|
|
72
|
+
resolve_ru_verb_class
|
|
73
|
+
when "つ"
|
|
74
|
+
self.part_of_speech = "v5t"
|
|
75
|
+
when "う"
|
|
76
|
+
self.part_of_speech = "v5u" unless self.kanji == "問う"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Include into the word class and use on word instances
|
|
82
|
+
def ends_in_ru?
|
|
83
|
+
unless self.kanji.blank?
|
|
84
|
+
self.kanji[-1] == "る" ? true : false
|
|
85
|
+
else
|
|
86
|
+
self.hiragana[-1] == "る" ? true : false
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Returns true if the class of the verb cannot be determined without more information
|
|
91
|
+
def ambiguous?
|
|
92
|
+
self.kanji == self.hiragana && self.kanji.in?(AMBIGUITY_IF_HIRAGANA_ONLY) ? true : false
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def ends_in_iru_eru?
|
|
96
|
+
self.hiragana[-2].in?(E_HIRAGANA) || self.hiragana[-2].in?(I_HIRAGANA) ? true : false
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def is_consonant_verb?
|
|
100
|
+
if self.ends_in_iru_eru? && !ambiguous?
|
|
101
|
+
self.kanji.in?(CONSONANT_VERBS) ? true : false
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Verifies the verb is not a "ru" irregular
|
|
106
|
+
def irregular?
|
|
107
|
+
irregulars = RU_IRREGULAR_MAPPING.stringify_keys!
|
|
108
|
+
ireg = irregulars.keys
|
|
109
|
+
self.kanji.in?(ireg) ? true : false
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
data/lib/jay_verb.rb
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
require 'active_support'
|
|
2
|
+
require 'active_support/core_ext'
|
|
3
|
+
require 'japanese/verb_identifier'
|
|
4
|
+
require 'japanese/conjugator'
|
|
5
|
+
require 'japanese/to_romaji'
|
|
6
|
+
|
|
7
|
+
class JayVerb
|
|
8
|
+
include Japanese::VerbIdentifier
|
|
9
|
+
include Japanese::Conjugator
|
|
10
|
+
include Japanese::ToRomaji
|
|
11
|
+
|
|
12
|
+
attr_accessor :kanji, :hiragana, :romaji, :part_of_speech, :passive_forms,
|
|
13
|
+
:stem_form, :hiragana_forms, :romaji_forms, :negative_stem,
|
|
14
|
+
:base, :passive_dictionary_form, :causative_dictionary_form,
|
|
15
|
+
:causative_passive_dictionary_form, :causative_forms, :conjugations,
|
|
16
|
+
:causative_passive_forms_hiragana, :causative_passive_forms_romaji,
|
|
17
|
+
:passive_forms_hiragana, :passive_forms_romaji, :causative_passive_forms,
|
|
18
|
+
:causative_passive_forms_hiragana, :causative_passive_forms_romaji,
|
|
19
|
+
:causative_forms_hiragana, :causative_forms_romaji, :has_imperative,
|
|
20
|
+
:has_causative, :has_causative_passive, :has_passive, :has_volitional
|
|
21
|
+
|
|
22
|
+
def initialize(kanji, hiragana)
|
|
23
|
+
@kanji = kanji
|
|
24
|
+
@hiragana = hiragana
|
|
25
|
+
@romaji = Japanese::ToRomaji.convert_hiragana(String.new(hiragana))
|
|
26
|
+
@part_of_speech = self.resolve_verb_class
|
|
27
|
+
self.set_verb_behavior_types
|
|
28
|
+
@hiragana_forms = {}
|
|
29
|
+
@romaji_forms = {}
|
|
30
|
+
@passive_forms = {}
|
|
31
|
+
@causative_forms = {}
|
|
32
|
+
@conjugations = {}
|
|
33
|
+
@causative_passive_forms = {}
|
|
34
|
+
@causative_passive_forms_hiragana = {}
|
|
35
|
+
@causative_passive_forms_romaji = {}
|
|
36
|
+
@passive_forms_hiragana = {}
|
|
37
|
+
@passive_forms_romaji ={}
|
|
38
|
+
@causative_forms_hiragana = {}
|
|
39
|
+
@causative_forms_romaji = {}
|
|
40
|
+
self.process_verb
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Defaults all has_* attributes to true for now. When self.process_verb is
|
|
44
|
+
# called, this will conjugate the verb for all possible modes. This may
|
|
45
|
+
# not make sense for all verbs —- for example, some verbs may never
|
|
46
|
+
# be realistically used in the imperative sense, such as "存在しろ!"
|
|
47
|
+
# ("Exist!"), which while grammatically correct, may sound nonsensical.
|
|
48
|
+
def set_verb_behavior_types
|
|
49
|
+
if part_of_speech.in?(%w(v1 v5b v5g v5k v5k-s v5m v5n v5r v5r-i v5s v5t v5u v5u-s v-aru v-kuru v-suru))
|
|
50
|
+
attrs = %w(imperative passive volitional causative causative_passive)
|
|
51
|
+
attrs.each do |a|
|
|
52
|
+
unless self.send("has_#{a}") == false
|
|
53
|
+
eval "self.has_#{a} = true"
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
# JayVerb.new("言う", "いう")
|
metadata
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: JayVerb
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Zack Kayser
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2016-11-19 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: A gem for working with and conjugating Japanese verbs
|
|
14
|
+
email: zkayser@gmail.com
|
|
15
|
+
executables: []
|
|
16
|
+
extensions: []
|
|
17
|
+
extra_rdoc_files: []
|
|
18
|
+
files:
|
|
19
|
+
- lib/japanese/conjugator.rb
|
|
20
|
+
- lib/japanese/to_romaji.rb
|
|
21
|
+
- lib/japanese/verb_identifier.rb
|
|
22
|
+
- lib/jay_verb.rb
|
|
23
|
+
homepage:
|
|
24
|
+
licenses:
|
|
25
|
+
- MIT
|
|
26
|
+
metadata: {}
|
|
27
|
+
post_install_message:
|
|
28
|
+
rdoc_options: []
|
|
29
|
+
require_paths:
|
|
30
|
+
- lib
|
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
32
|
+
requirements:
|
|
33
|
+
- - ">="
|
|
34
|
+
- !ruby/object:Gem::Version
|
|
35
|
+
version: '0'
|
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
requirements: []
|
|
42
|
+
rubyforge_project:
|
|
43
|
+
rubygems_version: 2.4.6
|
|
44
|
+
signing_key:
|
|
45
|
+
specification_version: 4
|
|
46
|
+
summary: Japanese verb conjugator
|
|
47
|
+
test_files: []
|