japanese_names 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/enamdict.min +433 -96300
- data/lib/japanese_names/splitter.rb +6 -0
- data/lib/japanese_names/version.rb +1 -1
- data/spec/config.yml +5 -0
- data/spec/unit/splitter_spec.rb +12 -3
- metadata +1 -1
@@ -15,6 +15,12 @@ module JapaneseNames
|
|
15
15
|
kanji = kanji.strip
|
16
16
|
kana = kana.strip
|
17
17
|
|
18
|
+
# Short-circuit: Return last name if it can match the full string
|
19
|
+
if kanji.size <= 3 && kana.size <= 4
|
20
|
+
full_match = finder.find(kanji).detect { |d| d[0] == kanji && d[1] =~ /\A#{hk kana}\z/ }
|
21
|
+
return [[kanji, nil], [kana, nil]] if full_match
|
22
|
+
end
|
23
|
+
|
18
24
|
# Partition kanji into candidate n-grams
|
19
25
|
kanji_ngrams = Util::Ngram.ngram_partition(kanji)
|
20
26
|
|
data/spec/config.yml
CHANGED
data/spec/unit/splitter_spec.rb
CHANGED
@@ -9,14 +9,23 @@ RSpec.describe JapaneseNames::Splitter do
|
|
9
9
|
config = YAML.load_file(File.join(File.dirname(__FILE__), '..', 'config.yml'))
|
10
10
|
skip_list = config[:skip]
|
11
11
|
|
12
|
+
config[:last_names] + config[:first_names].each do |name|
|
13
|
+
kanji, kana = name.split(' ')
|
14
|
+
next if skip_list.index(kanji)
|
15
|
+
|
16
|
+
it "should match #{kanji} #{kana}" do
|
17
|
+
result = subject.split(kanji, kana)
|
18
|
+
expect(result).to eq [[kanji, nil], [kana, nil]]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
12
22
|
config[:last_names].each do |last_name|
|
13
23
|
config[:first_names].each do |first_name|
|
14
24
|
kanji_fam, kana_fam = last_name.split(' ')
|
15
25
|
kanji_giv, kana_giv = first_name.split(' ')
|
26
|
+
next if skip_list.index([kanji_fam, kanji_giv].join(' '))
|
16
27
|
|
17
|
-
|
18
|
-
|
19
|
-
it "should parse #{kanji_fam + kanji_giv} #{kana_fam + kana_giv}" do
|
28
|
+
it "should split #{kanji_fam + kanji_giv} #{kana_fam + kana_giv}" do
|
20
29
|
result = subject.split(kanji_fam + kanji_giv, kana_fam + kana_giv)
|
21
30
|
expect(result).to eq [[kanji_fam, kanji_giv], [kana_fam, kana_giv]]
|
22
31
|
end
|