klookup 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/klookup.cgi +118 -30
- data/data/klookup/corpus.txt +2718 -0
- data/data/klookup/data.db +0 -0
- data/data/klookup/edict.gz +0 -0
- data/lib/klookup.rb +43 -0
- data/lib/klookup/database_flatfile_kanjidic.rb +1 -10
- data/lib/klookup/lookup.rb +1 -0
- data/lib/klookup/lookup_kanji.rb +2 -34
- data/test/suite.rb +55 -55
- metadata +14 -20
data/data/klookup/data.db
CHANGED
Binary file
|
Binary file
|
data/lib/klookup.rb
CHANGED
@@ -21,6 +21,49 @@ require 'runicode'
|
|
21
21
|
|
22
22
|
# Contains Lookup and Database.
|
23
23
|
module KLookup
|
24
|
+
# Returns true if there is kana in the string.
|
25
|
+
def self.include_kana?(str)
|
26
|
+
return (not (str =~ /[#{0x3040.chr}-#{0x30FF.chr}]/).nil?)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns a string containing the UTF-8 encoded character represented by the
|
30
|
+
# receiver’s value.
|
31
|
+
#
|
32
|
+
# Uses RUnicode's Integer#chr method
|
33
|
+
def self.cp_to_str(val)
|
34
|
+
return val.chr
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns a regular expression that matches strings in a kana-insensitive
|
38
|
+
# manner.
|
39
|
+
def self.norm_kana(str)
|
40
|
+
# Relevant codepoints:
|
41
|
+
# ひらがな == カタカナ
|
42
|
+
# 3041 - 3096 == 30A1 - 30F6 - ァ-ヶ
|
43
|
+
# 309D - 309E == 30FD - 30FE - ヽ-ヾ
|
44
|
+
hiragana = (0x3041..0x3096).to_a + (0x309D..0x309E).to_a
|
45
|
+
katakana = (0x30A1..0x30F6).to_a + (0x30FD..0x30FE).to_a
|
46
|
+
hkhash = {}
|
47
|
+
khhash = {}
|
48
|
+
i=0
|
49
|
+
hiragana.each {|c|
|
50
|
+
hkhash[c] = katakana[i]
|
51
|
+
khhash[katakana[i]] = c
|
52
|
+
i+=1
|
53
|
+
}
|
54
|
+
re=''
|
55
|
+
str.each_char {|c|
|
56
|
+
if hiragana.include?(c.chars.first)
|
57
|
+
re << "[#{c}#{cp_to_str(hkhash[c.chars.first])}]"
|
58
|
+
elsif katakana.include?(c.chars.first)
|
59
|
+
re << "[#{c}#{cp_to_str(khhash[c.chars.first])}]"
|
60
|
+
else
|
61
|
+
re << c
|
62
|
+
end
|
63
|
+
}
|
64
|
+
Regexp.new("#{re}")
|
65
|
+
end
|
66
|
+
|
24
67
|
require 'klookup/database'
|
25
68
|
require 'klookup/lookup'
|
26
69
|
end
|
@@ -44,15 +44,6 @@ class KLookup::Database::FlatFile::KanjiDic
|
|
44
44
|
@records[kanji] = {:items=>items}
|
45
45
|
end
|
46
46
|
|
47
|
-
#Returns true if there is kana in the string.
|
48
|
-
def include_kana?(str)
|
49
|
-
kana = (0x3040..0x30FF)
|
50
|
-
str.split(//).each {|i|
|
51
|
-
return true if kana.include? i.chars[0]
|
52
|
-
}
|
53
|
-
false
|
54
|
-
end
|
55
|
-
|
56
47
|
public
|
57
48
|
|
58
49
|
# Returns true if a kanji exists in the database.
|
@@ -87,7 +78,7 @@ class KLookup::Database::FlatFile::KanjiDic
|
|
87
78
|
name_reading = []
|
88
79
|
items.each {|i|
|
89
80
|
name_flag = true if i=='T1'
|
90
|
-
if include_kana?(i)
|
81
|
+
if KLookup.include_kana?(i)
|
91
82
|
if name_flag
|
92
83
|
name_reading << i
|
93
84
|
else
|
data/lib/klookup/lookup.rb
CHANGED
data/lib/klookup/lookup_kanji.rb
CHANGED
@@ -16,42 +16,10 @@ class KLookup::Lookup::Kanji
|
|
16
16
|
@@data = KLookup::Lookup.default_handler
|
17
17
|
|
18
18
|
private
|
19
|
-
# Returns a string containing the UTF-8 encoded character represented by the
|
20
|
-
# receiver’s value.
|
21
|
-
#
|
22
|
-
# Uses RUnicode's Integer#chr method
|
23
|
-
def self.cp_to_str(val)
|
24
|
-
return val.chr
|
25
|
-
end
|
26
|
-
|
27
19
|
# Returns a regular expression that matches strings in a kana-insensitive
|
28
20
|
# manner.
|
29
|
-
def
|
30
|
-
|
31
|
-
# ひらがな == カタカナ
|
32
|
-
# 3041 - 3096 == 30A1 - 30F6 - ァ-ヶ
|
33
|
-
# 309D - 309E == 30FD - 30FE - ヽ-ヾ
|
34
|
-
hiragana = (0x3041..0x3096).to_a + (0x309D..0x309E).to_a
|
35
|
-
katakana = (0x30A1..0x30F6).to_a + (0x30FD..0x30FE).to_a
|
36
|
-
hkhash = {}
|
37
|
-
khhash = {}
|
38
|
-
i=0
|
39
|
-
hiragana.each {|c|
|
40
|
-
hkhash[c] = katakana[i]
|
41
|
-
khhash[katakana[i]] = c
|
42
|
-
i+=1
|
43
|
-
}
|
44
|
-
re=''
|
45
|
-
str.each_char {|c|
|
46
|
-
if hiragana.include?(c.chars.first)
|
47
|
-
re << "[#{c}#{cp_to_str(hkhash[c.chars.first])}]"
|
48
|
-
elsif katakana.include?(c.chars.first)
|
49
|
-
re << "[#{c}#{cp_to_str(khhash[c.chars.first])}]"
|
50
|
-
else
|
51
|
-
re << c
|
52
|
-
end
|
53
|
-
}
|
54
|
-
Regexp.new("^#{re}$")
|
21
|
+
def norm_kana(str)
|
22
|
+
Regexp.new("^#{KLookup.norm_kana(str)}$")
|
55
23
|
end
|
56
24
|
public
|
57
25
|
|
data/test/suite.rb
CHANGED
@@ -111,71 +111,71 @@ class Lookup_Test < Test::Unit::TestCase
|
|
111
111
|
assert (not KLookup::Lookup::Radical.exist?('た'))
|
112
112
|
end
|
113
113
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
114
|
+
# def test_just_meaning_lookup
|
115
|
+
# cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
|
116
|
+
# cat = KLookup::Lookup::Kanji.new('猫')
|
117
|
+
# dog = KLookup::Lookup::Kanji.new('犬')
|
118
|
+
# assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
|
119
|
+
# end
|
120
|
+
|
121
|
+
# def test_meaning_lookup
|
122
|
+
# cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
|
123
|
+
# cat_ms = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>11)
|
124
|
+
# cat_mss = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>12)
|
125
|
+
# cat = KLookup::Lookup::Kanji.new('猫')
|
126
|
+
# dog = KLookup::Lookup::Kanji.new('犬')
|
127
|
+
# assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
|
128
|
+
# assert (cat_ms.include?(cat) and not cat_ms.include?(dog)), ':meaning and valid :stroke'
|
129
|
+
# assert (not cat_mss.include?(cat) and not cat_mss.include?(dog)), ':meaning and invalid :stroke'
|
130
|
+
# end
|
131
|
+
|
132
|
+
# def test_reading_lookup
|
133
|
+
# cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
|
134
|
+
# cat_rs = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>11)
|
135
|
+
# cat_rss = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>12)
|
136
|
+
# cat = KLookup::Lookup::Kanji.new('猫')
|
137
|
+
# dog = KLookup::Lookup::Kanji.new('犬')
|
138
|
+
# assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading'
|
139
|
+
# assert (cat_rs.include?(cat) and not cat_rs.include?(dog)), ':reading and valid :stroke'
|
140
|
+
# assert (not cat_rss.include?(cat) and not cat_rss.include?(dog)), ':reading and invalid :stroke'
|
141
|
+
# end
|
142
|
+
|
143
|
+
# def test_just_reading_lookup
|
144
|
+
# cat = KLookup::Lookup::Kanji.new('猫')
|
145
|
+
# dog = KLookup::Lookup::Kanji.new('犬')
|
146
|
+
# cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
|
147
|
+
# assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in same kana'
|
148
|
+
# cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ネコ')
|
149
|
+
# assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in different kana'
|
150
|
+
# end
|
151
151
|
|
152
152
|
def test_all_lookup
|
153
153
|
#TODO: this may not be a good test
|
154
154
|
assert KLookup::Lookup::Kanji.lookup.length > 1000
|
155
155
|
end
|
156
156
|
|
157
|
-
|
158
|
-
|
159
|
-
|
157
|
+
# def test_just_block_lookup
|
158
|
+
# cat = KLookup::Lookup::Kanji.new('猫')
|
159
|
+
# dog = KLookup::Lookup::Kanji.new('犬')
|
160
160
|
|
161
|
-
|
162
|
-
|
163
|
-
|
161
|
+
# look = KLookup::Lookup::Kanji.lookup {|k| k.meaning.include?('cat')}
|
162
|
+
# assert (look.include?(cat) and not look.include?(dog)), 'meaning in block'
|
163
|
+
# end
|
164
164
|
|
165
|
-
|
166
|
-
|
167
|
-
|
165
|
+
# def test_block_lookup
|
166
|
+
# cat = KLookup::Lookup::Kanji.new('猫')
|
167
|
+
# dog = KLookup::Lookup::Kanji.new('犬')
|
168
168
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
169
|
+
# look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
|
170
|
+
# k.meaning.include?('cat')}
|
171
|
+
# assert (look.include?(cat) and not look.include?(dog)),
|
172
|
+
# 'meaning in block and reading'
|
173
173
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
174
|
+
# look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
|
175
|
+
# true}
|
176
|
+
# assert (look.include?(cat) and not look.include?(dog)),
|
177
|
+
# 'true in block and reading'
|
178
|
+
# end
|
179
179
|
|
180
180
|
def test_kanji_stroke_count
|
181
181
|
assert_equal KLookup::Lookup::Kanji.new('猫').stroke_count, 11
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: klookup
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "0.
|
7
|
-
date: 2007-
|
6
|
+
version: "0.4"
|
7
|
+
date: 2007-04-22 00:00:00 +01:00
|
8
8
|
summary: A set of kanji lookup tools and a library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -29,23 +29,25 @@ post_install_message:
|
|
29
29
|
authors:
|
30
30
|
- Tom Adams
|
31
31
|
files:
|
32
|
-
- data/klookup/data.db
|
33
32
|
- data/klookup/kanjidic
|
34
33
|
- data/klookup/newradkfile
|
35
|
-
-
|
34
|
+
- data/klookup/corpus.txt
|
35
|
+
- data/klookup/data.db
|
36
|
+
- data/klookup/edict.gz
|
37
|
+
- lib/klookup.rb
|
36
38
|
- lib/klookup
|
39
|
+
- lib/klookup/lookup.rb
|
40
|
+
- lib/klookup/lookup_radical.rb
|
37
41
|
- lib/klookup/database_sqlite.rb
|
38
|
-
- lib/klookup/lookup_kanji.rb
|
39
42
|
- lib/klookup/database.rb
|
40
|
-
- lib/klookup/lookup.rb
|
41
|
-
- lib/klookup/database_flatfile_kanjidic.rb
|
42
|
-
- lib/klookup/database_unihan.rb
|
43
43
|
- lib/klookup/database_flatfile_radk.rb
|
44
|
-
- lib/klookup/
|
44
|
+
- lib/klookup/database_unihan.rb
|
45
|
+
- lib/klookup/database_flatfile_kanjidic.rb
|
45
46
|
- lib/klookup/database_flatfile.rb
|
47
|
+
- lib/klookup/lookup_kanji.rb
|
48
|
+
- lib/runicode.rb
|
46
49
|
- lib/runicode
|
47
50
|
- lib/runicode/utf8.rb
|
48
|
-
- lib/klookup.rb
|
49
51
|
test_files:
|
50
52
|
- test/database_test.rb
|
51
53
|
- test/runicode_test.rb
|
@@ -64,13 +66,5 @@ extensions: []
|
|
64
66
|
|
65
67
|
requirements: []
|
66
68
|
|
67
|
-
dependencies:
|
68
|
-
|
69
|
-
name: sqlite-ruby
|
70
|
-
version_requirement:
|
71
|
-
version_requirements: !ruby/object:Gem::Version::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: 2.2.3
|
76
|
-
version:
|
69
|
+
dependencies: []
|
70
|
+
|