klookup 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
data/data/klookup/data.db CHANGED
Binary file
Binary file
data/lib/klookup.rb CHANGED
@@ -21,6 +21,49 @@ require 'runicode'
21
21
 
22
22
  # Contains Lookup and Database.
23
23
  module KLookup
24
+ # Returns true if there is kana in the string.
25
+ def self.include_kana?(str)
26
+ return (not (str =~ /[#{0x3040.chr}-#{0x30FF.chr}]/).nil?)
27
+ end
28
+
29
+ # Returns a string containing the UTF-8 encoded character represented by the
30
+ # receiver’s value.
31
+ #
32
+ # Uses RUnicode's Integer#chr method
33
+ def self.cp_to_str(val)
34
+ return val.chr
35
+ end
36
+
37
+ # Returns a regular expression that matches strings in a kana-insensitive
38
+ # manner.
39
+ def self.norm_kana(str)
40
+ # Relevant codepoints:
41
+ # ひらがな == カタカナ
42
+ # 3041 - 3096 == 30A1 - 30F6 - ァ-ヶ
43
+ # 309D - 309E == 30FD - 30FE - ヽ-ヾ
44
+ hiragana = (0x3041..0x3096).to_a + (0x309D..0x309E).to_a
45
+ katakana = (0x30A1..0x30F6).to_a + (0x30FD..0x30FE).to_a
46
+ hkhash = {}
47
+ khhash = {}
48
+ i=0
49
+ hiragana.each {|c|
50
+ hkhash[c] = katakana[i]
51
+ khhash[katakana[i]] = c
52
+ i+=1
53
+ }
54
+ re=''
55
+ str.each_char {|c|
56
+ if hiragana.include?(c.chars.first)
57
+ re << "[#{c}#{cp_to_str(hkhash[c.chars.first])}]"
58
+ elsif katakana.include?(c.chars.first)
59
+ re << "[#{c}#{cp_to_str(khhash[c.chars.first])}]"
60
+ else
61
+ re << c
62
+ end
63
+ }
64
+ Regexp.new("#{re}")
65
+ end
66
+
24
67
  require 'klookup/database'
25
68
  require 'klookup/lookup'
26
69
  end
@@ -44,15 +44,6 @@ class KLookup::Database::FlatFile::KanjiDic
44
44
  @records[kanji] = {:items=>items}
45
45
  end
46
46
 
47
- #Returns true if there is kana in the string.
48
- def include_kana?(str)
49
- kana = (0x3040..0x30FF)
50
- str.split(//).each {|i|
51
- return true if kana.include? i.chars[0]
52
- }
53
- false
54
- end
55
-
56
47
  public
57
48
 
58
49
  # Returns true if a kanji exists in the database.
@@ -87,7 +78,7 @@ class KLookup::Database::FlatFile::KanjiDic
87
78
  name_reading = []
88
79
  items.each {|i|
89
80
  name_flag = true if i=='T1'
90
- if include_kana?(i)
81
+ if KLookup.include_kana?(i)
91
82
  if name_flag
92
83
  name_reading << i
93
84
  else
@@ -15,6 +15,7 @@ module KLookup::Lookup
15
15
 
16
16
  # Returns the default handler for database lookups.
17
17
  def self.default_handler
18
+ return KLookup::Database::FlatFile
18
19
  begin
19
20
  KLookup::Database::SQLite.instance
20
21
  KLookup::Database::SQLite
@@ -16,42 +16,10 @@ class KLookup::Lookup::Kanji
16
16
  @@data = KLookup::Lookup.default_handler
17
17
 
18
18
  private
19
- # Returns a string containing the UTF-8 encoded character represented by the
20
- # receiver’s value.
21
- #
22
- # Uses RUnicode's Integer#chr method
23
- def self.cp_to_str(val)
24
- return val.chr
25
- end
26
-
27
19
  # Returns a regular expression that matches strings in a kana-insensitive
28
20
  # manner.
29
- def self.norm_kana(str)
30
- # Relevant codepoints:
31
- # ひらがな == カタカナ
32
- # 3041 - 3096 == 30A1 - 30F6 - ァ-ヶ
33
- # 309D - 309E == 30FD - 30FE - ヽ-ヾ
34
- hiragana = (0x3041..0x3096).to_a + (0x309D..0x309E).to_a
35
- katakana = (0x30A1..0x30F6).to_a + (0x30FD..0x30FE).to_a
36
- hkhash = {}
37
- khhash = {}
38
- i=0
39
- hiragana.each {|c|
40
- hkhash[c] = katakana[i]
41
- khhash[katakana[i]] = c
42
- i+=1
43
- }
44
- re=''
45
- str.each_char {|c|
46
- if hiragana.include?(c.chars.first)
47
- re << "[#{c}#{cp_to_str(hkhash[c.chars.first])}]"
48
- elsif katakana.include?(c.chars.first)
49
- re << "[#{c}#{cp_to_str(khhash[c.chars.first])}]"
50
- else
51
- re << c
52
- end
53
- }
54
- Regexp.new("^#{re}$")
21
+ def norm_kana(str)
22
+ Regexp.new("^#{KLookup.norm_kana(str)}$")
55
23
  end
56
24
  public
57
25
 
data/test/suite.rb CHANGED
@@ -111,71 +111,71 @@ class Lookup_Test < Test::Unit::TestCase
111
111
  assert (not KLookup::Lookup::Radical.exist?('た'))
112
112
  end
113
113
 
114
- def test_just_meaning_lookup
115
- cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
116
- cat = KLookup::Lookup::Kanji.new('猫')
117
- dog = KLookup::Lookup::Kanji.new('犬')
118
- assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
119
- end
120
-
121
- def test_meaning_lookup
122
- cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
123
- cat_ms = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>11)
124
- cat_mss = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>12)
125
- cat = KLookup::Lookup::Kanji.new('猫')
126
- dog = KLookup::Lookup::Kanji.new('犬')
127
- assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
128
- assert (cat_ms.include?(cat) and not cat_ms.include?(dog)), ':meaning and valid :stroke'
129
- assert (not cat_mss.include?(cat) and not cat_mss.include?(dog)), ':meaning and invalid :stroke'
130
- end
131
-
132
- def test_reading_lookup
133
- cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
134
- cat_rs = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>11)
135
- cat_rss = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>12)
136
- cat = KLookup::Lookup::Kanji.new('猫')
137
- dog = KLookup::Lookup::Kanji.new('犬')
138
- assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading'
139
- assert (cat_rs.include?(cat) and not cat_rs.include?(dog)), ':reading and valid :stroke'
140
- assert (not cat_rss.include?(cat) and not cat_rss.include?(dog)), ':reading and invalid :stroke'
141
- end
142
-
143
- def test_just_reading_lookup
144
- cat = KLookup::Lookup::Kanji.new('猫')
145
- dog = KLookup::Lookup::Kanji.new('犬')
146
- cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
147
- assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in same kana'
148
- cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ネコ')
149
- assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in different kana'
150
- end
114
+ # def test_just_meaning_lookup
115
+ # cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
116
+ # cat = KLookup::Lookup::Kanji.new('猫')
117
+ # dog = KLookup::Lookup::Kanji.new('犬')
118
+ # assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
119
+ # end
120
+
121
+ # def test_meaning_lookup
122
+ # cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
123
+ # cat_ms = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>11)
124
+ # cat_mss = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>12)
125
+ # cat = KLookup::Lookup::Kanji.new('猫')
126
+ # dog = KLookup::Lookup::Kanji.new('犬')
127
+ # assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
128
+ # assert (cat_ms.include?(cat) and not cat_ms.include?(dog)), ':meaning and valid :stroke'
129
+ # assert (not cat_mss.include?(cat) and not cat_mss.include?(dog)), ':meaning and invalid :stroke'
130
+ # end
131
+
132
+ # def test_reading_lookup
133
+ # cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
134
+ # cat_rs = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>11)
135
+ # cat_rss = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>12)
136
+ # cat = KLookup::Lookup::Kanji.new('猫')
137
+ # dog = KLookup::Lookup::Kanji.new('犬')
138
+ # assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading'
139
+ # assert (cat_rs.include?(cat) and not cat_rs.include?(dog)), ':reading and valid :stroke'
140
+ # assert (not cat_rss.include?(cat) and not cat_rss.include?(dog)), ':reading and invalid :stroke'
141
+ # end
142
+
143
+ # def test_just_reading_lookup
144
+ # cat = KLookup::Lookup::Kanji.new('猫')
145
+ # dog = KLookup::Lookup::Kanji.new('犬')
146
+ # cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
147
+ # assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in same kana'
148
+ # cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ネコ')
149
+ # assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in different kana'
150
+ # end
151
151
 
152
152
  def test_all_lookup
153
153
  #TODO: this may not be a good test
154
154
  assert KLookup::Lookup::Kanji.lookup.length > 1000
155
155
  end
156
156
 
157
- def test_just_block_lookup
158
- cat = KLookup::Lookup::Kanji.new('猫')
159
- dog = KLookup::Lookup::Kanji.new('犬')
157
+ # def test_just_block_lookup
158
+ # cat = KLookup::Lookup::Kanji.new('猫')
159
+ # dog = KLookup::Lookup::Kanji.new('犬')
160
160
 
161
- look = KLookup::Lookup::Kanji.lookup {|k| k.meaning.include?('cat')}
162
- assert (look.include?(cat) and not look.include?(dog)), 'meaning in block'
163
- end
161
+ # look = KLookup::Lookup::Kanji.lookup {|k| k.meaning.include?('cat')}
162
+ # assert (look.include?(cat) and not look.include?(dog)), 'meaning in block'
163
+ # end
164
164
 
165
- def test_block_lookup
166
- cat = KLookup::Lookup::Kanji.new('猫')
167
- dog = KLookup::Lookup::Kanji.new('犬')
165
+ # def test_block_lookup
166
+ # cat = KLookup::Lookup::Kanji.new('猫')
167
+ # dog = KLookup::Lookup::Kanji.new('犬')
168
168
 
169
- look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
170
- k.meaning.include?('cat')}
171
- assert (look.include?(cat) and not look.include?(dog)),
172
- 'meaning in block and reading'
169
+ # look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
170
+ # k.meaning.include?('cat')}
171
+ # assert (look.include?(cat) and not look.include?(dog)),
172
+ # 'meaning in block and reading'
173
173
 
174
- look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
175
- true}
176
- assert (look.include?(cat) and not look.include?(dog)),
177
- 'true in block and reading'
178
- end
174
+ # look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
175
+ # true}
176
+ # assert (look.include?(cat) and not look.include?(dog)),
177
+ # 'true in block and reading'
178
+ # end
179
179
 
180
180
  def test_kanji_stroke_count
181
181
  assert_equal KLookup::Lookup::Kanji.new('猫').stroke_count, 11
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: klookup
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.3"
7
- date: 2007-03-01 00:00:00 +00:00
6
+ version: "0.4"
7
+ date: 2007-04-22 00:00:00 +01:00
8
8
  summary: A set of kanji lookup tools and a library.
9
9
  require_paths:
10
10
  - lib
@@ -29,23 +29,25 @@ post_install_message:
29
29
  authors:
30
30
  - Tom Adams
31
31
  files:
32
- - data/klookup/data.db
33
32
  - data/klookup/kanjidic
34
33
  - data/klookup/newradkfile
35
- - lib/runicode.rb
34
+ - data/klookup/corpus.txt
35
+ - data/klookup/data.db
36
+ - data/klookup/edict.gz
37
+ - lib/klookup.rb
36
38
  - lib/klookup
39
+ - lib/klookup/lookup.rb
40
+ - lib/klookup/lookup_radical.rb
37
41
  - lib/klookup/database_sqlite.rb
38
- - lib/klookup/lookup_kanji.rb
39
42
  - lib/klookup/database.rb
40
- - lib/klookup/lookup.rb
41
- - lib/klookup/database_flatfile_kanjidic.rb
42
- - lib/klookup/database_unihan.rb
43
43
  - lib/klookup/database_flatfile_radk.rb
44
- - lib/klookup/lookup_radical.rb
44
+ - lib/klookup/database_unihan.rb
45
+ - lib/klookup/database_flatfile_kanjidic.rb
45
46
  - lib/klookup/database_flatfile.rb
47
+ - lib/klookup/lookup_kanji.rb
48
+ - lib/runicode.rb
46
49
  - lib/runicode
47
50
  - lib/runicode/utf8.rb
48
- - lib/klookup.rb
49
51
  test_files:
50
52
  - test/database_test.rb
51
53
  - test/runicode_test.rb
@@ -64,13 +66,5 @@ extensions: []
64
66
 
65
67
  requirements: []
66
68
 
67
- dependencies:
68
- - !ruby/object:Gem::Dependency
69
- name: sqlite-ruby
70
- version_requirement:
71
- version_requirements: !ruby/object:Gem::Version::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: 2.2.3
76
- version:
69
+ dependencies: []
70
+