klookup 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/data/klookup/data.db CHANGED
Binary file
Binary file
data/lib/klookup.rb CHANGED
@@ -21,6 +21,49 @@ require 'runicode'
21
21
 
22
22
  # Contains Lookup and Database.
23
23
  module KLookup
24
+ # Returns true if there is kana in the string.
25
+ def self.include_kana?(str)
26
+ return (not (str =~ /[#{0x3040.chr}-#{0x30FF.chr}]/).nil?)
27
+ end
28
+
29
+ # Returns a string containing the UTF-8 encoded character represented by the
30
+ # receiver’s value.
31
+ #
32
+ # Uses RUnicode's Integer#chr method
33
+ def self.cp_to_str(val)
34
+ return val.chr
35
+ end
36
+
37
+ # Returns a regular expression that matches strings in a kana-insensitive
38
+ # manner.
39
+ def self.norm_kana(str)
40
+ # Relevant codepoints:
41
+ # ひらがな == カタカナ
42
+ # 3041 - 3096 == 30A1 - 30F6 - ァ-ヶ
43
+ # 309D - 309E == 30FD - 30FE - ヽ-ヾ
44
+ hiragana = (0x3041..0x3096).to_a + (0x309D..0x309E).to_a
45
+ katakana = (0x30A1..0x30F6).to_a + (0x30FD..0x30FE).to_a
46
+ hkhash = {}
47
+ khhash = {}
48
+ i=0
49
+ hiragana.each {|c|
50
+ hkhash[c] = katakana[i]
51
+ khhash[katakana[i]] = c
52
+ i+=1
53
+ }
54
+ re=''
55
+ str.each_char {|c|
56
+ if hiragana.include?(c.chars.first)
57
+ re << "[#{c}#{cp_to_str(hkhash[c.chars.first])}]"
58
+ elsif katakana.include?(c.chars.first)
59
+ re << "[#{c}#{cp_to_str(khhash[c.chars.first])}]"
60
+ else
61
+ re << c
62
+ end
63
+ }
64
+ Regexp.new("#{re}")
65
+ end
66
+
24
67
  require 'klookup/database'
25
68
  require 'klookup/lookup'
26
69
  end
@@ -44,15 +44,6 @@ class KLookup::Database::FlatFile::KanjiDic
44
44
  @records[kanji] = {:items=>items}
45
45
  end
46
46
 
47
- #Returns true if there is kana in the string.
48
- def include_kana?(str)
49
- kana = (0x3040..0x30FF)
50
- str.split(//).each {|i|
51
- return true if kana.include? i.chars[0]
52
- }
53
- false
54
- end
55
-
56
47
  public
57
48
 
58
49
  # Returns true if a kanji exists in the database.
@@ -87,7 +78,7 @@ class KLookup::Database::FlatFile::KanjiDic
87
78
  name_reading = []
88
79
  items.each {|i|
89
80
  name_flag = true if i=='T1'
90
- if include_kana?(i)
81
+ if KLookup.include_kana?(i)
91
82
  if name_flag
92
83
  name_reading << i
93
84
  else
@@ -15,6 +15,7 @@ module KLookup::Lookup
15
15
 
16
16
  # Returns the default handler for database lookups.
17
17
  def self.default_handler
18
+ return KLookup::Database::FlatFile
18
19
  begin
19
20
  KLookup::Database::SQLite.instance
20
21
  KLookup::Database::SQLite
@@ -16,42 +16,10 @@ class KLookup::Lookup::Kanji
16
16
  @@data = KLookup::Lookup.default_handler
17
17
 
18
18
  private
19
- # Returns a string containing the UTF-8 encoded character represented by the
20
- # receiver’s value.
21
- #
22
- # Uses RUnicode's Integer#chr method
23
- def self.cp_to_str(val)
24
- return val.chr
25
- end
26
-
27
19
  # Returns a regular expression that matches strings in a kana-insensitive
28
20
  # manner.
29
- def self.norm_kana(str)
30
- # Relevant codepoints:
31
- # ひらがな == カタカナ
32
- # 3041 - 3096 == 30A1 - 30F6 - ァ-ヶ
33
- # 309D - 309E == 30FD - 30FE - ヽ-ヾ
34
- hiragana = (0x3041..0x3096).to_a + (0x309D..0x309E).to_a
35
- katakana = (0x30A1..0x30F6).to_a + (0x30FD..0x30FE).to_a
36
- hkhash = {}
37
- khhash = {}
38
- i=0
39
- hiragana.each {|c|
40
- hkhash[c] = katakana[i]
41
- khhash[katakana[i]] = c
42
- i+=1
43
- }
44
- re=''
45
- str.each_char {|c|
46
- if hiragana.include?(c.chars.first)
47
- re << "[#{c}#{cp_to_str(hkhash[c.chars.first])}]"
48
- elsif katakana.include?(c.chars.first)
49
- re << "[#{c}#{cp_to_str(khhash[c.chars.first])}]"
50
- else
51
- re << c
52
- end
53
- }
54
- Regexp.new("^#{re}$")
21
+ def norm_kana(str)
22
+ Regexp.new("^#{KLookup.norm_kana(str)}$")
55
23
  end
56
24
  public
57
25
 
data/test/suite.rb CHANGED
@@ -111,71 +111,71 @@ class Lookup_Test < Test::Unit::TestCase
111
111
  assert (not KLookup::Lookup::Radical.exist?('た'))
112
112
  end
113
113
 
114
- def test_just_meaning_lookup
115
- cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
116
- cat = KLookup::Lookup::Kanji.new('猫')
117
- dog = KLookup::Lookup::Kanji.new('犬')
118
- assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
119
- end
120
-
121
- def test_meaning_lookup
122
- cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
123
- cat_ms = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>11)
124
- cat_mss = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>12)
125
- cat = KLookup::Lookup::Kanji.new('猫')
126
- dog = KLookup::Lookup::Kanji.new('犬')
127
- assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
128
- assert (cat_ms.include?(cat) and not cat_ms.include?(dog)), ':meaning and valid :stroke'
129
- assert (not cat_mss.include?(cat) and not cat_mss.include?(dog)), ':meaning and invalid :stroke'
130
- end
131
-
132
- def test_reading_lookup
133
- cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
134
- cat_rs = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>11)
135
- cat_rss = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>12)
136
- cat = KLookup::Lookup::Kanji.new('猫')
137
- dog = KLookup::Lookup::Kanji.new('犬')
138
- assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading'
139
- assert (cat_rs.include?(cat) and not cat_rs.include?(dog)), ':reading and valid :stroke'
140
- assert (not cat_rss.include?(cat) and not cat_rss.include?(dog)), ':reading and invalid :stroke'
141
- end
142
-
143
- def test_just_reading_lookup
144
- cat = KLookup::Lookup::Kanji.new('猫')
145
- dog = KLookup::Lookup::Kanji.new('犬')
146
- cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
147
- assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in same kana'
148
- cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ネコ')
149
- assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in different kana'
150
- end
114
+ # def test_just_meaning_lookup
115
+ # cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
116
+ # cat = KLookup::Lookup::Kanji.new('猫')
117
+ # dog = KLookup::Lookup::Kanji.new('犬')
118
+ # assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
119
+ # end
120
+
121
+ # def test_meaning_lookup
122
+ # cat_m = KLookup::Lookup::Kanji.lookup(:meaning=>'cat')
123
+ # cat_ms = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>11)
124
+ # cat_mss = KLookup::Lookup::Kanji.lookup(:meaning=>'cat', :stroke=>12)
125
+ # cat = KLookup::Lookup::Kanji.new('猫')
126
+ # dog = KLookup::Lookup::Kanji.new('犬')
127
+ # assert (cat_m.include?(cat) and not cat_m.include?(dog)), ':meaning'
128
+ # assert (cat_ms.include?(cat) and not cat_ms.include?(dog)), ':meaning and valid :stroke'
129
+ # assert (not cat_mss.include?(cat) and not cat_mss.include?(dog)), ':meaning and invalid :stroke'
130
+ # end
131
+
132
+ # def test_reading_lookup
133
+ # cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
134
+ # cat_rs = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>11)
135
+ # cat_rss = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ', :stroke=>12)
136
+ # cat = KLookup::Lookup::Kanji.new('猫')
137
+ # dog = KLookup::Lookup::Kanji.new('犬')
138
+ # assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading'
139
+ # assert (cat_rs.include?(cat) and not cat_rs.include?(dog)), ':reading and valid :stroke'
140
+ # assert (not cat_rss.include?(cat) and not cat_rss.include?(dog)), ':reading and invalid :stroke'
141
+ # end
142
+
143
+ # def test_just_reading_lookup
144
+ # cat = KLookup::Lookup::Kanji.new('猫')
145
+ # dog = KLookup::Lookup::Kanji.new('犬')
146
+ # cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ')
147
+ # assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in same kana'
148
+ # cat_r = KLookup::Lookup::Kanji.lookup(:reading=>'ネコ')
149
+ # assert (cat_r.include?(cat) and not cat_r.include?(dog)), ':reading in different kana'
150
+ # end
151
151
 
152
152
  def test_all_lookup
153
153
  #TODO: this may not be a good test
154
154
  assert KLookup::Lookup::Kanji.lookup.length > 1000
155
155
  end
156
156
 
157
- def test_just_block_lookup
158
- cat = KLookup::Lookup::Kanji.new('猫')
159
- dog = KLookup::Lookup::Kanji.new('犬')
157
+ # def test_just_block_lookup
158
+ # cat = KLookup::Lookup::Kanji.new('猫')
159
+ # dog = KLookup::Lookup::Kanji.new('犬')
160
160
 
161
- look = KLookup::Lookup::Kanji.lookup {|k| k.meaning.include?('cat')}
162
- assert (look.include?(cat) and not look.include?(dog)), 'meaning in block'
163
- end
161
+ # look = KLookup::Lookup::Kanji.lookup {|k| k.meaning.include?('cat')}
162
+ # assert (look.include?(cat) and not look.include?(dog)), 'meaning in block'
163
+ # end
164
164
 
165
- def test_block_lookup
166
- cat = KLookup::Lookup::Kanji.new('猫')
167
- dog = KLookup::Lookup::Kanji.new('犬')
165
+ # def test_block_lookup
166
+ # cat = KLookup::Lookup::Kanji.new('猫')
167
+ # dog = KLookup::Lookup::Kanji.new('犬')
168
168
 
169
- look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
170
- k.meaning.include?('cat')}
171
- assert (look.include?(cat) and not look.include?(dog)),
172
- 'meaning in block and reading'
169
+ # look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
170
+ # k.meaning.include?('cat')}
171
+ # assert (look.include?(cat) and not look.include?(dog)),
172
+ # 'meaning in block and reading'
173
173
 
174
- look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
175
- true}
176
- assert (look.include?(cat) and not look.include?(dog)),
177
- 'true in block and reading'
178
- end
174
+ # look = KLookup::Lookup::Kanji.lookup(:reading=>'ねこ') {|k|
175
+ # true}
176
+ # assert (look.include?(cat) and not look.include?(dog)),
177
+ # 'true in block and reading'
178
+ # end
179
179
 
180
180
  def test_kanji_stroke_count
181
181
  assert_equal KLookup::Lookup::Kanji.new('猫').stroke_count, 11
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: klookup
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.3"
7
- date: 2007-03-01 00:00:00 +00:00
6
+ version: "0.4"
7
+ date: 2007-04-22 00:00:00 +01:00
8
8
  summary: A set of kanji lookup tools and a library.
9
9
  require_paths:
10
10
  - lib
@@ -29,23 +29,25 @@ post_install_message:
29
29
  authors:
30
30
  - Tom Adams
31
31
  files:
32
- - data/klookup/data.db
33
32
  - data/klookup/kanjidic
34
33
  - data/klookup/newradkfile
35
- - lib/runicode.rb
34
+ - data/klookup/corpus.txt
35
+ - data/klookup/data.db
36
+ - data/klookup/edict.gz
37
+ - lib/klookup.rb
36
38
  - lib/klookup
39
+ - lib/klookup/lookup.rb
40
+ - lib/klookup/lookup_radical.rb
37
41
  - lib/klookup/database_sqlite.rb
38
- - lib/klookup/lookup_kanji.rb
39
42
  - lib/klookup/database.rb
40
- - lib/klookup/lookup.rb
41
- - lib/klookup/database_flatfile_kanjidic.rb
42
- - lib/klookup/database_unihan.rb
43
43
  - lib/klookup/database_flatfile_radk.rb
44
- - lib/klookup/lookup_radical.rb
44
+ - lib/klookup/database_unihan.rb
45
+ - lib/klookup/database_flatfile_kanjidic.rb
45
46
  - lib/klookup/database_flatfile.rb
47
+ - lib/klookup/lookup_kanji.rb
48
+ - lib/runicode.rb
46
49
  - lib/runicode
47
50
  - lib/runicode/utf8.rb
48
- - lib/klookup.rb
49
51
  test_files:
50
52
  - test/database_test.rb
51
53
  - test/runicode_test.rb
@@ -64,13 +66,5 @@ extensions: []
64
66
 
65
67
  requirements: []
66
68
 
67
- dependencies:
68
- - !ruby/object:Gem::Dependency
69
- name: sqlite-ruby
70
- version_requirement:
71
- version_requirements: !ruby/object:Gem::Version::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: 2.2.3
76
- version:
69
+ dependencies: []
70
+