klookup 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/cklookup +91 -0
- data/bin/gklookup +48 -0
- data/bin/klookup.cgi +206 -0
- data/data/klookup/kanjidic +6356 -0
- data/data/klookup/newradkfile +1068 -0
- data/lib/klookup.rb +27 -0
- data/lib/klookup/database.rb +66 -0
- data/lib/klookup/database_flatfile.rb +52 -0
- data/lib/klookup/database_flatfile_kanjidic.rb +128 -0
- data/lib/klookup/database_flatfile_radk.rb +136 -0
- data/lib/klookup/database_unihan.rb +101 -0
- data/lib/klookup/lookup.rb +29 -0
- data/lib/klookup/lookup_kanji.rb +107 -0
- data/lib/klookup/lookup_radical.rb +74 -0
- data/test/suite.rb +148 -0
- metadata +78 -0
data/lib/klookup.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
lib/klookup.rb
|
4
|
+
|
5
|
+
Copyright © Tom Adams 2006
|
6
|
+
|
7
|
+
This programme is free software.
|
8
|
+
You can distribute/modify this program under
|
9
|
+
the terms of the Ruby License.
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
begin
|
14
|
+
require 'active_support'
|
15
|
+
rescue LoadError
|
16
|
+
require 'runicode'
|
17
|
+
end
|
18
|
+
|
19
|
+
#This allows regular expressions to work on characters.
|
20
|
+
$KCODE='u'
|
21
|
+
require 'jcode'
|
22
|
+
|
23
|
+
# Contains Lookup and Database.
|
24
|
+
module KLookup
|
25
|
+
require 'klookup/database'
|
26
|
+
require 'klookup/lookup'
|
27
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
lib/klookup/database.rb
|
4
|
+
|
5
|
+
Copyright © Tom Adams 2006
|
6
|
+
|
7
|
+
This programme is free software.
|
8
|
+
You can distribute/modify this program under
|
9
|
+
the terms of the Ruby License.
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
# Database access modules:
|
14
|
+
# - FlatFile
|
15
|
+
# - Unihan
|
16
|
+
class KLookup::Database
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def undefined(*args)
|
21
|
+
raise NotImplementedError
|
22
|
+
end
|
23
|
+
|
24
|
+
public
|
25
|
+
|
26
|
+
# Opens a resource.
|
27
|
+
#
|
28
|
+
# Priority: LOOKUP_PATH environment variable, Gem load path.
|
29
|
+
def self.open_resource(path)
|
30
|
+
# Choose a directory
|
31
|
+
env=ENV['KLOOKUP_PATH']
|
32
|
+
if env and env != ''
|
33
|
+
dir=env+'/klookup'
|
34
|
+
else
|
35
|
+
dir=gem_path
|
36
|
+
begin
|
37
|
+
gem 'klookup'
|
38
|
+
dir=Gem.datadir 'klookup'
|
39
|
+
rescue NameError
|
40
|
+
raise IOError, 'Could not find resource %s' % path
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Open a file
|
45
|
+
file = open("#{dir}/#{path}")
|
46
|
+
if block_given?
|
47
|
+
yield file
|
48
|
+
else
|
49
|
+
return file
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
alias :stroke_count_list :undefined
|
54
|
+
alias :radicals_by_strokes :undefined
|
55
|
+
alias :get_kanji :undefined
|
56
|
+
alias :get_radical_strokes :undefined
|
57
|
+
alias :get_kanji_strokes :undefined
|
58
|
+
alias :get_radicals :undefined
|
59
|
+
alias :get_reading :undefined
|
60
|
+
alias :get_meaning :undefined
|
61
|
+
alias :is_kanji? :undefined
|
62
|
+
alias :is_radical? :undefined
|
63
|
+
|
64
|
+
require 'klookup/database_flatfile'
|
65
|
+
require 'klookup/database_unihan'
|
66
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
lib/klookup/database_flatfile.rb
|
4
|
+
|
5
|
+
Copyright © Tom Adams 2006
|
6
|
+
|
7
|
+
This programme is free software.
|
8
|
+
You can distribute/modify this program under
|
9
|
+
the terms of the Ruby License.
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
# A singleton class to abstract RadK and KanjiDic.
|
14
|
+
class KLookup::Database::FlatFile < KLookup::Database
|
15
|
+
require 'klookup/database_flatfile_radk'
|
16
|
+
require 'klookup/database_flatfile_kanjidic'
|
17
|
+
|
18
|
+
require 'singleton'
|
19
|
+
include Singleton
|
20
|
+
|
21
|
+
def stroke_count_list(*args)
|
22
|
+
RadK.instance.stroke_count_list(*args)
|
23
|
+
end
|
24
|
+
def radicals_by_strokes
|
25
|
+
RadK.instance.radicals_by_strokes
|
26
|
+
end
|
27
|
+
def get_kanji(strokes, *args)
|
28
|
+
RadK.instance.get_kanji(*args).delete_if {|k|
|
29
|
+
not strokes.nil? and not get_kanji_strokes(k) == strokes }
|
30
|
+
end
|
31
|
+
def get_radical_strokes(*args)
|
32
|
+
RadK.instance.get_strokes(*args)
|
33
|
+
end
|
34
|
+
def get_kanji_strokes(*args)
|
35
|
+
KanjiDic.instance.get_strokes(*args)
|
36
|
+
end
|
37
|
+
def get_radicals(*args)
|
38
|
+
RadK.instance.get_radicals(*args)
|
39
|
+
end
|
40
|
+
def get_reading(*args)
|
41
|
+
KanjiDic.instance.get_reading(*args)
|
42
|
+
end
|
43
|
+
def get_meaning(*args)
|
44
|
+
KanjiDic.instance.get_meaning(*args)
|
45
|
+
end
|
46
|
+
def is_kanji?(*args)
|
47
|
+
KanjiDic.instance.is_kanji?(*args)
|
48
|
+
end
|
49
|
+
def is_radical?(*args)
|
50
|
+
RadK.instance.is_radical?(*args)
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
lib/klookup/database_flatfile_kanjidic.rb
|
4
|
+
|
5
|
+
Copyright © Tom Adams 2006
|
6
|
+
|
7
|
+
This programme is free software.
|
8
|
+
You can distribute/modify this program under
|
9
|
+
the terms of the Ruby License.
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
# Access to the KANJIDIC (additional information about kanji).
|
14
|
+
class KLookup::Database::FlatFile::KanjiDic
|
15
|
+
require 'singleton'
|
16
|
+
include Singleton
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def initialize(path='kanjidic')
|
21
|
+
@records={}
|
22
|
+
KLookup::Database.open_resource(path) {|kanjidic|
|
23
|
+
extract_records kanjidic
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
# Runs process_line on each line of +file+.
|
28
|
+
def extract_records(file)
|
29
|
+
file.each_line { |line|
|
30
|
+
if line =~ /^\s*#/ or line =~ /^\s*$/
|
31
|
+
next # No content, so skip
|
32
|
+
else
|
33
|
+
process_line line
|
34
|
+
end
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Sets +@records+.
|
39
|
+
def process_line(line)
|
40
|
+
items = line.split(/ /)
|
41
|
+
kanji = items.delete_at 0
|
42
|
+
|
43
|
+
# Set the record
|
44
|
+
@records[kanji] = {:items=>items}
|
45
|
+
end
|
46
|
+
|
47
|
+
#Returns true if there is kana in the string.
|
48
|
+
def include_kana?(str)
|
49
|
+
kana = (0x3040..0x30FF)
|
50
|
+
str.split(//).each {|i|
|
51
|
+
return true if kana.include? i.chars[0]
|
52
|
+
}
|
53
|
+
false
|
54
|
+
end
|
55
|
+
|
56
|
+
public
|
57
|
+
|
58
|
+
# Returns true if a kanji exists in the database.
|
59
|
+
def is_kanji?(kanji)
|
60
|
+
return false unless kanji.respond_to?(:to_s)
|
61
|
+
return false if kanji.to_s.chars.length != 1
|
62
|
+
return false if @records[kanji.to_s].nil?
|
63
|
+
return true
|
64
|
+
end
|
65
|
+
|
66
|
+
# Returns the number of strokes needed to write a given kanji.
|
67
|
+
def get_strokes(kanji)
|
68
|
+
@records[kanji][:items].select {|i|
|
69
|
+
i =~ /^S\d+$/}.first.sub(/^S(\d+)$/, '\1').to_i
|
70
|
+
end
|
71
|
+
|
72
|
+
#Returns a Struct of arrays of Japanese readings.
|
73
|
+
#
|
74
|
+
# KLookup::Database::FlatFile::KanjiDic.instance.get_reading('富')
|
75
|
+
# #=> #<struct #<Class:0xb7d3b5dc> reading=["フ", "フウ", "と.む", "とみ"],
|
76
|
+
# name_reading=["と", "とん", "ふっ"]>
|
77
|
+
def get_reading(kanji)
|
78
|
+
items = @records[kanji][:items]
|
79
|
+
# Set readings
|
80
|
+
name_flag = false
|
81
|
+
reading = []
|
82
|
+
name_reading = []
|
83
|
+
items.each {|i|
|
84
|
+
name_flag = true if i=='T1'
|
85
|
+
if include_kana?(i)
|
86
|
+
if name_flag
|
87
|
+
name_reading << i
|
88
|
+
else
|
89
|
+
reading << i
|
90
|
+
end
|
91
|
+
end
|
92
|
+
}
|
93
|
+
|
94
|
+
return Struct.new(:reading,:name_reading).new(reading, name_reading)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns an array of English meanings of kanji.
|
98
|
+
def get_meaning(kanji)
|
99
|
+
items = @records[kanji][:items]
|
100
|
+
# Select the items with the meaning
|
101
|
+
flag=false
|
102
|
+
temp_meaning = items.select {|i|
|
103
|
+
if i =~ /^\{|\}$/ or flag
|
104
|
+
flag=true
|
105
|
+
end
|
106
|
+
}
|
107
|
+
temp_meaning.map! {|i| i.sub(/^\{(.*)\}$/, '\1') }
|
108
|
+
temp_meaning.delete_if {|i| i=~/^\s*$/}
|
109
|
+
|
110
|
+
# Stick the meanings together
|
111
|
+
finish=true
|
112
|
+
meaning=[]
|
113
|
+
temp_meaning.each {|m|
|
114
|
+
if m =~ /^\{(.*)$/ # {one
|
115
|
+
meaning << $1
|
116
|
+
finish = false
|
117
|
+
elsif finish # there are no strings being constructed
|
118
|
+
meaning << m
|
119
|
+
elsif m =~ /^(.*)\}$/ # three}
|
120
|
+
meaning.last<< ' ' + $1
|
121
|
+
finish = true
|
122
|
+
else # two (when there are strings being finished)
|
123
|
+
meaning.last<< ' ' + m.dup
|
124
|
+
end
|
125
|
+
}
|
126
|
+
meaning
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
lib/klookup/database_flatfile_radk.rb
|
4
|
+
|
5
|
+
Copyright © Tom Adams 2006
|
6
|
+
|
7
|
+
This programme is free software.
|
8
|
+
You can distribute/modify this program under
|
9
|
+
the terms of the Ruby License.
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
# Access to RADKFILE (mappings from radicals to kanji).
|
14
|
+
class KLookup::Database::FlatFile::RadK
|
15
|
+
require 'singleton'
|
16
|
+
include Singleton
|
17
|
+
|
18
|
+
attr_reader :stroke_count_list
|
19
|
+
attr_reader :radicals_by_strokes
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
class MalformedDatabaseException < Exception; end
|
24
|
+
|
25
|
+
def initialize(path='newradkfile')
|
26
|
+
@stroke_count_list = []
|
27
|
+
@radicals_by_strokes = {}
|
28
|
+
|
29
|
+
KLookup::Database.open_resource(path) {|radkfile|
|
30
|
+
extract_records radkfile
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
# Saves a record in the class variable +@records+.
|
35
|
+
def save_record(radical,strokes,kanji)
|
36
|
+
unless radical.respond_to?(:to_s) and
|
37
|
+
strokes.respond_to?(:to_i) and
|
38
|
+
kanji.respond_to?(:to_a)
|
39
|
+
raise ArgumentError
|
40
|
+
end
|
41
|
+
radical=radical.to_s
|
42
|
+
strokes=strokes.to_i
|
43
|
+
kanji=kanji.to_a
|
44
|
+
|
45
|
+
# @records is a hash of hashes:
|
46
|
+
# @records = {
|
47
|
+
# ...
|
48
|
+
# '龠' => {:strokes=>17, :kanji=>['籥', '鑰', '龠']}
|
49
|
+
# }
|
50
|
+
@records[radical]={:strokes=>strokes, :kanji=>kanji}
|
51
|
+
|
52
|
+
(@stroke_count_list << strokes).sort!.uniq!
|
53
|
+
|
54
|
+
unless @radicals_by_strokes[strokes]
|
55
|
+
@radicals_by_strokes[strokes]=[]
|
56
|
+
end
|
57
|
+
@radicals_by_strokes[strokes] << radical
|
58
|
+
end
|
59
|
+
|
60
|
+
# Takes a string of data and interprets it.
|
61
|
+
def extract_records(radk)
|
62
|
+
@records = {}
|
63
|
+
@stroke_count_list = []
|
64
|
+
radical_line_passed=false
|
65
|
+
radical = nil
|
66
|
+
strokes = nil
|
67
|
+
kanji = nil
|
68
|
+
|
69
|
+
radk.each_line { |line|
|
70
|
+
if line.match(/^#/) or line.match(/^$/)
|
71
|
+
next # because it's a comment
|
72
|
+
elsif line.match(/^\$\s+([^\s])\s+(\d+)/)
|
73
|
+
# It's a radical line
|
74
|
+
|
75
|
+
# Save the previous record
|
76
|
+
save_record(radical,strokes,kanji) if radical_line_passed
|
77
|
+
radical_line_passed=true
|
78
|
+
|
79
|
+
radical = $1
|
80
|
+
strokes = $2
|
81
|
+
kanji = [] # Reset
|
82
|
+
|
83
|
+
elsif not radical_line_passed
|
84
|
+
raise MalformedDatabaseException
|
85
|
+
elsif
|
86
|
+
# It must be a line of kanji
|
87
|
+
kanji << line.split(//)
|
88
|
+
kanji.flatten!
|
89
|
+
kanji.delete("\n")
|
90
|
+
end
|
91
|
+
}
|
92
|
+
# And save the record at the end
|
93
|
+
save_record(radical,strokes,kanji) if radical_line_passed
|
94
|
+
end
|
95
|
+
|
96
|
+
public
|
97
|
+
|
98
|
+
# Tests if a radical exists.
|
99
|
+
def is_radical?(radical)
|
100
|
+
return false unless radical.respond_to?(:to_s)
|
101
|
+
return false if radical.to_s.chars.length != 1
|
102
|
+
return false if @records[radical.to_s].nil?
|
103
|
+
return true
|
104
|
+
end
|
105
|
+
|
106
|
+
# Returns a list of kanji corresponding to given radicals.
|
107
|
+
def get_kanji(*radicals)
|
108
|
+
kanji = nil
|
109
|
+
radicals.flatten.each { |rad|
|
110
|
+
raise ArgumentError unless @records[rad.to_s]
|
111
|
+
current_kanji = @records[rad.to_s][:kanji]
|
112
|
+
if kanji==nil
|
113
|
+
kanji = current_kanji
|
114
|
+
else
|
115
|
+
kanji &= current_kanji
|
116
|
+
end
|
117
|
+
}
|
118
|
+
kanji.to_a
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns the number of strokes corresponding to a radical.
|
122
|
+
def get_strokes(radical)
|
123
|
+
@records[radical][:strokes]
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns a list of radicals corresponding to a kanji.
|
127
|
+
def get_radicals(kanji)
|
128
|
+
radicals = []
|
129
|
+
@records.each { |radical,data|
|
130
|
+
if data[:kanji].include? kanji
|
131
|
+
radicals << radical
|
132
|
+
end
|
133
|
+
}
|
134
|
+
radicals
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
lib/klookup/database_unihan.rb
|
4
|
+
|
5
|
+
Copyright © Tom Adams 2006
|
6
|
+
|
7
|
+
This programme is free software.
|
8
|
+
You can distribute/modify this program under
|
9
|
+
the terms of the Ruby License.
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
# Properties:
|
14
|
+
# - kJapaneseKun
|
15
|
+
# - kJapaneseOn
|
16
|
+
# - kDefinition (Chinese, with Japanese marked by '(J)')
|
17
|
+
# (contains radical number %d for radicals)
|
18
|
+
# - kFrequency (not language-specific)
|
19
|
+
# - kRSJapanese
|
20
|
+
# - kTotalStrokes
|
21
|
+
# Properties to find:
|
22
|
+
# - alternate radical representation
|
23
|
+
# - alternate kanji characters
|
24
|
+
|
25
|
+
# Other:
|
26
|
+
# - Add ability to lookup arbitrary Property for character
|
27
|
+
# - It may be faster to re-open the file every time or keep a file descriptor
|
28
|
+
# than to keep it all in memory
|
29
|
+
|
30
|
+
# A singleton class to access Unihan.txt.
|
31
|
+
class KLookup::Database::Unihan < KLookup::Database
|
32
|
+
require 'singleton'
|
33
|
+
include Singleton
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def initialize
|
38
|
+
@unihan = KLookup::Database::Unihan.open_resource('Unihan.txt')
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns the numeric codepoint of the first codepoint in +str+.
|
42
|
+
def get_codepoint(str)
|
43
|
+
str.chars[0]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns the hexadecimal representation used by the Unicode Standard
|
47
|
+
# (+%04X+) of the first codepoint in +str+.
|
48
|
+
def get_hexcp(str)
|
49
|
+
"%04X" % get_codepoint(str)
|
50
|
+
end
|
51
|
+
|
52
|
+
public
|
53
|
+
|
54
|
+
# Opens a resource.
|
55
|
+
#
|
56
|
+
# Priority: LOOKUP_PATH environment variable, Gem load path.
|
57
|
+
def self.open_resource(path)
|
58
|
+
# Choose a directory
|
59
|
+
env=ENV['KLOOKUP_PATH']
|
60
|
+
if env and env != ''
|
61
|
+
dir=env+'/unihan'
|
62
|
+
else
|
63
|
+
dir=gem_path
|
64
|
+
begin
|
65
|
+
gem 'unihan'
|
66
|
+
dir=Gem.datadir 'unihan'
|
67
|
+
rescue NameError
|
68
|
+
raise IOError, 'Could not find resource %s' % path
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Open a file
|
73
|
+
file = open("#{dir}/#{path}")
|
74
|
+
if block_given?
|
75
|
+
yield file
|
76
|
+
else
|
77
|
+
return file
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def stroke_count_list(*args)
|
82
|
+
end
|
83
|
+
def radicals_by_strokes
|
84
|
+
end
|
85
|
+
def get_kanji(strokes, *args)
|
86
|
+
end
|
87
|
+
def get_radical_strokes(*args)
|
88
|
+
end
|
89
|
+
def get_kanji_strokes(*args)
|
90
|
+
end
|
91
|
+
def get_radicals(*args)
|
92
|
+
end
|
93
|
+
def get_reading(*args)
|
94
|
+
end
|
95
|
+
def get_meaning(*args)
|
96
|
+
end
|
97
|
+
def is_kanji?(*args)
|
98
|
+
end
|
99
|
+
def is_radical?(*args)
|
100
|
+
end
|
101
|
+
end
|