klookup 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/klookup.rb ADDED
@@ -0,0 +1,27 @@
1
+ =begin
2
+
3
+ lib/klookup.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ begin
14
+ require 'active_support'
15
+ rescue LoadError
16
+ require 'runicode'
17
+ end
18
+
19
+ #This allows regular expressions to work on characters.
20
+ $KCODE='u'
21
+ require 'jcode'
22
+
23
+ # Contains Lookup and Database.
24
+ module KLookup
25
+ require 'klookup/database'
26
+ require 'klookup/lookup'
27
+ end
@@ -0,0 +1,66 @@
1
+ =begin
2
+
3
+ lib/klookup/database.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Database access modules:
14
+ # - FlatFile
15
+ # - Unihan
16
+ class KLookup::Database
17
+
18
+ private
19
+
20
+ def undefined(*args)
21
+ raise NotImplementedError
22
+ end
23
+
24
+ public
25
+
26
+ # Opens a resource.
27
+ #
28
+ # Priority: LOOKUP_PATH environment variable, Gem load path.
29
+ def self.open_resource(path)
30
+ # Choose a directory
31
+ env=ENV['KLOOKUP_PATH']
32
+ if env and env != ''
33
+ dir=env+'/klookup'
34
+ else
35
+ dir=gem_path
36
+ begin
37
+ gem 'klookup'
38
+ dir=Gem.datadir 'klookup'
39
+ rescue NameError
40
+ raise IOError, 'Could not find resource %s' % path
41
+ end
42
+ end
43
+
44
+ # Open a file
45
+ file = open("#{dir}/#{path}")
46
+ if block_given?
47
+ yield file
48
+ else
49
+ return file
50
+ end
51
+ end
52
+
53
+ alias :stroke_count_list :undefined
54
+ alias :radicals_by_strokes :undefined
55
+ alias :get_kanji :undefined
56
+ alias :get_radical_strokes :undefined
57
+ alias :get_kanji_strokes :undefined
58
+ alias :get_radicals :undefined
59
+ alias :get_reading :undefined
60
+ alias :get_meaning :undefined
61
+ alias :is_kanji? :undefined
62
+ alias :is_radical? :undefined
63
+
64
+ require 'klookup/database_flatfile'
65
+ require 'klookup/database_unihan'
66
+ end
@@ -0,0 +1,52 @@
1
+ =begin
2
+
3
+ lib/klookup/database_flatfile.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # A singleton class to abstract RadK and KanjiDic.
14
+ class KLookup::Database::FlatFile < KLookup::Database
15
+ require 'klookup/database_flatfile_radk'
16
+ require 'klookup/database_flatfile_kanjidic'
17
+
18
+ require 'singleton'
19
+ include Singleton
20
+
21
+ def stroke_count_list(*args)
22
+ RadK.instance.stroke_count_list(*args)
23
+ end
24
+ def radicals_by_strokes
25
+ RadK.instance.radicals_by_strokes
26
+ end
27
+ def get_kanji(strokes, *args)
28
+ RadK.instance.get_kanji(*args).delete_if {|k|
29
+ not strokes.nil? and not get_kanji_strokes(k) == strokes }
30
+ end
31
+ def get_radical_strokes(*args)
32
+ RadK.instance.get_strokes(*args)
33
+ end
34
+ def get_kanji_strokes(*args)
35
+ KanjiDic.instance.get_strokes(*args)
36
+ end
37
+ def get_radicals(*args)
38
+ RadK.instance.get_radicals(*args)
39
+ end
40
+ def get_reading(*args)
41
+ KanjiDic.instance.get_reading(*args)
42
+ end
43
+ def get_meaning(*args)
44
+ KanjiDic.instance.get_meaning(*args)
45
+ end
46
+ def is_kanji?(*args)
47
+ KanjiDic.instance.is_kanji?(*args)
48
+ end
49
+ def is_radical?(*args)
50
+ RadK.instance.is_radical?(*args)
51
+ end
52
+ end
@@ -0,0 +1,128 @@
1
+ =begin
2
+
3
+ lib/klookup/database_flatfile_kanjidic.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Access to the KANJIDIC (additional information about kanji).
14
+ class KLookup::Database::FlatFile::KanjiDic
15
+ require 'singleton'
16
+ include Singleton
17
+
18
+ private
19
+
20
+ def initialize(path='kanjidic')
21
+ @records={}
22
+ KLookup::Database.open_resource(path) {|kanjidic|
23
+ extract_records kanjidic
24
+ }
25
+ end
26
+
27
+ # Runs process_line on each line of +file+.
28
+ def extract_records(file)
29
+ file.each_line { |line|
30
+ if line =~ /^\s*#/ or line =~ /^\s*$/
31
+ next # No content, so skip
32
+ else
33
+ process_line line
34
+ end
35
+ }
36
+ end
37
+
38
+ # Sets +@records+.
39
+ def process_line(line)
40
+ items = line.split(/ /)
41
+ kanji = items.delete_at 0
42
+
43
+ # Set the record
44
+ @records[kanji] = {:items=>items}
45
+ end
46
+
47
+ #Returns true if there is kana in the string.
48
+ def include_kana?(str)
49
+ kana = (0x3040..0x30FF)
50
+ str.split(//).each {|i|
51
+ return true if kana.include? i.chars[0]
52
+ }
53
+ false
54
+ end
55
+
56
+ public
57
+
58
+ # Returns true if a kanji exists in the database.
59
+ def is_kanji?(kanji)
60
+ return false unless kanji.respond_to?(:to_s)
61
+ return false if kanji.to_s.chars.length != 1
62
+ return false if @records[kanji.to_s].nil?
63
+ return true
64
+ end
65
+
66
+ # Returns the number of strokes needed to write a given kanji.
67
+ def get_strokes(kanji)
68
+ @records[kanji][:items].select {|i|
69
+ i =~ /^S\d+$/}.first.sub(/^S(\d+)$/, '\1').to_i
70
+ end
71
+
72
+ #Returns a Struct of arrays of Japanese readings.
73
+ #
74
+ # KLookup::Database::FlatFile::KanjiDic.instance.get_reading('富')
75
+ # #=> #<struct #<Class:0xb7d3b5dc> reading=["フ", "フウ", "と.む", "とみ"],
76
+ # name_reading=["と", "とん", "ふっ"]>
77
+ def get_reading(kanji)
78
+ items = @records[kanji][:items]
79
+ # Set readings
80
+ name_flag = false
81
+ reading = []
82
+ name_reading = []
83
+ items.each {|i|
84
+ name_flag = true if i=='T1'
85
+ if include_kana?(i)
86
+ if name_flag
87
+ name_reading << i
88
+ else
89
+ reading << i
90
+ end
91
+ end
92
+ }
93
+
94
+ return Struct.new(:reading,:name_reading).new(reading, name_reading)
95
+ end
96
+
97
+ # Returns an array of English meanings of kanji.
98
+ def get_meaning(kanji)
99
+ items = @records[kanji][:items]
100
+ # Select the items with the meaning
101
+ flag=false
102
+ temp_meaning = items.select {|i|
103
+ if i =~ /^\{|\}$/ or flag
104
+ flag=true
105
+ end
106
+ }
107
+ temp_meaning.map! {|i| i.sub(/^\{(.*)\}$/, '\1') }
108
+ temp_meaning.delete_if {|i| i=~/^\s*$/}
109
+
110
+ # Stick the meanings together
111
+ finish=true
112
+ meaning=[]
113
+ temp_meaning.each {|m|
114
+ if m =~ /^\{(.*)$/ # {one
115
+ meaning << $1
116
+ finish = false
117
+ elsif finish # there are no strings being constructed
118
+ meaning << m
119
+ elsif m =~ /^(.*)\}$/ # three}
120
+ meaning.last<< ' ' + $1
121
+ finish = true
122
+ else # two (when there are strings being finished)
123
+ meaning.last<< ' ' + m.dup
124
+ end
125
+ }
126
+ meaning
127
+ end
128
+ end
@@ -0,0 +1,136 @@
1
+ =begin
2
+
3
+ lib/klookup/database_flatfile_radk.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Access to RADKFILE (mappings from radicals to kanji).
14
+ class KLookup::Database::FlatFile::RadK
15
+ require 'singleton'
16
+ include Singleton
17
+
18
+ attr_reader :stroke_count_list
19
+ attr_reader :radicals_by_strokes
20
+
21
+ private
22
+
23
+ class MalformedDatabaseException < Exception; end
24
+
25
+ def initialize(path='newradkfile')
26
+ @stroke_count_list = []
27
+ @radicals_by_strokes = {}
28
+
29
+ KLookup::Database.open_resource(path) {|radkfile|
30
+ extract_records radkfile
31
+ }
32
+ end
33
+
34
+ # Saves a record in the class variable +@records+.
35
+ def save_record(radical,strokes,kanji)
36
+ unless radical.respond_to?(:to_s) and
37
+ strokes.respond_to?(:to_i) and
38
+ kanji.respond_to?(:to_a)
39
+ raise ArgumentError
40
+ end
41
+ radical=radical.to_s
42
+ strokes=strokes.to_i
43
+ kanji=kanji.to_a
44
+
45
+ # @records is a hash of hashes:
46
+ # @records = {
47
+ # ...
48
+ # '龠' => {:strokes=>17, :kanji=>['籥', '鑰', '龠']}
49
+ # }
50
+ @records[radical]={:strokes=>strokes, :kanji=>kanji}
51
+
52
+ (@stroke_count_list << strokes).sort!.uniq!
53
+
54
+ unless @radicals_by_strokes[strokes]
55
+ @radicals_by_strokes[strokes]=[]
56
+ end
57
+ @radicals_by_strokes[strokes] << radical
58
+ end
59
+
60
+ # Takes a string of data and interprets it.
61
+ def extract_records(radk)
62
+ @records = {}
63
+ @stroke_count_list = []
64
+ radical_line_passed=false
65
+ radical = nil
66
+ strokes = nil
67
+ kanji = nil
68
+
69
+ radk.each_line { |line|
70
+ if line.match(/^#/) or line.match(/^$/)
71
+ next # because it's a comment
72
+ elsif line.match(/^\$\s+([^\s])\s+(\d+)/)
73
+ # It's a radical line
74
+
75
+ # Save the previous record
76
+ save_record(radical,strokes,kanji) if radical_line_passed
77
+ radical_line_passed=true
78
+
79
+ radical = $1
80
+ strokes = $2
81
+ kanji = [] # Reset
82
+
83
+ elsif not radical_line_passed
84
+ raise MalformedDatabaseException
85
+ elsif
86
+ # It must be a line of kanji
87
+ kanji << line.split(//)
88
+ kanji.flatten!
89
+ kanji.delete("\n")
90
+ end
91
+ }
92
+ # And save the record at the end
93
+ save_record(radical,strokes,kanji) if radical_line_passed
94
+ end
95
+
96
+ public
97
+
98
+ # Tests if a radical exists.
99
+ def is_radical?(radical)
100
+ return false unless radical.respond_to?(:to_s)
101
+ return false if radical.to_s.chars.length != 1
102
+ return false if @records[radical.to_s].nil?
103
+ return true
104
+ end
105
+
106
+ # Returns a list of kanji corresponding to given radicals.
107
+ def get_kanji(*radicals)
108
+ kanji = nil
109
+ radicals.flatten.each { |rad|
110
+ raise ArgumentError unless @records[rad.to_s]
111
+ current_kanji = @records[rad.to_s][:kanji]
112
+ if kanji==nil
113
+ kanji = current_kanji
114
+ else
115
+ kanji &= current_kanji
116
+ end
117
+ }
118
+ kanji.to_a
119
+ end
120
+
121
+ # Returns the number of strokes corresponding to a radical.
122
+ def get_strokes(radical)
123
+ @records[radical][:strokes]
124
+ end
125
+
126
+ # Returns a list of radicals corresponding to a kanji.
127
+ def get_radicals(kanji)
128
+ radicals = []
129
+ @records.each { |radical,data|
130
+ if data[:kanji].include? kanji
131
+ radicals << radical
132
+ end
133
+ }
134
+ radicals
135
+ end
136
+ end
@@ -0,0 +1,101 @@
1
+ =begin
2
+
3
+ lib/klookup/database_unihan.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Properties:
14
+ # - kJapaneseKun
15
+ # - kJapaneseOn
16
+ # - kDefinition (Chinese, with Japanese marked by '(J)')
17
+ # (contains radical number %d for radicals)
18
+ # - kFrequency (not language-specific)
19
+ # - kRSJapanese
20
+ # - kTotalStrokes
21
+ # Properties to find:
22
+ # - alternate radical representation
23
+ # - alternate kanji characters
24
+
25
+ # Other:
26
+ # - Add ability to lookup arbitrary Property for character
27
+ # - It may be faster to re-open the file every time or keep a file descriptor
28
+ # than to keep it all in memory
29
+
30
+ # A singleton class to access Unihan.txt.
31
+ class KLookup::Database::Unihan < KLookup::Database
32
+ require 'singleton'
33
+ include Singleton
34
+
35
+ private
36
+
37
+ def initialize
38
+ @unihan = KLookup::Database::Unihan.open_resource('Unihan.txt')
39
+ end
40
+
41
+ # Returns the numeric codepoint of the first codepoint in +str+.
42
+ def get_codepoint(str)
43
+ str.chars[0]
44
+ end
45
+
46
+ # Returns the hexadecimal representation used by the Unicode Standard
47
+ # (+%04X+) of the first codepoint in +str+.
48
+ def get_hexcp(str)
49
+ "%04X" % get_codepoint(str)
50
+ end
51
+
52
+ public
53
+
54
+ # Opens a resource.
55
+ #
56
+ # Priority: LOOKUP_PATH environment variable, Gem load path.
57
+ def self.open_resource(path)
58
+ # Choose a directory
59
+ env=ENV['KLOOKUP_PATH']
60
+ if env and env != ''
61
+ dir=env+'/unihan'
62
+ else
63
+ dir=gem_path
64
+ begin
65
+ gem 'unihan'
66
+ dir=Gem.datadir 'unihan'
67
+ rescue NameError
68
+ raise IOError, 'Could not find resource %s' % path
69
+ end
70
+ end
71
+
72
+ # Open a file
73
+ file = open("#{dir}/#{path}")
74
+ if block_given?
75
+ yield file
76
+ else
77
+ return file
78
+ end
79
+ end
80
+
81
+ def stroke_count_list(*args)
82
+ end
83
+ def radicals_by_strokes
84
+ end
85
+ def get_kanji(strokes, *args)
86
+ end
87
+ def get_radical_strokes(*args)
88
+ end
89
+ def get_kanji_strokes(*args)
90
+ end
91
+ def get_radicals(*args)
92
+ end
93
+ def get_reading(*args)
94
+ end
95
+ def get_meaning(*args)
96
+ end
97
+ def is_kanji?(*args)
98
+ end
99
+ def is_radical?(*args)
100
+ end
101
+ end