klookup 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/klookup.rb ADDED
@@ -0,0 +1,27 @@
1
+ =begin
2
+
3
+ lib/klookup.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ begin
14
+ require 'active_support'
15
+ rescue LoadError
16
+ require 'runicode'
17
+ end
18
+
19
+ #This allows regular expressions to work on characters.
20
+ $KCODE='u'
21
+ require 'jcode'
22
+
23
+ # Contains Lookup and Database.
24
+ module KLookup
25
+ require 'klookup/database'
26
+ require 'klookup/lookup'
27
+ end
@@ -0,0 +1,66 @@
1
+ =begin
2
+
3
+ lib/klookup/database.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Database access modules:
14
+ # - FlatFile
15
+ # - Unihan
16
+ class KLookup::Database
17
+
18
+ private
19
+
20
+ def undefined(*args)
21
+ raise NotImplementedError
22
+ end
23
+
24
+ public
25
+
26
+ # Opens a resource.
27
+ #
28
+ # Priority: LOOKUP_PATH environment variable, Gem load path.
29
+ def self.open_resource(path)
30
+ # Choose a directory
31
+ env=ENV['KLOOKUP_PATH']
32
+ if env and env != ''
33
+ dir=env+'/klookup'
34
+ else
35
+ dir=gem_path
36
+ begin
37
+ gem 'klookup'
38
+ dir=Gem.datadir 'klookup'
39
+ rescue NameError
40
+ raise IOError, 'Could not find resource %s' % path
41
+ end
42
+ end
43
+
44
+ # Open a file
45
+ file = open("#{dir}/#{path}")
46
+ if block_given?
47
+ yield file
48
+ else
49
+ return file
50
+ end
51
+ end
52
+
53
+ alias :stroke_count_list :undefined
54
+ alias :radicals_by_strokes :undefined
55
+ alias :get_kanji :undefined
56
+ alias :get_radical_strokes :undefined
57
+ alias :get_kanji_strokes :undefined
58
+ alias :get_radicals :undefined
59
+ alias :get_reading :undefined
60
+ alias :get_meaning :undefined
61
+ alias :is_kanji? :undefined
62
+ alias :is_radical? :undefined
63
+
64
+ require 'klookup/database_flatfile'
65
+ require 'klookup/database_unihan'
66
+ end
@@ -0,0 +1,52 @@
1
+ =begin
2
+
3
+ lib/klookup/database_flatfile.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # A singleton class to abstract RadK and KanjiDic.
14
+ class KLookup::Database::FlatFile < KLookup::Database
15
+ require 'klookup/database_flatfile_radk'
16
+ require 'klookup/database_flatfile_kanjidic'
17
+
18
+ require 'singleton'
19
+ include Singleton
20
+
21
+ def stroke_count_list(*args)
22
+ RadK.instance.stroke_count_list(*args)
23
+ end
24
+ def radicals_by_strokes
25
+ RadK.instance.radicals_by_strokes
26
+ end
27
+ def get_kanji(strokes, *args)
28
+ RadK.instance.get_kanji(*args).delete_if {|k|
29
+ not strokes.nil? and not get_kanji_strokes(k) == strokes }
30
+ end
31
+ def get_radical_strokes(*args)
32
+ RadK.instance.get_strokes(*args)
33
+ end
34
+ def get_kanji_strokes(*args)
35
+ KanjiDic.instance.get_strokes(*args)
36
+ end
37
+ def get_radicals(*args)
38
+ RadK.instance.get_radicals(*args)
39
+ end
40
+ def get_reading(*args)
41
+ KanjiDic.instance.get_reading(*args)
42
+ end
43
+ def get_meaning(*args)
44
+ KanjiDic.instance.get_meaning(*args)
45
+ end
46
+ def is_kanji?(*args)
47
+ KanjiDic.instance.is_kanji?(*args)
48
+ end
49
+ def is_radical?(*args)
50
+ RadK.instance.is_radical?(*args)
51
+ end
52
+ end
@@ -0,0 +1,128 @@
1
+ =begin
2
+
3
+ lib/klookup/database_flatfile_kanjidic.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Access to the KANJIDIC (additional information about kanji).
14
+ class KLookup::Database::FlatFile::KanjiDic
15
+ require 'singleton'
16
+ include Singleton
17
+
18
+ private
19
+
20
+ def initialize(path='kanjidic')
21
+ @records={}
22
+ KLookup::Database.open_resource(path) {|kanjidic|
23
+ extract_records kanjidic
24
+ }
25
+ end
26
+
27
+ # Runs process_line on each line of +file+.
28
+ def extract_records(file)
29
+ file.each_line { |line|
30
+ if line =~ /^\s*#/ or line =~ /^\s*$/
31
+ next # No content, so skip
32
+ else
33
+ process_line line
34
+ end
35
+ }
36
+ end
37
+
38
+ # Sets +@records+.
39
+ def process_line(line)
40
+ items = line.split(/ /)
41
+ kanji = items.delete_at 0
42
+
43
+ # Set the record
44
+ @records[kanji] = {:items=>items}
45
+ end
46
+
47
+ #Returns true if there is kana in the string.
48
+ def include_kana?(str)
49
+ kana = (0x3040..0x30FF)
50
+ str.split(//).each {|i|
51
+ return true if kana.include? i.chars[0]
52
+ }
53
+ false
54
+ end
55
+
56
+ public
57
+
58
+ # Returns true if a kanji exists in the database.
59
+ def is_kanji?(kanji)
60
+ return false unless kanji.respond_to?(:to_s)
61
+ return false if kanji.to_s.chars.length != 1
62
+ return false if @records[kanji.to_s].nil?
63
+ return true
64
+ end
65
+
66
+ # Returns the number of strokes needed to write a given kanji.
67
+ def get_strokes(kanji)
68
+ @records[kanji][:items].select {|i|
69
+ i =~ /^S\d+$/}.first.sub(/^S(\d+)$/, '\1').to_i
70
+ end
71
+
72
+ #Returns a Struct of arrays of Japanese readings.
73
+ #
74
+ # KLookup::Database::FlatFile::KanjiDic.instance.get_reading('富')
75
+ # #=> #<struct #<Class:0xb7d3b5dc> reading=["フ", "フウ", "と.む", "とみ"],
76
+ # name_reading=["と", "とん", "ふっ"]>
77
+ def get_reading(kanji)
78
+ items = @records[kanji][:items]
79
+ # Set readings
80
+ name_flag = false
81
+ reading = []
82
+ name_reading = []
83
+ items.each {|i|
84
+ name_flag = true if i=='T1'
85
+ if include_kana?(i)
86
+ if name_flag
87
+ name_reading << i
88
+ else
89
+ reading << i
90
+ end
91
+ end
92
+ }
93
+
94
+ return Struct.new(:reading,:name_reading).new(reading, name_reading)
95
+ end
96
+
97
+ # Returns an array of English meanings of kanji.
98
+ def get_meaning(kanji)
99
+ items = @records[kanji][:items]
100
+ # Select the items with the meaning
101
+ flag=false
102
+ temp_meaning = items.select {|i|
103
+ if i =~ /^\{|\}$/ or flag
104
+ flag=true
105
+ end
106
+ }
107
+ temp_meaning.map! {|i| i.sub(/^\{(.*)\}$/, '\1') }
108
+ temp_meaning.delete_if {|i| i=~/^\s*$/}
109
+
110
+ # Stick the meanings together
111
+ finish=true
112
+ meaning=[]
113
+ temp_meaning.each {|m|
114
+ if m =~ /^\{(.*)$/ # {one
115
+ meaning << $1
116
+ finish = false
117
+ elsif finish # there are no strings being constructed
118
+ meaning << m
119
+ elsif m =~ /^(.*)\}$/ # three}
120
+ meaning.last<< ' ' + $1
121
+ finish = true
122
+ else # two (when there are strings being finished)
123
+ meaning.last<< ' ' + m.dup
124
+ end
125
+ }
126
+ meaning
127
+ end
128
+ end
@@ -0,0 +1,136 @@
1
+ =begin
2
+
3
+ lib/klookup/database_flatfile_radk.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Access to RADKFILE (mappings from radicals to kanji).
14
+ class KLookup::Database::FlatFile::RadK
15
+ require 'singleton'
16
+ include Singleton
17
+
18
+ attr_reader :stroke_count_list
19
+ attr_reader :radicals_by_strokes
20
+
21
+ private
22
+
23
+ class MalformedDatabaseException < Exception; end
24
+
25
+ def initialize(path='newradkfile')
26
+ @stroke_count_list = []
27
+ @radicals_by_strokes = {}
28
+
29
+ KLookup::Database.open_resource(path) {|radkfile|
30
+ extract_records radkfile
31
+ }
32
+ end
33
+
34
+ # Saves a record in the class variable +@records+.
35
+ def save_record(radical,strokes,kanji)
36
+ unless radical.respond_to?(:to_s) and
37
+ strokes.respond_to?(:to_i) and
38
+ kanji.respond_to?(:to_a)
39
+ raise ArgumentError
40
+ end
41
+ radical=radical.to_s
42
+ strokes=strokes.to_i
43
+ kanji=kanji.to_a
44
+
45
+ # @records is a hash of hashes:
46
+ # @records = {
47
+ # ...
48
+ # '龠' => {:strokes=>17, :kanji=>['籥', '鑰', '龠']}
49
+ # }
50
+ @records[radical]={:strokes=>strokes, :kanji=>kanji}
51
+
52
+ (@stroke_count_list << strokes).sort!.uniq!
53
+
54
+ unless @radicals_by_strokes[strokes]
55
+ @radicals_by_strokes[strokes]=[]
56
+ end
57
+ @radicals_by_strokes[strokes] << radical
58
+ end
59
+
60
+ # Takes a string of data and interprets it.
61
+ def extract_records(radk)
62
+ @records = {}
63
+ @stroke_count_list = []
64
+ radical_line_passed=false
65
+ radical = nil
66
+ strokes = nil
67
+ kanji = nil
68
+
69
+ radk.each_line { |line|
70
+ if line.match(/^#/) or line.match(/^$/)
71
+ next # because it's a comment
72
+ elsif line.match(/^\$\s+([^\s])\s+(\d+)/)
73
+ # It's a radical line
74
+
75
+ # Save the previous record
76
+ save_record(radical,strokes,kanji) if radical_line_passed
77
+ radical_line_passed=true
78
+
79
+ radical = $1
80
+ strokes = $2
81
+ kanji = [] # Reset
82
+
83
+ elsif not radical_line_passed
84
+ raise MalformedDatabaseException
85
+ elsif
86
+ # It must be a line of kanji
87
+ kanji << line.split(//)
88
+ kanji.flatten!
89
+ kanji.delete("\n")
90
+ end
91
+ }
92
+ # And save the record at the end
93
+ save_record(radical,strokes,kanji) if radical_line_passed
94
+ end
95
+
96
+ public
97
+
98
+ # Tests if a radical exists.
99
+ def is_radical?(radical)
100
+ return false unless radical.respond_to?(:to_s)
101
+ return false if radical.to_s.chars.length != 1
102
+ return false if @records[radical.to_s].nil?
103
+ return true
104
+ end
105
+
106
+ # Returns a list of kanji corresponding to given radicals.
107
+ def get_kanji(*radicals)
108
+ kanji = nil
109
+ radicals.flatten.each { |rad|
110
+ raise ArgumentError unless @records[rad.to_s]
111
+ current_kanji = @records[rad.to_s][:kanji]
112
+ if kanji==nil
113
+ kanji = current_kanji
114
+ else
115
+ kanji &= current_kanji
116
+ end
117
+ }
118
+ kanji.to_a
119
+ end
120
+
121
+ # Returns the number of strokes corresponding to a radical.
122
+ def get_strokes(radical)
123
+ @records[radical][:strokes]
124
+ end
125
+
126
+ # Returns a list of radicals corresponding to a kanji.
127
+ def get_radicals(kanji)
128
+ radicals = []
129
+ @records.each { |radical,data|
130
+ if data[:kanji].include? kanji
131
+ radicals << radical
132
+ end
133
+ }
134
+ radicals
135
+ end
136
+ end
@@ -0,0 +1,101 @@
1
+ =begin
2
+
3
+ lib/klookup/database_unihan.rb
4
+
5
+ Copyright © Tom Adams 2006
6
+
7
+ This programme is free software.
8
+ You can distribute/modify this program under
9
+ the terms of the Ruby License.
10
+
11
+ =end
12
+
13
+ # Properties:
14
+ # - kJapaneseKun
15
+ # - kJapaneseOn
16
+ # - kDefinition (Chinese, with Japanese marked by '(J)')
17
+ # (contains radical number %d for radicals)
18
+ # - kFrequency (not language-specific)
19
+ # - kRSJapanese
20
+ # - kTotalStrokes
21
+ # Properties to find:
22
+ # - alternate radical representation
23
+ # - alternate kanji characters
24
+
25
+ # Other:
26
+ # - Add ability to lookup arbitrary Property for character
27
+ # - It may be faster to re-open the file every time or keep a file descriptor
28
+ # than to keep it all in memory
29
+
30
+ # A singleton class to access Unihan.txt.
31
+ class KLookup::Database::Unihan < KLookup::Database
32
+ require 'singleton'
33
+ include Singleton
34
+
35
+ private
36
+
37
+ def initialize
38
+ @unihan = KLookup::Database::Unihan.open_resource('Unihan.txt')
39
+ end
40
+
41
+ # Returns the numeric codepoint of the first codepoint in +str+.
42
+ def get_codepoint(str)
43
+ str.chars[0]
44
+ end
45
+
46
+ # Returns the hexadecimal representation used by the Unicode Standard
47
+ # (+%04X+) of the first codepoint in +str+.
48
+ def get_hexcp(str)
49
+ "%04X" % get_codepoint(str)
50
+ end
51
+
52
+ public
53
+
54
+ # Opens a resource.
55
+ #
56
+ # Priority: LOOKUP_PATH environment variable, Gem load path.
57
+ def self.open_resource(path)
58
+ # Choose a directory
59
+ env=ENV['KLOOKUP_PATH']
60
+ if env and env != ''
61
+ dir=env+'/unihan'
62
+ else
63
+ dir=gem_path
64
+ begin
65
+ gem 'unihan'
66
+ dir=Gem.datadir 'unihan'
67
+ rescue NameError
68
+ raise IOError, 'Could not find resource %s' % path
69
+ end
70
+ end
71
+
72
+ # Open a file
73
+ file = open("#{dir}/#{path}")
74
+ if block_given?
75
+ yield file
76
+ else
77
+ return file
78
+ end
79
+ end
80
+
81
+ def stroke_count_list(*args)
82
+ end
83
+ def radicals_by_strokes
84
+ end
85
+ def get_kanji(strokes, *args)
86
+ end
87
+ def get_radical_strokes(*args)
88
+ end
89
+ def get_kanji_strokes(*args)
90
+ end
91
+ def get_radicals(*args)
92
+ end
93
+ def get_reading(*args)
94
+ end
95
+ def get_meaning(*args)
96
+ end
97
+ def is_kanji?(*args)
98
+ end
99
+ def is_radical?(*args)
100
+ end
101
+ end