igo-ruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.1.0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.5.1"
12
+ gem "rcov", ">= 0"
13
+ end
@@ -0,0 +1,28 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.2)
5
+ git (1.2.5)
6
+ jeweler (1.5.1)
7
+ bundler (~> 1.0.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rake (0.8.7)
11
+ rcov (0.9.9)
12
+ rspec (2.1.0)
13
+ rspec-core (~> 2.1.0)
14
+ rspec-expectations (~> 2.1.0)
15
+ rspec-mocks (~> 2.1.0)
16
+ rspec-core (2.1.0)
17
+ rspec-expectations (2.1.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.1.0)
20
+
21
+ PLATFORMS
22
+ ruby
23
+
24
+ DEPENDENCIES
25
+ bundler (~> 1.0.0)
26
+ jeweler (~> 1.5.1)
27
+ rcov
28
+ rspec (~> 2.1.0)
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 kyow
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = igo-ruby
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to igo-ruby
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 kyow. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "igo-ruby"
16
+ gem.homepage = "http://github.com/kyow/igo-ruby"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Ruby port of Igo Japanese morphological analyzer.}
19
+ gem.description = %Q{Ruby port of Igo Japanese morphological analyzer.}
20
+ gem.email = "24signals@gmail.com"
21
+ gem.authors = ["K.Nishi"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+
27
+ gem.files = Rake::FileList.new('lib/**/*.rb', '[A-Z]*')
28
+ gem.required_rubygems_version = ">1.3.6"
29
+ end
30
+ Jeweler::RubygemsDotOrgTasks.new
31
+
32
+ require 'rspec/core'
33
+ require 'rspec/core/rake_task'
34
+ RSpec::Core::RakeTask.new(:spec) do |spec|
35
+ spec.pattern = FileList['spec/**/*_spec.rb']
36
+ end
37
+
38
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
39
+ spec.pattern = 'spec/**/*_spec.rb'
40
+ spec.rcov = true
41
+ end
42
+
43
+ task :default => :spec
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "igo-ruby #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,9 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+
3
+ require 'nkf'
4
+ require 'jcode'
5
+ require 'kconv'
6
+
7
+ module Igo
8
+ autoload :Tagger, 'igo/tagger'
9
+ end
@@ -0,0 +1,187 @@
1
+ #require 'trie'
2
+ #require 'util'
3
+ #require 'nkf'
4
+
5
+ # 辞書
6
+
7
+ class ViterbiNode
8
+ attr_accessor :cost, :prev, :word_id, :start, :length, :left_id, :right_id, :is_space
9
+ def initialize(word_id, start, length, left_id, right_id, is_space)
10
+ @cost = 0
11
+ @prev = nil
12
+ @word_id = word_id
13
+ @start = start
14
+ @length = length
15
+ @left_id = left_id
16
+ @right_id = right_id
17
+ @is_space = is_space
18
+ # puts "==viterbinode #{word_id} #{start} #{length} #{left_id} #{right_id} #{is_space}"
19
+ end
20
+
21
+ def self.make_BOSEOS
22
+ return ViterbiNode.new(0, 0, 0, 0, 0, false)
23
+ end
24
+ end
25
+
26
+ class CharCategory
27
+ def initialize(data_dir)
28
+ @categories = CharCategory.read_categories(data_dir)
29
+ fmis = FileMappedInputStream.new(data_dir + "/code2category")
30
+ @char2id = fmis.get_int_array(fmis.size / 4 / 2)
31
+ @eql_masks = fmis.get_int_array(fmis.size / 4 /2)
32
+ fmis.close
33
+ end
34
+
35
+ def category(code)
36
+ return @categories[@char2id[code]]
37
+ end
38
+
39
+ def compatible?(code1, code2)
40
+ # puts @eql_masks[code1] & @eql_masks[code2]
41
+ return (@eql_masks[code1] & @eql_masks[code2]) != 0
42
+ end
43
+
44
+ def self.read_categories(data_dir)
45
+ data = FileMappedInputStream::get_int_array(data_dir + "/char.category")
46
+ size = data.size / 4
47
+ ary = []
48
+ for i in 0 .. (size - 1)
49
+ ary.push(Category.new(data[i * 4], data[i * 4 + 1], data[i * 4 + 2] == 1, data[i * 4 + 3] == 1))
50
+ end
51
+ return ary
52
+ end
53
+ end
54
+
55
+ class Category
56
+ attr_reader :id, :length, :invoke, :group
57
+ def initialize(i, l, iv, g)
58
+ @id = i
59
+ @length = l
60
+ @invoke = iv
61
+ @group = g
62
+ # puts "==category #{i} #{l} #{iv} #{g}"
63
+ end
64
+ end
65
+
66
+ class Matrix
67
+ def initialize(data_dir)
68
+ fmis = FileMappedInputStream.new(data_dir + "/matrix.bin")
69
+ @left_size = fmis.get_int
70
+ @right_size = fmis.get_int
71
+ @matrix = fmis.get_short_array(@left_size * @right_size)
72
+ fmis.close
73
+ end
74
+
75
+ def link_cost(left_id, right_id)
76
+ return @matrix[right_id * @right_size + left_id]
77
+ end
78
+ end
79
+
80
+ class Unknown
81
+ def initialize(data_dir)
82
+ @category = CharCategory.new(data_dir)
83
+ @space_id = @category.category(' '.unpack("U*")[0]).id
84
+ end
85
+
86
+ def search(text, start, wdic, result)
87
+ txt = text.unpack("U*")
88
+ length = txt.size
89
+ ch = txt[start]
90
+ ct = @category.category(ch)
91
+
92
+ # puts "Unknown.search ch=#{ch} length=#{length} start=#{start}"
93
+ # p ct
94
+ # p result
95
+ # p ct.invoke
96
+ if !result.empty? and !ct.invoke
97
+ # puts "result return"
98
+ return
99
+ end
100
+ # puts "---i"
101
+
102
+ is_space = (ct.id == @space_id)
103
+ limit = [length, ct.length + start].min
104
+
105
+ # puts "limit = #{limit} #{length} #{ct.length}"
106
+
107
+ for i in start..(limit - 1)
108
+ # puts "[a]"
109
+ wdic.search_from_trie_id(ct.id, start, (i - start) + 1, is_space, result)
110
+
111
+ if((i + 1) != limit and !(@category.compatible?(ch, text[i + 1])))
112
+ return
113
+ end
114
+ end
115
+
116
+ if ct.group and limit < length
117
+ # puts "[b]"
118
+ for i in limit..(length - 1)
119
+ # puts "[c] COMPATIBLE? #{ch} #{txt[i + 1]}"
120
+
121
+ if not @category.compatible?(ch, txt[i])
122
+ # puts "[d] #{i} #{start}"
123
+ wdic.search_from_trie_id(ct.id, start, i - start, is_space, result)
124
+ return
125
+ end
126
+ end
127
+ # puts "[e] #{length} #{start}"
128
+ wdic.search_from_trie_id(ct.id, start, length - start, is_space, result)
129
+ end
130
+ end
131
+ end
132
+
133
+ class WordDic
134
+ def initialize(data_dir)
135
+ @trie = Searcher.new(data_dir + "/word2id")
136
+ @data = FileMappedInputStream.get_string(data_dir + "/word.dat")
137
+ @indices = FileMappedInputStream.get_int_array(data_dir + "/word.ary.idx")
138
+
139
+ fmis = FileMappedInputStream.new(data_dir + "/word.inf")
140
+ word_count = fmis.size / (4 + 2 + 2 + 2)
141
+ @data_offsets = fmis.get_int_array(word_count)
142
+ @left_ids = fmis.get_short_array(word_count)
143
+ @right_ids = fmis.get_short_array(word_count)
144
+ @costs = fmis.get_short_array(word_count)
145
+ fmis.close
146
+ end
147
+
148
+ def cost(word_id)
149
+ return @costs[word_id]
150
+ end
151
+
152
+ def search(text, start, result)
153
+ indices = @indices
154
+ left_ids = @left_ids
155
+ right_ids = @right_ids
156
+
157
+ @trie.each_common_prefix(text, start, Proc.new { |start, offset, trie_id|
158
+ ed = @indices[trie_id + 1]
159
+
160
+ for i in indices[trie_id]..(ed - 1)
161
+ result.push(ViterbiNode.new(i, start, offset, @left_ids[i], right_ids[i], false))
162
+ end
163
+ })
164
+ end
165
+
166
+ def search_from_trie_id(trie_id, start, word_length, is_space, result)
167
+ ed = @indices[trie_id + 1]
168
+ for i in @indices[trie_id]..(ed - 1)
169
+ result.push(ViterbiNode.new(i, start, word_length, @left_ids[i], @right_ids[i], is_space))
170
+ end
171
+ end
172
+
173
+ def word_data(word_id)
174
+ # s = UTFConverter.utf16to8(@data)
175
+
176
+ # st = format("%x", @data_offsets[word_id] * 2)
177
+ # ed = format("%x", @data_offsets[word_id + 1] * 2)
178
+
179
+ # puts "WORD DATA: #{word_id} = #{st} : #{ed}"
180
+ # p s
181
+ # puts "nkf= " + NKF.nkf('-W16L0 --utf8', s)
182
+ # p [s].pack("U*")
183
+ return @data.slice(@data_offsets[word_id]*2..@data_offsets[word_id + 1]*2 - 1)
184
+ # return NKF.nkf('-W16L0 --utf8', s)
185
+ end
186
+ end
187
+
@@ -0,0 +1,144 @@
1
+ require 'igo/dictionary'
2
+ require 'igo/trie'
3
+
4
+ module Igo
5
+
6
+ class Morpheme
7
+ attr_accessor :surface, :feature, :start
8
+ def initialize(surface, feature, start)
9
+ @surface = surface
10
+ @feature = feature
11
+ @start = start
12
+ end
13
+ end
14
+
15
+ # 形態素解析を行う
16
+ class Tagger
17
+ def self.__BOS_NODES
18
+ return [ViterbiNode.make_BOSEOS]
19
+ end
20
+
21
+ def initialize(dir)
22
+ @wdc = WordDic.new(dir)
23
+ @unk = Unknown.new(dir)
24
+ @mtx = Matrix.new(dir)
25
+ end
26
+
27
+ #
28
+ def parse(text, result=[])
29
+ vn = impl(text, result)
30
+ txt = text.unpack("U*")
31
+ while vn
32
+ surface = txt.slice(vn.start, vn.length).pack("U*")
33
+
34
+ s = @wdc.word_data(vn.word_id)
35
+
36
+ # puts s.size
37
+ feature = NKF.nkf('-W16L0 --utf8', s)
38
+ # feature = @wdc.word_data(vn.word_id)
39
+ result.push(Morpheme.new(surface, feature, vn.start))
40
+ vn = vn.prev
41
+ end
42
+ return result
43
+ end
44
+
45
+
46
+ # 分かち書きを行う
47
+ def wakati(text, result=[])
48
+ vn = impl(text, result)
49
+ txt = text.unpack("U*")
50
+
51
+ while vn
52
+ # puts "s:#{vn.start} len:#{vn.length}"
53
+ a = txt.slice(vn.start, vn.length).pack("U*")
54
+ result.push(a)
55
+ vn = vn.prev
56
+ end
57
+ return result
58
+ end
59
+
60
+ private
61
+
62
+ def impl(text, result=[])
63
+ txs = text.unpack("U*")
64
+ len = txs.size
65
+
66
+ # puts "len=#{len}"
67
+
68
+ node_ary = [Tagger.__BOS_NODES]
69
+ for i in 0..(len-1)
70
+ node_ary.push([])
71
+ end
72
+
73
+ for i in 0..(len-1)
74
+ per_result = []
75
+
76
+ # puts "==> node_ary[#{i}].length = #{!node_ary[i].empty?}"
77
+ # p node_ary
78
+ unless node_ary[i].empty?
79
+ @wdc.search(text, i, per_result)
80
+ # puts "---WDC---"
81
+ # p per_result
82
+ @unk.search(text, i, @wdc, per_result)
83
+ # puts "---UNK---"
84
+ # p per_result
85
+ prevs = node_ary[i]
86
+
87
+ for j in 0..(per_result.size - 1)
88
+ vn = per_result[j]
89
+ # p vn
90
+ if(vn.is_space)
91
+ # puts "#{j} is space (#{i + vn.length}) i=#{i} len=#{vn.length}"
92
+ node_ary[i + vn.length] = prevs
93
+ # p node_ary
94
+ # node_ary[i + vn.length].push(prevs)
95
+ else
96
+ # puts "#{j} is NOT space (#{i + vn.length}) i=#{i} len=#{vn.length}"
97
+ node_ary[i + vn.length].push(set_min_cost_node(vn, prevs))
98
+ # p node_ary
99
+ # node_ary[i + vn.length] + set_min_cost_node(vn, prevs)
100
+ end
101
+ # p node_ary
102
+ end
103
+ end
104
+ end
105
+
106
+ cur = set_min_cost_node(ViterbiNode.make_BOSEOS, node_ary[len]).prev
107
+
108
+ # reverse
109
+ head = nil
110
+ while cur.prev
111
+ tmp = cur.prev
112
+ cur.prev = head
113
+ head = cur
114
+ cur = tmp
115
+ end
116
+ return head
117
+
118
+ # return cur.reverse
119
+
120
+ end
121
+
122
+ def set_min_cost_node(vn, prevs)
123
+ f = vn.prev = prevs[0]
124
+ # puts "=> set_min_cost_node"
125
+ # p f
126
+
127
+ vn.cost = f.cost + @mtx.link_cost(f.right_id, vn.left_id)
128
+
129
+ # puts "#{vn.cost} #{f.cost} #{f.right_id} #{vn.left_id} #{@mtx.link_cost(f.right_id, vn.left_id)} #{}"
130
+
131
+ for i in 1..(prevs.size - 1)
132
+ p = prevs[i]
133
+ cost = p.cost + @mtx.link_cost(p.right_id, vn.left_id)
134
+ if(cost < vn.cost)
135
+ vn.cost = cost
136
+ vn.prev = p
137
+ end
138
+ end
139
+ vn.cost += @wdc.cost(vn.word_id)
140
+ return vn
141
+ end
142
+ end
143
+
144
+ end
@@ -0,0 +1,203 @@
1
+ require 'igo/util'
2
+
3
+ class String
4
+ def starts_with?(prefix)
5
+ prefix = prefix.to_s
6
+ self[0, prefix.length] == prefix
7
+ end
8
+ end
9
+
10
+ class Node
11
+ class Base
12
+ def self.ids(nid)
13
+ return (-1 * nid) - 1
14
+ end
15
+ end
16
+
17
+ class Chck
18
+ TERMINATE_CODE = 0
19
+ TERMINATE_CHAR = TERMINATE_CODE.chr
20
+ VACANT_CODE = 1
21
+ CODE_LIMIT = 0xffff
22
+ end
23
+ end
24
+
25
+ class KeyStream
26
+
27
+ def initialize(key, start = 0)
28
+ @s = key
29
+ @cur = start
30
+ @len = key.unpack("U*").size
31
+ end
32
+
33
+ def compare_to(ks)
34
+ return rest.compare_to(ks.rest)
35
+ end
36
+
37
+ def start_with(prefix, beg, len)
38
+ s = @s
39
+ c = @cur
40
+ if @len - c < len
41
+ return false
42
+ end
43
+ # puts "c = #{c} len = #{len}"
44
+ # p s.unpack("U*")[c]
45
+ # p [s.unpack("U*")[c]].pack("U*")
46
+ word = s.unpack("U*")[c]
47
+ if word.nil?
48
+ return (prefix.slice(beg, len-beg) == nil)
49
+ else
50
+ [word].pack("U*").starts_with?(prefix.slice(beg, len-beg))
51
+ end
52
+ # return [s.unpack("U*")[c]].pack("U*").starts_with?(prefix.slice(beg, len-beg))
53
+ end
54
+
55
+ def rest
56
+ return @s.slice(@cur, @s.length)
57
+ end
58
+
59
+ def read
60
+ # puts "CUR=#{@cur}"
61
+
62
+ if eos?
63
+ # puts "EOS!!"
64
+ return Node::Chck::TERMINATE_CODE
65
+ else
66
+ r = @s.unpack("U*")[@cur]
67
+ # puts [r].pack("U*").tosjis
68
+ result = [r].pack("U*")
69
+ # result = @s.unpack("U*")[@cur]
70
+ @cur += 1
71
+ return r
72
+ # p = @cur
73
+ # @cur += 1
74
+ # return @s[p]
75
+ end
76
+ end
77
+
78
+ def eos?
79
+ # puts "eos? #{@cur} == #{@len}"
80
+ return (@cur == @len) ? true : false
81
+ end
82
+ end
83
+
84
+ # DoubleArray検索用のクラス
85
+ class Searcher
86
+ def initialize(path)
87
+ fmis = FileMappedInputStream.new(path)
88
+ node_size = fmis.get_int()
89
+ tind_size = fmis.get_int()
90
+ tail_size = fmis.get_int()
91
+ @key_set_size = tind_size
92
+ @begs = fmis.get_int_array(tind_size)
93
+ @base = fmis.get_int_array(node_size)
94
+ @lens = fmis.get_short_array(tind_size)
95
+ @chck = fmis.get_char_array(node_size)
96
+ @tail = fmis.get_string(tail_size)
97
+
98
+ #p @begs[0]
99
+ #p @base[0]
100
+ #p @lens[0]
101
+ #print @tail.tosjis
102
+ #print @tail[0].tosjis
103
+
104
+ fmis.close
105
+ end
106
+
107
+ def size
108
+ return @key_set_size
109
+ end
110
+
111
+ def search(key)
112
+ base = @base
113
+ chck = @chck
114
+ node = @base[0]
115
+ kin = KeyStream.new(key)
116
+
117
+ while true
118
+ code = kin.read
119
+ idx = node + code
120
+ node = base[idx]
121
+
122
+ if(chck[idx] == code)
123
+ if(node >= 0)
124
+ next
125
+ elsif(kin.eos? or key_exists?(kin, node))
126
+ return Node::Base.ids(node)
127
+ end
128
+ return -1
129
+ end
130
+ end
131
+ end
132
+
133
+ def each_common_prefix(key, start, callback)
134
+ base = @base
135
+ chck = @chck
136
+ node = @base[0]
137
+ offset = -1
138
+ kin = KeyStream.new(key, start)
139
+
140
+ # puts "each_common_prefix"
141
+ while true
142
+ code = kin.read
143
+ offset += 1
144
+ terminal_index = node
145
+ # terminal_index = node + Node::Chck::TERMINATE_CODE
146
+ #puts "code #{code.tosjis}"
147
+
148
+ if(chck[terminal_index] == Node::Chck::TERMINATE_CODE)
149
+ callback.call(start, offset, Node::Base.ids(base[terminal_index]))
150
+
151
+ # puts "code -> #{code} #{Node::Chck::TERMINATE_CHAR}"
152
+
153
+ if(code == Node::Chck::TERMINATE_CODE)
154
+ # puts code
155
+ # puts "(1)"
156
+ return
157
+ end
158
+ end
159
+
160
+ # TODO
161
+ #puts "code #{code.tosjis}"
162
+ # p code
163
+ idx = node + code
164
+ node = base[idx]
165
+
166
+ # code = [code].pack('U*')
167
+
168
+ if(chck[idx] == code)
169
+ if(node >= 0)
170
+ next
171
+ else
172
+ # id = Node.Base.ids(node)
173
+ # if(kin.start_with(@tail, @begs[id], lens[id]))
174
+ # callback.call(start, offset+@lens[id]+1, id)
175
+ # end
176
+
177
+ call_if_key_including(kin, node, start, offset, callback)
178
+ end
179
+ end
180
+ # puts code
181
+ # puts "(2)"
182
+ return
183
+ end
184
+ end
185
+
186
+ private
187
+
188
+ def call_if_key_including(kin, node, start, offset, callback)
189
+ # puts "call_if_key_including"
190
+ node_id = Node::Base.ids(node)
191
+ if(kin.start_with(@tail, @begs[node_id], @lens[node_id]))
192
+ callback.call(start, offset + @lens[node_id] + 1, node_id)
193
+ end
194
+ end
195
+
196
+ def key_exists?(kin, node)
197
+ nid = Node.Base.ids(node)
198
+ beg = @begs[nid]
199
+ s = @tail.slice(beg, beg + @lens[nid])
200
+ return kin.rest == s ? true : false
201
+ end
202
+ end
203
+
@@ -0,0 +1,74 @@
1
+ # ユーティリティ
2
+
3
+ class FileMappedInputStream
4
+ def initialize(path)
5
+ @path = path
6
+ @cur = 0
7
+ @file = open(path, "r+b")
8
+ # @file.binmode
9
+ end
10
+
11
+ def get_int()
12
+ return @file.read(4).unpack("i*")[0]
13
+ end
14
+
15
+ def get_int_array(count)
16
+ # return map(count * 4).unpack("i*")
17
+ return @file.read(count * 4).unpack("i*")
18
+ end
19
+
20
+ def self.get_int_array(path)
21
+ fmis = FileMappedInputStream.new(path)
22
+ array = fmis.get_int_array((File::stat(path).size)/4)
23
+ fmis.close
24
+ return array
25
+ end
26
+
27
+ def get_short_array(count)
28
+ # return map(count * 2).unpack("s*")
29
+ return @file.read(count * 2).unpack("s*")
30
+ end
31
+
32
+ def get_char_array(count)
33
+ # return map(count * 2).unpack("S!*")
34
+ return @file.read(count * 2).unpack("S!*")
35
+ end
36
+
37
+ def get_string(count)
38
+ # return map(count * 2)
39
+ # puts "read count = #{count}"
40
+ return @file.read(count * 2)
41
+ end
42
+
43
+ def self.get_string(path)
44
+ fmis = FileMappedInputStream.new(path)
45
+ str = fmis.get_string((File::stat(path).size)/2)
46
+ fmis.close
47
+
48
+ return str
49
+ end
50
+
51
+ def size
52
+ return File::stat(@path).size
53
+ end
54
+
55
+ def close
56
+ @file.close
57
+ end
58
+
59
+ def self.get_char_array(path)
60
+ fmis = FileMappedInputStream.new(path)
61
+ array = fmis.get_char_array(fmis.size / 2)
62
+ fmis.close
63
+ return array
64
+ end
65
+
66
+ private
67
+
68
+ def __map(size)
69
+ @file.pos = @cur
70
+ @cur += size
71
+ return @file.read(size)
72
+ end
73
+ end
74
+
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "IgoRuby" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'igo-ruby'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'igo-ruby'
3
+ tagger = Igo::Tagger.new('../ipadic')
4
+ t = tagger.parse('吾輩は猫である。名前はまだ無い。')
5
+ t.each{|m|
6
+ puts "#{m.surface} #{m.feature} #{m.start}"
7
+ }
8
+ t = tagger.wakati('どこで生れたかとんと見当がつかぬ。')
9
+ puts t.join(' ')
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: igo-ruby
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - K.Nishi
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-11 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ prerelease: false
23
+ version_requirements: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 11
29
+ segments:
30
+ - 2
31
+ - 1
32
+ - 0
33
+ version: 2.1.0
34
+ name: rspec
35
+ requirement: *id001
36
+ type: :development
37
+ - !ruby/object:Gem::Dependency
38
+ prerelease: false
39
+ version_requirements: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ hash: 23
45
+ segments:
46
+ - 1
47
+ - 0
48
+ - 0
49
+ version: 1.0.0
50
+ name: bundler
51
+ requirement: *id002
52
+ type: :development
53
+ - !ruby/object:Gem::Dependency
54
+ prerelease: false
55
+ version_requirements: &id003 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ~>
59
+ - !ruby/object:Gem::Version
60
+ hash: 1
61
+ segments:
62
+ - 1
63
+ - 5
64
+ - 1
65
+ version: 1.5.1
66
+ name: jeweler
67
+ requirement: *id003
68
+ type: :development
69
+ - !ruby/object:Gem::Dependency
70
+ prerelease: false
71
+ version_requirements: &id004 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
80
+ name: rcov
81
+ requirement: *id004
82
+ type: :development
83
+ description: Ruby port of Igo Japanese morphological analyzer.
84
+ email: 24signals@gmail.com
85
+ executables: []
86
+
87
+ extensions: []
88
+
89
+ extra_rdoc_files:
90
+ - LICENSE.txt
91
+ - README.rdoc
92
+ files:
93
+ - Gemfile
94
+ - Gemfile.lock
95
+ - LICENSE.txt
96
+ - README.rdoc
97
+ - Rakefile
98
+ - VERSION
99
+ - lib/igo-ruby.rb
100
+ - lib/igo/dictionary.rb
101
+ - lib/igo/tagger.rb
102
+ - lib/igo/trie.rb
103
+ - lib/igo/util.rb
104
+ - spec/igo-ruby_spec.rb
105
+ - spec/spec_helper.rb
106
+ - test/test.rb
107
+ has_rdoc: true
108
+ homepage: http://github.com/kyow/igo-ruby
109
+ licenses:
110
+ - MIT
111
+ post_install_message:
112
+ rdoc_options: []
113
+
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ hash: 3
122
+ segments:
123
+ - 0
124
+ version: "0"
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
126
+ none: false
127
+ requirements:
128
+ - - ">"
129
+ - !ruby/object:Gem::Version
130
+ hash: 23
131
+ segments:
132
+ - 1
133
+ - 3
134
+ - 6
135
+ version: 1.3.6
136
+ requirements: []
137
+
138
+ rubyforge_project:
139
+ rubygems_version: 1.3.7
140
+ signing_key:
141
+ specification_version: 3
142
+ summary: Ruby port of Igo Japanese morphological analyzer.
143
+ test_files:
144
+ - spec/igo-ruby_spec.rb
145
+ - spec/spec_helper.rb
146
+ - test/test.rb