RubyGems - igo-ruby - Versions diffs - 0.1.2 → 0.1.3 - Mend

igo-ruby 0.1.2 → 0.1.3

Files changed (10) hide show

data/README.rdoc CHANGED

@@ -1,19 +1,68 @@
 = igo-ruby
+igo-rubyはJavaおよびCommon Lispで実装された形態素解析器 Igo[http://igo.sourceforge.jp] のRuby実装です。
-Description goes here.
+igo-rubyでは、 Igo[http://igo.sourceforge.jp] と同一の解析用辞書ファイルを使用します。
+従って Igo[http://igo.sourceforge.jp] の機能を使用して解析用辞書ファイルを生成する必要があります。
-== Contributing to igo-ruby
-* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
-* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
-* Fork the project
-* Start a feature/bugfix branch
-* Commit and push until you are happy with your contribution
-* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
-* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
+== インストール方法
+コマンドプロンプトより以下を実行してください。
+  $ gem install igo-ruby
-== Copyright
+== 解析用辞書ファイルの生成
+{Igoのインストール/使い方}[http://igo.sourceforge.jp/index.html#usage] を参照してください。
+== サンプル
+=== 形態素解析
+  require 'rubygems'
+  require 'igo-ruby'
+  tagger = Igo::Tagger.new('../../ipadic')  # 解析用辞書のディレクトリを指定
+  t = tagger.parse('吾輩は猫である。名前はまだ無い。')
+  t.each{|m|
+    puts "#{m.surface} #{m.feature} #{m.start}"
+  }
+  # 実行結果
+  吾輩 名詞,代名詞,一般,*,*,*,吾輩,ワガハイ,ワガハイ 0
+  は 助詞,係助詞,*,*,*,*,は,ハ,ワ 2
+  猫 名詞,一般,*,*,*,*,猫,ネコ,ネコ 3
+  で 助動詞,*,*,*,特殊・ダ,連用形,だ,デ,デ 4
+  ある 助動詞,*,*,*,五段・ラ行アル,基本形,ある,アル,アル 5
+  。 記号,句点,*,*,*,*,。,。,。 7
+  名前 名詞,一般,*,*,*,*,名前,ナマエ,ナマエ 8
+  は 助詞,係助詞,*,*,*,*,は,ハ,ワ 10
+  まだ 副詞,助詞類接続,*,*,*,*,まだ,マダ,マダ 11
+  無い 形容詞,自立,*,*,形容詞・アウオ段,基本形,無い,ナイ,ナイ 13
+  。 記号,句点,*,*,*,*,。,。,。 15
+=== 分かち書き
+  require 'rubygems'
+  require 'igo-ruby'
+  tagger = Igo::Tagger.new('../../ipadic')  # 解析用辞書のディレクトリを指定
+  t = tagger.wakati('どこで生れたかとんと見当がつかぬ。')
+  puts t.join(' ')
+  # 実行結果
+  どこ で 生れ た か とんと 見当 が つか ぬ 。
-Copyright (c) 2010 kyow. See LICENSE.txt for
-further details.
+=== ウェブアプリ例
+* {igo-ruby.heroku.com}[http://igo-ruby.heroku.com/]
+== 付録
+=== 公開場所
+* RubyGems
+  * igo-ruby[https://rubygems.org/gems/igo-ruby]
+* ソース(github)
+  * {kyow/igo-ruby}[https://github.com/kyow/igo-ruby]
+=== 参照
+* Igo
+  1. {Igo - Java形態素解析器}[http://igo.sourceforge.jp/index.html]
+  2. {Igo}[http://sourceforge.jp/projects/igo/releases/]
+* Igo-python
+  1. {igo-python 0.3a}[http://pypi.python.org/pypi/igo-python/0.3a]
+  2. {Igo Japanease morphological analyzer for python}[https://launchpad.net/igo-python/]
+== Copyright
+Copyright (c) kyow, 2010. See LICENSE.txt for further details.

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.2
1	+ 0.1.3

data/lib/igo-ruby.rb CHANGED

@@ -1,8 +1,9 @@
+# coding: utf-8
 #
 #= 形態素解析エンジンIgoのRuby実装
 #解析結果がほぼMeCab互換の形態素解析エンジン"Igo"のRuby実装
 #
-#Copyright:: Copyright (C) K.Nishi, 2010. All rights reserved.
+#Copyright:: Copyright (c) kyow, 2010
 #Authors:: K.Nishi
 #License:: MIT License ただし、使用する辞書のライセンスに関しては、辞書配布元のそれに準ずる
 #
@@ -28,7 +29,6 @@
 $:.unshift(File.dirname(__FILE__))
 require 'nkf'
-require 'jcode'
 require 'kconv'
 #
@@ -36,4 +36,5 @@ require 'kconv'
 #
 module Igo
   autoload :Tagger, 'igo/tagger'
+  autoload :Version, 'igo/version'
 end

data/lib/igo/dictionary.rb CHANGED

@@ -1,179 +1,185 @@
-#辞書クラス群
+# coding: utf-8
+#= 辞書クラス群
-#
-# Viterbiアルゴリズムで使用されるノードクラス
-#
-class ViterbiNode
-  attr_accessor :cost, :prev, :word_id, :start, :length, :left_id, :right_id, :is_space
-  def initialize(word_id, start, length, left_id, right_id, is_space)
-    @cost = 0            # 始点からノードまでの総コスト
-    @prev = nil          # コスト最小の前方のノードへのリンク
-    @word_id = word_id   # 単語ID
-    @start = start       # 入力テキスト内での形態素の開始位置
-    @length = length     # 形態素の表層形の長さ(文字数)
-    @left_id = left_id   # 左文脈ID
-    @right_id = right_id # 右文脈ID
-    @is_space = is_space # 形態素の文字種(文字カテゴリ)が空白かどうか
-  end
+module Igo
+  #
+  # Viterbiアルゴリズムで使用されるノードクラス
+  #
+  class ViterbiNode
+    attr_accessor :cost, :prev, :word_id, :start, :length, :left_id, :right_id, :is_space
+    def initialize(word_id, start, length, left_id, right_id, is_space)
+      @cost = 0            # 始点からノードまでの総コスト
+      @prev = nil          # コスト最小の前方のノードへのリンク
+      @word_id = word_id   # 単語ID
+      @start = start       # 入力テキスト内での形態素の開始位置
+      @length = length     # 形態素の表層形の長さ(文字数)
+      @left_id = left_id   # 左文脈ID
+      @right_id = right_id # 右文脈ID
+      @is_space = is_space # 形態素の文字種(文字カテゴリ)が空白かどうか
+    end
-  def self.make_BOSEOS
-    return ViterbiNode.new(0, 0, 0, 0, 0, false)
+    def self.make_BOSEOS
+      return ViterbiNode.new(0, 0, 0, 0, 0, false)
+    end
   end
-end
-class CharCategory
-  def initialize(data_dir)
-    @categories = CharCategory.read_categories(data_dir)
-    fmis = FileMappedInputStream.new(data_dir + "/code2category")
-    @char2id = fmis.get_int_array(fmis.size / 4 / 2)
-    @eql_masks = fmis.get_int_array(fmis.size / 4 /2)
-    fmis.close
-  end
+  class CharCategory
+    def initialize(data_dir)
+      @categories = CharCategory.read_categories(data_dir)
+      fmis = FileMappedInputStream.new(data_dir + "/code2category")
+      @char2id = fmis.get_int_array(fmis.size / 4 / 2)
+      @eql_masks = fmis.get_int_array(fmis.size / 4 /2)
+      fmis.close
+    end
-  def category(code)
-    return @categories[@char2id[code]]
-  end
+    def category(code)
+      return @categories[@char2id[code]]
+    end
-  def compatible?(code1, code2)
-    return (@eql_masks[code1] & @eql_masks[code2]) != 0
-  end
+    def compatible?(code1, code2)
+      return (@eql_masks[code1] & @eql_masks[code2]) != 0
+    end
-  def self.read_categories(data_dir)
-    data = FileMappedInputStream::get_int_array(data_dir + "/char.category")
-    size = data.size / 4
-    ary = []
-    for i in 0 .. (size - 1)
-      ary.push(Category.new(data[i * 4], data[i * 4 + 1], data[i * 4 + 2] == 1, data[i * 4 + 3] == 1))
-    end
-    return ary
+    def self.read_categories(data_dir)
+      data = FileMappedInputStream::get_int_array(data_dir + "/char.category")
+      size = data.size / 4
+      ary = []
+      for i in 0 .. (size - 1)
+        ary.push(Category.new(data[i * 4], data[i * 4 + 1], data[i * 4 + 2] == 1, data[i * 4 + 3] == 1))
+      end
+      return ary
+    end
   end
-end
-class Category
-  attr_reader :id, :length, :invoke, :group
-  def initialize(i, l, iv, g)
-    @id = i
-    @length = l
-    @invoke = iv
-    @group = g
+  class Category
+    attr_reader :id, :length, :invoke, :group
+    def initialize(i, l, iv, g)
+      @id = i
+      @length = l
+      @invoke = iv
+      @group = g
+    end
   end
-end
-#
-# 形態素の連接コスト表クラス
-#
-class Matrix
-  # コンストラクタ
-  # data_dir:: 辞書ファイルのディレクトリパス
-  def initialize(data_dir)
-    fmis = FileMappedInputStream.new(data_dir + "/matrix.bin")
-    @left_size = fmis.get_int
-    @right_size = fmis.get_int
-    @matrix = fmis.get_short_array(@left_size * @right_size)
-    fmis.close
-  end
+  #
+  # 形態素の連接コスト表クラス
+  #
+  class Matrix
+    # コンストラクタ
+    # data_dir:: 辞書ファイルのディレクトリパス
+    def initialize(data_dir)
+      fmis = FileMappedInputStream.new(data_dir + "/matrix.bin")
+      @left_size = fmis.get_int
+      @right_size = fmis.get_int
+      @matrix = fmis.get_short_array(@left_size * @right_size)
+      fmis.close
+    end
-  # 形態素同士の連接コストを求める
-  # left_id:: 左文脈ID
-  # right_id:: 右文脈ID
-  def link_cost(left_id, right_id)
-    return @matrix[right_id * @right_size + left_id]
+    # 形態素同士の連接コストを求める
+    # left_id:: 左文脈ID
+    # right_id:: 右文脈ID
+    def link_cost(left_id, right_id)
+      return @matrix[right_id * @right_size + left_id]
+    end
   end
-end
-#
-# 未知語の検索を行うクラス
-#
-class Unknown
+  #
+  # 未知語の検索を行うクラス
+  #
+  class Unknown
-  # コンストラクタ
-  #data_dir:: 辞書ファイルのディレクトリパス
-  def initialize(data_dir)
-    # 文字カテゴリ管理クラス
-    @category = CharCategory.new(data_dir)
+    # コンストラクタ
+    #data_dir:: 辞書ファイルのディレクトリパス
+    def initialize(data_dir)
+      # 文字カテゴリ管理クラス
+      @category = CharCategory.new(data_dir)
-    # 文字カテゴリが空白の文字のID
-    @space_id = @category.category(' '.unpack("U*")[0]).id
-  end
+      # 文字カテゴリが空白の文字のID
+      @space_id = @category.category(' '.unpack("U*")[0]).id
+    end
-  # 検索
-  def search(text, start, wdic, result)
-    txt = text.unpack("U*")
-    length = txt.size
-    ch = txt[start]
-    ct = @category.category(ch)
+    # 検索
+    #text::
+    #start::
+    #wdic::
+    #result::
+    def search(text, start, wdic, result)
+      txt = text.unpack("U*")
+      length = txt.size
+      ch = txt[start]
+      ct = @category.category(ch)
-    if !result.empty? and !ct.invoke
-      return
-    end
+      if !result.empty? and !ct.invoke
+        return
+      end
-    is_space = (ct.id == @space_id)
-    limit = [length, ct.length + start].min
+      is_space = (ct.id == @space_id)
+      limit = [length, ct.length + start].min
-    for i in start..(limit - 1)
-      wdic.search_from_trie_id(ct.id, start, (i - start) + 1, is_space, result)
+      for i in start..(limit - 1)
+        wdic.search_from_trie_id(ct.id, start, (i - start) + 1, is_space, result)
-      if((i + 1) != limit and !(@category.compatible?(ch, text[i + 1])))
-        return
+        if((i + 1) != limit and !(@category.compatible?(ch, text[i + 1])))
+          return
+        end
       end
-    end
-    if ct.group and limit < length
-      for i in limit..(length - 1)
-        if not @category.compatible?(ch, txt[i])
-          wdic.search_from_trie_id(ct.id, start, i - start, is_space, result)
-          return
+      if ct.group and limit < length
+        for i in limit..(length - 1)
+          if not @category.compatible?(ch, txt[i])
+            wdic.search_from_trie_id(ct.id, start, i - start, is_space, result)
+            return
+          end
         end
+        wdic.search_from_trie_id(ct.id, start, length - start, is_space, result)
       end
-      wdic.search_from_trie_id(ct.id, start, length - start, is_space, result)
     end
   end
-end
-class WordDic
-  # コンストラクタ
-  #data_dir:: 辞書ファイルのディレクトリパス
-  def initialize(data_dir)
-    @trie = Searcher.new(data_dir + "/word2id")
-    @data = FileMappedInputStream.get_string(data_dir + "/word.dat")
-    @indices = FileMappedInputStream.get_int_array(data_dir + "/word.ary.idx")
+  class WordDic
+    # コンストラクタ
+    #data_dir:: 辞書ファイルのディレクトリパス
+    def initialize(data_dir)
+      @trie = Searcher.new(data_dir + "/word2id")
+      @data = FileMappedInputStream.get_string(data_dir + "/word.dat")
+      @indices = FileMappedInputStream.get_int_array(data_dir + "/word.ary.idx")
-    fmis = FileMappedInputStream.new(data_dir + "/word.inf")
-    word_count = fmis.size / (4 + 2 + 2 + 2)
-    @data_offsets = fmis.get_int_array(word_count)   # 単語の素性データの開始位置
-    @left_ids     = fmis.get_short_array(word_count) # 単語の左文脈ID
-    @right_ids    = fmis.get_short_array(word_count) # 単語の右文脈ID
-    @costs        = fmis.get_short_array(word_count) # 単語のコスト
-    fmis.close
-  end
+      fmis = FileMappedInputStream.new(data_dir + "/word.inf")
+      word_count = fmis.size / (4 + 2 + 2 + 2)
+      @data_offsets = fmis.get_int_array(word_count)   # 単語の素性データの開始位置
+      @left_ids     = fmis.get_short_array(word_count) # 単語の左文脈ID
+      @right_ids    = fmis.get_short_array(word_count) # 単語の右文脈ID
+      @costs        = fmis.get_short_array(word_count) # 単語のコスト
+      fmis.close
+    end
-  def cost(word_id)
-    return @costs[word_id]
-  end
+    def cost(word_id)
+      return @costs[word_id]
+    end
-  def search(text, start, result)
-    indices = @indices
-    left_ids = @left_ids
-    right_ids = @right_ids
+    def search(text, start, result)
+      indices = @indices
+      left_ids = @left_ids
+      right_ids = @right_ids
-    @trie.each_common_prefix(text, start, Proc.new { |start, offset, trie_id|
-      ed = @indices[trie_id + 1]
+      @trie.each_common_prefix(text, start, Proc.new { |start, offset, trie_id|
+        ed = @indices[trie_id + 1]
-      for i in indices[trie_id]..(ed - 1)
-        result.push(ViterbiNode.new(i, start, offset, @left_ids[i], right_ids[i], false))
-      end
-    })
-  end
+        for i in indices[trie_id]..(ed - 1)
+          result.push(ViterbiNode.new(i, start, offset, @left_ids[i], right_ids[i], false))
+        end
+      })
+    end
-  def search_from_trie_id(trie_id, start, word_length, is_space, result)
-    ed = @indices[trie_id + 1]
-    for i in @indices[trie_id]..(ed - 1)
-      result.push(ViterbiNode.new(i, start, word_length, @left_ids[i], @right_ids[i], is_space))
+    def search_from_trie_id(trie_id, start, word_length, is_space, result)
+      ed = @indices[trie_id + 1]
+      for i in @indices[trie_id]..(ed - 1)
+        result.push(ViterbiNode.new(i, start, word_length, @left_ids[i], @right_ids[i], is_space))
+      end
     end
-  end
-  def word_data(word_id)
-    return @data.slice(@data_offsets[word_id]*2..@data_offsets[word_id + 1]*2 - 1)
+    def word_data(word_id)
+      return @data.slice(@data_offsets[word_id]*2..@data_offsets[word_id + 1]*2 - 1)
+    end
   end
 end

data/lib/igo/tagger.rb CHANGED

@@ -1,3 +1,4 @@
+# coding: utf-8
 #形態素解析と分かち書きを行う機能の実装
 require 'igo/dictionary'

data/lib/igo/trie.rb CHANGED

@@ -1,201 +1,210 @@
+# coding: utf-8
 require 'igo/util'
+#
+#Stringクラスの拡張
+#
 class String
+  # 文字列がパラメタの接頭辞で開始するかどうかを返却する
+  #prefix:: 接頭辞
+  #return:: true - 接頭辞で開始する
   def starts_with?(prefix)
     prefix = prefix.to_s
     self[0, prefix.length] == prefix
   end
 end
-#
-#DoubleArrayのノード用の定数などが定義されているクラス
-#
-class Node
+module Igo
   #
-  #BASEノード用のメソッドが定義されているクラス
+  #DoubleArrayのノード用の定数などが定義されているクラス
   #
-  class Base
-    #BASEノードに格納するID値をエンコードする
-    def self.ids(nid)
-      return (-1 * nid) - 1
+  class Node
+    #
+    #BASEノード用のメソッドが定義されているクラス
+    #
+    class Base
+      #BASEノードに格納するID値をエンコードする
+      def self.ids(nid)
+        return (-1 * nid) - 1
+      end
     end
-  end
-  #
-  #CHECKノード用の定数が定義されているクラス
-  #
-  class Chck
-    #文字列の終端文字コード
-    #この文字はシステムにより予約されており、辞書内の形態素の表層形および解析対象テキストに含まれていた場合の動作は未定義
-    TERMINATE_CODE = 0
-    #文字列の終端を表す文字定数
-    TERMINATE_CHAR = TERMINATE_CODE.chr
-    #CHECKノードが未使用であることを示す文字コード
-    #この文字はシステムにより予約されており、辞書内の形態素の表層形および解析対象テキストに含まれていた場合の動作は未定義
-    VACANT_CODE = 1
-    #使用可能な文字の最大値
-    CODE_LIMIT = 0xffff
+    #
+    #CHECKノード用の定数が定義されているクラス
+    #
+      class Chck
+      #文字列の終端文字コード
+      #この文字はシステムにより予約されており、辞書内の形態素の表層形および解析対象テキストに含まれていた場合の動作は未定義
+      TERMINATE_CODE = 0
+      #文字列の終端を表す文字定数
+      TERMINATE_CHAR = TERMINATE_CODE.chr
+      #CHECKノードが未使用であることを示す文字コード
+      #この文字はシステムにより予約されており、辞書内の形態素の表層形および解析対象テキストに含まれていた場合の動作は未定義
+      VACANT_CODE = 1
+      #使用可能な文字の最大値
+      CODE_LIMIT = 0xffff
+    end
   end
-end
-#
-#文字列を文字のストリームとして扱うためのクラス
-#* readメソッドで個々の文字を順に読み込み、文字列の終端に達した場合にはNode::Chck::TERMINATE_CODEが返される。
-#
-class KeyStream
-  def initialize(key, start = 0)
-    @s = key
-    @cur = start
-    @len = key.unpack("U*").size
-  end
-  def compare_to(ks)
-    return rest.compare_to(ks.rest)
-  end
+  #
+  #文字列を文字のストリームとして扱うためのクラス
+  #* readメソッドで個々の文字を順に読み込み、文字列の終端に達した場合にはNode::Chck::TERMINATE_CODEが返される。
+  #
+  class KeyStream
-  #このメソッドは動作的には、rest().starts_with?(prefix.substring(beg, len))と等価。
-  #ほんの若干だが、パフォーマンスを改善するために導入。
-  #簡潔性のためになくしても良いかもしれない。
-  def start_with(prefix, beg, len)
-    s = @s
-    c = @cur
-    if @len - c < len
-      return false
+    def initialize(key, start = 0)
+      @s = key
+      @cur = start
+      @len = key.unpack("U*").size
     end
-    word = s.unpack("U*")[c]
-    if word.nil?
-      return (prefix.slice(beg, len-beg) == nil)
-    else
-      [word].pack("U*").starts_with?(prefix.slice(beg, len-beg))
+    def compare_to(ks)
+      return rest.compare_to(ks.rest)
     end
-  end
-  def rest
-    return @s.slice(@cur, @s.length)
-  end
+    #このメソッドは動作的には、rest().starts_with?(prefix.substring(beg, len))と等価。
+    #ほんの若干だが、パフォーマンスを改善するために導入。
+    #簡潔性のためになくしても良いかもしれない。
+    def start_with(prefix, beg, len)
+      s = @s
+      c = @cur
+      if @len - c < len
+        return false
+      end
+      word = s.unpack("U*")[c]
+      if word.nil?
+        return (prefix.slice(beg, len-beg) == nil)
+      else
+        [word].pack("U*").starts_with?(prefix.slice(beg, len-beg))
+      end
+    end
+    def rest
+      return @s.slice(@cur, @s.length)
+    end
-  def read
+    def read
-    if eos?
-      return Node::Chck::TERMINATE_CODE
-    else
-      r = @s.unpack("U*")[@cur]
-      result = [r].pack("U*")
-      @cur += 1
-      return r
+      if eos?
+        return Node::Chck::TERMINATE_CODE
+      else
+        r = @s.unpack("U*")[@cur]
+        result = [r].pack("U*")
+        @cur += 1
+        return r
+      end
     end
-  end
-  def eos?
-    return (@cur == @len) ? true : false
+    def eos?
+      return (@cur == @len) ? true : false
+    end
   end
-end
-#
-# DoubleArray検索用のクラス
-#
-class Searcher
-  #保存されているDoubleArrayを読み込んで、このクラスのインスタンスを作成する
-  #path:: DoubleArrayが保存されているファイルのパス
-  def initialize(path)
-    fmis = FileMappedInputStream.new(path)
-    node_size = fmis.get_int()
-    tind_size = fmis.get_int()
-    tail_size = fmis.get_int()
-    @key_set_size = tind_size
-    @begs = fmis.get_int_array(tind_size)
-    @base = fmis.get_int_array(node_size)
-    @lens = fmis.get_short_array(tind_size)
-    @chck = fmis.get_char_array(node_size)
-    @tail = fmis.get_string(tail_size)
-    fmis.close
-  end
+  #
+  # DoubleArray検索用のクラス
+  #
+  class Searcher
+    #保存されているDoubleArrayを読み込んで、このクラスのインスタンスを作成する
+    #path:: DoubleArrayが保存されているファイルのパス
+    def initialize(path)
+      fmis = FileMappedInputStream.new(path)
+      node_size = fmis.get_int()
+      tind_size = fmis.get_int()
+      tail_size = fmis.get_int()
+      @key_set_size = tind_size
+      @begs = fmis.get_int_array(tind_size)
+      @base = fmis.get_int_array(node_size)
+      @lens = fmis.get_short_array(tind_size)
+      @chck = fmis.get_char_array(node_size)
+      @tail = fmis.get_string(tail_size)
+      fmis.close
+    end
-  #DoubleArrayに格納されているキーの数を返却
-  #return:: DoubleArrayに格納されているキーの数
-  def size
-    return @key_set_size
-  end
+    #DoubleArrayに格納されているキーの数を返却
+    #return:: DoubleArrayに格納されているキーの数
+    def size
+      return @key_set_size
+    end
-  #キーを検索する
-  #key:: 検索対象のキー文字列
-  #return:: キーが見つかった場合はそのIDを、見つからなかった場合は-1を返す
-  def search(key)
-    base = @base
-    chck = @chck
-    node = @base[0]
-    kin = KeyStream.new(key)
+    #キーを検索する
+    #key:: 検索対象のキー文字列
+    #return:: キーが見つかった場合はそのIDを、見つからなかった場合は-1を返す
+    def search(key)
+      base = @base
+      chck = @chck
+      node = @base[0]
+      kin = KeyStream.new(key)
-    while true
-      code = kin.read
-      idx = node + code
-      node = base[idx]
+      while true
+        code = kin.read
+        idx = node + code
+        node = base[idx]
-      if(chck[idx] == code)
-        if(node >= 0)
-          next
-        elsif(kin.eos? or key_exists?(kin, node))
-          return Node::Base.ids(node)
+        if(chck[idx] == code)
+          if(node >= 0)
+            next
+          elsif(kin.eos? or key_exists?(kin, node))
+            return Node::Base.ids(node)
+          end
+          return -1
         end
-        return -1
       end
     end
-  end
-  #common-prefix検索を行う
-  #* 条件に一致するキーが見つかる度に、callback.callメソッドが呼び出される
-  #key:: 検索対象のキー文字列
-  #start:: 検索対象となるキー文字列の最初の添字
-  #callback:: 一致を検出した場合に呼び出されるコールバックメソッド
-  def each_common_prefix(key, start, callback)
-    base = @base
-    chck = @chck
-    node = @base[0]
-    offset = -1
-    kin = KeyStream.new(key, start)
+    #common-prefix検索を行う
+    #* 条件に一致するキーが見つかる度に、callback.callメソッドが呼び出される
+    #key:: 検索対象のキー文字列
+    #start:: 検索対象となるキー文字列の最初の添字
+    #callback:: 一致を検出した場合に呼び出されるコールバックメソッド
+    def each_common_prefix(key, start, callback)
+      base = @base
+      chck = @chck
+      node = @base[0]
+      offset = -1
+      kin = KeyStream.new(key, start)
-    while true
-      code = kin.read
-      offset += 1
-      terminal_index = node
+      while true
+        code = kin.read
+        offset += 1
+        terminal_index = node
-      if(chck[terminal_index] == Node::Chck::TERMINATE_CODE)
-        callback.call(start, offset, Node::Base.ids(base[terminal_index]))
+        if(chck[terminal_index] == Node::Chck::TERMINATE_CODE)
+          callback.call(start, offset, Node::Base.ids(base[terminal_index]))
-        if(code == Node::Chck::TERMINATE_CODE)
-          return
+          if(code == Node::Chck::TERMINATE_CODE)
+            return
+          end
         end
-      end
-      idx = node + code
-      node = base[idx]
+        idx = node + code
+        node = base[idx]
-      if(chck[idx] == code)
-        if(node >= 0)
-          next
-        else
-          call_if_key_including(kin, node, start, offset, callback)
+        if(chck[idx] == code)
+          if(node >= 0)
+            next
+          else
+            call_if_key_including(kin, node, start, offset, callback)
+          end
         end
+        return
       end
-      return
     end
-  end
-  private
+    private
-  def call_if_key_including(kin, node, start, offset, callback)
-    node_id = Node::Base.ids(node)
-    if(kin.start_with(@tail, @begs[node_id], @lens[node_id]))
-      callback.call(start, offset + @lens[node_id] + 1, node_id)
+    def call_if_key_including(kin, node, start, offset, callback)
+      node_id = Node::Base.ids(node)
+      if(kin.start_with(@tail, @begs[node_id], @lens[node_id]))
+        callback.call(start, offset + @lens[node_id] + 1, node_id)
+      end
     end
-  end
-  def key_exists?(kin, node)
-    nid = Node.Base.ids(node)
-    beg = @begs[nid]
-    s = @tail.slice(beg, beg + @lens[nid])
-    return kin.rest == s ? true : false
+    def key_exists?(kin, node)
+      nid = Node.Base.ids(node)
+      beg = @begs[nid]
+      s = @tail.slice(beg, beg + @lens[nid])
+      return kin.rest == s ? true : false
+    end
   end
 end

data/lib/igo/util.rb CHANGED

@@ -1,93 +1,96 @@
-# ファイルユーティリティ
+# coding: utf-8
+#= ファイルユーティリティ
-#
-#=== ファイルにマッピングされた入力ストリーム
-# ファイルからバイナリデータを取得する場合、必ずこのクラスが使用される。
-#
-class FileMappedInputStream
-  # 入力ストリームの初期化
-  # path:: 入力ファイルのパス
-  def initialize(path)
-    @path = path
-    @cur = 0
-    @file = open(path, "rb")
-  end
+module Igo
+  #
+  #=== ファイルにマッピングされた入力ストリーム
+  # ファイルからバイナリデータを取得する場合、必ずこのクラスが使用される。
+  #
+  class FileMappedInputStream
+    # 入力ストリームの初期化
+    # path:: 入力ファイルのパス
+    def initialize(path)
+      @path = path
+      @cur = 0
+      @file = open(path, "rb")
+    end
-  # int値で読み取り
-  def get_int()
-    return @file.read(4).unpack("i*")[0]
-  end
+    # int値で読み取り
+    def get_int()
+      return @file.read(4).unpack("i*")[0]
+    end
+    # int配列で読み取り
+    # count:: 読み取りカウント
+    def get_int_array(count)
+      return @file.read(count * 4).unpack("i*")
+    end
-  # int配列で読み取り
-  # count:: 読み取りカウント
-  def get_int_array(count)
-    return @file.read(count * 4).unpack("i*")
-  end
+    # int配列で読み取り
+    # path:: 入力ファイルのパス
+    def self.get_int_array(path)
+      fmis = FileMappedInputStream.new(path)
+      array = fmis.get_int_array((File::stat(path).size)/4)
+      fmis.close
+      return array
+    end
-  # int配列で読み取り
-  # path:: 入力ファイルのパス
-  def self.get_int_array(path)
-    fmis = FileMappedInputStream.new(path)
-    array = fmis.get_int_array((File::stat(path).size)/4)
-    fmis.close
-    return array
-  end
+    # short配列で読み取り
+    # count:: 読み取りカウント
+    def get_short_array(count)
+      return @file.read(count * 2).unpack("s*")
+    end
-  # short配列で読み取り
-  # count:: 読み取りカウント
-  def get_short_array(count)
-    return @file.read(count * 2).unpack("s*")
-  end
+    # char配列で読み取り
+    # count:: 読み取りカウント
+    def get_char_array(count)
+      return @file.read(count * 2).unpack("S!*")
+    end
-  # char配列で読み取り
-  # count:: 読み取りカウント
-  def get_char_array(count)
-    return @file.read(count * 2).unpack("S!*")
-  end
+    # stringで読み取り
+    # count:: 読み取りカウント
+    def get_string(count)
+      return @file.read(count * 2)
+    end
-  # stringで読み取り
-  # count:: 読み取りカウント
-  def get_string(count)
-    return @file.read(count * 2)
-  end
-  # stringで読み取り
-  # path:: 入力ファイル
-  def self.get_string(path)
-    fmis = FileMappedInputStream.new(path)
-    str = fmis.get_string((File::stat(path).size)/2)
-    fmis.close
+    # stringで読み取り
+    # path:: 入力ファイル
+    def self.get_string(path)
+      fmis = FileMappedInputStream.new(path)
+      str = fmis.get_string((File::stat(path).size)/2)
+      fmis.close
-    return str
-  end
+      return str
+    end
-  # 入力ファイルのサイズを返却する
-  def size
-    return File::stat(@path).size
-  end
+    # 入力ファイルのサイズを返却する
+    def size
+      return File::stat(@path).size
+    end
-  # 入力ストリームを閉じる
-  #* newした場合、必ずcloseを呼ぶこと
-  def close
-    @file.close
-  end
+    # 入力ストリームを閉じる
+    #* newした場合、必ずcloseを呼ぶこと
+    def close
+      @file.close
+    end
-  # char配列で読み取り
-  # path:: 入力ファイル
-  def self.get_char_array(path)
-    fmis = FileMappedInputStream.new(path)
-    array = fmis.get_char_array(fmis.size / 2)
-    fmis.close
-    return array
-  end
+    # char配列で読み取り
+    # path:: 入力ファイル
+    def self.get_char_array(path)
+      fmis = FileMappedInputStream.new(path)
+      array = fmis.get_char_array(fmis.size / 2)
+      fmis.close
+      return array
+    end
-  private
+    private
-  # ファイルマップ
-  #* 現在、不使用
-  def map(size)
-    @file.pos = @cur
-    @cur += size
-    return @file.read(size)
+    # ファイルマップ
+    #* 現在、不使用
+    def map(size)
+      @file.pos = @cur
+      @cur += size
+      return @file.read(size)
+    end
   end
-end
+end

data/lib/igo/version.rb ADDED

@@ -0,0 +1,17 @@
+# coding: utf-8
+module Igo
+  #
+  #バージョンクラス
+  #
+  class Version
+    #igo-rubyのRubyGemsバージョンを出力する
+    def self.igo_ruby
+      version_file = File.dirname(__FILE__) + '/../../VERSION'
+      version = ""
+      open(version_file) { |igo_ruby_version|
+        version = igo_ruby_version.gets
+      }
+      return version
+    end
+  end
+end

data/test/test.rb CHANGED

@@ -1,9 +1,16 @@
+# coding: utf-8
 require 'rubygems'
 require 'igo-ruby'
-tagger = Igo::Tagger.new('../../ipadic')
+#require File.dirname(__FILE__) + '/../lib/igo-ruby'
+puts "version -> #{Igo::Version.igo_ruby}"
+tagger = Igo::Tagger.new(File.dirname(__FILE__) + '/../../ipadic')
 t = tagger.parse('吾輩は猫である。名前はまだ無い。')
+puts "parse ->"
 t.each{|m|
   puts "#{m.surface} #{m.feature} #{m.start}"
 }
+puts "wakati ->"
 t = tagger.wakati('どこで生れたかとんと見当がつかぬ。')
-puts t.join(' ')
+puts t.join(' ')

metadata CHANGED

@@ -1,13 +1,8 @@
 --- !ruby/object:Gem::Specification
 name: igo-ruby
 version: !ruby/object:Gem::Version
-  hash: 31
-  prerelease: false
-  segments:
-  - 0
-  - 1
-  - 2
-  version: 0.1.2
+  prerelease:
+  version: 0.1.3
 platform: ruby
 authors:
 - K.Nishi
@@ -15,71 +10,53 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-12-13 00:00:00 +09:00
+date: 2011-02-10 00:00:00 +09:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
-  prerelease: false
-  version_requirements: &id001 !ruby/object:Gem::Requirement
+  name: rspec
+  requirement: &id001 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        hash: 11
-        segments:
-        - 2
-        - 1
-        - 0
         version: 2.1.0
-  name: rspec
-  requirement: *id001
   type: :development
-- !ruby/object:Gem::Dependency
   prerelease: false
-  version_requirements: &id002 !ruby/object:Gem::Requirement
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: &id002 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        hash: 23
-        segments:
-        - 1
-        - 0
-        - 0
         version: 1.0.0
-  name: bundler
-  requirement: *id002
   type: :development
-- !ruby/object:Gem::Dependency
   prerelease: false
-  version_requirements: &id003 !ruby/object:Gem::Requirement
+  version_requirements: *id002
+- !ruby/object:Gem::Dependency
+  name: jeweler
+  requirement: &id003 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        hash: 1
-        segments:
-        - 1
-        - 5
-        - 1
         version: 1.5.1
-  name: jeweler
-  requirement: *id003
   type: :development
-- !ruby/object:Gem::Dependency
   prerelease: false
-  version_requirements: &id004 !ruby/object:Gem::Requirement
+  version_requirements: *id003
+- !ruby/object:Gem::Dependency
+  name: rcov
+  requirement: &id004 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
         version: "0"
-  name: rcov
-  requirement: *id004
   type: :development
+  prerelease: false
+  version_requirements: *id004
 description: "\n    Ruby port of Igo Japanese morphological analyzer. Igo-ruby needs Igo's binary dictionary files.\n    These files created by Java programs.\n    See: http://igo.sourceforge.jp/\n  "
 email: 24signals@gmail.com
 executables: []
@@ -101,6 +78,7 @@ files:
 - lib/igo/tagger.rb
 - lib/igo/trie.rb
 - lib/igo/util.rb
+- lib/igo/version.rb
 - spec/igo-ruby_spec.rb
 - spec/spec_helper.rb
 - test/test.rb
@@ -120,7 +98,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3
+      hash: 2664330083952194465
       segments:
       - 0
       version: "0"
@@ -129,16 +107,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">"
     - !ruby/object:Gem::Version
-      hash: 23
-      segments:
-      - 1
-      - 3
-      - 6
       version: 1.3.6
 requirements: []
 rubyforge_project:
-rubygems_version: 1.3.7
+rubygems_version: 1.5.0
 signing_key:
 specification_version: 3
 summary: Ruby port of Igo Japanese morphological analyzer.