ja-complex-word 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +13 -0
- data/README.rdoc +61 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/example/test.rb +18 -0
- data/lib/ja/complex_word.rb +129 -0
- data/spec/base_spec.rb +89 -0
- data/spec/spec_helper.rb +12 -0
- metadata +143 -0
    
        data/.document
    ADDED
    
    
    
        data/.rspec
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            -fs --color
         | 
    
        data/Gemfile
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
| 1 | 
            +
            source "http://rubygems.org"
         | 
| 2 | 
            +
            # Add dependencies required to use your gem here.
         | 
| 3 | 
            +
            # Example:
         | 
| 4 | 
            +
            #   gem "activesupport", ">= 2.3.5"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Add dependencies to develop your gem here.
         | 
| 7 | 
            +
            # Include everything needed to run rake, tests, features, etc.
         | 
| 8 | 
            +
            group :development do
         | 
| 9 | 
            +
              gem "rspec", "~> 2.3.0"
         | 
| 10 | 
            +
              gem "bundler", "~> 1.0.0"
         | 
| 11 | 
            +
              gem "jeweler", "~> 1.6.4"
         | 
| 12 | 
            +
              gem "rcov", ">= 0"
         | 
| 13 | 
            +
            end
         | 
    
        data/README.rdoc
    ADDED
    
    | @@ -0,0 +1,61 @@ | |
| 1 | 
            +
            = Ja::ComplexWord
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            == 概要
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            MeCab の形態素解析結果に複合語の考慮を与えるライブラリ。
         | 
| 6 | 
            +
            TermExtract::MeCab (Perl ライブラリ) から複合語に関する処理を抜き出し、単体でシンプルに使えるようにしたものです。
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            イメージとしては、[東京 特許 許可局 の 名前 が 欲しい です] という形態素の配列があるときに、
         | 
| 9 | 
            +
            [[東京 特許 許可局] の [名前] が 欲しい です] のように flat ではない 2 次元配列へとパースしてくれるものです。
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            == 使い方
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                require 'ja/complex_word'
         | 
| 14 | 
            +
                
         | 
| 15 | 
            +
                text = 'ABC事件とは東京特許許可局でバスガス爆発が緊急発生した事件のことです'
         | 
| 16 | 
            +
                jcw = Ja::ComplexWord.new
         | 
| 17 | 
            +
                node_list = jcw.parse(text)
         | 
| 18 | 
            +
                node_list.each do |node|
         | 
| 19 | 
            +
                  if node.is_a?(Array)
         | 
| 20 | 
            +
                    all = node.map{|n| n.surface }.join
         | 
| 21 | 
            +
                    puts "#{all}\t複合語"
         | 
| 22 | 
            +
                    node.each do |n|
         | 
| 23 | 
            +
                      puts " - #{n.surface}\t#{n.feature}"
         | 
| 24 | 
            +
                    end
         | 
| 25 | 
            +
                  else
         | 
| 26 | 
            +
                    puts "#{node.surface}\t#{node.feature}"
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                ABC事件 複合語
         | 
| 31 | 
            +
                 - ABC  ,
         | 
| 32 | 
            +
                 - 事件 名詞,一般,*,*,*,*,事件,ジケン,ジケン
         | 
| 33 | 
            +
                と      助詞,格助詞,一般,*,*,*,と,ト,ト
         | 
| 34 | 
            +
                は      助詞,係助詞,*,*,*,*,は,ハ,ワ
         | 
| 35 | 
            +
                東京特許許可局  複合語
         | 
| 36 | 
            +
                 - 東京 名詞,固有名詞,地域,一般,*,*,東京,トウキョウ,トーキョー
         | 
| 37 | 
            +
                 - 特許 名詞,サ変接続,*,*,*,*,特許,トッキョ,トッキョ
         | 
| 38 | 
            +
                 - 許可 名詞,サ変接続,*,*,*,*,許可,キョカ,キョカ
         | 
| 39 | 
            +
                 - 局   名詞,接尾,一般,*,*,*,局,キョク,キョク
         | 
| 40 | 
            +
                で      助詞,格助詞,一般,*,*,*,で,デ,デ
         | 
| 41 | 
            +
                バスガス爆発    複合語
         | 
| 42 | 
            +
                 - バス 名詞,一般,*,*,*,*,バス,バス,バス
         | 
| 43 | 
            +
                 - ガス 名詞,一般,*,*,*,*,ガス,ガス,ガス
         | 
| 44 | 
            +
                 - 爆発 名詞,サ変接続,*,*,*,*,爆発,バクハツ,バクハツ
         | 
| 45 | 
            +
                が      助詞,格助詞,一般,*,*,*,が,ガ,ガ
         | 
| 46 | 
            +
                緊急発生        複合語
         | 
| 47 | 
            +
                 - 緊急 名詞,形容動詞語幹,*,*,*,*,緊急,キンキュウ,キンキュー
         | 
| 48 | 
            +
                 - 発生 名詞,サ変接続,*,*,*,*,発生,ハッセイ,ハッセイ
         | 
| 49 | 
            +
                し      動詞,自立,*,*,サ変・スル,連用形,する,シ,シ
         | 
| 50 | 
            +
                た      助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
         | 
| 51 | 
            +
                事件    名詞,一般,*,*,*,*,事件,ジケン,ジケン
         | 
| 52 | 
            +
                の      助詞,連体化,*,*,*,*,の,ノ,ノ
         | 
| 53 | 
            +
                こと    名詞,非自立,一般,*,*,*,こと,コト,コト
         | 
| 54 | 
            +
                です    助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            == 使い方
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            == ライセンス
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            Term::Extract のライセンスと同様にしたいと思います。
         | 
| 61 | 
            +
            (Term::Extract のライセンス自体がどうなっているかよく分かりませんが…。)
         | 
    
        data/Rakefile
    ADDED
    
    | @@ -0,0 +1,49 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rubygems'
         | 
| 4 | 
            +
            require 'bundler'
         | 
| 5 | 
            +
            begin
         | 
| 6 | 
            +
              Bundler.setup(:default, :development)
         | 
| 7 | 
            +
            rescue Bundler::BundlerError => e
         | 
| 8 | 
            +
              $stderr.puts e.message
         | 
| 9 | 
            +
              $stderr.puts "Run `bundle install` to install missing gems"
         | 
| 10 | 
            +
              exit e.status_code
         | 
| 11 | 
            +
            end
         | 
| 12 | 
            +
            require 'rake'
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            require 'jeweler'
         | 
| 15 | 
            +
            Jeweler::Tasks.new do |gem|
         | 
| 16 | 
            +
              # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
         | 
| 17 | 
            +
              gem.name = "ja-complex-word"
         | 
| 18 | 
            +
              gem.homepage = "http://github.com/tily/ruby-ja-complex-word"
         | 
| 19 | 
            +
              gem.license = "MIT"
         | 
| 20 | 
            +
              gem.summary = %Q{日本語テキストに含まれる複合語をパースするライブラリ}
         | 
| 21 | 
            +
              gem.description = %Q{日本語テキストに含まれる複合語をパースするライブラリ}
         | 
| 22 | 
            +
              gem.email = "tily05@gmail.com"
         | 
| 23 | 
            +
              gem.authors = ["tily"]
         | 
| 24 | 
            +
              # dependencies defined in Gemfile
         | 
| 25 | 
            +
            end
         | 
| 26 | 
            +
            Jeweler::RubygemsDotOrgTasks.new
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            require 'rspec/core'
         | 
| 29 | 
            +
            require 'rspec/core/rake_task'
         | 
| 30 | 
            +
            RSpec::Core::RakeTask.new(:spec) do |spec|
         | 
| 31 | 
            +
              spec.pattern = FileList['spec/**/*_spec.rb']
         | 
| 32 | 
            +
            end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            RSpec::Core::RakeTask.new(:rcov) do |spec|
         | 
| 35 | 
            +
              spec.pattern = 'spec/**/*_spec.rb'
         | 
| 36 | 
            +
              spec.rcov = true
         | 
| 37 | 
            +
            end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            task :default => :spec
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            require 'rake/rdoctask'
         | 
| 42 | 
            +
            Rake::RDocTask.new do |rdoc|
         | 
| 43 | 
            +
              version = File.exist?('VERSION') ? File.read('VERSION') : ""
         | 
| 44 | 
            +
             | 
| 45 | 
            +
              rdoc.rdoc_dir = 'rdoc'
         | 
| 46 | 
            +
              rdoc.title = "ruby-ja-complex-word #{version}"
         | 
| 47 | 
            +
              rdoc.rdoc_files.include('README*')
         | 
| 48 | 
            +
              rdoc.rdoc_files.include('lib/**/*.rb')
         | 
| 49 | 
            +
            end
         | 
    
        data/VERSION
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            0.0.0
         | 
    
        data/example/test.rb
    ADDED
    
    | @@ -0,0 +1,18 @@ | |
| 1 | 
            +
            # -*- coding:utf-8 -*-
         | 
| 2 | 
            +
            $:.unshift(File.dirname(__FILE__) + '/../lib')
         | 
| 3 | 
            +
            require 'ja/complex_word'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            text = 'ABC事件とは東京特許許可局でバスガス爆発が緊急発生した事件のことです'
         | 
| 6 | 
            +
            jcw = Ja::ComplexWord.new
         | 
| 7 | 
            +
            node_list = jcw.parse(text)
         | 
| 8 | 
            +
            node_list.each do |node|
         | 
| 9 | 
            +
              if node.is_a?(Array)
         | 
| 10 | 
            +
                all = node.map{|n| n.surface }.join
         | 
| 11 | 
            +
                puts "#{all}\t複合語"
         | 
| 12 | 
            +
                node.each do |n|
         | 
| 13 | 
            +
                  puts " - #{n.surface}\t#{n.feature}"
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
              else
         | 
| 16 | 
            +
                puts "#{node.surface}\t#{node.feature}"
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
            end
         | 
| @@ -0,0 +1,129 @@ | |
| 1 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 2 | 
            +
            require 'MeCab'
         | 
| 3 | 
            +
            require 'ostruct'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            class Ja
         | 
| 6 | 
            +
              class ComplexWord
         | 
| 7 | 
            +
                DEFAULT_UNKNOWN = '未知語'
         | 
| 8 | 
            +
                RULE_BASE = {:noun1 => false, :noun2 => false, :verb => false}
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                # コンストラクタ。オプションを指定することができる。
         | 
| 11 | 
            +
                def initialize(opts={})
         | 
| 12 | 
            +
                  @opts = opts
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                # 入力されたオブジェクトを複合語考慮した配列にパースして返す。
         | 
| 16 | 
            +
                # 引数には下記オブジェクトを取ることができる。
         | 
| 17 | 
            +
                #
         | 
| 18 | 
            +
                # * IO オブジェクト (#read で日本語テキストを返すもの)
         | 
| 19 | 
            +
                # * String オブジェクト (日本語テキスト)
         | 
| 20 | 
            +
                # * MeCab::Node オブジェクト
         | 
| 21 | 
            +
                # * Array オブジェクト (各要素は文字列を返す surface, feature メソッドを持つ必要がある)
         | 
| 22 | 
            +
                def parse(arg)
         | 
| 23 | 
            +
                  unk = @opts[:unk] || DEFAULT_UNKNOWN
         | 
| 24 | 
            +
                  nodes = []
         | 
| 25 | 
            +
                  if arg.respond_to?(:read)
         | 
| 26 | 
            +
                    nodes = to_nodes(arg.read)
         | 
| 27 | 
            +
                  elsif arg.is_a?(String)
         | 
| 28 | 
            +
                    nodes = to_nodes(arg)
         | 
| 29 | 
            +
                  elsif arg.is_a?(MeCab::Node)
         | 
| 30 | 
            +
                    node = arg
         | 
| 31 | 
            +
                    nodes = []
         | 
| 32 | 
            +
                    while node
         | 
| 33 | 
            +
                      nodes << OpenStruct.new(:surface => node.surface, :feature => node.feature)
         | 
| 34 | 
            +
                      node = node.next
         | 
| 35 | 
            +
                    end
         | 
| 36 | 
            +
                  elsif arg.is_a?(Array)
         | 
| 37 | 
            +
                    nodes = arg
         | 
| 38 | 
            +
                  else
         | 
| 39 | 
            +
                    raise ArgumentError, 'Error: arg1 must be either an IO, String or Array.'
         | 
| 40 | 
            +
                  end
         | 
| 41 | 
            +
                  parse_nodes(nodes)
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # MeCab を用いて日本語テキストを解析し MeCab::Node 風の OpenStruct オブジェクトを含む配列にして返す。
         | 
| 45 | 
            +
                # 形態素が未知の場合には :unk オプションに指定された文字列 (デフォルトは「未知語」) を利用し、
         | 
| 46 | 
            +
                # 素性を '未知語,' に設定する。
         | 
| 47 | 
            +
                def to_nodes(text)
         | 
| 48 | 
            +
                  list = []
         | 
| 49 | 
            +
                  tagger = MeCab::Tagger.new("-U %M\\t#{@opts[:unk]},\\n")
         | 
| 50 | 
            +
                  result = tagger.parse(text)
         | 
| 51 | 
            +
                  result.split("\n").each do |line|
         | 
| 52 | 
            +
                    surface, feature = line.chomp.split("\t")
         | 
| 53 | 
            +
                    list << OpenStruct.new(:surface => surface, :feature => feature || '')
         | 
| 54 | 
            +
                  end
         | 
| 55 | 
            +
                  list
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                # Array オブジェクト (各要素は文字列を返す surface, feature メソッドを持つ) を受け取り、
         | 
| 59 | 
            +
                # 複合語と思われる連続を Array 内の Array にパースして返却する。
         | 
| 60 | 
            +
                def parse_nodes(nodes)
         | 
| 61 | 
            +
                  rule = RULE_BASE
         | 
| 62 | 
            +
                  rule.update(@opts[:rule]) if @opts[:rule]
         | 
| 63 | 
            +
                  terms = []    # 複合語リスト作成用の作業用配列
         | 
| 64 | 
            +
                  unknown = []  # 「未知語」整形用作業変数
         | 
| 65 | 
            +
                  must  = false # 次の語が名詞でなければならない場合は真
         | 
| 66 | 
            +
                  result = []
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                  nodes.each do |node|
         | 
| 69 | 
            +
                    # 記号・数値で区切られた「未知語」は、1つのまとまりにしてから処理
         | 
| 70 | 
            +
                    if node.feature[/^#{@opts[:unk]},/u] && !node.surface[/^[\(\)\[\]\<\>|\"\'\;\,]/]
         | 
| 71 | 
            +
                      if unknown.empty?
         | 
| 72 | 
            +
                        unknown << node
         | 
| 73 | 
            +
                        next
         | 
| 74 | 
            +
                      end
         | 
| 75 | 
            +
                      # 「未知語」が記号・数値で結びつかない
         | 
| 76 | 
            +
                      unless unknown.last[/[A-Za-z]/] && node.surface[/^[A-Za-z]/]
         | 
| 77 | 
            +
                        unknown << node # 「未知語」をひとまとめにする
         | 
| 78 | 
            +
                        next
         | 
| 79 | 
            +
                      end
         | 
| 80 | 
            +
                    end
         | 
| 81 | 
            +
                    # ひとまとめにした「未知語」を蓄積する
         | 
| 82 | 
            +
                    while !unknown.empty?
         | 
| 83 | 
            +
                      if unknown.last =~ /^[\x21-\x2F]|[{|}:\;\<\>\[\]]$/
         | 
| 84 | 
            +
                        unknown.pop
         | 
| 85 | 
            +
                      else
         | 
| 86 | 
            +
                        break
         | 
| 87 | 
            +
                      end
         | 
| 88 | 
            +
                    end
         | 
| 89 | 
            +
                    terms.concat(unknown) if !unknown.empty?
         | 
| 90 | 
            +
                    unknown = []
         | 
| 91 | 
            +
              
         | 
| 92 | 
            +
                    # 基本ルール
         | 
| 93 | 
            +
                    if node.feature[/^名詞,(一般|サ変接続|固有名詞),/u] ||
         | 
| 94 | 
            +
                       node.feature[/^名詞,接尾,(一般|サ変接続),/u]     ||
         | 
| 95 | 
            +
                       node.feature[/^名詞,固有名詞,/u]                 ||
         | 
| 96 | 
            +
                       node.feature[/^記号,アルファベット,/u]           ||
         | 
| 97 | 
            +
                       node.feature[/^m語,/u] && node.surface !~ /^[\x21-\x2F]|[{|}:\;\<\>\[\]]$/
         | 
| 98 | 
            +
                      terms << node
         | 
| 99 | 
            +
                      must = false
         | 
| 100 | 
            +
                      next
         | 
| 101 | 
            +
                    # 名詞ルール1
         | 
| 102 | 
            +
                    elsif node.feature[/^名詞,(形容動詞語幹|ナイ形容詞語幹),/u]
         | 
| 103 | 
            +
                      terms << node
         | 
| 104 | 
            +
                      must = rule[:noun1]
         | 
| 105 | 
            +
                      next
         | 
| 106 | 
            +
                    # 名詞ルール2
         | 
| 107 | 
            +
                    elsif node.feature[/^名詞,接尾,形容動詞語幹,/u]
         | 
| 108 | 
            +
                      terms << node
         | 
| 109 | 
            +
                      must = rule[:noun2]
         | 
| 110 | 
            +
                      next
         | 
| 111 | 
            +
                    end
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                    # 動詞ルール
         | 
| 114 | 
            +
                    must = rule[:verb] if node.feature[/^動詞,/u]
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    if must || terms.size == 1
         | 
| 117 | 
            +
                      result += terms if !terms.empty?
         | 
| 118 | 
            +
                    else
         | 
| 119 | 
            +
                      result << terms if !terms.empty?
         | 
| 120 | 
            +
                    end
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                    terms = []
         | 
| 123 | 
            +
                    must = false
         | 
| 124 | 
            +
                    result << node
         | 
| 125 | 
            +
                  end
         | 
| 126 | 
            +
                  result
         | 
| 127 | 
            +
                end
         | 
| 128 | 
            +
              end
         | 
| 129 | 
            +
            end
         | 
    
        data/spec/base_spec.rb
    ADDED
    
    | @@ -0,0 +1,89 @@ | |
| 1 | 
            +
            # -*- coding:utf-8 -*-
         | 
| 2 | 
            +
            require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "Ja::ComplexWord (basic specs)" do
         | 
| 5 | 
            +
              before do
         | 
| 6 | 
            +
                @parser = Ja::ComplexWord.new
         | 
| 7 | 
            +
              end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              describe "#to_nodes" do
         | 
| 10 | 
            +
                it "splits Japanese text to nodes." do
         | 
| 11 | 
            +
                  expect = [
         | 
| 12 | 
            +
                    ['すもも', '名詞,一般,*,*,*,*,すもも,スモモ,スモモ'   ],
         | 
| 13 | 
            +
                    ['も',     '助詞,係助詞,*,*,*,*,も,モ,モ'             ],
         | 
| 14 | 
            +
                    ['もも',   '名詞,一般,*,*,*,*,もも,モモ,モモ'         ],
         | 
| 15 | 
            +
                    ['も',     '助詞,係助詞,*,*,*,*,も,モ,モ'             ],
         | 
| 16 | 
            +
                    ['もも',   '名詞,一般,*,*,*,*,もも,モモ,モモ'         ],
         | 
| 17 | 
            +
                    ['の',     '助詞,連体化,*,*,*,*,の,ノ,ノ'             ],
         | 
| 18 | 
            +
                    ['うち',   '名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ'],
         | 
| 19 | 
            +
                    ['EOS',    ''                                         ]
         | 
| 20 | 
            +
                  ]
         | 
| 21 | 
            +
                  nodes = @parser.to_nodes('すもももももももものうち')
         | 
| 22 | 
            +
                  res = nodes.map {|n| [n.surface, n.feature] }
         | 
| 23 | 
            +
                  res.should == expect
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                it "deals with unknown words correctly." do
         | 
| 27 | 
            +
                  expect = 'dummy_unknown_phrase' # TODO: must not include spaces.
         | 
| 28 | 
            +
                  @parser.instance_variable_set("@opts", {:unk => expect}) 
         | 
| 29 | 
            +
                  nodes = @parser.to_nodes("ABCをDEFした。")
         | 
| 30 | 
            +
                  [nodes[0].surface, nodes[0].feature].should == %W(ABC #{expect},)
         | 
| 31 | 
            +
                  [nodes[2].surface, nodes[2].feature].should == %W(DEF #{expect},)
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              describe "#parse" do
         | 
| 36 | 
            +
                it "receives an IO." do
         | 
| 37 | 
            +
                  expects = %w(値1 値2 値3)
         | 
| 38 | 
            +
                  io = StringIO.new(expects[0])
         | 
| 39 | 
            +
                  @parser.should_receive(:to_nodes).with(expects[0]).and_return(expects[1])
         | 
| 40 | 
            +
                  @parser.should_receive(:parse_nodes).with(expects[1]).and_return(expects[2])
         | 
| 41 | 
            +
                  @parser.parse(io).should == expects[2]
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                it "receives a String." do
         | 
| 46 | 
            +
                  expects = %w(値1 値2 値3)
         | 
| 47 | 
            +
                  @parser.should_receive(:to_nodes).with(expects[0]).and_return(expects[1])
         | 
| 48 | 
            +
                  @parser.should_receive(:parse_nodes).with(expects[1]).and_return(expects[2])
         | 
| 49 | 
            +
                  @parser.parse(expects[0]).should == expects[2]
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                it "receives a MeCab::Node." do
         | 
| 53 | 
            +
                  expect = 'テスト用の値'
         | 
| 54 | 
            +
                  tagger = MeCab::Tagger.new
         | 
| 55 | 
            +
                  node = tagger.parseToNode('これはテストです')
         | 
| 56 | 
            +
                  @parser.should_receive(:parse_nodes).and_return(expect)
         | 
| 57 | 
            +
                  @parser.parse(node).should == expect
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                it "receives an Array." do
         | 
| 61 | 
            +
                  expect = [
         | 
| 62 | 
            +
                    OpenStruct.new(:surface => '', :feature => ''),
         | 
| 63 | 
            +
                    OpenStruct.new(:surface => '', :feature => ''),
         | 
| 64 | 
            +
                    OpenStruct.new(:surface => '', :feature => '')
         | 
| 65 | 
            +
                  ]
         | 
| 66 | 
            +
                  result = 'dummy'
         | 
| 67 | 
            +
                  @parser.should_receive(:parse_nodes).with(expect).and_return(result)
         | 
| 68 | 
            +
                  @parser.parse(expect).should == result
         | 
| 69 | 
            +
                end
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                it "receives the other objects and raises an ArgumentError." do
         | 
| 72 | 
            +
                  SomeOtherClass = Class.new
         | 
| 73 | 
            +
                  lambda { @parser.parse(SomeOtherClass.new) }.should raise_error(ArgumentError)
         | 
| 74 | 
            +
                end
         | 
| 75 | 
            +
              end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
              describe "#parse_nodes" do
         | 
| 78 | 
            +
                it "parses nodes good." do
         | 
| 79 | 
            +
                  nodes = [
         | 
| 80 | 
            +
                    OpenStruct.new(:surface => '', :feature => ''),
         | 
| 81 | 
            +
                    OpenStruct.new(:surface => '', :feature => ''),
         | 
| 82 | 
            +
                    OpenStruct.new(:surface => '', :feature => '')
         | 
| 83 | 
            +
                  ]
         | 
| 84 | 
            +
                  expect = [
         | 
| 85 | 
            +
                  ]
         | 
| 86 | 
            +
                  #@parser.parse_nodes(nodes).should_return(expect)
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
              end
         | 
| 89 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    ADDED
    
    | @@ -0,0 +1,12 @@ | |
| 1 | 
            +
            $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
         | 
| 2 | 
            +
            $LOAD_PATH.unshift(File.dirname(__FILE__))
         | 
| 3 | 
            +
            require 'rspec'
         | 
| 4 | 
            +
            require 'ja/complex_word'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Requires supporting files with custom matchers and macros, etc,
         | 
| 7 | 
            +
            # in ./support/ and its subdirectories.
         | 
| 8 | 
            +
            Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            RSpec.configure do |config|
         | 
| 11 | 
            +
              
         | 
| 12 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,143 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification 
         | 
| 2 | 
            +
            name: ja-complex-word
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            +
              hash: 31
         | 
| 5 | 
            +
              prerelease: 
         | 
| 6 | 
            +
              segments: 
         | 
| 7 | 
            +
              - 0
         | 
| 8 | 
            +
              - 0
         | 
| 9 | 
            +
              - 0
         | 
| 10 | 
            +
              version: 0.0.0
         | 
| 11 | 
            +
            platform: ruby
         | 
| 12 | 
            +
            authors: 
         | 
| 13 | 
            +
            - tily
         | 
| 14 | 
            +
            autorequire: 
         | 
| 15 | 
            +
            bindir: bin
         | 
| 16 | 
            +
            cert_chain: []
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            date: 2011-10-14 00:00:00 +09:00
         | 
| 19 | 
            +
            default_executable: 
         | 
| 20 | 
            +
            dependencies: 
         | 
| 21 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 22 | 
            +
              type: :development
         | 
| 23 | 
            +
              requirement: &id001 !ruby/object:Gem::Requirement 
         | 
| 24 | 
            +
                none: false
         | 
| 25 | 
            +
                requirements: 
         | 
| 26 | 
            +
                - - ~>
         | 
| 27 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 28 | 
            +
                    hash: 3
         | 
| 29 | 
            +
                    segments: 
         | 
| 30 | 
            +
                    - 2
         | 
| 31 | 
            +
                    - 3
         | 
| 32 | 
            +
                    - 0
         | 
| 33 | 
            +
                    version: 2.3.0
         | 
| 34 | 
            +
              name: rspec
         | 
| 35 | 
            +
              version_requirements: *id001
         | 
| 36 | 
            +
              prerelease: false
         | 
| 37 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 38 | 
            +
              type: :development
         | 
| 39 | 
            +
              requirement: &id002 !ruby/object:Gem::Requirement 
         | 
| 40 | 
            +
                none: false
         | 
| 41 | 
            +
                requirements: 
         | 
| 42 | 
            +
                - - ~>
         | 
| 43 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 44 | 
            +
                    hash: 23
         | 
| 45 | 
            +
                    segments: 
         | 
| 46 | 
            +
                    - 1
         | 
| 47 | 
            +
                    - 0
         | 
| 48 | 
            +
                    - 0
         | 
| 49 | 
            +
                    version: 1.0.0
         | 
| 50 | 
            +
              name: bundler
         | 
| 51 | 
            +
              version_requirements: *id002
         | 
| 52 | 
            +
              prerelease: false
         | 
| 53 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 54 | 
            +
              type: :development
         | 
| 55 | 
            +
              requirement: &id003 !ruby/object:Gem::Requirement 
         | 
| 56 | 
            +
                none: false
         | 
| 57 | 
            +
                requirements: 
         | 
| 58 | 
            +
                - - ~>
         | 
| 59 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 60 | 
            +
                    hash: 7
         | 
| 61 | 
            +
                    segments: 
         | 
| 62 | 
            +
                    - 1
         | 
| 63 | 
            +
                    - 6
         | 
| 64 | 
            +
                    - 4
         | 
| 65 | 
            +
                    version: 1.6.4
         | 
| 66 | 
            +
              name: jeweler
         | 
| 67 | 
            +
              version_requirements: *id003
         | 
| 68 | 
            +
              prerelease: false
         | 
| 69 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 70 | 
            +
              type: :development
         | 
| 71 | 
            +
              requirement: &id004 !ruby/object:Gem::Requirement 
         | 
| 72 | 
            +
                none: false
         | 
| 73 | 
            +
                requirements: 
         | 
| 74 | 
            +
                - - ">="
         | 
| 75 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 76 | 
            +
                    hash: 3
         | 
| 77 | 
            +
                    segments: 
         | 
| 78 | 
            +
                    - 0
         | 
| 79 | 
            +
                    version: "0"
         | 
| 80 | 
            +
              name: rcov
         | 
| 81 | 
            +
              version_requirements: *id004
         | 
| 82 | 
            +
              prerelease: false
         | 
| 83 | 
            +
            description: !binary |
         | 
| 84 | 
            +
              5pel5pys6Kqe44OG44Kt44K544OI44Gr5ZCr44G+44KM44KL6KSH5ZCI6Kqe
         | 
| 85 | 
            +
              44KS44OR44O844K544GZ44KL44Op44Kk44OW44Op44Oq
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            email: tily05@gmail.com
         | 
| 88 | 
            +
            executables: []
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            extensions: []
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            extra_rdoc_files: 
         | 
| 93 | 
            +
            - README.rdoc
         | 
| 94 | 
            +
            files: 
         | 
| 95 | 
            +
            - .document
         | 
| 96 | 
            +
            - .rspec
         | 
| 97 | 
            +
            - Gemfile
         | 
| 98 | 
            +
            - README.rdoc
         | 
| 99 | 
            +
            - Rakefile
         | 
| 100 | 
            +
            - VERSION
         | 
| 101 | 
            +
            - example/test.rb
         | 
| 102 | 
            +
            - lib/ja/complex_word.rb
         | 
| 103 | 
            +
            - spec/base_spec.rb
         | 
| 104 | 
            +
            - spec/spec_helper.rb
         | 
| 105 | 
            +
            has_rdoc: true
         | 
| 106 | 
            +
            homepage: http://github.com/tily/ruby-ja-complex-word
         | 
| 107 | 
            +
            licenses: 
         | 
| 108 | 
            +
            - MIT
         | 
| 109 | 
            +
            post_install_message: 
         | 
| 110 | 
            +
            rdoc_options: []
         | 
| 111 | 
            +
             | 
| 112 | 
            +
            require_paths: 
         | 
| 113 | 
            +
            - lib
         | 
| 114 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 115 | 
            +
              none: false
         | 
| 116 | 
            +
              requirements: 
         | 
| 117 | 
            +
              - - ">="
         | 
| 118 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 119 | 
            +
                  hash: 3
         | 
| 120 | 
            +
                  segments: 
         | 
| 121 | 
            +
                  - 0
         | 
| 122 | 
            +
                  version: "0"
         | 
| 123 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 124 | 
            +
              none: false
         | 
| 125 | 
            +
              requirements: 
         | 
| 126 | 
            +
              - - ">="
         | 
| 127 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 128 | 
            +
                  hash: 3
         | 
| 129 | 
            +
                  segments: 
         | 
| 130 | 
            +
                  - 0
         | 
| 131 | 
            +
                  version: "0"
         | 
| 132 | 
            +
            requirements: []
         | 
| 133 | 
            +
             | 
| 134 | 
            +
            rubyforge_project: 
         | 
| 135 | 
            +
            rubygems_version: 1.6.2
         | 
| 136 | 
            +
            signing_key: 
         | 
| 137 | 
            +
            specification_version: 3
         | 
| 138 | 
            +
            summary: !binary |
         | 
| 139 | 
            +
              5pel5pys6Kqe44OG44Kt44K544OI44Gr5ZCr44G+44KM44KL6KSH5ZCI6Kqe
         | 
| 140 | 
            +
              44KS44OR44O844K544GZ44KL44Op44Kk44OW44Op44Oq
         | 
| 141 | 
            +
             | 
| 142 | 
            +
            test_files: []
         | 
| 143 | 
            +
             |