RubyGems - langue - Versions diffs - 0.0.3 → 0.0.4 - Mend

langue 0.0.3 → 0.0.4

Files changed (12) hide show

data/lib/langue/morpheme.rb +5 -0
data/lib/langue/morphemes.rb +42 -6
data/lib/langue/sentence.rb +5 -0
data/lib/langue/text.rb +6 -1
data/lib/langue/version.rb +1 -1
data/lib/langue/word.rb +11 -0
data/spec/langue/morpheme_spec.rb +31 -0
data/spec/langue/morphemes_spec.rb +69 -4
data/spec/langue/sentence_spec.rb +16 -0
data/spec/langue/text_spec.rb +24 -0
data/spec/langue/word_spec.rb +31 -5
metadata +3 -3

@@ -17,6 +17,11 @@ module Langue
     attr_reader(*KEYS)
+    def ==(other_morpheme)
+      return false unless other_morpheme.is_a?(self.class)
+      KEYS.all? { |key| self.__send__(key) == other_morpheme.__send__(key) }
+    end
     def classified?(part_of_speech, *categories)
       got = [@part_of_speech] + @categories
       expected = [part_of_speech] + categories

data/lib/langue/morphemes.rb CHANGED

@@ -11,12 +11,48 @@ module Langue
       morpheme.nil? || !block_given? ? morpheme : yield(morpheme)
     end
-#     def match?(index, text)
-#       at(index) {|morpheme| morpheme.text == text}
-#     end
+    def pad(text)
+      whitespace_regexp = /^([\x09\x0A\x0B\x0D\x20])+/
+      differece_error = ArgumentError.new('maybe different from the original text')
-#     def after(index)
-#       self[index..-1]
-#     end
+      morphemes = Morphemes.new
+      index = 0
+      while index < size
+        morpheme = self[index]
+        morpheme_text = morpheme.text
+        if text[0, morpheme_text.size] == morpheme_text
+          text = text[morpheme_text.size..-1]
+          morphemes << morpheme
+          index += 1
+        elsif text =~ whitespace_regexp
+          whitespace = $1
+          text = text[whitespace.size..-1]
+          morphemes << create_padding(whitespace)
+        else
+          raise differece_error
+        end
+      end
+      if text =~ /#{whitespace_regexp}$/
+        morphemes << create_padding(text)
+      elsif !text.empty?
+        raise differece_error
+      end
+      morphemes
+    end
+    private
+    def create_padding(whitespace)
+      Morpheme.new(
+        :text           => whitespace,
+        :part_of_speech => 'padding',
+        :categories     => [],
+        :root_form      => whitespace
+      )
+    end
   end
 end

data/lib/langue/sentence.rb CHANGED

@@ -14,5 +14,10 @@ module Langue
     def morphemes
       @morphemes ||= Morphemes.new(flatten)
     end
+    def text
+      @text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
+      @text
+    end
   end
 end

data/lib/langue/text.rb CHANGED

@@ -12,11 +12,16 @@ module Langue
     end
     def words
-      @words ||= inject(&:+)
+      @words ||= inject([], &:+)
     end
     def morphemes
       @morphemes ||= Morphemes.new(flatten)
     end
+    def text
+      @text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
+      @text
+    end
   end
 end

data/lib/langue/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Langue
-  VERSION = '0.0.3'
+  VERSION = '0.0.4'
 end

data/lib/langue/word.rb CHANGED

@@ -61,4 +61,15 @@ module Langue
                      end
     end
   end
+  class Noun         < Word; end
+  class Pronoun      < Word; end
+  class Adjective    < Word; end
+  class Verb         < Word; end
+  class Adverb       < Word; end
+  class Particle     < Word; end
+  class Conjunction  < Word; end
+  class Determiner   < Word; end
+  class Interjection < Word; end
+  class Period       < Word; end
 end

data/spec/langue/morpheme_spec.rb CHANGED

@@ -47,6 +47,37 @@ describe Langue::Morpheme, ' accessors' do
   end
 end
+describe Langue::Morpheme, '#==' do
+  before do
+    @attrs = {
+      :text            => 'text',
+      :part_of_speech  => 'part_of_speech',
+      :categories      => %w(category1 category2),
+      :inflection      => 'inflection',
+      :inflection_type => 'inflection_type',
+      :root_form       => 'root_form',
+      :yomi            => 'yomi',
+      :pronunciation   => 'pronunciation'
+    }
+    @morpheme = described_class.new(@attrs)
+  end
+  it 'returns true if the morpheme equals to other morpheme' do
+    other_morpheme = described_class.new(@attrs)
+    @morpheme.should == other_morpheme
+  end
+  it 'returns false if the morpheme does not equal to other morpheme' do
+    other_morpheme = described_class.new(@attrs.merge(:pronunciation => 'other_pronunciation'))
+    @morpheme.should_not == other_morpheme
+  end
+  it "returns false if other morpheme is not an instance of #{described_class}" do
+    @morpheme.should_not == 'other_morpheme'
+  end
+end
 describe Langue::Morpheme, '#classified?' do
   before do
     @morpheme = described_class.new(

data/spec/langue/morphemes_spec.rb CHANGED

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 require 'langue/morphemes'
 describe Langue::Morphemes, '#valid?' do
@@ -66,8 +67,72 @@ describe Langue::Morphemes, '#at' do
   end
 end
-# describe Langue::Morphemes, '#match?' do
-# end
+describe Langue::Morphemes, '#pad' do
+  before do
+    @morphemes = morphemes = described_class.new([
+      Langue::Morpheme.new(:text => 'text1'),
+      Langue::Morpheme.new(:text => 'text2'),
+      Langue::Morpheme.new(:text => 'text3')
+    ])
+  end
+  it 'pads \\x09' do
+    padded_morphemes = @morphemes.pad("text1\x09text2text3\x09")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x09"
+    morpheme.root_form.should == "\x09"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x09"
+    morpheme.root_form.should == "\x09"
+  end
+  it 'pads \\x0A' do
+    padded_morphemes = @morphemes.pad("text1\x0Atext2text3\x0A")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x0A"
+    morpheme.root_form.should == "\x0A"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x0A"
+    morpheme.root_form.should == "\x0A"
+  end
-# describe Langue::Morphemes, '#after' do
-# end
+  it 'pads \\x0B' do
+    padded_morphemes = @morphemes.pad("text1\x0Btext2text3\x0B")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x0B"
+    morpheme.root_form.should == "\x0B"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x0B"
+    morpheme.root_form.should == "\x0B"
+  end
+  it 'pads \\x0D' do
+    padded_morphemes = @morphemes.pad("text1\x0Dtext2text3\x0D")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x0D"
+    morpheme.root_form.should == "\x0D"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x0D"
+    morpheme.root_form.should == "\x0D"
+  end
+  it 'pads \\x20' do
+    padded_morphemes = @morphemes.pad("text1\x20text2text3\x20")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x20"
+    morpheme.root_form.should == "\x20"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x20"
+    morpheme.root_form.should == "\x20"
+  end
+  it 'raises ArgumentError if there is a difference between the text and the morphemes' do
+    lambda { @morphemes.pad("text1\x20text3text4") }.should raise_error(ArgumentError, 'maybe different from the original text')
+    lambda { @morphemes.pad('text1text2text3text4text5') }.should raise_error(ArgumentError, 'maybe different from the original text')
+  end
+end

data/spec/langue/sentence_spec.rb CHANGED

@@ -55,3 +55,19 @@ describe Langue::Sentence, '#morphemes' do
     @morphemes.should == [1, 2, 3, 4]
   end
 end
+describe Langue::Sentence, '#text' do
+  it 'returns a concatenated string of the text of the words' do
+    sentence = described_class.new([
+      stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word1') },
+      stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word2') }
+    ])
+    sentence.text.should == 'word1word2'
+  end
+  it 'returns nil if the sentence is empty' do
+    sentence = described_class.new
+    sentence.text.should be_nil
+  end
+end

data/spec/langue/text_spec.rb CHANGED

@@ -54,6 +54,14 @@ describe Langue::Text, '#words' do
   it 'returns the words in the sentences' do
     @words.should == [[1, 2], [3, 4], [5, 6], [7, 8]]
   end
+  context 'with an empty text' do
+    it 'returns an empty array' do
+      text = described_class.new([])
+      text.words.should be_an(Array)
+      text.words.should be_empty
+    end
+  end
 end
 describe Langue::Text, '#morphemes' do
@@ -70,3 +78,19 @@ describe Langue::Text, '#morphemes' do
     @morphemes.should == [1, 2, 3, 4, 5, 6, 7, 8]
   end
 end
+describe Langue::Text, '#text' do
+  it 'returns a concatenated string of the text of the sentences' do
+    text = described_class.new([
+      stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence1') },
+      stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence2') }
+    ])
+    text.text.should == 'sentence1sentence2'
+  end
+  it 'returns nil if the text is empty' do
+    text = described_class.new
+    text.text.should be_nil
+  end
+end

data/spec/langue/word_spec.rb CHANGED

@@ -1,5 +1,30 @@
 require 'langue/word'
+describe Langue do
+  %w(
+    Noun
+    Pronoun
+    Adjective
+    Verb
+    Adverb
+    Particle
+    Conjunction
+    Determiner
+    Interjection
+    Period
+  ).each do |part_of_speech|
+    it "has #{described_class}::#{part_of_speech}" do
+      Langue.should be_const_defined(part_of_speech)
+    end
+    describe part_of_speech do
+      it 'inherits Langue::Word' do
+        Langue.const_get(part_of_speech).superclass.should == Langue::Word
+      end
+    end
+  end
+end
 describe Langue::Word, '#valid?' do
   before do
     @word = described_class.new([
@@ -36,7 +61,7 @@ describe Langue::Word, '#morphemes' do
     @morphemes.should be_a Langue::Morphemes
   end
-  it 'returns ' do
+  it 'returns own morphemes' do
     @morphemes.should == [1, 2, 3]
   end
 end
@@ -64,18 +89,19 @@ describe Langue::Word, '#key_morpheme' do
 end
 describe Langue::Word, '#text' do
-  before do
+  it 'returns a concatenated string of the text of the morphemes' do
     word = described_class.new([
       stub.tap { |s| s.stub!(:text).and_return('text1') },
       stub.tap { |s| s.stub!(:text).and_return('text2') },
       stub.tap { |s| s.stub!(:text).and_return('text3') }
     ])
-    @text = word.text
+    word.text.should == 'text1text2text3'
   end
-  it 'returns a concatenated string of the text of the morphemes' do
-    @text.should == 'text1text2text3'
+  it 'returns nil if the word is empty' do
+    word = described_class.new
+    word.text.should be_nil
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: langue
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-04-13 00:00:00.000000000 Z
+date: 2012-09-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.21
+rubygems_version: 1.8.24
 signing_key:
 specification_version: 3
 summary: The foundation for the natural languages