RubyGems - langue - Versions diffs - 0.0.3 → 0.0.4 - Mend

langue 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data/lib/langue/morpheme.rb +5 -0
data/lib/langue/morphemes.rb +42 -6
data/lib/langue/sentence.rb +5 -0
data/lib/langue/text.rb +6 -1
data/lib/langue/version.rb +1 -1
data/lib/langue/word.rb +11 -0
data/spec/langue/morpheme_spec.rb +31 -0
data/spec/langue/morphemes_spec.rb +69 -4
data/spec/langue/sentence_spec.rb +16 -0
data/spec/langue/text_spec.rb +24 -0
data/spec/langue/word_spec.rb +31 -5
metadata +3 -3

data/lib/langue/morpheme.rb CHANGED

@@ -17,6 +17,11 @@ module Langue
     attr_reader(*KEYS)
+    def ==(other_morpheme)
+      return false unless other_morpheme.is_a?(self.class)
+      KEYS.all? { |key| self.__send__(key) == other_morpheme.__send__(key) }
+    end
     def classified?(part_of_speech, *categories)
       got = [@part_of_speech] + @categories
       expected = [part_of_speech] + categories

data/lib/langue/morphemes.rb CHANGED

@@ -11,12 +11,48 @@ module Langue
       morpheme.nil? || !block_given? ? morpheme : yield(morpheme)
     end
-#     def match?(index, text)
-#       at(index) {|morpheme| morpheme.text == text}
-#     end
+    def pad(text)
+      whitespace_regexp = /^([\x09\x0A\x0B\x0D\x20])+/
+      differece_error = ArgumentError.new('maybe different from the original text')
-#     def after(index)
-#       self[index..-1]
-#     end
+      morphemes = Morphemes.new
+      index = 0
+      while index < size
+        morpheme = self[index]
+        morpheme_text = morpheme.text
+        if text[0, morpheme_text.size] == morpheme_text
+          text = text[morpheme_text.size..-1]
+          morphemes << morpheme
+          index += 1
+        elsif text =~ whitespace_regexp
+          whitespace = $1
+          text = text[whitespace.size..-1]
+          morphemes << create_padding(whitespace)
+        else
+          raise differece_error
+        end
+      end
+      if text =~ /#{whitespace_regexp}$/
+        morphemes << create_padding(text)
+      elsif !text.empty?
+        raise differece_error
+      end
+      morphemes
+    end
+    private
+    def create_padding(whitespace)
+      Morpheme.new(
+        :text           => whitespace,
+        :part_of_speech => 'padding',
+        :categories     => [],
+        :root_form      => whitespace
+      )
+    end
   end
 end

data/lib/langue/sentence.rb CHANGED

@@ -14,5 +14,10 @@ module Langue
     def morphemes
       @morphemes ||= Morphemes.new(flatten)
     end
+    def text
+      @text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
+      @text
+    end
   end
 end

data/lib/langue/text.rb CHANGED

@@ -12,11 +12,16 @@ module Langue
     end
     def words
-      @words ||= inject(&:+)
+      @words ||= inject([], &:+)
     end
     def morphemes
       @morphemes ||= Morphemes.new(flatten)
     end
+    def text
+      @text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
+      @text
+    end
   end
 end

data/lib/langue/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Langue
-  VERSION = '0.0.3'
+  VERSION = '0.0.4'
 end

data/lib/langue/word.rb CHANGED

@@ -61,4 +61,15 @@ module Langue
                      end
     end
   end
+  class Noun         < Word; end
+  class Pronoun      < Word; end
+  class Adjective    < Word; end
+  class Verb         < Word; end
+  class Adverb       < Word; end
+  class Particle     < Word; end
+  class Conjunction  < Word; end
+  class Determiner   < Word; end
+  class Interjection < Word; end
+  class Period       < Word; end
 end

data/spec/langue/morpheme_spec.rb CHANGED

@@ -47,6 +47,37 @@ describe Langue::Morpheme, ' accessors' do
   end
 end
+describe Langue::Morpheme, '#==' do
+  before do
+    @attrs = {
+      :text            => 'text',
+      :part_of_speech  => 'part_of_speech',
+      :categories      => %w(category1 category2),
+      :inflection      => 'inflection',
+      :inflection_type => 'inflection_type',
+      :root_form       => 'root_form',
+      :yomi            => 'yomi',
+      :pronunciation   => 'pronunciation'
+    }
+    @morpheme = described_class.new(@attrs)
+  end
+  it 'returns true if the morpheme equals to other morpheme' do
+    other_morpheme = described_class.new(@attrs)
+    @morpheme.should == other_morpheme
+  end
+  it 'returns false if the morpheme does not equal to other morpheme' do
+    other_morpheme = described_class.new(@attrs.merge(:pronunciation => 'other_pronunciation'))
+    @morpheme.should_not == other_morpheme
+  end
+  it "returns false if other morpheme is not an instance of #{described_class}" do
+    @morpheme.should_not == 'other_morpheme'
+  end
+end
 describe Langue::Morpheme, '#classified?' do
   before do
     @morpheme = described_class.new(

data/spec/langue/morphemes_spec.rb CHANGED

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 require 'langue/morphemes'
 describe Langue::Morphemes, '#valid?' do
@@ -66,8 +67,72 @@ describe Langue::Morphemes, '#at' do
   end
 end
-# describe Langue::Morphemes, '#match?' do
-# end
+describe Langue::Morphemes, '#pad' do
+  before do
+    @morphemes = morphemes = described_class.new([
+      Langue::Morpheme.new(:text => 'text1'),
+      Langue::Morpheme.new(:text => 'text2'),
+      Langue::Morpheme.new(:text => 'text3')
+    ])
+  end
+  it 'pads \\x09' do
+    padded_morphemes = @morphemes.pad("text1\x09text2text3\x09")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x09"
+    morpheme.root_form.should == "\x09"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x09"
+    morpheme.root_form.should == "\x09"
+  end
+  it 'pads \\x0A' do
+    padded_morphemes = @morphemes.pad("text1\x0Atext2text3\x0A")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x0A"
+    morpheme.root_form.should == "\x0A"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x0A"
+    morpheme.root_form.should == "\x0A"
+  end
-# describe Langue::Morphemes, '#after' do
-# end
+  it 'pads \\x0B' do
+    padded_morphemes = @morphemes.pad("text1\x0Btext2text3\x0B")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x0B"
+    morpheme.root_form.should == "\x0B"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x0B"
+    morpheme.root_form.should == "\x0B"
+  end
+  it 'pads \\x0D' do
+    padded_morphemes = @morphemes.pad("text1\x0Dtext2text3\x0D")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x0D"
+    morpheme.root_form.should == "\x0D"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x0D"
+    morpheme.root_form.should == "\x0D"
+  end
+  it 'pads \\x20' do
+    padded_morphemes = @morphemes.pad("text1\x20text2text3\x20")
+    padded_morphemes.should have(5).items
+    morpheme = padded_morphemes[1]
+    morpheme.text.should == "\x20"
+    morpheme.root_form.should == "\x20"
+    morpheme = padded_morphemes[4]
+    morpheme.text.should == "\x20"
+    morpheme.root_form.should == "\x20"
+  end
+  it 'raises ArgumentError if there is a difference between the text and the morphemes' do
+    lambda { @morphemes.pad("text1\x20text3text4") }.should raise_error(ArgumentError, 'maybe different from the original text')
+    lambda { @morphemes.pad('text1text2text3text4text5') }.should raise_error(ArgumentError, 'maybe different from the original text')
+  end
+end

data/spec/langue/sentence_spec.rb CHANGED

@@ -55,3 +55,19 @@ describe Langue::Sentence, '#morphemes' do
     @morphemes.should == [1, 2, 3, 4]
   end
 end
+describe Langue::Sentence, '#text' do
+  it 'returns a concatenated string of the text of the words' do
+    sentence = described_class.new([
+      stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word1') },
+      stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word2') }
+    ])
+    sentence.text.should == 'word1word2'
+  end
+  it 'returns nil if the sentence is empty' do
+    sentence = described_class.new
+    sentence.text.should be_nil
+  end
+end

data/spec/langue/text_spec.rb CHANGED

@@ -54,6 +54,14 @@ describe Langue::Text, '#words' do
   it 'returns the words in the sentences' do
     @words.should == [[1, 2], [3, 4], [5, 6], [7, 8]]
   end
+  context 'with an empty text' do
+    it 'returns an empty array' do
+      text = described_class.new([])
+      text.words.should be_an(Array)
+      text.words.should be_empty
+    end
+  end
 end
 describe Langue::Text, '#morphemes' do
@@ -70,3 +78,19 @@ describe Langue::Text, '#morphemes' do
     @morphemes.should == [1, 2, 3, 4, 5, 6, 7, 8]
   end
 end
+describe Langue::Text, '#text' do
+  it 'returns a concatenated string of the text of the sentences' do
+    text = described_class.new([
+      stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence1') },
+      stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence2') }
+    ])
+    text.text.should == 'sentence1sentence2'
+  end
+  it 'returns nil if the text is empty' do
+    text = described_class.new
+    text.text.should be_nil
+  end
+end

data/spec/langue/word_spec.rb CHANGED

@@ -1,5 +1,30 @@
 require 'langue/word'
+describe Langue do
+  %w(
+    Noun
+    Pronoun
+    Adjective
+    Verb
+    Adverb
+    Particle
+    Conjunction
+    Determiner
+    Interjection
+    Period
+  ).each do |part_of_speech|
+    it "has #{described_class}::#{part_of_speech}" do
+      Langue.should be_const_defined(part_of_speech)
+    end
+    describe part_of_speech do
+      it 'inherits Langue::Word' do
+        Langue.const_get(part_of_speech).superclass.should == Langue::Word
+      end
+    end
+  end
+end
 describe Langue::Word, '#valid?' do
   before do
     @word = described_class.new([
@@ -36,7 +61,7 @@ describe Langue::Word, '#morphemes' do
     @morphemes.should be_a Langue::Morphemes
   end
-  it 'returns ' do
+  it 'returns own morphemes' do
     @morphemes.should == [1, 2, 3]
   end
 end
@@ -64,18 +89,19 @@ describe Langue::Word, '#key_morpheme' do
 end
 describe Langue::Word, '#text' do
-  before do
+  it 'returns a concatenated string of the text of the morphemes' do
     word = described_class.new([
       stub.tap { |s| s.stub!(:text).and_return('text1') },
       stub.tap { |s| s.stub!(:text).and_return('text2') },
       stub.tap { |s| s.stub!(:text).and_return('text3') }
     ])
-    @text = word.text
+    word.text.should == 'text1text2text3'
   end
-  it 'returns a concatenated string of the text of the morphemes' do
-    @text.should == 'text1text2text3'
+  it 'returns nil if the word is empty' do
+    word = described_class.new
+    word.text.should be_nil
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: langue
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-04-13 00:00:00.000000000 Z
+date: 2012-09-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.21
+rubygems_version: 1.8.24
 signing_key:
 specification_version: 3
 summary: The foundation for the natural languages