RubyGems - greeb - Versions diffs - 0.1.0.rc3 → 0.1.0.rc4 - Mend

greeb 0.1.0.rc3 → 0.1.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/README.md CHANGED Viewed

@@ -132,6 +132,11 @@ systematic and awesome.
 ## Build Status [<img src="https://secure.travis-ci.org/eveel/greeb.png"/>](http://travis-ci.org/eveel/greeb)
+If you're using [Rubinius](http://rubini.us) please note that it has the
+incompatible `StringScanner` implementation. More information can be
+provided under the following link:
+<https://github.com/rubinius/rubinius/issues/1808>.
 ## Dependency Status [<img src="https://gemnasium.com/eveel/greeb.png?travis"/>](https://gemnasium.com/eveel/greeb)
 ## Copyright

data/lib/greeb/segmentator.rb CHANGED Viewed

@@ -33,6 +33,15 @@ class Greeb::Segmentator
     @sentences
   end
+  # Subsentences memoization method.
+  #
+  # @return [Set<Greeb::Entity>] a set of subsentences.
+  #
+  def subsentences
+    detect_subsentences! unless @subsentences
+    @subsentences
+  end
   # Extract tokens from the set of sentences.
   #
   # @param sentences [Array<Greeb::Entity>] a list of sentences.
@@ -48,6 +57,21 @@ class Greeb::Segmentator
     ]
   end
+  # Extract subsentences from the set of sentences.
+  #
+  # @param sentences [Array<Greeb::Entity>] a list of sentences.
+  #
+  # @return [Hash<Greeb::Entity, Array<Greeb::Entity>>] a hash with
+  #   sentences as keys and subsentences arrays as values.
+  #
+  def subextract *sentences
+    Hash[
+      sentences.map do |s|
+        [s, subsentences.select { |ss| ss.from >= s.from and ss.to <= s.to }]
+      end
+    ]
+  end
   protected
     # Implementation of the sentence detection method. This method
     # changes the `@sentences` ivar.
@@ -84,6 +108,41 @@ class Greeb::Segmentator
       nil.tap { @sentences << rest if rest.from and rest.to }
     end
+    # Implementation of the subsentence detection method. This method
+    # changes the `@subsentences` ivar.
+    #
+    # @return [nil] nothing.
+    #
+    def detect_subsentences!
+      @subsentences = SortedSet.new
+      rest = tokens.inject(new_subsentence) do |subsentence, token|
+        if !subsentence.from and SENTENCE_DOESNT_START.include?(token.type)
+          next subsentence
+        end
+        subsentence.from = token.from unless subsentence.from
+        next subsentence if subsentence.to and subsentence.to > token.to
+        if [:punct, :spunct].include? token.type
+          subsentence.to = tokens.
+            select { |t| t.from >= token.from }.
+            inject(token) { |r, t| break r if t.type != token.type; t }.
+            to
+          @subsentences << subsentence
+          subsentence = new_subsentence
+        elsif :separ != token.type
+          subsentence.to = token.to
+        end
+        subsentence
+      end
+      nil.tap { @subsentences << rest if rest.from and rest.to }
+    end
   private
     # Create a new instance of {Greeb::Entity} with `:sentence` type.
     #
@@ -92,4 +151,12 @@ class Greeb::Segmentator
     def new_sentence
       Greeb::Entity.new(nil, nil, :sentence)
     end
+    # Create a new instance of {Greeb::Entity} with `:subsentence` type.
+    #
+    # @return [Greeb::Entity] a new entity instance.
+    #
+    def new_subsentence
+      Greeb::Entity.new(nil, nil, :subsentence)
+    end
 end

data/lib/greeb/version.rb CHANGED Viewed

@@ -5,5 +5,5 @@
 module Greeb
   # Version of Greeb.
   #
-  VERSION = '0.1.0.rc3'
+  VERSION = '0.1.0.rc4'
 end

data/spec/segmentator_spec.rb CHANGED Viewed

@@ -89,8 +89,10 @@ module Greeb
       subject { Segmentator.new(@tokenizer) }
+      let(:sentences) { subject.sentences }
       it 'should be extracted' do
-        subject.extract(*subject.sentences).must_equal({
+        subject.extract(*sentences).must_equal({
           Entity.new(0,  6, :sentence) => [
             Entity.new(0, 5, :letter),
             Entity.new(5, 6, :punct)
@@ -108,5 +110,22 @@ module Greeb
         })
       end
     end
+    describe 'subsentence extractor' do
+      before { @tokenizer = Tokenizer.new('Hello, I am JC Denton.') }
+      subject { Segmentator.new(@tokenizer) }
+      let(:sentences) { subject.sentences }
+      it 'should extract subsentences' do
+        subject.subextract(*sentences).must_equal({
+          Entity.new(0,  22, :sentence) => [
+            Entity.new(0, 6, :subsentence),
+            Entity.new(7, 22, :subsentence)
+          ]
+        })
+      end
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: greeb
 version: !ruby/object:Gem::Version
-  version: 0.1.0.rc3
+  version: 0.1.0.rc4
   prerelease: 6
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-07-10 00:00:00.000000000 Z
+date: 2012-07-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -112,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -2527935574265859361
+      hash: 1130932854600612903
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements: