RubyGems - srx-english - Versions diffs - 0.1.1 → 0.1.2 - Mend

srx-english 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/README.rdoc +1 -1
data/changelog.txt +2 -0
data/features/steps/word_splitter.rb +3 -1
data/features/word_splitter.feature +12 -12
data/lib/srx/english/word_splitter.rb +7 -5
data/srx-english.gemspec +1 -1
metadata +2 -2

data/README.rdoc CHANGED Viewed

@@ -41,7 +41,7 @@ over the matched sentences:
   sentence = 'My home is my castle.'
   splitter = SRX::English::WordSplitter.new(sentence)
-  splitter.each do |word,type|
+  splitter.each do |word,type,start_offset,end_offset|
     puts "'#{word}' #{type}"
   end
   # 'My' word

data/changelog.txt CHANGED Viewed

@@ -1,3 +1,5 @@
+0.1.2
+- Word splitter reports positions of the word
 0.1.1
 - Use getc instead of readchar
 0.1.0

data/features/steps/word_splitter.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # encoding: utf-8
 $:.unshift "lib"
-#require 'srx/english/word_splitter'
+require 'srx/english/word_splitter'
 Given /^a sentence '([^']+)'$/ do |sentence|
   @sentence = sentence.force_encoding('utf-8')
@@ -14,6 +14,8 @@ Then /^the following segments should be detected$/ do |table|
   table.hashes.zip(@splitter.to_a).each do |expected,returned|
     returned[0].should == expected[:segment].gsub(/'/,"")
     returned[1].should == expected[:type].to_sym
+    returned[2].should == expected[:start].to_i
+    returned[3].should == expected[:end].to_i
   end
 end

data/features/word_splitter.feature CHANGED Viewed

@@ -3,15 +3,15 @@ Feature: word splitter
     Given a sentence 'My home is my castle.'
     When the sentence is split
     Then the following segments should be detected
-      | segment | type  |
-      #-----------------#
-      | My      | word  |
-      | ' '     | other |
-      | home    | word  |
-      | ' '     | other |
-      | is      | word  |
-      | ' '     | other |
-      | my      | word  |
-      | ' '     | other |
-      | castle  | word  |
-      | .       | punct |
+      | segment | type  | start | end |
+      #-------------------------------#
+      | My      | word  | 0     | 1   |
+      | ' '     | other | 2     | 2   |
+      | home    | word  | 3     | 6   |
+      | ' '     | other | 7     | 7   |
+      | is      | word  | 8     | 9   |
+      | ' '     | other | 10    | 10  |
+      | my      | word  | 11    | 12  |
+      | ' '     | other | 13    | 13  |
+      | castle  | word  | 14    | 19  |
+      | .       | punct | 20    | 20  |

data/lib/srx/english/word_splitter.rb CHANGED Viewed

@@ -38,16 +38,18 @@ module SRX
       def each
         raise "Invalid argument - sentence is nil" if @sentence.nil?
         @sentence.scan(SPLIT_RE) do |word,number,punct,graph,other|
+          start_offset = $~.begin(0)
+          end_offset = $~.end(0)-1
           if !word.nil?
-            yield word, :word
+            yield word, :word, start_offset, end_offset
           elsif !number.nil?
-            yield number, :number
+            yield number, :number, start_offset, end_offset
           elsif !punct.nil?
-            yield punct, :punct
+            yield punct, :punct, start_offset, end_offset
           elsif !graph.nil?
-            yield graph, :graph
+            yield graph, :graph, start_offset, end_offset
           else
-            yield other, :other
+            yield other, :other, start_offset, end_offset
           end
         end
       end

data/srx-english.gemspec CHANGED Viewed

@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
 Gem::Specification.new do |s|
   s.name        = "srx-english"
-  s.version     = "0.1.1"
+  s.version     = "0.1.2"
   s.platform    = Gem::Platform::RUBY
   s.authors     = ["Aleksander Pohl"]
   s.email       = ["apohllo@o2.pl"]

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: srx-english
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-10-22 00:00:00.000000000 Z
+date: 2012-11-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: term-ansicolor