srx-english 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -41,7 +41,7 @@ over the matched sentences:
41
41
 
42
42
  sentence = 'My home is my castle.'
43
43
  splitter = SRX::English::WordSplitter.new(sentence)
44
- splitter.each do |word,type|
44
+ splitter.each do |word,type,start_offset,end_offset|
45
45
  puts "'#{word}' #{type}"
46
46
  end
47
47
  # 'My' word
data/changelog.txt CHANGED
@@ -1,3 +1,5 @@
1
+ 0.1.2
2
+ - Word splitter reports positions of the word
1
3
  0.1.1
2
4
  - Use getc instead of readchar
3
5
  0.1.0
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  $:.unshift "lib"
3
- #require 'srx/english/word_splitter'
3
+ require 'srx/english/word_splitter'
4
4
 
5
5
  Given /^a sentence '([^']+)'$/ do |sentence|
6
6
  @sentence = sentence.force_encoding('utf-8')
@@ -14,6 +14,8 @@ Then /^the following segments should be detected$/ do |table|
14
14
  table.hashes.zip(@splitter.to_a).each do |expected,returned|
15
15
  returned[0].should == expected[:segment].gsub(/'/,"")
16
16
  returned[1].should == expected[:type].to_sym
17
+ returned[2].should == expected[:start].to_i
18
+ returned[3].should == expected[:end].to_i
17
19
  end
18
20
  end
19
21
 
@@ -3,15 +3,15 @@ Feature: word splitter
3
3
  Given a sentence 'My home is my castle.'
4
4
  When the sentence is split
5
5
  Then the following segments should be detected
6
- | segment | type |
7
- #-----------------#
8
- | My | word |
9
- | ' ' | other |
10
- | home | word |
11
- | ' ' | other |
12
- | is | word |
13
- | ' ' | other |
14
- | my | word |
15
- | ' ' | other |
16
- | castle | word |
17
- | . | punct |
6
+ | segment | type | start | end |
7
+ #-------------------------------#
8
+ | My | word | 0 | 1 |
9
+ | ' ' | other | 2 | 2 |
10
+ | home | word | 3 | 6 |
11
+ | ' ' | other | 7 | 7 |
12
+ | is | word | 8 | 9 |
13
+ | ' ' | other | 10 | 10 |
14
+ | my | word | 11 | 12 |
15
+ | ' ' | other | 13 | 13 |
16
+ | castle | word | 14 | 19 |
17
+ | . | punct | 20 | 20 |
@@ -38,16 +38,18 @@ module SRX
38
38
  def each
39
39
  raise "Invalid argument - sentence is nil" if @sentence.nil?
40
40
  @sentence.scan(SPLIT_RE) do |word,number,punct,graph,other|
41
+ start_offset = $~.begin(0)
42
+ end_offset = $~.end(0)-1
41
43
  if !word.nil?
42
- yield word, :word
44
+ yield word, :word, start_offset, end_offset
43
45
  elsif !number.nil?
44
- yield number, :number
46
+ yield number, :number, start_offset, end_offset
45
47
  elsif !punct.nil?
46
- yield punct, :punct
48
+ yield punct, :punct, start_offset, end_offset
47
49
  elsif !graph.nil?
48
- yield graph, :graph
50
+ yield graph, :graph, start_offset, end_offset
49
51
  else
50
- yield other, :other
52
+ yield other, :other, start_offset, end_offset
51
53
  end
52
54
  end
53
55
  end
data/srx-english.gemspec CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "srx-english"
6
- s.version = "0.1.1"
6
+ s.version = "0.1.2"
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = ["Aleksander Pohl"]
9
9
  s.email = ["apohllo@o2.pl"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srx-english
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-22 00:00:00.000000000 Z
12
+ date: 2012-11-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: term-ansicolor