srx-english 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -41,7 +41,7 @@ over the matched sentences:
41
41
 
42
42
  sentence = 'My home is my castle.'
43
43
  splitter = SRX::English::WordSplitter.new(sentence)
44
- splitter.each do |word,type|
44
+ splitter.each do |word,type,start_offset,end_offset|
45
45
  puts "'#{word}' #{type}"
46
46
  end
47
47
  # 'My' word
data/changelog.txt CHANGED
@@ -1,3 +1,5 @@
1
+ 0.1.2
2
+ - Word splitter reports positions of the word
1
3
  0.1.1
2
4
  - Use getc instead of readchar
3
5
  0.1.0
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  $:.unshift "lib"
3
- #require 'srx/english/word_splitter'
3
+ require 'srx/english/word_splitter'
4
4
 
5
5
  Given /^a sentence '([^']+)'$/ do |sentence|
6
6
  @sentence = sentence.force_encoding('utf-8')
@@ -14,6 +14,8 @@ Then /^the following segments should be detected$/ do |table|
14
14
  table.hashes.zip(@splitter.to_a).each do |expected,returned|
15
15
  returned[0].should == expected[:segment].gsub(/'/,"")
16
16
  returned[1].should == expected[:type].to_sym
17
+ returned[2].should == expected[:start].to_i
18
+ returned[3].should == expected[:end].to_i
17
19
  end
18
20
  end
19
21
 
@@ -3,15 +3,15 @@ Feature: word splitter
3
3
  Given a sentence 'My home is my castle.'
4
4
  When the sentence is split
5
5
  Then the following segments should be detected
6
- | segment | type |
7
- #-----------------#
8
- | My | word |
9
- | ' ' | other |
10
- | home | word |
11
- | ' ' | other |
12
- | is | word |
13
- | ' ' | other |
14
- | my | word |
15
- | ' ' | other |
16
- | castle | word |
17
- | . | punct |
6
+ | segment | type | start | end |
7
+ #-------------------------------#
8
+ | My | word | 0 | 1 |
9
+ | ' ' | other | 2 | 2 |
10
+ | home | word | 3 | 6 |
11
+ | ' ' | other | 7 | 7 |
12
+ | is | word | 8 | 9 |
13
+ | ' ' | other | 10 | 10 |
14
+ | my | word | 11 | 12 |
15
+ | ' ' | other | 13 | 13 |
16
+ | castle | word | 14 | 19 |
17
+ | . | punct | 20 | 20 |
@@ -38,16 +38,18 @@ module SRX
38
38
  def each
39
39
  raise "Invalid argument - sentence is nil" if @sentence.nil?
40
40
  @sentence.scan(SPLIT_RE) do |word,number,punct,graph,other|
41
+ start_offset = $~.begin(0)
42
+ end_offset = $~.end(0)-1
41
43
  if !word.nil?
42
- yield word, :word
44
+ yield word, :word, start_offset, end_offset
43
45
  elsif !number.nil?
44
- yield number, :number
46
+ yield number, :number, start_offset, end_offset
45
47
  elsif !punct.nil?
46
- yield punct, :punct
48
+ yield punct, :punct, start_offset, end_offset
47
49
  elsif !graph.nil?
48
- yield graph, :graph
50
+ yield graph, :graph, start_offset, end_offset
49
51
  else
50
- yield other, :other
52
+ yield other, :other, start_offset, end_offset
51
53
  end
52
54
  end
53
55
  end
data/srx-english.gemspec CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "srx-english"
6
- s.version = "0.1.1"
6
+ s.version = "0.1.2"
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = ["Aleksander Pohl"]
9
9
  s.email = ["apohllo@o2.pl"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srx-english
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-22 00:00:00.000000000 Z
12
+ date: 2012-11-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: term-ansicolor