srx-english 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/changelog.txt +2 -0
- data/features/steps/word_splitter.rb +3 -1
- data/features/word_splitter.feature +12 -12
- data/lib/srx/english/word_splitter.rb +7 -5
- data/srx-english.gemspec +1 -1
- metadata +2 -2
data/README.rdoc
CHANGED
@@ -41,7 +41,7 @@ over the matched sentences:
|
|
41
41
|
|
42
42
|
sentence = 'My home is my castle.'
|
43
43
|
splitter = SRX::English::WordSplitter.new(sentence)
|
44
|
-
splitter.each do |word,type|
|
44
|
+
splitter.each do |word,type,start_offset,end_offset|
|
45
45
|
puts "'#{word}' #{type}"
|
46
46
|
end
|
47
47
|
# 'My' word
|
data/changelog.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
$:.unshift "lib"
|
3
|
-
|
3
|
+
require 'srx/english/word_splitter'
|
4
4
|
|
5
5
|
Given /^a sentence '([^']+)'$/ do |sentence|
|
6
6
|
@sentence = sentence.force_encoding('utf-8')
|
@@ -14,6 +14,8 @@ Then /^the following segments should be detected$/ do |table|
|
|
14
14
|
table.hashes.zip(@splitter.to_a).each do |expected,returned|
|
15
15
|
returned[0].should == expected[:segment].gsub(/'/,"")
|
16
16
|
returned[1].should == expected[:type].to_sym
|
17
|
+
returned[2].should == expected[:start].to_i
|
18
|
+
returned[3].should == expected[:end].to_i
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
@@ -3,15 +3,15 @@ Feature: word splitter
|
|
3
3
|
Given a sentence 'My home is my castle.'
|
4
4
|
When the sentence is split
|
5
5
|
Then the following segments should be detected
|
6
|
-
| segment | type |
|
7
|
-
|
8
|
-
| My | word |
|
9
|
-
| ' ' | other |
|
10
|
-
| home | word |
|
11
|
-
| ' ' | other |
|
12
|
-
| is | word |
|
13
|
-
| ' ' | other |
|
14
|
-
| my | word |
|
15
|
-
| ' ' | other |
|
16
|
-
| castle | word |
|
17
|
-
| . | punct |
|
6
|
+
| segment | type | start | end |
|
7
|
+
#-------------------------------#
|
8
|
+
| My | word | 0 | 1 |
|
9
|
+
| ' ' | other | 2 | 2 |
|
10
|
+
| home | word | 3 | 6 |
|
11
|
+
| ' ' | other | 7 | 7 |
|
12
|
+
| is | word | 8 | 9 |
|
13
|
+
| ' ' | other | 10 | 10 |
|
14
|
+
| my | word | 11 | 12 |
|
15
|
+
| ' ' | other | 13 | 13 |
|
16
|
+
| castle | word | 14 | 19 |
|
17
|
+
| . | punct | 20 | 20 |
|
@@ -38,16 +38,18 @@ module SRX
|
|
38
38
|
def each
|
39
39
|
raise "Invalid argument - sentence is nil" if @sentence.nil?
|
40
40
|
@sentence.scan(SPLIT_RE) do |word,number,punct,graph,other|
|
41
|
+
start_offset = $~.begin(0)
|
42
|
+
end_offset = $~.end(0)-1
|
41
43
|
if !word.nil?
|
42
|
-
yield word, :word
|
44
|
+
yield word, :word, start_offset, end_offset
|
43
45
|
elsif !number.nil?
|
44
|
-
yield number, :number
|
46
|
+
yield number, :number, start_offset, end_offset
|
45
47
|
elsif !punct.nil?
|
46
|
-
yield punct, :punct
|
48
|
+
yield punct, :punct, start_offset, end_offset
|
47
49
|
elsif !graph.nil?
|
48
|
-
yield graph, :graph
|
50
|
+
yield graph, :graph, start_offset, end_offset
|
49
51
|
else
|
50
|
-
yield other, :other
|
52
|
+
yield other, :other, start_offset, end_offset
|
51
53
|
end
|
52
54
|
end
|
53
55
|
end
|
data/srx-english.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srx-english
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: term-ansicolor
|