sentence_extractor 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/sentence_extractor/extractor.rb +10 -1
- data/lib/sentence_extractor/version.rb +1 -1
- data/spec/extractor_spec.rb +14 -0
- metadata +4 -4
@@ -45,7 +45,8 @@ module SentenceExtractor
|
|
45
45
|
while match != nil
|
46
46
|
sentence_end_point = remaining_text =~ /#{@reg_exp}/ # check for sentences
|
47
47
|
if sentence_end_point
|
48
|
-
|
48
|
+
sentence = remaining_text[0..sentence_end_point].strip # strip sentence of leading and trailing spaces
|
49
|
+
sentences << sentence if (!@first_letter_capitilization || first_letter_uppercase?(sentence)) # add new sentence to array
|
49
50
|
remaining_text = remaining_text[sentence_end_point+1..remaining_text.size] # set the rest of the text to be processed.
|
50
51
|
else
|
51
52
|
match = nil
|
@@ -54,6 +55,14 @@ module SentenceExtractor
|
|
54
55
|
sentences
|
55
56
|
end
|
56
57
|
|
58
|
+
def first_letter_uppercase? sentence
|
59
|
+
if sentence[0..0] == sentence[0..0].upcase
|
60
|
+
return true
|
61
|
+
else
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
57
66
|
end
|
58
67
|
|
59
68
|
end
|
data/spec/extractor_spec.rb
CHANGED
@@ -26,4 +26,18 @@ describe SentenceExtractor::Extractor do
|
|
26
26
|
sen_exc.extract_sentences("Hello. How are you? Good thanks! ").should eql(["Hello.", "How are you?", "Good thanks!"])
|
27
27
|
end
|
28
28
|
|
29
|
+
it "Should leave in sentences that don't start with capital letters" do
|
30
|
+
sen_exc = SentenceExtractor::Extractor.new()
|
31
|
+
sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["hello.", "How are you?", "good thanks!"])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "Should remove sentences that don't start with capital letters" do
|
35
|
+
sen_exc = SentenceExtractor::Extractor.new(language = "en", delimiters = [".", "!","?"], first_letter_capitilization = true)
|
36
|
+
sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["How are you?"])
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
29
43
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentence_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 1
|
10
|
+
version: 0.2.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seb Glazebrook
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-03-
|
18
|
+
date: 2012-03-26 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rspec
|