sentence_extractor 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/sentence_extractor/extractor.rb +10 -1
- data/lib/sentence_extractor/version.rb +1 -1
- data/spec/extractor_spec.rb +14 -0
- metadata +4 -4
@@ -45,7 +45,8 @@ module SentenceExtractor
|
|
45
45
|
while match != nil
|
46
46
|
sentence_end_point = remaining_text =~ /#{@reg_exp}/ # check for sentences
|
47
47
|
if sentence_end_point
|
48
|
-
|
48
|
+
sentence = remaining_text[0..sentence_end_point].strip # strip sentence of leading and trailing spaces
|
49
|
+
sentences << sentence if (!@first_letter_capitilization || first_letter_uppercase?(sentence)) # add new sentence to array
|
49
50
|
remaining_text = remaining_text[sentence_end_point+1..remaining_text.size] # set the rest of the text to be processed.
|
50
51
|
else
|
51
52
|
match = nil
|
@@ -54,6 +55,14 @@ module SentenceExtractor
|
|
54
55
|
sentences
|
55
56
|
end
|
56
57
|
|
58
|
+
def first_letter_uppercase? sentence
|
59
|
+
if sentence[0..0] == sentence[0..0].upcase
|
60
|
+
return true
|
61
|
+
else
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
57
66
|
end
|
58
67
|
|
59
68
|
end
|
data/spec/extractor_spec.rb
CHANGED
@@ -26,4 +26,18 @@ describe SentenceExtractor::Extractor do
|
|
26
26
|
sen_exc.extract_sentences("Hello. How are you? Good thanks! ").should eql(["Hello.", "How are you?", "Good thanks!"])
|
27
27
|
end
|
28
28
|
|
29
|
+
it "Should leave in sentences that don't start with capital letters" do
|
30
|
+
sen_exc = SentenceExtractor::Extractor.new()
|
31
|
+
sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["hello.", "How are you?", "good thanks!"])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "Should remove sentences that don't start with capital letters" do
|
35
|
+
sen_exc = SentenceExtractor::Extractor.new(language = "en", delimiters = [".", "!","?"], first_letter_capitilization = true)
|
36
|
+
sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["How are you?"])
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
29
43
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentence_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 1
|
10
|
+
version: 0.2.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seb Glazebrook
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-03-
|
18
|
+
date: 2012-03-26 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rspec
|