sentence_extractor 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,8 @@ module SentenceExtractor
45
45
  while match != nil
46
46
  sentence_end_point = remaining_text =~ /#{@reg_exp}/ # check for sentences
47
47
  if sentence_end_point
48
- sentences << remaining_text[0..sentence_end_point].strip # add new sentence to array
48
+ sentence = remaining_text[0..sentence_end_point].strip # strip sentence of leading and trailing spaces
49
+ sentences << sentence if (!@first_letter_capitilization || first_letter_uppercase?(sentence)) # add new sentence to array
49
50
  remaining_text = remaining_text[sentence_end_point+1..remaining_text.size] # set the rest of the text to be processed.
50
51
  else
51
52
  match = nil
@@ -54,6 +55,14 @@ module SentenceExtractor
54
55
  sentences
55
56
  end
56
57
 
58
+ def first_letter_uppercase? sentence
59
+ if sentence[0..0] == sentence[0..0].upcase
60
+ return true
61
+ else
62
+ return false
63
+ end
64
+ end
65
+
57
66
  end
58
67
 
59
68
  end
@@ -1,3 +1,3 @@
1
1
  module SentenceExtractor
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -26,4 +26,18 @@ describe SentenceExtractor::Extractor do
26
26
  sen_exc.extract_sentences("Hello. How are you? Good thanks! ").should eql(["Hello.", "How are you?", "Good thanks!"])
27
27
  end
28
28
 
29
+ it "Should leave in sentences that don't start with capital letters" do
30
+ sen_exc = SentenceExtractor::Extractor.new()
31
+ sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["hello.", "How are you?", "good thanks!"])
32
+ end
33
+
34
+ it "Should remove sentences that don't start with capital letters" do
35
+ sen_exc = SentenceExtractor::Extractor.new(language = "en", delimiters = [".", "!","?"], first_letter_capitilization = true)
36
+ sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["How are you?"])
37
+ end
38
+
39
+
40
+
41
+
42
+
29
43
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sentence_extractor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 0
10
- version: 0.2.0
9
+ - 1
10
+ version: 0.2.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seb Glazebrook
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-23 00:00:00 Z
18
+ date: 2012-03-26 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rspec