sentence_extractor 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -45,7 +45,8 @@ module SentenceExtractor
45
45
  while match != nil
46
46
  sentence_end_point = remaining_text =~ /#{@reg_exp}/ # check for sentences
47
47
  if sentence_end_point
48
- sentences << remaining_text[0..sentence_end_point].strip # add new sentence to array
48
+ sentence = remaining_text[0..sentence_end_point].strip # strip sentence of leading and trailing spaces
49
+ sentences << sentence if (!@first_letter_capitilization || first_letter_uppercase?(sentence)) # add new sentence to array
49
50
  remaining_text = remaining_text[sentence_end_point+1..remaining_text.size] # set the rest of the text to be processed.
50
51
  else
51
52
  match = nil
@@ -54,6 +55,14 @@ module SentenceExtractor
54
55
  sentences
55
56
  end
56
57
 
58
+ def first_letter_uppercase? sentence
59
+ if sentence[0..0] == sentence[0..0].upcase
60
+ return true
61
+ else
62
+ return false
63
+ end
64
+ end
65
+
57
66
  end
58
67
 
59
68
  end
@@ -1,3 +1,3 @@
1
1
  module SentenceExtractor
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -26,4 +26,18 @@ describe SentenceExtractor::Extractor do
26
26
  sen_exc.extract_sentences("Hello. How are you? Good thanks! ").should eql(["Hello.", "How are you?", "Good thanks!"])
27
27
  end
28
28
 
29
+ it "Should leave in sentences that don't start with capital letters" do
30
+ sen_exc = SentenceExtractor::Extractor.new()
31
+ sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["hello.", "How are you?", "good thanks!"])
32
+ end
33
+
34
+ it "Should remove sentences that don't start with capital letters" do
35
+ sen_exc = SentenceExtractor::Extractor.new(language = "en", delimiters = [".", "!","?"], first_letter_capitilization = true)
36
+ sen_exc.extract_sentences("hello. How are you? good thanks! ").should eql(["How are you?"])
37
+ end
38
+
39
+
40
+
41
+
42
+
29
43
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sentence_extractor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 0
10
- version: 0.2.0
9
+ - 1
10
+ version: 0.2.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seb Glazebrook
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-23 00:00:00 Z
18
+ date: 2012-03-26 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rspec