lingua 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ 0.6.2
2
+ -----
3
+ * Fix for sentences that end in abbreviations (e.g. dr, mrs, ms) [chad]
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.1
1
+ 0.6.2
@@ -78,7 +78,7 @@ module Lingua
78
78
  end
79
79
 
80
80
  def self.set_abbr_regex!
81
- @abbr_regex = /(#{abbreviations.join("|")})\.#{EOS}/i
81
+ @abbr_regex = / (#{abbreviations.join("|")})\.#{EOS}/i
82
82
  end
83
83
 
84
84
  initialize_abbreviations!
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{lingua}
8
- s.version = "0.6.1"
8
+ s.version = "0.6.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["David Balatero"]
12
- s.date = %q{2010-05-17}
12
+ s.date = %q{2010-07-25}
13
13
  s.description = %q{Provides sentence splitting, syllable, and text-quality algorithms.}
14
14
  s.email = %q{dbalatero@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -19,6 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.files = [
20
20
  ".document",
21
21
  ".gitignore",
22
+ "CHANGELOG.markdown",
22
23
  "LICENSE",
23
24
  "README.rdoc",
24
25
  "Rakefile",
@@ -2,7 +2,7 @@ require File.dirname(__FILE__) + "/../../spec_helper"
2
2
 
3
3
  describe Lingua::EN::Sentence do
4
4
  klass = Lingua::EN::Sentence
5
-
5
+
6
6
  describe "#sentences" do
7
7
  describe "multi-paragraph text" do
8
8
  before(:each) do
@@ -10,11 +10,11 @@ describe Lingua::EN::Sentence do
10
10
  text << "Visit http://www.google.com and check out my site. Thanks very much!"
11
11
  @sentences = klass.sentences(text)
12
12
  end
13
-
13
+
14
14
  it "should get the correct number of sentences" do
15
15
  @sentences.should have(5).things
16
16
  end
17
-
17
+
18
18
  it "should get the correct sentences" do
19
19
  @sentences[0].should == "As Milton Bradley once said, \"board games are the shit.\""
20
20
  @sentences[1].should == "And I'm inclined to agree."
@@ -23,109 +23,126 @@ describe Lingua::EN::Sentence do
23
23
  @sentences[4].should == "Thanks very much!"
24
24
  end
25
25
  end
26
-
26
+
27
27
  describe "quoted sentences" do
28
28
  before(:each) do
29
29
  text = "As Milton Bradley once said, \"board games are the shit.\" And I'm inclined to agree. \"Why can't we be friends?\""
30
30
  @sentences = klass.sentences(text)
31
31
  end
32
-
32
+
33
33
  it "should get the correct number of sentences" do
34
34
  @sentences.should have(3).things
35
35
  end
36
-
36
+
37
37
  it "should get the correct sentences" do
38
38
  @sentences[0].should == "As Milton Bradley once said, \"board games are the shit.\""
39
39
  @sentences[1].should == "And I'm inclined to agree."
40
40
  @sentences[2].should == "\"Why can't we be friends?\""
41
41
  end
42
42
  end
43
-
43
+
44
44
  describe "ellipses correction" do
45
45
  before(:each) do
46
46
  text = "Well... why would you do that? Let's not fight."
47
47
  @sentences = klass.sentences(text)
48
48
  end
49
-
49
+
50
50
  it "should get the correct number of sentences" do
51
51
  @sentences.should have(2).things
52
52
  end
53
-
53
+
54
54
  it "should get the right sentences" do
55
55
  @sentences[0].should == "Well... why would you do that?"
56
56
  @sentences[1].should == "Let's not fight."
57
57
  end
58
58
  end
59
-
59
+
60
60
  describe "simple URL matching" do
61
61
  before(:each) do
62
62
  text = "Hello, visit http://www.google.com/index.php?ok=ok for more info. Ok?"
63
63
  @sentences = klass.sentences(text)
64
64
  end
65
-
65
+
66
66
  it "should get the correct number of sentences" do
67
67
  @sentences.should have(2).things
68
68
  end
69
-
69
+
70
70
  it "should get the right sentences" do
71
71
  @sentences[0].should == "Hello, visit http://www.google.com/index.php?ok=ok for more info."
72
72
  @sentences[1].should == "Ok?"
73
73
  end
74
74
  end
75
-
75
+
76
76
  describe "ending a sentence with an abbreviation" do
77
77
  before(:each) do
78
78
  text = "I was born in the U.S.S.R. My parents were from the U.S. This is not weird."
79
79
  @sentences = klass.sentences(text)
80
80
  end
81
-
81
+
82
82
  it "should get the correct number of sentences" do
83
83
  @sentences.should have(3).things
84
84
  end
85
-
85
+
86
86
  it "should get the correct sentences" do
87
87
  @sentences[0].should == "I was born in the U.S.S.R."
88
88
  @sentences[1].should == "My parents were from the U.S."
89
89
  @sentences[2].should == "This is not weird."
90
90
  end
91
+
92
+ describe "which is hard-coded (like st, dr, mrs...)" do
93
+ before(:each) do
94
+ text = "This is a test. The word 'test' ends with the abbreviation for 'street'. This should still be three sentences."
95
+ @sentences = klass.sentences(text)
96
+ end
97
+
98
+ it "should have the correct number of sentences" do
99
+ @sentences.should have(3).things
100
+ end
101
+
102
+ it "should get the correct sentences" do
103
+ @sentences[0].should == "This is a test."
104
+ @sentences[1].should == "The word 'test' ends with the abbreviation for 'street'."
105
+ @sentences[2].should == "This should still be three sentences."
106
+ end
107
+ end
91
108
  end
92
-
109
+
93
110
  describe "basic sentences" do
94
111
  before(:each) do
95
112
  text = "Hello, my name is David. What is your name?"
96
113
  @sentences = klass.sentences(text)
97
114
  end
98
-
115
+
99
116
  it "should get the correct number of sentences" do
100
117
  @sentences.should have(2).things
101
118
  end
102
119
  end
103
-
120
+
104
121
  describe "short sentences w/ line breaks" do
105
122
  before(:each) do
106
123
  @doc = <<-EOF
107
124
  So how does the 401(k) plan work? Let's see -
108
-
125
+
109
126
  The 401(k) consists of - first, asking your employer to set aside a portion (upto 15% of your total income) in keeping with the plan.
110
127
  EOF
111
128
  @sentences = klass.sentences(@doc)
112
129
  end
113
-
130
+
114
131
  it "should find 3 sentences" do
115
132
  @sentences.should have(3).things
116
133
  end
117
-
134
+
118
135
  it "should stop at line breaks" do
119
136
  @sentences[1].should == "Let's see -"
120
137
  end
121
138
  end
122
-
139
+
123
140
  describe "sentences with URLs and abbreviation" do
124
141
  before(:each) do
125
142
  text = "Many of these leading names now have their own website, e.g. http://www.kaptest.com/. Hello, e.g. you don't know what you mean. I'm so angry about what you said about the U.S.A. or the u.S. or the U.S.S.R. ok."
126
143
  @sentences = klass.sentences(text)
127
144
  end
128
-
145
+
129
146
  it "should get the correct number of sentences" do
130
147
  @sentences[0].should == "Many of these leading names now have their own website, e.g. http://www.kaptest.com/."
131
148
  @sentences[1].should == "Hello, e.g. you don't know what you mean."
@@ -134,22 +151,23 @@ describe Lingua::EN::Sentence do
134
151
  end
135
152
  end
136
153
  end
137
-
154
+
138
155
  describe "#abbreviation" do
139
156
  it "should change the abbreviations list" do
140
157
  klass.abbreviation('monkey', 'pig')
141
158
  klass.abbreviations.should include('monkey')
142
159
  klass.abbreviations.should include('pig')
143
160
  end
144
-
161
+
145
162
  it "should change the regex for abbreviations" do
146
163
  lambda {
147
164
  klass.abbreviation('monkey')
148
165
  }.should change(klass, :abbr_regex)
149
166
  end
150
-
167
+
151
168
  after(:each) do
152
169
  klass.initialize_abbreviations!
153
170
  end
154
171
  end
172
+
155
173
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 6
8
- - 1
9
- version: 0.6.1
8
+ - 2
9
+ version: 0.6.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - David Balatero
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-05-17 00:00:00 -07:00
17
+ date: 2010-07-25 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -43,6 +43,7 @@ extra_rdoc_files:
43
43
  files:
44
44
  - .document
45
45
  - .gitignore
46
+ - CHANGELOG.markdown
46
47
  - LICENSE
47
48
  - README.rdoc
48
49
  - Rakefile