lingua 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.markdown +3 -0
- data/VERSION +1 -1
- data/lib/lingua/en/sentence.rb +1 -1
- data/lingua.gemspec +3 -2
- data/spec/lingua/en/sentence_spec.rb +44 -26
- metadata +4 -3
data/CHANGELOG.markdown
ADDED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.6.
|
1
|
+
0.6.2
|
data/lib/lingua/en/sentence.rb
CHANGED
data/lingua.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{lingua}
|
8
|
-
s.version = "0.6.
|
8
|
+
s.version = "0.6.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["David Balatero"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-07-25}
|
13
13
|
s.description = %q{Provides sentence splitting, syllable, and text-quality algorithms.}
|
14
14
|
s.email = %q{dbalatero@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -19,6 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.files = [
|
20
20
|
".document",
|
21
21
|
".gitignore",
|
22
|
+
"CHANGELOG.markdown",
|
22
23
|
"LICENSE",
|
23
24
|
"README.rdoc",
|
24
25
|
"Rakefile",
|
@@ -2,7 +2,7 @@ require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
2
|
|
3
3
|
describe Lingua::EN::Sentence do
|
4
4
|
klass = Lingua::EN::Sentence
|
5
|
-
|
5
|
+
|
6
6
|
describe "#sentences" do
|
7
7
|
describe "multi-paragraph text" do
|
8
8
|
before(:each) do
|
@@ -10,11 +10,11 @@ describe Lingua::EN::Sentence do
|
|
10
10
|
text << "Visit http://www.google.com and check out my site. Thanks very much!"
|
11
11
|
@sentences = klass.sentences(text)
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
it "should get the correct number of sentences" do
|
15
15
|
@sentences.should have(5).things
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
it "should get the correct sentences" do
|
19
19
|
@sentences[0].should == "As Milton Bradley once said, \"board games are the shit.\""
|
20
20
|
@sentences[1].should == "And I'm inclined to agree."
|
@@ -23,109 +23,126 @@ describe Lingua::EN::Sentence do
|
|
23
23
|
@sentences[4].should == "Thanks very much!"
|
24
24
|
end
|
25
25
|
end
|
26
|
-
|
26
|
+
|
27
27
|
describe "quoted sentences" do
|
28
28
|
before(:each) do
|
29
29
|
text = "As Milton Bradley once said, \"board games are the shit.\" And I'm inclined to agree. \"Why can't we be friends?\""
|
30
30
|
@sentences = klass.sentences(text)
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
it "should get the correct number of sentences" do
|
34
34
|
@sentences.should have(3).things
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
it "should get the correct sentences" do
|
38
38
|
@sentences[0].should == "As Milton Bradley once said, \"board games are the shit.\""
|
39
39
|
@sentences[1].should == "And I'm inclined to agree."
|
40
40
|
@sentences[2].should == "\"Why can't we be friends?\""
|
41
41
|
end
|
42
42
|
end
|
43
|
-
|
43
|
+
|
44
44
|
describe "ellipses correction" do
|
45
45
|
before(:each) do
|
46
46
|
text = "Well... why would you do that? Let's not fight."
|
47
47
|
@sentences = klass.sentences(text)
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
it "should get the correct number of sentences" do
|
51
51
|
@sentences.should have(2).things
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
it "should get the right sentences" do
|
55
55
|
@sentences[0].should == "Well... why would you do that?"
|
56
56
|
@sentences[1].should == "Let's not fight."
|
57
57
|
end
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
describe "simple URL matching" do
|
61
61
|
before(:each) do
|
62
62
|
text = "Hello, visit http://www.google.com/index.php?ok=ok for more info. Ok?"
|
63
63
|
@sentences = klass.sentences(text)
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
it "should get the correct number of sentences" do
|
67
67
|
@sentences.should have(2).things
|
68
68
|
end
|
69
|
-
|
69
|
+
|
70
70
|
it "should get the right sentences" do
|
71
71
|
@sentences[0].should == "Hello, visit http://www.google.com/index.php?ok=ok for more info."
|
72
72
|
@sentences[1].should == "Ok?"
|
73
73
|
end
|
74
74
|
end
|
75
|
-
|
75
|
+
|
76
76
|
describe "ending a sentence with an abbreviation" do
|
77
77
|
before(:each) do
|
78
78
|
text = "I was born in the U.S.S.R. My parents were from the U.S. This is not weird."
|
79
79
|
@sentences = klass.sentences(text)
|
80
80
|
end
|
81
|
-
|
81
|
+
|
82
82
|
it "should get the correct number of sentences" do
|
83
83
|
@sentences.should have(3).things
|
84
84
|
end
|
85
|
-
|
85
|
+
|
86
86
|
it "should get the correct sentences" do
|
87
87
|
@sentences[0].should == "I was born in the U.S.S.R."
|
88
88
|
@sentences[1].should == "My parents were from the U.S."
|
89
89
|
@sentences[2].should == "This is not weird."
|
90
90
|
end
|
91
|
+
|
92
|
+
describe "which is hard-coded (like st, dr, mrs...)" do
|
93
|
+
before(:each) do
|
94
|
+
text = "This is a test. The word 'test' ends with the abbreviation for 'street'. This should still be three sentences."
|
95
|
+
@sentences = klass.sentences(text)
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should have the correct number of sentences" do
|
99
|
+
@sentences.should have(3).things
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should get the correct sentences" do
|
103
|
+
@sentences[0].should == "This is a test."
|
104
|
+
@sentences[1].should == "The word 'test' ends with the abbreviation for 'street'."
|
105
|
+
@sentences[2].should == "This should still be three sentences."
|
106
|
+
end
|
107
|
+
end
|
91
108
|
end
|
92
|
-
|
109
|
+
|
93
110
|
describe "basic sentences" do
|
94
111
|
before(:each) do
|
95
112
|
text = "Hello, my name is David. What is your name?"
|
96
113
|
@sentences = klass.sentences(text)
|
97
114
|
end
|
98
|
-
|
115
|
+
|
99
116
|
it "should get the correct number of sentences" do
|
100
117
|
@sentences.should have(2).things
|
101
118
|
end
|
102
119
|
end
|
103
|
-
|
120
|
+
|
104
121
|
describe "short sentences w/ line breaks" do
|
105
122
|
before(:each) do
|
106
123
|
@doc = <<-EOF
|
107
124
|
So how does the 401(k) plan work? Let's see -
|
108
|
-
|
125
|
+
|
109
126
|
The 401(k) consists of - first, asking your employer to set aside a portion (upto 15% of your total income) in keeping with the plan.
|
110
127
|
EOF
|
111
128
|
@sentences = klass.sentences(@doc)
|
112
129
|
end
|
113
|
-
|
130
|
+
|
114
131
|
it "should find 3 sentences" do
|
115
132
|
@sentences.should have(3).things
|
116
133
|
end
|
117
|
-
|
134
|
+
|
118
135
|
it "should stop at line breaks" do
|
119
136
|
@sentences[1].should == "Let's see -"
|
120
137
|
end
|
121
138
|
end
|
122
|
-
|
139
|
+
|
123
140
|
describe "sentences with URLs and abbreviation" do
|
124
141
|
before(:each) do
|
125
142
|
text = "Many of these leading names now have their own website, e.g. http://www.kaptest.com/. Hello, e.g. you don't know what you mean. I'm so angry about what you said about the U.S.A. or the u.S. or the U.S.S.R. ok."
|
126
143
|
@sentences = klass.sentences(text)
|
127
144
|
end
|
128
|
-
|
145
|
+
|
129
146
|
it "should get the correct number of sentences" do
|
130
147
|
@sentences[0].should == "Many of these leading names now have their own website, e.g. http://www.kaptest.com/."
|
131
148
|
@sentences[1].should == "Hello, e.g. you don't know what you mean."
|
@@ -134,22 +151,23 @@ describe Lingua::EN::Sentence do
|
|
134
151
|
end
|
135
152
|
end
|
136
153
|
end
|
137
|
-
|
154
|
+
|
138
155
|
describe "#abbreviation" do
|
139
156
|
it "should change the abbreviations list" do
|
140
157
|
klass.abbreviation('monkey', 'pig')
|
141
158
|
klass.abbreviations.should include('monkey')
|
142
159
|
klass.abbreviations.should include('pig')
|
143
160
|
end
|
144
|
-
|
161
|
+
|
145
162
|
it "should change the regex for abbreviations" do
|
146
163
|
lambda {
|
147
164
|
klass.abbreviation('monkey')
|
148
165
|
}.should change(klass, :abbr_regex)
|
149
166
|
end
|
150
|
-
|
167
|
+
|
151
168
|
after(:each) do
|
152
169
|
klass.initialize_abbreviations!
|
153
170
|
end
|
154
171
|
end
|
172
|
+
|
155
173
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 6
|
8
|
-
-
|
9
|
-
version: 0.6.
|
8
|
+
- 2
|
9
|
+
version: 0.6.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David Balatero
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-07-25 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -43,6 +43,7 @@ extra_rdoc_files:
|
|
43
43
|
files:
|
44
44
|
- .document
|
45
45
|
- .gitignore
|
46
|
+
- CHANGELOG.markdown
|
46
47
|
- LICENSE
|
47
48
|
- README.rdoc
|
48
49
|
- Rakefile
|