lingua 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +0 -2
- data/VERSION +1 -1
- data/lib/lingua/en/sentence.rb +1 -1
- data/lingua.gemspec +64 -0
- data/spec/lingua/en/sentence_spec.rb +19 -0
- metadata +4 -3
data/.gitignore
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.6.
|
1
|
+
0.6.1
|
data/lib/lingua/en/sentence.rb
CHANGED
@@ -29,7 +29,7 @@ module Lingua
|
|
29
29
|
ABBR_DETECT = /(?:\s(?:(?:(?:\w\.){2,}\w?)|(?:\w\.\w)))/ unless defined?(ABBR_DETECT)
|
30
30
|
|
31
31
|
# Finds punctuation that ends paragraphs.
|
32
|
-
PUNCTUATION_DETECT = /([\.?!](?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
|
32
|
+
PUNCTUATION_DETECT = /((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
|
33
33
|
|
34
34
|
CORRECT_ABBR = /(#{ABBR_DETECT})#{EOS}(\s+[a-z0-9])/
|
35
35
|
|
data/lingua.gemspec
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{lingua}
|
8
|
+
s.version = "0.6.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["David Balatero"]
|
12
|
+
s.date = %q{2010-05-17}
|
13
|
+
s.description = %q{Provides sentence splitting, syllable, and text-quality algorithms.}
|
14
|
+
s.email = %q{dbalatero@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/lingua.rb",
|
27
|
+
"lib/lingua/en/paragraph.rb",
|
28
|
+
"lib/lingua/en/readability.rb",
|
29
|
+
"lib/lingua/en/sentence.rb",
|
30
|
+
"lib/lingua/en/syllable.rb",
|
31
|
+
"lib/lingua/en/syllable/guess.rb",
|
32
|
+
"lingua.gemspec",
|
33
|
+
"spec/lingua/en/paragraph_spec.rb",
|
34
|
+
"spec/lingua/en/readability_spec.rb",
|
35
|
+
"spec/lingua/en/sentence_spec.rb",
|
36
|
+
"spec/spec.opts",
|
37
|
+
"spec/spec_helper.rb"
|
38
|
+
]
|
39
|
+
s.homepage = %q{http://github.com/dbalatero/lingua}
|
40
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = %q{1.3.6}
|
43
|
+
s.summary = %q{This is a maintained version of Ruby's Lingua port.}
|
44
|
+
s.test_files = [
|
45
|
+
"spec/lingua/en/paragraph_spec.rb",
|
46
|
+
"spec/lingua/en/readability_spec.rb",
|
47
|
+
"spec/lingua/en/sentence_spec.rb",
|
48
|
+
"spec/spec_helper.rb"
|
49
|
+
]
|
50
|
+
|
51
|
+
if s.respond_to? :specification_version then
|
52
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
53
|
+
s.specification_version = 3
|
54
|
+
|
55
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
56
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
59
|
+
end
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
@@ -101,6 +101,25 @@ describe Lingua::EN::Sentence do
|
|
101
101
|
end
|
102
102
|
end
|
103
103
|
|
104
|
+
describe "short sentences w/ line breaks" do
|
105
|
+
before(:each) do
|
106
|
+
@doc = <<-EOF
|
107
|
+
So how does the 401(k) plan work? Let's see -
|
108
|
+
|
109
|
+
The 401(k) consists of - first, asking your employer to set aside a portion (upto 15% of your total income) in keeping with the plan.
|
110
|
+
EOF
|
111
|
+
@sentences = klass.sentences(@doc)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should find 3 sentences" do
|
115
|
+
@sentences.should have(3).things
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should stop at line breaks" do
|
119
|
+
@sentences[1].should == "Let's see -"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
104
123
|
describe "sentences with URLs and abbreviation" do
|
105
124
|
before(:each) do
|
106
125
|
text = "Many of these leading names now have their own website, e.g. http://www.kaptest.com/. Hello, e.g. you don't know what you mean. I'm so angry about what you said about the U.S.A. or the u.S. or the U.S.S.R. ok."
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 6
|
8
|
-
-
|
9
|
-
version: 0.6.
|
8
|
+
- 1
|
9
|
+
version: 0.6.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David Balatero
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-05-17 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -53,6 +53,7 @@ files:
|
|
53
53
|
- lib/lingua/en/sentence.rb
|
54
54
|
- lib/lingua/en/syllable.rb
|
55
55
|
- lib/lingua/en/syllable/guess.rb
|
56
|
+
- lingua.gemspec
|
56
57
|
- spec/lingua/en/paragraph_spec.rb
|
57
58
|
- spec/lingua/en/readability_spec.rb
|
58
59
|
- spec/lingua/en/sentence_spec.rb
|