lingua 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +0 -2
- data/VERSION +1 -1
- data/lib/lingua/en/sentence.rb +1 -1
- data/lingua.gemspec +64 -0
- data/spec/lingua/en/sentence_spec.rb +19 -0
- metadata +4 -3
data/.gitignore
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.6.
|
1
|
+
0.6.1
|
data/lib/lingua/en/sentence.rb
CHANGED
@@ -29,7 +29,7 @@ module Lingua
|
|
29
29
|
ABBR_DETECT = /(?:\s(?:(?:(?:\w\.){2,}\w?)|(?:\w\.\w)))/ unless defined?(ABBR_DETECT)
|
30
30
|
|
31
31
|
# Finds punctuation that ends paragraphs.
|
32
|
-
PUNCTUATION_DETECT = /([\.?!](?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
|
32
|
+
PUNCTUATION_DETECT = /((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
|
33
33
|
|
34
34
|
CORRECT_ABBR = /(#{ABBR_DETECT})#{EOS}(\s+[a-z0-9])/
|
35
35
|
|
data/lingua.gemspec
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{lingua}
|
8
|
+
s.version = "0.6.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["David Balatero"]
|
12
|
+
s.date = %q{2010-05-17}
|
13
|
+
s.description = %q{Provides sentence splitting, syllable, and text-quality algorithms.}
|
14
|
+
s.email = %q{dbalatero@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/lingua.rb",
|
27
|
+
"lib/lingua/en/paragraph.rb",
|
28
|
+
"lib/lingua/en/readability.rb",
|
29
|
+
"lib/lingua/en/sentence.rb",
|
30
|
+
"lib/lingua/en/syllable.rb",
|
31
|
+
"lib/lingua/en/syllable/guess.rb",
|
32
|
+
"lingua.gemspec",
|
33
|
+
"spec/lingua/en/paragraph_spec.rb",
|
34
|
+
"spec/lingua/en/readability_spec.rb",
|
35
|
+
"spec/lingua/en/sentence_spec.rb",
|
36
|
+
"spec/spec.opts",
|
37
|
+
"spec/spec_helper.rb"
|
38
|
+
]
|
39
|
+
s.homepage = %q{http://github.com/dbalatero/lingua}
|
40
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = %q{1.3.6}
|
43
|
+
s.summary = %q{This is a maintained version of Ruby's Lingua port.}
|
44
|
+
s.test_files = [
|
45
|
+
"spec/lingua/en/paragraph_spec.rb",
|
46
|
+
"spec/lingua/en/readability_spec.rb",
|
47
|
+
"spec/lingua/en/sentence_spec.rb",
|
48
|
+
"spec/spec_helper.rb"
|
49
|
+
]
|
50
|
+
|
51
|
+
if s.respond_to? :specification_version then
|
52
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
53
|
+
s.specification_version = 3
|
54
|
+
|
55
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
56
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
59
|
+
end
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
@@ -101,6 +101,25 @@ describe Lingua::EN::Sentence do
|
|
101
101
|
end
|
102
102
|
end
|
103
103
|
|
104
|
+
describe "short sentences w/ line breaks" do
|
105
|
+
before(:each) do
|
106
|
+
@doc = <<-EOF
|
107
|
+
So how does the 401(k) plan work? Let's see -
|
108
|
+
|
109
|
+
The 401(k) consists of - first, asking your employer to set aside a portion (upto 15% of your total income) in keeping with the plan.
|
110
|
+
EOF
|
111
|
+
@sentences = klass.sentences(@doc)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should find 3 sentences" do
|
115
|
+
@sentences.should have(3).things
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should stop at line breaks" do
|
119
|
+
@sentences[1].should == "Let's see -"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
104
123
|
describe "sentences with URLs and abbreviation" do
|
105
124
|
before(:each) do
|
106
125
|
text = "Many of these leading names now have their own website, e.g. http://www.kaptest.com/. Hello, e.g. you don't know what you mean. I'm so angry about what you said about the U.S.A. or the u.S. or the U.S.S.R. ok."
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 6
|
8
|
-
-
|
9
|
-
version: 0.6.
|
8
|
+
- 1
|
9
|
+
version: 0.6.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David Balatero
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-05-17 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -53,6 +53,7 @@ files:
|
|
53
53
|
- lib/lingua/en/sentence.rb
|
54
54
|
- lib/lingua/en/syllable.rb
|
55
55
|
- lib/lingua/en/syllable/guess.rb
|
56
|
+
- lingua.gemspec
|
56
57
|
- spec/lingua/en/paragraph_spec.rb
|
57
58
|
- spec/lingua/en/readability_spec.rb
|
58
59
|
- spec/lingua/en/sentence_spec.rb
|