lingua 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,8 +1,6 @@
1
1
  ## MAC OS
2
2
  .DS_Store
3
3
 
4
- *.gemspec
5
-
6
4
  ## TEXTMATE
7
5
  *.tmproj
8
6
  tmtags
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.0
1
+ 0.6.1
@@ -29,7 +29,7 @@ module Lingua
29
29
  ABBR_DETECT = /(?:\s(?:(?:(?:\w\.){2,}\w?)|(?:\w\.\w)))/ unless defined?(ABBR_DETECT)
30
30
 
31
31
  # Finds punctuation that ends paragraphs.
32
- PUNCTUATION_DETECT = /([\.?!](?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
32
+ PUNCTUATION_DETECT = /((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
33
33
 
34
34
  CORRECT_ABBR = /(#{ABBR_DETECT})#{EOS}(\s+[a-z0-9])/
35
35
 
@@ -0,0 +1,64 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{lingua}
8
+ s.version = "0.6.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["David Balatero"]
12
+ s.date = %q{2010-05-17}
13
+ s.description = %q{Provides sentence splitting, syllable, and text-quality algorithms.}
14
+ s.email = %q{dbalatero@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/lingua.rb",
27
+ "lib/lingua/en/paragraph.rb",
28
+ "lib/lingua/en/readability.rb",
29
+ "lib/lingua/en/sentence.rb",
30
+ "lib/lingua/en/syllable.rb",
31
+ "lib/lingua/en/syllable/guess.rb",
32
+ "lingua.gemspec",
33
+ "spec/lingua/en/paragraph_spec.rb",
34
+ "spec/lingua/en/readability_spec.rb",
35
+ "spec/lingua/en/sentence_spec.rb",
36
+ "spec/spec.opts",
37
+ "spec/spec_helper.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/dbalatero/lingua}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.6}
43
+ s.summary = %q{This is a maintained version of Ruby's Lingua port.}
44
+ s.test_files = [
45
+ "spec/lingua/en/paragraph_spec.rb",
46
+ "spec/lingua/en/readability_spec.rb",
47
+ "spec/lingua/en/sentence_spec.rb",
48
+ "spec/spec_helper.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
57
+ else
58
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
62
+ end
63
+ end
64
+
@@ -101,6 +101,25 @@ describe Lingua::EN::Sentence do
101
101
  end
102
102
  end
103
103
 
104
+ describe "short sentences w/ line breaks" do
105
+ before(:each) do
106
+ @doc = <<-EOF
107
+ So how does the 401(k) plan work? Let's see -
108
+
109
+ The 401(k) consists of - first, asking your employer to set aside a portion (upto 15% of your total income) in keeping with the plan.
110
+ EOF
111
+ @sentences = klass.sentences(@doc)
112
+ end
113
+
114
+ it "should find 3 sentences" do
115
+ @sentences.should have(3).things
116
+ end
117
+
118
+ it "should stop at line breaks" do
119
+ @sentences[1].should == "Let's see -"
120
+ end
121
+ end
122
+
104
123
  describe "sentences with URLs and abbreviation" do
105
124
  before(:each) do
106
125
  text = "Many of these leading names now have their own website, e.g. http://www.kaptest.com/. Hello, e.g. you don't know what you mean. I'm so angry about what you said about the U.S.A. or the u.S. or the U.S.S.R. ok."
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 6
8
- - 0
9
- version: 0.6.0
8
+ - 1
9
+ version: 0.6.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - David Balatero
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-04-20 00:00:00 -07:00
17
+ date: 2010-05-17 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -53,6 +53,7 @@ files:
53
53
  - lib/lingua/en/sentence.rb
54
54
  - lib/lingua/en/syllable.rb
55
55
  - lib/lingua/en/syllable/guess.rb
56
+ - lingua.gemspec
56
57
  - spec/lingua/en/paragraph_spec.rb
57
58
  - spec/lingua/en/readability_spec.rb
58
59
  - spec/lingua/en/sentence_spec.rb