lingua 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,8 +1,6 @@
1
1
  ## MAC OS
2
2
  .DS_Store
3
3
 
4
- *.gemspec
5
-
6
4
  ## TEXTMATE
7
5
  *.tmproj
8
6
  tmtags
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.0
1
+ 0.6.1
@@ -29,7 +29,7 @@ module Lingua
29
29
  ABBR_DETECT = /(?:\s(?:(?:(?:\w\.){2,}\w?)|(?:\w\.\w)))/ unless defined?(ABBR_DETECT)
30
30
 
31
31
  # Finds punctuation that ends paragraphs.
32
- PUNCTUATION_DETECT = /([\.?!](?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
32
+ PUNCTUATION_DETECT = /((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/ unless defined?(PUNCTUATION_DETECT)
33
33
 
34
34
  CORRECT_ABBR = /(#{ABBR_DETECT})#{EOS}(\s+[a-z0-9])/
35
35
 
@@ -0,0 +1,64 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{lingua}
8
+ s.version = "0.6.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["David Balatero"]
12
+ s.date = %q{2010-05-17}
13
+ s.description = %q{Provides sentence splitting, syllable, and text-quality algorithms.}
14
+ s.email = %q{dbalatero@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/lingua.rb",
27
+ "lib/lingua/en/paragraph.rb",
28
+ "lib/lingua/en/readability.rb",
29
+ "lib/lingua/en/sentence.rb",
30
+ "lib/lingua/en/syllable.rb",
31
+ "lib/lingua/en/syllable/guess.rb",
32
+ "lingua.gemspec",
33
+ "spec/lingua/en/paragraph_spec.rb",
34
+ "spec/lingua/en/readability_spec.rb",
35
+ "spec/lingua/en/sentence_spec.rb",
36
+ "spec/spec.opts",
37
+ "spec/spec_helper.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/dbalatero/lingua}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.6}
43
+ s.summary = %q{This is a maintained version of Ruby's Lingua port.}
44
+ s.test_files = [
45
+ "spec/lingua/en/paragraph_spec.rb",
46
+ "spec/lingua/en/readability_spec.rb",
47
+ "spec/lingua/en/sentence_spec.rb",
48
+ "spec/spec_helper.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
57
+ else
58
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
62
+ end
63
+ end
64
+
@@ -101,6 +101,25 @@ describe Lingua::EN::Sentence do
101
101
  end
102
102
  end
103
103
 
104
+ describe "short sentences w/ line breaks" do
105
+ before(:each) do
106
+ @doc = <<-EOF
107
+ So how does the 401(k) plan work? Let's see -
108
+
109
+ The 401(k) consists of - first, asking your employer to set aside a portion (upto 15% of your total income) in keeping with the plan.
110
+ EOF
111
+ @sentences = klass.sentences(@doc)
112
+ end
113
+
114
+ it "should find 3 sentences" do
115
+ @sentences.should have(3).things
116
+ end
117
+
118
+ it "should stop at line breaks" do
119
+ @sentences[1].should == "Let's see -"
120
+ end
121
+ end
122
+
104
123
  describe "sentences with URLs and abbreviation" do
105
124
  before(:each) do
106
125
  text = "Many of these leading names now have their own website, e.g. http://www.kaptest.com/. Hello, e.g. you don't know what you mean. I'm so angry about what you said about the U.S.A. or the u.S. or the U.S.S.R. ok."
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 6
8
- - 0
9
- version: 0.6.0
8
+ - 1
9
+ version: 0.6.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - David Balatero
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-04-20 00:00:00 -07:00
17
+ date: 2010-05-17 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -53,6 +53,7 @@ files:
53
53
  - lib/lingua/en/sentence.rb
54
54
  - lib/lingua/en/syllable.rb
55
55
  - lib/lingua/en/syllable/guess.rb
56
+ - lingua.gemspec
56
57
  - spec/lingua/en/paragraph_spec.rb
57
58
  - spec/lingua/en/readability_spec.rb
58
59
  - spec/lingua/en/sentence_spec.rb