rudge 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/rudge.rb +23 -0
  3. data/lib/rudge/abbreviations.rb +22 -0
  4. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a8ed66b8eeec5d42fc40a8dd59567508795bd056
4
+ data.tar.gz: c4e51bd25dd55ddd1ea5821b853dfa7d8df7e6f1
5
+ SHA512:
6
+ metadata.gz: 8e62a34ee8f52c7ce5a6762d97fb973c94dd9a68f26935546ec66f4db6479b506a718c8cbe2b3b5f19536a74888229f03e7f96374ac20f7cfd8c7dbf063fa6b3
7
+ data.tar.gz: 4b7c58b69d7401a58c4e19339ac3c13866f80e46b7f9cd8dd9cd93438363da317a0a2fd2e1579a30b0ef6219f39c6457ca80ff2ca277745433678e8b99fc9637
data/lib/rudge.rb ADDED
@@ -0,0 +1,23 @@
1
+ class Rudge
2
+ require 'rudge/abbreviations'
3
+
4
+ # end of sentence marker
5
+ EOS = "\001"
6
+
7
+ def self.sentences(text)
8
+ text = text.dup
9
+
10
+ # initial split after punctuation,
11
+ # preserves trailing whitespace for the ellipsis correction
12
+ text.gsub!(/([\.?!](?:\"|\'|\)|\]|\})?)(\s+)/) { $1 << EOS << $2 }
13
+
14
+ # correct ellipsis marks and rows of stops
15
+ text.gsub!(/(\.\.\.*)#{EOS}/) { $1 }
16
+
17
+ # correct abbreviations - precompile regexp?
18
+ text.gsub!(/(#{Rudge::Abbreviations.list.join("|")})\.#{EOS}/i) { $1 << '.' }
19
+
20
+ # split on EOS marker, strip gets rid of trailing whitespace
21
+ text.split(EOS).map { | sentence | sentence.strip }
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ class Rudge
2
+ class Abbreviations
3
+ TITLES = %w(jr, mr, mrs, ms, dr, prof, sr, sen, rep, rev, gov, atty,
4
+ supt, det, rev, col, gen, lt, cmdr, adm, capt, sgt, cpl, maj)
5
+
6
+ ENTITIES = %w(dept, univ, uni, assn, bros, inc, ltd, co, corp, plc)
7
+
8
+ MONTHS = %w(jan, feb, mar, apr, may, jun, jul, aug, sep, sept, oct,
9
+ nov, dec)
10
+
11
+ DAYS = %w(mon, tue, wed, thu, fri, sat, sun)
12
+
13
+ ADDRESSES = %w(ave, bld, blvd, cl, ct, cres, dr, rd, st)
14
+
15
+ MISC = %w(vs, etc, no, esp, cf)
16
+
17
+ def self.list
18
+ TITLES + ENTITIES + MONTHS + DAYS + ADDRESSES + MISC
19
+ end
20
+
21
+ end
22
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rudge
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - bruno coelho
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-31 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A Ruby Judge! Determines the sentences in a text.
14
+ email: bruno.coelho@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/rudge.rb
20
+ - lib/rudge/abbreviations.rb
21
+ homepage: http://rubygems.org/gems/rudge
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.2.1
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Rudge - determines sentences.
45
+ test_files: []