rudge 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rudge.rb +3 -4
- data/lib/rudge/abbreviations.rb +1 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28ba0cef0025ab229bf1839fe3f2b39d1629efe2
|
4
|
+
data.tar.gz: c6d0b88fc57c12726709fca953122fbca9f47588
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07346e1f5a07b4103ee0ba89bdf102a551b76cc79a9d573a17f96a0e430a085514adfbe5fc94de6e8a66a036739ed036bc0eb005a71be4a578f4a2a9f69253c4
|
7
|
+
data.tar.gz: 7f509f26b478468682b6b32be2e2ba279521c47b5620cf40088880280190fad40b2a21d1de6651a1239cc4bedc44418870ea4251af263b651ed082a6b6acbf33
|
data/lib/rudge.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
|
-
require
|
1
|
+
require "rudge/abbreviations"
|
2
2
|
|
3
3
|
class Rudge
|
4
|
-
|
5
4
|
# end of sentence marker
|
6
5
|
EOS = "\001"
|
7
6
|
|
8
7
|
def self.sentences(text)
|
9
8
|
text = text.dup
|
10
9
|
|
11
|
-
# initial split after punctuation,
|
10
|
+
# initial split after punctuation,
|
12
11
|
# preserves trailing whitespace for the ellipsis correction
|
13
12
|
text.gsub!(/([\.?!](?:\"|\'|\)|\]|\})?)(\s+)/) { $1 << EOS << $2 }
|
14
13
|
|
@@ -16,7 +15,7 @@ class Rudge
|
|
16
15
|
text.gsub!(/(\.\.\.*)#{EOS}/) { $1 }
|
17
16
|
|
18
17
|
# correct abbreviations - precompile regexp?
|
19
|
-
text.gsub!(
|
18
|
+
text.gsub!(/\s(#{Rudge::Abbreviations.list.join("|")})\.#{EOS}/i) { " " << $1 << "." }
|
20
19
|
|
21
20
|
# split on EOS marker, strip gets rid of trailing whitespace
|
22
21
|
text.split(EOS).map { | sentence | sentence.strip }
|
data/lib/rudge/abbreviations.rb
CHANGED