scalpel 0.2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/scalpel.rb +6 -2
- metadata +2 -2
data/lib/scalpel.rb
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
class Scalpel
|
9
9
|
|
10
10
|
# Current version.
|
11
|
-
VERSION = '0.2'
|
11
|
+
VERSION = '0.2.1'
|
12
12
|
|
13
13
|
# Segment a text using the Scalpel algorithm.
|
14
14
|
# This will eventually be ported to a gem.
|
@@ -22,6 +22,8 @@ class Scalpel
|
|
22
22
|
text.gsub!('...', '&;&.')
|
23
23
|
# Remove floating point numbers.
|
24
24
|
text.gsub!(/([0-9]+)\.([0-9]+)/) { $1 + '&@&' + $2 }
|
25
|
+
# Handle floats without leading zero.
|
26
|
+
text.gsub!(/\s\.([0-9]+)/) { ' &#&' + $1 }
|
25
27
|
# Remove abbreviations.
|
26
28
|
text.gsub!(/(?:[A-Za-z]\.){2,}/) { |abbr| abbr.gsub('.', '&-&') }
|
27
29
|
# Remove titles.
|
@@ -63,7 +65,9 @@ class Scalpel
|
|
63
65
|
sentence.gsub!(/&%&([.!?])/) { $1 + "'" }
|
64
66
|
sentence.gsub!(/&\^&([.?!])/) { "'" + $1 + '"' }
|
65
67
|
sentence.gsub!(/&\*&([.?!])/) { "'" + $1 + '”' }
|
66
|
-
sentence.gsub!(
|
68
|
+
sentence.gsub!(/&\$&([.!?])/) { $1 + '"' }
|
69
|
+
# Repair floats without leading zeros.
|
70
|
+
sentence.gsub!(/&#&([0-9]+)/) { '.' + $1 }
|
67
71
|
results << sentence.strip
|
68
72
|
end
|
69
73
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scalpel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! ' Scalpel is a sentence segmentation tool for Ruby. It allows you to
|
15
15
|
split a text into an array of sentences. It is simple, lightweight, blazing fast
|