scalpel 0.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/scalpel.rb +6 -2
  2. metadata +2 -2
@@ -8,7 +8,7 @@
8
8
  class Scalpel
9
9
 
10
10
  # Current version.
11
- VERSION = '0.2'
11
+ VERSION = '0.2.1'
12
12
 
13
13
  # Segment a text using the Scalpel algorithm.
14
14
  # This will eventually be ported to a gem.
@@ -22,6 +22,8 @@ class Scalpel
22
22
  text.gsub!('...', '&;&.')
23
23
  # Remove floating point numbers.
24
24
  text.gsub!(/([0-9]+)\.([0-9]+)/) { $1 + '&@&' + $2 }
25
+ # Handle floats without leading zero.
26
+ text.gsub!(/\s\.([0-9]+)/) { ' &#&' + $1 }
25
27
  # Remove abbreviations.
26
28
  text.gsub!(/(?:[A-Za-z]\.){2,}/) { |abbr| abbr.gsub('.', '&-&') }
27
29
  # Remove titles.
@@ -63,7 +65,9 @@ class Scalpel
63
65
  sentence.gsub!(/&%&([.!?])/) { $1 + "'" }
64
66
  sentence.gsub!(/&\^&([.?!])/) { "'" + $1 + '"' }
65
67
  sentence.gsub!(/&\*&([.?!])/) { "'" + $1 + '”' }
66
- sentence.gsub!(/&$&([.!?])/) { $1 + '"' }
68
+ sentence.gsub!(/&\$&([.!?])/) { $1 + '"' }
69
+ # Repair floats without leading zeros.
70
+ sentence.gsub!(/&#&([0-9]+)/) { '.' + $1 }
67
71
  results << sentence.strip
68
72
  end
69
73
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scalpel
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-15 00:00:00.000000000 Z
12
+ date: 2012-12-21 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! ' Scalpel is a sentence segmentation tool for Ruby. It allows you to
15
15
  split a text into an array of sentences. It is simple, lightweight, blazing fast