parsenum 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +45 -0
- data/Rakefile +8 -0
- data/lib/parsenum.rb +36 -0
- data/lib/parsenum/core_ext.rb +9 -0
- data/lib/parsenum/parser.rb +86 -0
- data/lib/parsenum/scanner.rb +10 -0
- data/lib/parsenum/version.rb +0 -0
- data/parsenum.gemspec +22 -0
- data/spec/parsenum_spec.rb +126 -0
- data/spec/spec_helper.rb +3 -0
- metadata +101 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e18dfce9821ee32e9b72667ae07e74ff38bb9888
|
4
|
+
data.tar.gz: 72803f8588db14512b8fcd3301d61422ee04eba3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e515e020483084c73a3b6ea3e621799c45ae07bf8b3470cc8dc6add2a01b0a45cc6154d3e79f2605996e7408a01a7395c5cf96c58203df3482602ec727ae100b
|
7
|
+
data.tar.gz: 8eb175187fe66fe2f63f5c5fc11b5a627a2194397e9e643063aeed523cccff79e7dafc87faccba9b346007f91a7641e4a69baf79868f2d0ff464e0aae2cb7a2a
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Andy Kent
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
Parsenum
|
2
|
+
========
|
3
|
+
|
4
|
+
Because sometimes you just want to get numbers out of strings damnit.
|
5
|
+
|
6
|
+
This acutally happens a surprising amount. Usually when reading a CSV or scraping a webpage you want extract a number out of a test string. Sure in the simple case .to_i and .to_f work fine but often there's pesky things like commas or pond signs getting in the way.
|
7
|
+
|
8
|
+
This library is pretty dumb as parsers go but it's just an attempt to hide all the string ugliness so you can get to the numbers you need.
|
9
|
+
|
10
|
+
|
11
|
+
Simple examples
|
12
|
+
---------------
|
13
|
+
|
14
|
+
"123".number => 123
|
15
|
+
"1.23".number => 1.23
|
16
|
+
"£3.99".number => 3.99
|
17
|
+
"28.5%".number => 0.285
|
18
|
+
"$1.99".number => 1.99
|
19
|
+
"got 99 problems".number => 99
|
20
|
+
"8 out of 10 cats".numbers => [8, 10]
|
21
|
+
|
22
|
+
|
23
|
+
Advenaced Examples
|
24
|
+
------------------
|
25
|
+
|
26
|
+
number = Parsenum.parse("$3.99")
|
27
|
+
number.value => 3.99
|
28
|
+
number.currency? => true
|
29
|
+
number.currency => 'USD'
|
30
|
+
|
31
|
+
number = Parsenum.parse("68%")
|
32
|
+
number.value => 0.68
|
33
|
+
number.percentage? => true
|
34
|
+
|
35
|
+
numbers = Parsenum.parse_all("8 out of 10 cats")
|
36
|
+
numbers.size => 2
|
37
|
+
numbers.first.value => 8
|
38
|
+
numbers.first.integer? => true
|
39
|
+
|
40
|
+
|
41
|
+
TODO
|
42
|
+
----
|
43
|
+
|
44
|
+
- support for more currencies
|
45
|
+
- bigdecimel support
|
data/Rakefile
ADDED
data/lib/parsenum.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require "parsenum/version"
|
2
|
+
|
3
|
+
module Parsenum
|
4
|
+
INTEGER = 'integer'
|
5
|
+
NIL = 'nil'
|
6
|
+
FLOAT = 'float'
|
7
|
+
PERCENTAGE = 'percentage'
|
8
|
+
CURRENCY = 'currency'
|
9
|
+
|
10
|
+
def parse(str)
|
11
|
+
candidate = Parsenum::Scanner.new(str).candidates.first
|
12
|
+
Parsenum::Parser.new(candidate)
|
13
|
+
end
|
14
|
+
module_function :parse
|
15
|
+
|
16
|
+
def parse_all(str)
|
17
|
+
Parsenum::Scanner.new(str).candidates.map do |candidate|
|
18
|
+
Parsenum::Parser.new(candidate)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
module_function :parse_all
|
22
|
+
|
23
|
+
def value(str)
|
24
|
+
parse(str).value
|
25
|
+
end
|
26
|
+
module_function :value
|
27
|
+
|
28
|
+
def values(str)
|
29
|
+
parse_all(str).map(&:value)
|
30
|
+
end
|
31
|
+
module_function :values
|
32
|
+
end
|
33
|
+
|
34
|
+
require "parsenum/scanner"
|
35
|
+
require "parsenum/parser"
|
36
|
+
require "parsenum/core_ext"
|
@@ -0,0 +1,86 @@
|
|
1
|
+
class Parsenum::Parser
|
2
|
+
attr_reader :source, :value, :type
|
3
|
+
|
4
|
+
def initialize(source, type=nil)
|
5
|
+
@source = source
|
6
|
+
@value = nil
|
7
|
+
@type = nil
|
8
|
+
run
|
9
|
+
end
|
10
|
+
|
11
|
+
def currency
|
12
|
+
return nil unless type == Parsenum::CURRENCY
|
13
|
+
return 'GBP' if @source =~ /£/
|
14
|
+
return 'USD' if @source =~ /\$/
|
15
|
+
"other"
|
16
|
+
end
|
17
|
+
|
18
|
+
def nil?
|
19
|
+
@type == Parsenum::NIL
|
20
|
+
end
|
21
|
+
|
22
|
+
def percentage?
|
23
|
+
@type == Parsenum::PERCENTAGE
|
24
|
+
end
|
25
|
+
|
26
|
+
def currency?
|
27
|
+
@type == Parsenum::CURRENCY
|
28
|
+
end
|
29
|
+
|
30
|
+
def float?
|
31
|
+
@type == Parsenum::FLOAT
|
32
|
+
end
|
33
|
+
|
34
|
+
def integer?
|
35
|
+
@type == Parsenum::INTEGER
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def run
|
41
|
+
@type ||= estimate_type(@source)
|
42
|
+
@value = extract(@source, @type)
|
43
|
+
end
|
44
|
+
|
45
|
+
def estimate_type(str)
|
46
|
+
return Parsenum::NIL if str.nil?
|
47
|
+
return Parsenum::PERCENTAGE if str =~ /%/
|
48
|
+
return Parsenum::CURRENCY if str =~ /\$|£/
|
49
|
+
return Parsenum::FLOAT if str =~ /\d\.\d/
|
50
|
+
return Parsenum::INTEGER if str =~ /\d/
|
51
|
+
Parsenum::NIL
|
52
|
+
end
|
53
|
+
|
54
|
+
def extract(str, t=nil)
|
55
|
+
t ||= estimate_type(str)
|
56
|
+
self.send(:"extract_#{t}", str)
|
57
|
+
end
|
58
|
+
|
59
|
+
def extract_nil(str)
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def extract_integer(str)
|
64
|
+
strip_commas(str).to_i
|
65
|
+
end
|
66
|
+
|
67
|
+
def extract_float(str)
|
68
|
+
strip_commas(str).to_f
|
69
|
+
end
|
70
|
+
|
71
|
+
def extract_percentage(str)
|
72
|
+
str = str.sub('%', '')
|
73
|
+
extract_float(str) / 100.0
|
74
|
+
end
|
75
|
+
|
76
|
+
def extract_currency(str)
|
77
|
+
matches = str.match(/(\$|£)([\d\,\.]+)/)
|
78
|
+
return 0 unless matches
|
79
|
+
extract(matches[2])
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
def strip_commas(str)
|
84
|
+
str.gsub(',', '')
|
85
|
+
end
|
86
|
+
end
|
File without changes
|
data/parsenum.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "parsenum"
|
7
|
+
spec.version = '0.1.0'
|
8
|
+
spec.authors = ["Andy Kent"]
|
9
|
+
spec.email = ["andy.kent@me.com"]
|
10
|
+
spec.summary = %q{Simple parsing of numeric values from strings}
|
11
|
+
spec.homepage = "https://github.com/andykent/parsenum"
|
12
|
+
spec.license = "MIT"
|
13
|
+
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
20
|
+
spec.add_development_dependency "rake"
|
21
|
+
spec.add_development_dependency "minitest"
|
22
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require_relative './spec_helper'
|
2
|
+
|
3
|
+
describe "Non-number Parsing" do
|
4
|
+
it "doesn't blow up on nil values" do
|
5
|
+
result = Parsenum.parse(nil)
|
6
|
+
assert_equal Parsenum::NIL, result.type
|
7
|
+
assert_equal nil, result.value
|
8
|
+
end
|
9
|
+
|
10
|
+
it "doesn't blow up on empty strings" do
|
11
|
+
result = Parsenum.parse('')
|
12
|
+
assert_equal Parsenum::NIL, result.type
|
13
|
+
assert_equal nil, result.value
|
14
|
+
end
|
15
|
+
|
16
|
+
it "doesn't blow up on strings with no numbers present" do
|
17
|
+
result = Parsenum.parse('hello world')
|
18
|
+
assert_equal Parsenum::NIL, result.type
|
19
|
+
assert_equal nil, result.value
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "Integer Parsing" do
|
24
|
+
it "parses basic integers" do
|
25
|
+
result = Parsenum.parse('123')
|
26
|
+
assert_equal Parsenum::INTEGER, result.type
|
27
|
+
assert_equal 123, result.value
|
28
|
+
end
|
29
|
+
|
30
|
+
it "parses integers with comma delimiters" do
|
31
|
+
result = Parsenum.parse('123,456')
|
32
|
+
assert_equal Parsenum::INTEGER, result.type
|
33
|
+
assert_equal 123_456, result.value
|
34
|
+
end
|
35
|
+
|
36
|
+
it "parses integers out of text strings" do
|
37
|
+
result = Parsenum.parse('got 99 problems')
|
38
|
+
assert_equal Parsenum::INTEGER, result.type
|
39
|
+
assert_equal 99, result.value
|
40
|
+
end
|
41
|
+
|
42
|
+
it "parses the first integer out of text" do
|
43
|
+
result = Parsenum.parse('8 out of 10 cats')
|
44
|
+
assert_equal Parsenum::INTEGER, result.type
|
45
|
+
assert_equal 8, result.value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe "Float Parsing" do
|
50
|
+
it "parses basic floats" do
|
51
|
+
result = Parsenum.parse('1.23')
|
52
|
+
assert_equal Parsenum::FLOAT, result.type
|
53
|
+
assert_equal 1.23, result.value
|
54
|
+
end
|
55
|
+
|
56
|
+
it "parses floats with comma delimiters" do
|
57
|
+
result = Parsenum.parse('123,456.7')
|
58
|
+
assert_equal Parsenum::FLOAT, result.type
|
59
|
+
assert_equal 123_456.7, result.value
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
describe "Percentage Parsing" do
|
64
|
+
it "parses basic percentages" do
|
65
|
+
result = Parsenum.parse('12%')
|
66
|
+
assert_equal Parsenum::PERCENTAGE, result.type
|
67
|
+
assert_in_epsilon 0.12, result.value, 0.001
|
68
|
+
end
|
69
|
+
|
70
|
+
it "parses percentages with decimal places" do
|
71
|
+
result = Parsenum.parse('12.3%')
|
72
|
+
assert_equal Parsenum::PERCENTAGE, result.type
|
73
|
+
assert_in_epsilon 0.123, result.value, 0.0001
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe "Currency Parsing" do
|
78
|
+
it "parses £" do
|
79
|
+
result = Parsenum.parse('£123')
|
80
|
+
assert_equal Parsenum::CURRENCY, result.type
|
81
|
+
assert_equal 'GBP', result.currency
|
82
|
+
assert_equal 123, result.value
|
83
|
+
end
|
84
|
+
|
85
|
+
it "parses £ with pence" do
|
86
|
+
result = Parsenum.parse('£12.34')
|
87
|
+
assert_equal Parsenum::CURRENCY, result.type
|
88
|
+
assert_equal 'GBP', result.currency
|
89
|
+
assert_in_epsilon 12.34, result.value, 0.001
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "Currency Parsing" do
|
94
|
+
it "parses $" do
|
95
|
+
result = Parsenum.parse('$123')
|
96
|
+
assert_equal Parsenum::CURRENCY, result.type
|
97
|
+
assert_equal 'USD', result.currency
|
98
|
+
assert_equal 123, result.value
|
99
|
+
end
|
100
|
+
|
101
|
+
it "parses $ with cents" do
|
102
|
+
result = Parsenum.parse('$12.34')
|
103
|
+
assert_equal Parsenum::CURRENCY, result.type
|
104
|
+
assert_equal 'USD', result.currency
|
105
|
+
assert_in_epsilon 12.34, result.value, 0.001
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
describe "parse_all" do
|
110
|
+
it "parses multiple numbers" do
|
111
|
+
result = Parsenum.parse_all('8 out of 10 cats')
|
112
|
+
assert_equal result.size, 2
|
113
|
+
assert_equal result[0].value, 8
|
114
|
+
assert_equal result[1].value, 10
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "String extensions" do
|
119
|
+
it "adds a 'number' method" do
|
120
|
+
assert_equal 8, "8 out of 10 cats".number
|
121
|
+
end
|
122
|
+
|
123
|
+
it "adds a 'numbers' method" do
|
124
|
+
assert_equal [8, 10], "8 out of 10 cats".numbers
|
125
|
+
end
|
126
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: parsenum
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andy Kent
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- andy.kent@me.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE.txt
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- lib/parsenum.rb
|
68
|
+
- lib/parsenum/core_ext.rb
|
69
|
+
- lib/parsenum/parser.rb
|
70
|
+
- lib/parsenum/scanner.rb
|
71
|
+
- lib/parsenum/version.rb
|
72
|
+
- parsenum.gemspec
|
73
|
+
- spec/parsenum_spec.rb
|
74
|
+
- spec/spec_helper.rb
|
75
|
+
homepage: https://github.com/andykent/parsenum
|
76
|
+
licenses:
|
77
|
+
- MIT
|
78
|
+
metadata: {}
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options: []
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 2.2.2
|
96
|
+
signing_key:
|
97
|
+
specification_version: 4
|
98
|
+
summary: Simple parsing of numeric values from strings
|
99
|
+
test_files:
|
100
|
+
- spec/parsenum_spec.rb
|
101
|
+
- spec/spec_helper.rb
|