parse 0.0.1.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +12 -0
- data/README.md +17 -19
- data/lib/parse.rb +11 -89
- data/lib/parse/algorithm.rb +8 -0
- data/lib/parse/algorithm/ver0_0_1.rb +99 -0
- data/lib/parse/algorithm/ver0_1_0.rb +149 -0
- data/lib/parse/version.rb +1 -1
- data/parse.gemspec +3 -2
- data/spec/parse_spec.rb +2 -2
- data/spec/parse_ver0_0_1_spec.rb +1 -1
- data/spec/parse_ver0_1_0_spec.rb +277 -0
- metadata +22 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 321f7a0116ae940b829a45faedb2fe731b3ada97
|
4
|
+
data.tar.gz: 6599e616962df9eac3ff27f31f4d9f11418270ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1d0fc63cabe88dcea9ae7f1049c99288782ffe4ebcaa575a2ae30b22f5f84aa4466223158b152d07d24118af84792eac77e40425c62690f25fe36692f9bafb9
|
7
|
+
data.tar.gz: 8faa4572ebe3ae57960e983596c98324c25188e576d1cd5d1a15d41561ae0818cbda6d564d3c603f6ef043081ca8cd550034bbcda8d7e072180a524dac4e1794
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
0.1.0
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Not parsed as numbers anymore: "10_14" or "10_400" or "-1_2e3_5" or "-$123_456.7" -- very unlikely to see in the wild
|
6
|
+
* Not parsed as numbers anymore: '1,2' or '-1,2' or '1,20' or '1,22.0' -- too hard to distinguish from CSV
|
7
|
+
|
8
|
+
* Enhancements
|
9
|
+
|
10
|
+
* Using proper versioning scheme - every version of algorithm involves a minor bump!
|
11
|
+
* Recover from YAML parsing errors in 1.9
|
12
|
+
|
1
13
|
0.0.1.1 / 2014-02-06
|
2
14
|
|
3
15
|
* Bug fixes
|
data/README.md
CHANGED
@@ -4,10 +4,12 @@ Detect and convert short strings into integers, floats, dates, times, booleans,
|
|
4
4
|
|
5
5
|
## Note on versions
|
6
6
|
|
7
|
-
You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.
|
7
|
+
You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.ver0_1_0`).
|
8
8
|
|
9
9
|
If the algorithm changes and you need the old version, you can reference it by its version number. For example, `Parse.ver0_0_1`.
|
10
10
|
|
11
|
+
Since almost any change to the algorithm is a breaking change, there are going to be lots of minor version bumps (as opposed to patches).
|
12
|
+
|
11
13
|
## Usage
|
12
14
|
|
13
15
|
You get the idea:
|
@@ -39,26 +41,24 @@ More esoteric stuff:
|
|
39
41
|
Parse.parse("-") #=> nil
|
40
42
|
Parse.parse("?") #=> nil
|
41
43
|
Parse.parse("-8e-05") #=> -8.0e-05
|
42
|
-
Parse.parse("-
|
44
|
+
Parse.parse("-12.5e-13") #=> -1.25e-12
|
43
45
|
Parse.parse("05753") #=> 5753
|
44
|
-
Parse.parse("
|
45
|
-
Parse.parse("15_00_0") #=> 15000
|
46
|
+
Parse.parse("15000") #=> 15000
|
46
47
|
Parse.parse("15.0") #=> 15.0
|
47
48
|
Parse.parse("15,000.0") #=> 15000.0
|
48
|
-
Parse.parse("
|
49
|
-
Parse.parse("15_00_0.0") #=> 15000.0
|
49
|
+
Parse.parse("15000.0") #=> 15000.0
|
50
50
|
Parse.parse("0015") #=> 15
|
51
51
|
Parse.parse("0015.0") #=> 15.0
|
52
|
-
Parse.parse("
|
52
|
+
Parse.parse("0015.0") #=> 15.0
|
53
53
|
Parse.parse("0x15") #=> 21
|
54
54
|
Parse.parse("0o15") #=> 13
|
55
55
|
Parse.parse("8e-05") #=> 8.0e-05
|
56
|
-
Parse.parse("
|
56
|
+
Parse.parse("12.5e-13") #=> 1.25e-12
|
57
57
|
Parse.parse("0$123.4") #=> 123.4
|
58
58
|
Parse.parse("$15,000") #=> 15000
|
59
59
|
Parse.parse("0$15,000") #=> 15000
|
60
|
-
Parse.parse("$
|
61
|
-
Parse.parse("$
|
60
|
+
Parse.parse("$123456") #=> 123456
|
61
|
+
Parse.parse("$123456.7") #=> 123456.7
|
62
62
|
Parse.parse("10,000,000") #=> 10000000
|
63
63
|
Parse.parse("10,000,000.00") #=> 10000000.0
|
64
64
|
Parse.parse("$10,000,000.00") #=> 10000000.0
|
@@ -66,15 +66,13 @@ More esoteric stuff:
|
|
66
66
|
Parse.parse("$010,000,000.00") #=> 10000000.0
|
67
67
|
Parse.parse("-15") #=> -15
|
68
68
|
Parse.parse("-15,000") #=> -15000
|
69
|
-
Parse.parse("-
|
70
|
-
Parse.parse("-15_00_0") #=> -15000
|
69
|
+
Parse.parse("-15000") #=> -15000
|
71
70
|
Parse.parse("-15.0") #=> -15.0
|
72
71
|
Parse.parse("-15,000.0") #=> -15000.0
|
73
|
-
Parse.parse("-
|
74
|
-
Parse.parse("-
|
72
|
+
Parse.parse("-15000.0") #=> -15000.0
|
73
|
+
Parse.parse("-15000.0") #=> -15000.0
|
75
74
|
Parse.parse("00-15") #=> -15
|
76
75
|
Parse.parse("00-15.0") #=> -15.0
|
77
|
-
Parse.parse("0_0-15.0") #=> "0_0-15.0"
|
78
76
|
Parse.parse("-0x15") #=> -21
|
79
77
|
Parse.parse("-0o15") #=> -13
|
80
78
|
Parse.parse("-$123.4") #=> -123.4
|
@@ -82,10 +80,10 @@ More esoteric stuff:
|
|
82
80
|
Parse.parse("0($123.4)") #=> -123.4
|
83
81
|
Parse.parse("-$15,000") #=> -15000
|
84
82
|
Parse.parse("($15,000)") #=> -15000
|
85
|
-
Parse.parse("-$
|
86
|
-
Parse.parse("($
|
87
|
-
Parse.parse("-$
|
88
|
-
Parse.parse("($
|
83
|
+
Parse.parse("-$123,456") #=> -123456
|
84
|
+
Parse.parse("($123,456)") #=> -123456
|
85
|
+
Parse.parse("-$123,456.7") #=> -123456.7
|
86
|
+
Parse.parse("($123,456.7)") #=> -123456.7
|
89
87
|
Parse.parse("-10,000,000") #=> -10000000
|
90
88
|
Parse.parse("(10,000,000)") #=> -10000000
|
91
89
|
Parse.parse("-10,000,000.00") #=> -10000000.0
|
data/lib/parse.rb
CHANGED
@@ -1,102 +1,24 @@
|
|
1
1
|
require "parse/version"
|
2
|
+
require 'parse/algorithm'
|
3
|
+
require 'parse/algorithm/ver0_0_1'
|
4
|
+
require 'parse/algorithm/ver0_1_0'
|
2
5
|
|
3
6
|
require 'date'
|
4
7
|
require 'yaml'
|
5
8
|
require 'safe_yaml/load'
|
9
|
+
require 'active_support/core_ext'
|
6
10
|
|
7
11
|
module Parse
|
8
|
-
# only need to deal with stuff not caught by YAML or JSON
|
9
|
-
NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
|
10
|
-
NAN = [ 'NaN' ]
|
11
|
-
INFINITY = [ '#DIV/0', 'Infinity' ]
|
12
|
-
NEG_INFINITY = [ '-Infinity' ]
|
13
|
-
DATE = {
|
14
|
-
euro: ['%d-%m-%Y', '%d-%m-%y'],
|
15
|
-
us: ['%m-%d-%Y', '%m-%d-%y'],
|
16
|
-
}
|
17
|
-
|
18
12
|
def self.parse(raw, options = nil)
|
19
|
-
|
13
|
+
ver0_1_0 raw, options
|
20
14
|
end
|
21
15
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
def self.ver0_0_1(raw, options = nil)
|
26
|
-
return raw unless raw.is_a? String
|
27
|
-
|
28
|
-
memo = raw.strip
|
29
|
-
|
30
|
-
return nil if NULL.include? memo
|
31
|
-
return 1.0/0 if INFINITY.include? memo
|
32
|
-
return -1.0/0 if NEG_INFINITY.include? memo
|
33
|
-
return 0.0/0 if NAN.include? memo
|
34
|
-
|
35
|
-
if options and options[:date]
|
36
|
-
yyyy, yy = DATE.fetch options[:date]
|
37
|
-
memo.sub!(/0+/, '')
|
38
|
-
memo.gsub! '/', '-'
|
39
|
-
if memo =~ /\d{4,}/ # yyyy
|
40
|
-
return Date.strptime(memo, yyyy)
|
41
|
-
else
|
42
|
-
return Date.strptime(memo, yy)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
not_numeric = nil
|
47
|
-
not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
|
48
|
-
not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
|
49
|
-
not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
|
50
|
-
possible_numeric = !not_numeric
|
51
|
-
accounting_negative = nil
|
52
|
-
percentage = nil
|
53
|
-
|
54
|
-
if possible_numeric
|
55
|
-
accounting_negative = memo =~ /\A[0$]*\([0$]*/
|
56
|
-
percentage = memo.end_with?('%')
|
57
|
-
memo.sub! /%\z/, '' if percentage
|
58
|
-
memo.delete!('()') if accounting_negative # accounting negative
|
59
|
-
# in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
|
60
|
-
memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
|
61
|
-
memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
|
62
|
-
if memo.include?(',')
|
63
|
-
a, b = memo.split('.', 2)
|
64
|
-
a.delete! ','
|
65
|
-
memo = b ? [a, b].join('.') : a
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
not_safe_for_yaml = nil
|
70
|
-
not_safe_for_yaml ||= memo.include?('#')
|
71
|
-
not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
|
72
|
-
safe_for_yaml = !not_safe_for_yaml
|
73
|
-
|
74
|
-
if safe_for_yaml
|
75
|
-
begin
|
76
|
-
memo = SafeYAML.load memo
|
77
|
-
rescue
|
78
|
-
$stderr.puts "#{memo.inspect} => #{$!}"
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
if possible_numeric
|
83
|
-
case memo
|
84
|
-
when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
|
85
|
-
# scientific notation
|
86
|
-
memo = memo.to_f
|
87
|
-
when /\A[+\-]?0o/
|
88
|
-
# octal per yaml 1.2
|
89
|
-
memo = memo.to_i 8
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
if memo.is_a?(String)
|
94
|
-
# compress whitespace
|
95
|
-
memo.gsub! /\s+/, ' '
|
96
|
-
end
|
16
|
+
def self.ver0_1_0(*args)
|
17
|
+
Algorithm::Ver0_1_0.new(*args).result
|
18
|
+
end
|
97
19
|
|
98
|
-
|
99
|
-
|
100
|
-
memo
|
20
|
+
def self.ver0_0_1(*args)
|
21
|
+
Algorithm::Ver0_0_1.new(*args).result
|
101
22
|
end
|
23
|
+
|
102
24
|
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Parse
|
2
|
+
module Algorithm
|
3
|
+
class Ver0_0_1
|
4
|
+
NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
|
5
|
+
DATE = {
|
6
|
+
euro: ['%d-%m-%Y', '%d-%m-%y'],
|
7
|
+
us: ['%m-%d-%Y', '%m-%d-%y'],
|
8
|
+
}
|
9
|
+
|
10
|
+
attr_reader :raw
|
11
|
+
attr_reader :options
|
12
|
+
def initialize(raw, options = nil)
|
13
|
+
@raw = raw
|
14
|
+
@options = options
|
15
|
+
end
|
16
|
+
|
17
|
+
# @private
|
18
|
+
# use YAML to parse stuff like '1.5'
|
19
|
+
# ruby's yaml is 1.1, which means it does weird stuff with '001' (fixed in 1.2, which jruby has)
|
20
|
+
def result
|
21
|
+
return raw unless raw.is_a? String
|
22
|
+
|
23
|
+
memo = raw.strip
|
24
|
+
|
25
|
+
return nil if NULL.include? memo
|
26
|
+
return 1.0/0 if INFINITY.include? memo
|
27
|
+
return -1.0/0 if NEG_INFINITY.include? memo
|
28
|
+
return 0.0/0 if NAN.include? memo
|
29
|
+
|
30
|
+
if options and options[:date]
|
31
|
+
yyyy, yy = DATE.fetch options[:date]
|
32
|
+
memo.sub!(/0+/, '')
|
33
|
+
memo.gsub! '/', '-'
|
34
|
+
if memo =~ /\d{4,}/ # yyyy
|
35
|
+
return Date.strptime(memo, yyyy)
|
36
|
+
else
|
37
|
+
return Date.strptime(memo, yy)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
not_numeric = nil
|
42
|
+
not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
|
43
|
+
not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
|
44
|
+
not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
|
45
|
+
possible_numeric = !not_numeric
|
46
|
+
accounting_negative = nil
|
47
|
+
percentage = nil
|
48
|
+
|
49
|
+
if possible_numeric
|
50
|
+
accounting_negative = memo =~ /\A[0$]*\([0$]*/
|
51
|
+
percentage = memo.end_with?('%')
|
52
|
+
memo.sub! /%\z/, '' if percentage
|
53
|
+
memo.delete!('()') if accounting_negative # accounting negative
|
54
|
+
# in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
|
55
|
+
memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
|
56
|
+
memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
|
57
|
+
if memo.include?(',')
|
58
|
+
a, b = memo.split('.', 2)
|
59
|
+
a.delete! ','
|
60
|
+
memo = b ? [a, b].join('.') : a
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
not_safe_for_yaml = nil
|
65
|
+
not_safe_for_yaml ||= memo.include?('#')
|
66
|
+
not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
|
67
|
+
safe_for_yaml = !not_safe_for_yaml
|
68
|
+
|
69
|
+
if safe_for_yaml
|
70
|
+
begin
|
71
|
+
memo = SafeYAML.load memo
|
72
|
+
rescue
|
73
|
+
$stderr.puts "#{memo.inspect} => #{$!}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
if possible_numeric
|
78
|
+
case memo
|
79
|
+
when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
|
80
|
+
# scientific notation
|
81
|
+
memo = memo.to_f
|
82
|
+
when /\A[+\-]?0o/
|
83
|
+
# octal per yaml 1.2
|
84
|
+
memo = memo.to_i 8
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
if memo.is_a?(String)
|
89
|
+
# compress whitespace
|
90
|
+
memo.gsub! /\s+/, ' '
|
91
|
+
end
|
92
|
+
|
93
|
+
memo = memo / 100.0 if percentage
|
94
|
+
memo = -memo if accounting_negative
|
95
|
+
memo
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module Parse
|
2
|
+
module Algorithm
|
3
|
+
class Ver0_1_0
|
4
|
+
NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!', '00/00/00', '0000-00-00'] # from bigml's list
|
5
|
+
REGION_DATE_FORMAT = {
|
6
|
+
euro: ['%d-%m-%Y', '%d-%m-%y'],
|
7
|
+
us: ['%m-%d-%Y', '%m-%d-%y'],
|
8
|
+
iso: ['%Y-%m-%d', '%y-%m-%d'], # second one is silly
|
9
|
+
}
|
10
|
+
DATE_DETECT = {
|
11
|
+
%r{\A0*[12]\d\d\d[\-/](?:(?:0[1-9])|(?:1[0-2]))[\-/][1-9]\d\z} => :iso, # $1 will be delimiter
|
12
|
+
}
|
13
|
+
EMPTY_OPTIONS = {}
|
14
|
+
|
15
|
+
attr_reader :raw
|
16
|
+
attr_reader :options
|
17
|
+
def initialize(raw, options = nil)
|
18
|
+
@raw = raw
|
19
|
+
@options = options || EMPTY_OPTIONS
|
20
|
+
end
|
21
|
+
|
22
|
+
def result
|
23
|
+
return raw unless raw.is_a? String
|
24
|
+
|
25
|
+
memo = raw.strip
|
26
|
+
|
27
|
+
return nil if NULL.include? memo
|
28
|
+
return 1.0/0 if INFINITY.include? memo
|
29
|
+
return -1.0/0 if NEG_INFINITY.include? memo
|
30
|
+
return 0.0/0 if NAN.include? memo
|
31
|
+
|
32
|
+
date_region = if options[:date]
|
33
|
+
options[:date]
|
34
|
+
else
|
35
|
+
catch :hit do
|
36
|
+
DATE_DETECT.each do |pattern, date_region|
|
37
|
+
# binding.pry if memo.include?('2011-')
|
38
|
+
if memo =~ pattern
|
39
|
+
throw :hit, date_region
|
40
|
+
end
|
41
|
+
end
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if date_region.nil? and options[:type] == Date
|
47
|
+
date_region = :iso
|
48
|
+
end
|
49
|
+
|
50
|
+
if date_region
|
51
|
+
yyyy, yy = REGION_DATE_FORMAT.fetch date_region
|
52
|
+
is_yyyy = memo =~ /[1-9]\d\d\d/
|
53
|
+
memo.sub! /\A0+/, ''
|
54
|
+
memo.gsub! '/', '-'
|
55
|
+
if is_yyyy
|
56
|
+
if memo.length < 10 and date_region == :iso
|
57
|
+
return Date.parse(memo)
|
58
|
+
else
|
59
|
+
return Date.strptime(memo, yyyy)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
return Date.strptime(memo, yy)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
possible_numeric = nil
|
67
|
+
not_numeric = nil
|
68
|
+
certain_numeric = nil
|
69
|
+
if [Numeric, Integer, Float].include?(options[:type])
|
70
|
+
certain_numeric = true
|
71
|
+
possible_numeric = true
|
72
|
+
not_numeric = false
|
73
|
+
else
|
74
|
+
# not_numeric ||= memo =~ /[1-9][^)\d_,%.eE]/ # has a dash in the middle
|
75
|
+
not_numeric ||= memo.include?('_')
|
76
|
+
not_numeric ||= memo =~ %r{[1-9][/-]\d}
|
77
|
+
not_numeric ||= memo =~ /,\d{1,2}(?:[.\D]|\z)/
|
78
|
+
not_numeric ||= memo.scan(/[^\d_,%.eE]/).length > memo.scan(/[\d_,%.eE]/).length
|
79
|
+
not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
|
80
|
+
possible_numeric = !not_numeric
|
81
|
+
end
|
82
|
+
accounting_negative = nil
|
83
|
+
percentage = nil
|
84
|
+
if possible_numeric
|
85
|
+
accounting_negative = memo =~ /\A[0$]*\([0$]*/
|
86
|
+
percentage = memo.end_with?('%')
|
87
|
+
memo.sub! /%\z/, '' if percentage
|
88
|
+
memo.delete!('()') if accounting_negative # accounting negative
|
89
|
+
# in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
|
90
|
+
memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
|
91
|
+
memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
|
92
|
+
memo.sub!('D', 'e') if memo =~ /\A[+\-]?[\d.]+D[+\-]?[\d.]+\z/ # fortran double precision
|
93
|
+
if memo.include?(',')
|
94
|
+
a, b = memo.split('.', 2)
|
95
|
+
a.delete! ','
|
96
|
+
memo = b ? [a, b].join('.') : a
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
if certain_numeric
|
101
|
+
memo.gsub! /[a-z]/i, ''
|
102
|
+
end
|
103
|
+
|
104
|
+
not_safe_for_yaml = nil
|
105
|
+
not_safe_for_yaml ||= memo =~ /\A(on|off)\z/i
|
106
|
+
not_safe_for_yaml ||= memo.include?('#')
|
107
|
+
not_safe_for_yaml ||= memo =~ /\A[@&,]/
|
108
|
+
not_safe_for_yaml ||= not_numeric && memo.start_with?('0')
|
109
|
+
not_safe_for_yaml ||= not_numeric && memo =~ /\A[^{\[]*\d[,_]/ #1,2,3, maybe a csv
|
110
|
+
|
111
|
+
safe_for_yaml = !not_safe_for_yaml
|
112
|
+
|
113
|
+
if safe_for_yaml
|
114
|
+
begin
|
115
|
+
memo = SafeYAML.load memo
|
116
|
+
rescue Exception # Psych::SyntaxError will blow up plain rescue in 1.9.3
|
117
|
+
$stderr.puts "#{memo.inspect} => #{$!}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
if possible_numeric
|
122
|
+
case memo
|
123
|
+
when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
|
124
|
+
# scientific notation
|
125
|
+
memo = memo.to_f
|
126
|
+
when /\A[+\-]?0o/
|
127
|
+
# octal per yaml 1.2
|
128
|
+
memo = memo.to_i 8
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
if memo.is_a?(String)
|
133
|
+
# compress whitespace
|
134
|
+
memo.gsub! /\s+/, ' '
|
135
|
+
end
|
136
|
+
|
137
|
+
memo = memo / 100.0 if percentage
|
138
|
+
memo = -memo if accounting_negative
|
139
|
+
memo
|
140
|
+
rescue
|
141
|
+
if options and options[:ignore_error]
|
142
|
+
# nothing to see here
|
143
|
+
else
|
144
|
+
raise "#{memo.inspect} => #{$!}"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
data/lib/parse/version.rb
CHANGED
data/parse.gemspec
CHANGED
@@ -18,12 +18,13 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_runtime_dependency 'safe_yaml'
|
21
|
+
spec.add_runtime_dependency 'safe_yaml', '>=1'
|
22
|
+
spec.add_runtime_dependency 'activesupport'
|
22
23
|
|
23
24
|
spec.add_development_dependency "bundler", "~> 1.5"
|
24
25
|
spec.add_development_dependency "rake"
|
25
26
|
spec.add_development_dependency "rspec"
|
26
27
|
spec.add_development_dependency 'multi_json'
|
27
|
-
spec.add_development_dependency 'activesupport'
|
28
28
|
spec.add_development_dependency 'pry'
|
29
|
+
# spec.add_development_dependency 'twitter_cldr'
|
29
30
|
end
|
data/spec/parse_spec.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Parse do
|
4
|
-
it "should parse with version 0.0.
|
4
|
+
it "should parse with version 0.0.2 of the algorithm" do
|
5
5
|
v = " 1990-04-03 "
|
6
|
-
expect(Parse.parse(v)).to eq(Parse.
|
6
|
+
expect(Parse.parse(v)).to eq(Parse.ver0_1_0(v))
|
7
7
|
end
|
8
8
|
end
|
data/spec/parse_ver0_0_1_spec.rb
CHANGED
@@ -0,0 +1,277 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'twitter_cldr'
|
3
|
+
|
4
|
+
describe Parse::Algorithm::Ver0_1_0 do
|
5
|
+
same = [
|
6
|
+
'1,2',
|
7
|
+
'1,20',
|
8
|
+
'1,2.0',
|
9
|
+
'-1,2',
|
10
|
+
'-1,20',
|
11
|
+
'-1,2.0',
|
12
|
+
'01,2',
|
13
|
+
'01,20',
|
14
|
+
'01,2.0',
|
15
|
+
'15_000',
|
16
|
+
'15_00_0',
|
17
|
+
'15_000.0',
|
18
|
+
'15_00_0.0',
|
19
|
+
'0_015.0', # just weird
|
20
|
+
'1_2.5e-1_3',
|
21
|
+
'-1_2.5e-1_3',
|
22
|
+
'$123_456',
|
23
|
+
'$123_456.7',
|
24
|
+
'-15_000',
|
25
|
+
'-15_00_0',
|
26
|
+
'-15_000.0',
|
27
|
+
'-15_00_0.0',
|
28
|
+
'0_0-15.0', # just weird
|
29
|
+
'-$123_456',
|
30
|
+
'($123_456)',
|
31
|
+
'-$123_456.7',
|
32
|
+
'($123_456.7)',
|
33
|
+
'10_14_A',
|
34
|
+
'10_14',
|
35
|
+
'10_140',
|
36
|
+
]
|
37
|
+
same.each do |v|
|
38
|
+
it "parses #{v.inspect} as itself (yaml=#{SafeYAML.load(v).inspect})" do
|
39
|
+
expect(Parse.ver0_1_0(v)).to eq(v)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
a = {
|
44
|
+
|
45
|
+
"@ foo" => "@ foo",
|
46
|
+
", foo" => ", foo",
|
47
|
+
"044-1-276-000" => "044-1-276-000",
|
48
|
+
|
49
|
+
['1 BEDROOMS', { type: Numeric } ] => 1,
|
50
|
+
'1 BEDROOMS' => '1 BEDROOMS',
|
51
|
+
|
52
|
+
[ '2.4 SQFT', { type: Numeric } ] => 2.4,
|
53
|
+
'2.4 SQFT' => '2.4 SQFT',
|
54
|
+
|
55
|
+
['000', { date: :us, ignore_error: true}] => nil,
|
56
|
+
['7/7/2004', {date: :us}] => Date.new(2004,7,7),
|
57
|
+
"999 HOLY CROSS ROAD, COLCHESTER, VT 05446" => "999 HOLY CROSS ROAD, COLCHESTER, VT 05446",
|
58
|
+
|
59
|
+
'00020110628' => 20110628,
|
60
|
+
'0002011-06-28' => Date.new(2011,6,28),
|
61
|
+
'0002011/06/28' => Date.new(2011,6,28),
|
62
|
+
['00020110628', {date: :iso}] => Date.new(2011,6,28),
|
63
|
+
['00020110628', {type: Date}] => Date.new(2011,6,28),
|
64
|
+
|
65
|
+
'00019800628' => 19800628,
|
66
|
+
'0001980-06-28' => Date.new(1980,6,28),
|
67
|
+
'0001980/06/28' => Date.new(1980,6,28),
|
68
|
+
['00019800628', {date: :iso}] => Date.new(1980,6,28),
|
69
|
+
['00019800628', {type: Date}] => Date.new(1980,6,28),
|
70
|
+
|
71
|
+
'00030000628' => 30000628,
|
72
|
+
'0003000-06-28' => '0003000-06-28',
|
73
|
+
'0003000/06/28' => '0003000/06/28',
|
74
|
+
['00030000628', {date: :iso}] => Date.new(3000,6,28),
|
75
|
+
['00030000628', {type: Date}] => Date.new(3000,6,28),
|
76
|
+
|
77
|
+
['', {type: Numeric}] => nil,
|
78
|
+
|
79
|
+
# fortran double precision
|
80
|
+
'0.225120000000000D+06' => 0.22512e6,
|
81
|
+
'0.341913000000000D+07' => 0.341913e7,
|
82
|
+
'0.2500000E-01' => 0.25e-1,
|
83
|
+
'3.1D0' => 3.1,
|
84
|
+
'-2.D0' => -2.0,
|
85
|
+
|
86
|
+
'8e-05' => 8e-5,
|
87
|
+
'8e+4' => 8e4,
|
88
|
+
'8.0e+4' => 8.0e4,
|
89
|
+
'8e-4' => 8e-4,
|
90
|
+
'8.0e-4' => 8.0e-4,
|
91
|
+
'-8e+4' => -8e4,
|
92
|
+
'-8.0e+4' => -8.0e4,
|
93
|
+
'-8e-4' => -8e-4,
|
94
|
+
'-8.0e-4' => -8.0e-4,
|
95
|
+
'8E+4' => 8e4,
|
96
|
+
'8.0E+4' => 8.0e4,
|
97
|
+
'8E-4' => 8e-4,
|
98
|
+
'8.0E-4' => 8.0e-4,
|
99
|
+
'-8E+4' => -8e4,
|
100
|
+
'-8.0E+4' => -8.0e4,
|
101
|
+
'-8E-4' => -8e-4,
|
102
|
+
'-8.0E-4' => -8.0e-4,
|
103
|
+
|
104
|
+
# http://dojotoolkit.org/reference-guide/1.9/dojo/number.html
|
105
|
+
# '1,000,000.00' => 1_000_000.0,
|
106
|
+
# '1.000.000,00' => 1_000_000.0, # german
|
107
|
+
# '1 000 000,00' => 1_000_000.0, # french
|
108
|
+
# '10,00,000.00' => 1_000_000.0, # indian
|
109
|
+
|
110
|
+
'060-10-01' => '60-10-01',
|
111
|
+
'OFF' => 'OFF',
|
112
|
+
'ON' => 'ON',
|
113
|
+
|
114
|
+
'& P4' => '& P4',
|
115
|
+
|
116
|
+
# EVERYTHING BELOW IS SAME AS 0.0.1
|
117
|
+
|
118
|
+
'' => nil,
|
119
|
+
'nil' => nil,
|
120
|
+
'15' => 15,
|
121
|
+
'15,000' => 15_000,
|
122
|
+
'15.0' => 15.0,
|
123
|
+
'15,000.0' => 15_000.0,
|
124
|
+
'0015' => 15, # not octal
|
125
|
+
'0015.0' => 15.0, # not octal
|
126
|
+
'0x15' => 0x15, # hex
|
127
|
+
'0o15' => 015, # octal
|
128
|
+
'8e-05' => 8e-05,
|
129
|
+
'12.5e-13' => 12.5e-13,
|
130
|
+
'-12.5e-13' => -12.5e-13,
|
131
|
+
'$123.4' => 123.4,
|
132
|
+
'0$123.4' => 123.4,
|
133
|
+
'$15,000' => 15_000,
|
134
|
+
'0$15,000' => 15_000,
|
135
|
+
'10,000,000' => 10_000_000,
|
136
|
+
'10,000,000.00' => 10_000_000.0,
|
137
|
+
'$10,000,000.00' => 10_000_000.0,
|
138
|
+
'0$10,000,000.00' => 10_000_000.0,
|
139
|
+
'$010,000,000.00' => 10_000_000.0,
|
140
|
+
|
141
|
+
'-15' => -15,
|
142
|
+
'-15,000' => -15_000,
|
143
|
+
'-15.0' => -15.0,
|
144
|
+
'-15,000.0' => -15_000.0,
|
145
|
+
'00-15' => -15, # not octal
|
146
|
+
'00-15.0' => -15.0, # not octal
|
147
|
+
'-0x15' => -0x15, # hex
|
148
|
+
'-0o15' => -015, # octal
|
149
|
+
'-8e-05' => -8e-05,
|
150
|
+
'-$123.4' => -123.4,
|
151
|
+
'($123.4)' => -123.4,
|
152
|
+
'0($123.4)' => -123.4,
|
153
|
+
'-$15,000' => -15_000,
|
154
|
+
'($15,000)' => -15_000,
|
155
|
+
'-$123456' => -123_456,
|
156
|
+
'($123456)' => -123_456,
|
157
|
+
'-$123456.7' => -123_456.7,
|
158
|
+
'($123456.7)' => -123_456.7,
|
159
|
+
'-$123,456' => -123_456,
|
160
|
+
'($123,456)' => -123_456,
|
161
|
+
'-$123,456.7' => -123_456.7,
|
162
|
+
'($123,456.7)' => -123_456.7,
|
163
|
+
'-10,000,000' => -10_000_000,
|
164
|
+
'(10,000,000)' => -10_000_000,
|
165
|
+
'-10,000,000.00' => -10_000_000.0,
|
166
|
+
'(10,000,000.00)' => -10_000_000.0,
|
167
|
+
'-10000000' => -10_000_000,
|
168
|
+
'(10000000)' => -10_000_000,
|
169
|
+
'-10000000.00' => -10_000_000.0,
|
170
|
+
'(10000000.00)' => -10_000_000.0,
|
171
|
+
'1,200' => 1_200,
|
172
|
+
'1,200.0' => 1_200.0,
|
173
|
+
'1.0,2' => '1.0,2',
|
174
|
+
'1.0,2.0' => '1.0,2.0',
|
175
|
+
'-1,200' => -1_200,
|
176
|
+
'-1,200.0' => -1_200.0,
|
177
|
+
'-1.0,2' => '-1.0,2',
|
178
|
+
'-1.0,2.0' => '-1.0,2.0',
|
179
|
+
'01,200' => 1_200,
|
180
|
+
'01,200.0' => 1_200.0,
|
181
|
+
'01.0,2' => '01.0,2',
|
182
|
+
'01.0,2.0' => '01.0,2.0',
|
183
|
+
|
184
|
+
'05753' => 5753,
|
185
|
+
'true' => true,
|
186
|
+
'yes' => true,
|
187
|
+
'false' => false,
|
188
|
+
'no' => false,
|
189
|
+
'#DIV/0' => (1.0/0),
|
190
|
+
'#NAME?' => nil,
|
191
|
+
'Inf' => 'Inf',
|
192
|
+
'Infinity' => (1.0/0),
|
193
|
+
'-Infinity' => -(1.0/0),
|
194
|
+
'NaN' => 0.0/0, # need the dot
|
195
|
+
'.NaN' => 0.0/0, # NaN
|
196
|
+
'-.inf' => -(1.0/0), # -Infinity
|
197
|
+
'-' => nil, # per bigml
|
198
|
+
'?' => nil,
|
199
|
+
'1982-01-01' => Date.new(1982,1,1),
|
200
|
+
'2010-05-05 13:42:16 Z' => Time.parse('2010-05-05 13:42:16 Z'),
|
201
|
+
'2010-05-05 13:42:16 -02:00' => Time.parse('2010-05-05 13:42:16 -02:00'),
|
202
|
+
":not_a_symbol" => ':not_a_symbol',
|
203
|
+
'#hello' => '#hello',
|
204
|
+
"\n#hello\n#world" => '#hello #world',
|
205
|
+
"hello\nworld" => 'hello world', # whitespace compression
|
206
|
+
|
207
|
+
'0%' => 0.0,
|
208
|
+
'100%' => 1.0,
|
209
|
+
'50%' => 0.5,
|
210
|
+
'5%' => 0.05,
|
211
|
+
'00000%' => 0.0,
|
212
|
+
'0000100%' => 1.0,
|
213
|
+
'000050%' => 0.5,
|
214
|
+
'00005%' => 0.05,
|
215
|
+
|
216
|
+
['12/25/82', {date: :us}] => Date.new(1982,12,25),
|
217
|
+
['12/25/1982', {date: :us}] => Date.new(1982,12,25),
|
218
|
+
['25/12/82', {date: :euro}] => Date.new(1982,12,25),
|
219
|
+
['25/12/1982', {date: :euro}] => Date.new(1982,12,25),
|
220
|
+
['12-25-82', {date: :us}] => Date.new(1982,12,25),
|
221
|
+
['12-25-1982', {date: :us}] => Date.new(1982,12,25),
|
222
|
+
['25-12-82', {date: :euro}] => Date.new(1982,12,25),
|
223
|
+
['25-12-1982', {date: :euro}] => Date.new(1982,12,25),
|
224
|
+
|
225
|
+
'12/25/82' => '12/25/82',
|
226
|
+
|
227
|
+
',1' => ',1', # not a csv parser
|
228
|
+
',1,' => ',1,', # not a csv parser
|
229
|
+
'1,2,3' => '1,2,3', # not a csv parser
|
230
|
+
'[1,2,3]' => [1,2,3],
|
231
|
+
YAML.dump('a' => 1) => { 'a' => 1 },
|
232
|
+
YAML.dump(a: 1) => { ':a' => 1 }, # doesn't parse symbols
|
233
|
+
YAML.dump('a' => 1, 5 => "c\n3") => { 'a' => 1, 5 => "c\n3" },
|
234
|
+
MultiJson.dump(a: 1) => { 'a' => 1 }, # json always loses symbols
|
235
|
+
MultiJson.dump(a: 1, 5 => "c\n3") => { 'a' => 1, '5' => "c\n3" },
|
236
|
+
}
|
237
|
+
|
238
|
+
# TwitterCldr.supported_locales.each do |locale|
|
239
|
+
# 1.upto(9).map do |power|
|
240
|
+
# num = (rand * (10 ** power)).round(4)
|
241
|
+
# # a[[num.localize(locale).to_s, {locale: locale}]] = num
|
242
|
+
# a[[num.localize(locale).to_s, { locale: locale }]] = num
|
243
|
+
# # a[num.localize(locale).to_currency.to_s] = num
|
244
|
+
# end
|
245
|
+
# end
|
246
|
+
|
247
|
+
# and next dates!
|
248
|
+
# Time.now.localize(:es).to_full_s
|
249
|
+
|
250
|
+
a.each do |input, expected|
|
251
|
+
input = Array.wrap input
|
252
|
+
locale = if input[1].is_a?(Hash)
|
253
|
+
input[1][:locale]
|
254
|
+
end
|
255
|
+
it "#{locale ? "(#{locale}) " : nil}parses #{input[0].inspect} as #{expected.inspect}" do
|
256
|
+
got = Parse.ver0_1_0(*input)
|
257
|
+
# $lines << [ "Parse.parse(#{input.inspect})".ljust(45), "#=> #{got.inspect}" ].join
|
258
|
+
if expected.is_a?(Float) and expected.nan?
|
259
|
+
expect(got.nan?).to eq(true)
|
260
|
+
elsif expected.is_a?(Float) and got.is_a?(Float)
|
261
|
+
expect(got.round(8)).to eq(expected.round(8))
|
262
|
+
else
|
263
|
+
expect(got).to eq(expected)
|
264
|
+
end
|
265
|
+
|
266
|
+
input_with_spaces = [ "\t" + input[0] + "\t", input[1] ]
|
267
|
+
got_with_spaces = Parse.ver0_1_0(*input_with_spaces)
|
268
|
+
if expected.is_a?(Float) and expected.nan?
|
269
|
+
expect(got.nan?).to eq(true)
|
270
|
+
elsif expected.is_a?(Float) and got.is_a?(Float)
|
271
|
+
expect(got.round(8)).to eq(expected.round(8))
|
272
|
+
else
|
273
|
+
expect(got_with_spaces).to eq(expected)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
metadata
CHANGED
@@ -1,17 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: safe_yaml
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activesupport
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
31
|
- - ">="
|
@@ -80,20 +94,6 @@ dependencies:
|
|
80
94
|
- - ">="
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: activesupport
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: pry
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -125,10 +125,14 @@ files:
|
|
125
125
|
- README.md
|
126
126
|
- Rakefile
|
127
127
|
- lib/parse.rb
|
128
|
+
- lib/parse/algorithm.rb
|
129
|
+
- lib/parse/algorithm/ver0_0_1.rb
|
130
|
+
- lib/parse/algorithm/ver0_1_0.rb
|
128
131
|
- lib/parse/version.rb
|
129
132
|
- parse.gemspec
|
130
133
|
- spec/parse_spec.rb
|
131
134
|
- spec/parse_ver0_0_1_spec.rb
|
135
|
+
- spec/parse_ver0_1_0_spec.rb
|
132
136
|
- spec/spec_helper.rb
|
133
137
|
homepage: https://github.com/seamusabshere/parse
|
134
138
|
licenses:
|
@@ -150,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
150
154
|
version: '0'
|
151
155
|
requirements: []
|
152
156
|
rubyforge_project:
|
153
|
-
rubygems_version: 2.2.
|
157
|
+
rubygems_version: 2.2.2
|
154
158
|
signing_key:
|
155
159
|
specification_version: 4
|
156
160
|
summary: Detect and convert short strings into integers, floats, dates, times, booleans,
|
@@ -158,4 +162,5 @@ summary: Detect and convert short strings into integers, floats, dates, times, b
|
|
158
162
|
test_files:
|
159
163
|
- spec/parse_spec.rb
|
160
164
|
- spec/parse_ver0_0_1_spec.rb
|
165
|
+
- spec/parse_ver0_1_0_spec.rb
|
161
166
|
- spec/spec_helper.rb
|