parse 0.0.1.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +12 -0
- data/README.md +17 -19
- data/lib/parse.rb +11 -89
- data/lib/parse/algorithm.rb +8 -0
- data/lib/parse/algorithm/ver0_0_1.rb +99 -0
- data/lib/parse/algorithm/ver0_1_0.rb +149 -0
- data/lib/parse/version.rb +1 -1
- data/parse.gemspec +3 -2
- data/spec/parse_spec.rb +2 -2
- data/spec/parse_ver0_0_1_spec.rb +1 -1
- data/spec/parse_ver0_1_0_spec.rb +277 -0
- metadata +22 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 321f7a0116ae940b829a45faedb2fe731b3ada97
|
4
|
+
data.tar.gz: 6599e616962df9eac3ff27f31f4d9f11418270ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1d0fc63cabe88dcea9ae7f1049c99288782ffe4ebcaa575a2ae30b22f5f84aa4466223158b152d07d24118af84792eac77e40425c62690f25fe36692f9bafb9
|
7
|
+
data.tar.gz: 8faa4572ebe3ae57960e983596c98324c25188e576d1cd5d1a15d41561ae0818cbda6d564d3c603f6ef043081ca8cd550034bbcda8d7e072180a524dac4e1794
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
0.1.0
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Not parsed as numbers anymore: "10_14" or "10_400" or "-1_2e3_5" or "-$123_456.7" -- very unlikely to see in the wild
|
6
|
+
* Not parsed as numbers anymore: '1,2' or '-1,2' or '1,20' or '1,22.0' -- too hard to distinguish from CSV
|
7
|
+
|
8
|
+
* Enhancements
|
9
|
+
|
10
|
+
* Using proper versioning scheme - every version of algorithm involves a minor bump!
|
11
|
+
* Recover from YAML parsing errors in 1.9
|
12
|
+
|
1
13
|
0.0.1.1 / 2014-02-06
|
2
14
|
|
3
15
|
* Bug fixes
|
data/README.md
CHANGED
@@ -4,10 +4,12 @@ Detect and convert short strings into integers, floats, dates, times, booleans,
|
|
4
4
|
|
5
5
|
## Note on versions
|
6
6
|
|
7
|
-
You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.
|
7
|
+
You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.ver0_1_0`).
|
8
8
|
|
9
9
|
If the algorithm changes and you need the old version, you can reference it by its version number. For example, `Parse.ver0_0_1`.
|
10
10
|
|
11
|
+
Since almost any change to the algorithm is a breaking change, there are going to be lots of minor version bumps (as opposed to patches).
|
12
|
+
|
11
13
|
## Usage
|
12
14
|
|
13
15
|
You get the idea:
|
@@ -39,26 +41,24 @@ More esoteric stuff:
|
|
39
41
|
Parse.parse("-") #=> nil
|
40
42
|
Parse.parse("?") #=> nil
|
41
43
|
Parse.parse("-8e-05") #=> -8.0e-05
|
42
|
-
Parse.parse("-
|
44
|
+
Parse.parse("-12.5e-13") #=> -1.25e-12
|
43
45
|
Parse.parse("05753") #=> 5753
|
44
|
-
Parse.parse("
|
45
|
-
Parse.parse("15_00_0") #=> 15000
|
46
|
+
Parse.parse("15000") #=> 15000
|
46
47
|
Parse.parse("15.0") #=> 15.0
|
47
48
|
Parse.parse("15,000.0") #=> 15000.0
|
48
|
-
Parse.parse("
|
49
|
-
Parse.parse("15_00_0.0") #=> 15000.0
|
49
|
+
Parse.parse("15000.0") #=> 15000.0
|
50
50
|
Parse.parse("0015") #=> 15
|
51
51
|
Parse.parse("0015.0") #=> 15.0
|
52
|
-
Parse.parse("
|
52
|
+
Parse.parse("0015.0") #=> 15.0
|
53
53
|
Parse.parse("0x15") #=> 21
|
54
54
|
Parse.parse("0o15") #=> 13
|
55
55
|
Parse.parse("8e-05") #=> 8.0e-05
|
56
|
-
Parse.parse("
|
56
|
+
Parse.parse("12.5e-13") #=> 1.25e-12
|
57
57
|
Parse.parse("0$123.4") #=> 123.4
|
58
58
|
Parse.parse("$15,000") #=> 15000
|
59
59
|
Parse.parse("0$15,000") #=> 15000
|
60
|
-
Parse.parse("$
|
61
|
-
Parse.parse("$
|
60
|
+
Parse.parse("$123456") #=> 123456
|
61
|
+
Parse.parse("$123456.7") #=> 123456.7
|
62
62
|
Parse.parse("10,000,000") #=> 10000000
|
63
63
|
Parse.parse("10,000,000.00") #=> 10000000.0
|
64
64
|
Parse.parse("$10,000,000.00") #=> 10000000.0
|
@@ -66,15 +66,13 @@ More esoteric stuff:
|
|
66
66
|
Parse.parse("$010,000,000.00") #=> 10000000.0
|
67
67
|
Parse.parse("-15") #=> -15
|
68
68
|
Parse.parse("-15,000") #=> -15000
|
69
|
-
Parse.parse("-
|
70
|
-
Parse.parse("-15_00_0") #=> -15000
|
69
|
+
Parse.parse("-15000") #=> -15000
|
71
70
|
Parse.parse("-15.0") #=> -15.0
|
72
71
|
Parse.parse("-15,000.0") #=> -15000.0
|
73
|
-
Parse.parse("-
|
74
|
-
Parse.parse("-
|
72
|
+
Parse.parse("-15000.0") #=> -15000.0
|
73
|
+
Parse.parse("-15000.0") #=> -15000.0
|
75
74
|
Parse.parse("00-15") #=> -15
|
76
75
|
Parse.parse("00-15.0") #=> -15.0
|
77
|
-
Parse.parse("0_0-15.0") #=> "0_0-15.0"
|
78
76
|
Parse.parse("-0x15") #=> -21
|
79
77
|
Parse.parse("-0o15") #=> -13
|
80
78
|
Parse.parse("-$123.4") #=> -123.4
|
@@ -82,10 +80,10 @@ More esoteric stuff:
|
|
82
80
|
Parse.parse("0($123.4)") #=> -123.4
|
83
81
|
Parse.parse("-$15,000") #=> -15000
|
84
82
|
Parse.parse("($15,000)") #=> -15000
|
85
|
-
Parse.parse("-$
|
86
|
-
Parse.parse("($
|
87
|
-
Parse.parse("-$
|
88
|
-
Parse.parse("($
|
83
|
+
Parse.parse("-$123,456") #=> -123456
|
84
|
+
Parse.parse("($123,456)") #=> -123456
|
85
|
+
Parse.parse("-$123,456.7") #=> -123456.7
|
86
|
+
Parse.parse("($123,456.7)") #=> -123456.7
|
89
87
|
Parse.parse("-10,000,000") #=> -10000000
|
90
88
|
Parse.parse("(10,000,000)") #=> -10000000
|
91
89
|
Parse.parse("-10,000,000.00") #=> -10000000.0
|
data/lib/parse.rb
CHANGED
@@ -1,102 +1,24 @@
|
|
1
1
|
require "parse/version"
|
2
|
+
require 'parse/algorithm'
|
3
|
+
require 'parse/algorithm/ver0_0_1'
|
4
|
+
require 'parse/algorithm/ver0_1_0'
|
2
5
|
|
3
6
|
require 'date'
|
4
7
|
require 'yaml'
|
5
8
|
require 'safe_yaml/load'
|
9
|
+
require 'active_support/core_ext'
|
6
10
|
|
7
11
|
module Parse
|
8
|
-
# only need to deal with stuff not caught by YAML or JSON
|
9
|
-
NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
|
10
|
-
NAN = [ 'NaN' ]
|
11
|
-
INFINITY = [ '#DIV/0', 'Infinity' ]
|
12
|
-
NEG_INFINITY = [ '-Infinity' ]
|
13
|
-
DATE = {
|
14
|
-
euro: ['%d-%m-%Y', '%d-%m-%y'],
|
15
|
-
us: ['%m-%d-%Y', '%m-%d-%y'],
|
16
|
-
}
|
17
|
-
|
18
12
|
def self.parse(raw, options = nil)
|
19
|
-
|
13
|
+
ver0_1_0 raw, options
|
20
14
|
end
|
21
15
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
def self.ver0_0_1(raw, options = nil)
|
26
|
-
return raw unless raw.is_a? String
|
27
|
-
|
28
|
-
memo = raw.strip
|
29
|
-
|
30
|
-
return nil if NULL.include? memo
|
31
|
-
return 1.0/0 if INFINITY.include? memo
|
32
|
-
return -1.0/0 if NEG_INFINITY.include? memo
|
33
|
-
return 0.0/0 if NAN.include? memo
|
34
|
-
|
35
|
-
if options and options[:date]
|
36
|
-
yyyy, yy = DATE.fetch options[:date]
|
37
|
-
memo.sub!(/0+/, '')
|
38
|
-
memo.gsub! '/', '-'
|
39
|
-
if memo =~ /\d{4,}/ # yyyy
|
40
|
-
return Date.strptime(memo, yyyy)
|
41
|
-
else
|
42
|
-
return Date.strptime(memo, yy)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
not_numeric = nil
|
47
|
-
not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
|
48
|
-
not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
|
49
|
-
not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
|
50
|
-
possible_numeric = !not_numeric
|
51
|
-
accounting_negative = nil
|
52
|
-
percentage = nil
|
53
|
-
|
54
|
-
if possible_numeric
|
55
|
-
accounting_negative = memo =~ /\A[0$]*\([0$]*/
|
56
|
-
percentage = memo.end_with?('%')
|
57
|
-
memo.sub! /%\z/, '' if percentage
|
58
|
-
memo.delete!('()') if accounting_negative # accounting negative
|
59
|
-
# in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
|
60
|
-
memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
|
61
|
-
memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
|
62
|
-
if memo.include?(',')
|
63
|
-
a, b = memo.split('.', 2)
|
64
|
-
a.delete! ','
|
65
|
-
memo = b ? [a, b].join('.') : a
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
not_safe_for_yaml = nil
|
70
|
-
not_safe_for_yaml ||= memo.include?('#')
|
71
|
-
not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
|
72
|
-
safe_for_yaml = !not_safe_for_yaml
|
73
|
-
|
74
|
-
if safe_for_yaml
|
75
|
-
begin
|
76
|
-
memo = SafeYAML.load memo
|
77
|
-
rescue
|
78
|
-
$stderr.puts "#{memo.inspect} => #{$!}"
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
if possible_numeric
|
83
|
-
case memo
|
84
|
-
when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
|
85
|
-
# scientific notation
|
86
|
-
memo = memo.to_f
|
87
|
-
when /\A[+\-]?0o/
|
88
|
-
# octal per yaml 1.2
|
89
|
-
memo = memo.to_i 8
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
if memo.is_a?(String)
|
94
|
-
# compress whitespace
|
95
|
-
memo.gsub! /\s+/, ' '
|
96
|
-
end
|
16
|
+
def self.ver0_1_0(*args)
|
17
|
+
Algorithm::Ver0_1_0.new(*args).result
|
18
|
+
end
|
97
19
|
|
98
|
-
|
99
|
-
|
100
|
-
memo
|
20
|
+
def self.ver0_0_1(*args)
|
21
|
+
Algorithm::Ver0_0_1.new(*args).result
|
101
22
|
end
|
23
|
+
|
102
24
|
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Parse
|
2
|
+
module Algorithm
|
3
|
+
class Ver0_0_1
|
4
|
+
NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
|
5
|
+
DATE = {
|
6
|
+
euro: ['%d-%m-%Y', '%d-%m-%y'],
|
7
|
+
us: ['%m-%d-%Y', '%m-%d-%y'],
|
8
|
+
}
|
9
|
+
|
10
|
+
attr_reader :raw
|
11
|
+
attr_reader :options
|
12
|
+
def initialize(raw, options = nil)
|
13
|
+
@raw = raw
|
14
|
+
@options = options
|
15
|
+
end
|
16
|
+
|
17
|
+
# @private
|
18
|
+
# use YAML to parse stuff like '1.5'
|
19
|
+
# ruby's yaml is 1.1, which means it does weird stuff with '001' (fixed in 1.2, which jruby has)
|
20
|
+
def result
|
21
|
+
return raw unless raw.is_a? String
|
22
|
+
|
23
|
+
memo = raw.strip
|
24
|
+
|
25
|
+
return nil if NULL.include? memo
|
26
|
+
return 1.0/0 if INFINITY.include? memo
|
27
|
+
return -1.0/0 if NEG_INFINITY.include? memo
|
28
|
+
return 0.0/0 if NAN.include? memo
|
29
|
+
|
30
|
+
if options and options[:date]
|
31
|
+
yyyy, yy = DATE.fetch options[:date]
|
32
|
+
memo.sub!(/0+/, '')
|
33
|
+
memo.gsub! '/', '-'
|
34
|
+
if memo =~ /\d{4,}/ # yyyy
|
35
|
+
return Date.strptime(memo, yyyy)
|
36
|
+
else
|
37
|
+
return Date.strptime(memo, yy)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
not_numeric = nil
|
42
|
+
not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
|
43
|
+
not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
|
44
|
+
not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
|
45
|
+
possible_numeric = !not_numeric
|
46
|
+
accounting_negative = nil
|
47
|
+
percentage = nil
|
48
|
+
|
49
|
+
if possible_numeric
|
50
|
+
accounting_negative = memo =~ /\A[0$]*\([0$]*/
|
51
|
+
percentage = memo.end_with?('%')
|
52
|
+
memo.sub! /%\z/, '' if percentage
|
53
|
+
memo.delete!('()') if accounting_negative # accounting negative
|
54
|
+
# in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
|
55
|
+
memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
|
56
|
+
memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
|
57
|
+
if memo.include?(',')
|
58
|
+
a, b = memo.split('.', 2)
|
59
|
+
a.delete! ','
|
60
|
+
memo = b ? [a, b].join('.') : a
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
not_safe_for_yaml = nil
|
65
|
+
not_safe_for_yaml ||= memo.include?('#')
|
66
|
+
not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
|
67
|
+
safe_for_yaml = !not_safe_for_yaml
|
68
|
+
|
69
|
+
if safe_for_yaml
|
70
|
+
begin
|
71
|
+
memo = SafeYAML.load memo
|
72
|
+
rescue
|
73
|
+
$stderr.puts "#{memo.inspect} => #{$!}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
if possible_numeric
|
78
|
+
case memo
|
79
|
+
when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
|
80
|
+
# scientific notation
|
81
|
+
memo = memo.to_f
|
82
|
+
when /\A[+\-]?0o/
|
83
|
+
# octal per yaml 1.2
|
84
|
+
memo = memo.to_i 8
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
if memo.is_a?(String)
|
89
|
+
# compress whitespace
|
90
|
+
memo.gsub! /\s+/, ' '
|
91
|
+
end
|
92
|
+
|
93
|
+
memo = memo / 100.0 if percentage
|
94
|
+
memo = -memo if accounting_negative
|
95
|
+
memo
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module Parse
|
2
|
+
module Algorithm
|
3
|
+
class Ver0_1_0
|
4
|
+
NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!', '00/00/00', '0000-00-00'] # from bigml's list
|
5
|
+
REGION_DATE_FORMAT = {
|
6
|
+
euro: ['%d-%m-%Y', '%d-%m-%y'],
|
7
|
+
us: ['%m-%d-%Y', '%m-%d-%y'],
|
8
|
+
iso: ['%Y-%m-%d', '%y-%m-%d'], # second one is silly
|
9
|
+
}
|
10
|
+
DATE_DETECT = {
|
11
|
+
%r{\A0*[12]\d\d\d[\-/](?:(?:0[1-9])|(?:1[0-2]))[\-/][1-9]\d\z} => :iso, # $1 will be delimiter
|
12
|
+
}
|
13
|
+
EMPTY_OPTIONS = {}
|
14
|
+
|
15
|
+
attr_reader :raw
|
16
|
+
attr_reader :options
|
17
|
+
def initialize(raw, options = nil)
|
18
|
+
@raw = raw
|
19
|
+
@options = options || EMPTY_OPTIONS
|
20
|
+
end
|
21
|
+
|
22
|
+
def result
|
23
|
+
return raw unless raw.is_a? String
|
24
|
+
|
25
|
+
memo = raw.strip
|
26
|
+
|
27
|
+
return nil if NULL.include? memo
|
28
|
+
return 1.0/0 if INFINITY.include? memo
|
29
|
+
return -1.0/0 if NEG_INFINITY.include? memo
|
30
|
+
return 0.0/0 if NAN.include? memo
|
31
|
+
|
32
|
+
date_region = if options[:date]
|
33
|
+
options[:date]
|
34
|
+
else
|
35
|
+
catch :hit do
|
36
|
+
DATE_DETECT.each do |pattern, date_region|
|
37
|
+
# binding.pry if memo.include?('2011-')
|
38
|
+
if memo =~ pattern
|
39
|
+
throw :hit, date_region
|
40
|
+
end
|
41
|
+
end
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if date_region.nil? and options[:type] == Date
|
47
|
+
date_region = :iso
|
48
|
+
end
|
49
|
+
|
50
|
+
if date_region
|
51
|
+
yyyy, yy = REGION_DATE_FORMAT.fetch date_region
|
52
|
+
is_yyyy = memo =~ /[1-9]\d\d\d/
|
53
|
+
memo.sub! /\A0+/, ''
|
54
|
+
memo.gsub! '/', '-'
|
55
|
+
if is_yyyy
|
56
|
+
if memo.length < 10 and date_region == :iso
|
57
|
+
return Date.parse(memo)
|
58
|
+
else
|
59
|
+
return Date.strptime(memo, yyyy)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
return Date.strptime(memo, yy)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
possible_numeric = nil
|
67
|
+
not_numeric = nil
|
68
|
+
certain_numeric = nil
|
69
|
+
if [Numeric, Integer, Float].include?(options[:type])
|
70
|
+
certain_numeric = true
|
71
|
+
possible_numeric = true
|
72
|
+
not_numeric = false
|
73
|
+
else
|
74
|
+
# not_numeric ||= memo =~ /[1-9][^)\d_,%.eE]/ # has a dash in the middle
|
75
|
+
not_numeric ||= memo.include?('_')
|
76
|
+
not_numeric ||= memo =~ %r{[1-9][/-]\d}
|
77
|
+
not_numeric ||= memo =~ /,\d{1,2}(?:[.\D]|\z)/
|
78
|
+
not_numeric ||= memo.scan(/[^\d_,%.eE]/).length > memo.scan(/[\d_,%.eE]/).length
|
79
|
+
not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
|
80
|
+
possible_numeric = !not_numeric
|
81
|
+
end
|
82
|
+
accounting_negative = nil
|
83
|
+
percentage = nil
|
84
|
+
if possible_numeric
|
85
|
+
accounting_negative = memo =~ /\A[0$]*\([0$]*/
|
86
|
+
percentage = memo.end_with?('%')
|
87
|
+
memo.sub! /%\z/, '' if percentage
|
88
|
+
memo.delete!('()') if accounting_negative # accounting negative
|
89
|
+
# in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
|
90
|
+
memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
|
91
|
+
memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
|
92
|
+
memo.sub!('D', 'e') if memo =~ /\A[+\-]?[\d.]+D[+\-]?[\d.]+\z/ # fortran double precision
|
93
|
+
if memo.include?(',')
|
94
|
+
a, b = memo.split('.', 2)
|
95
|
+
a.delete! ','
|
96
|
+
memo = b ? [a, b].join('.') : a
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
if certain_numeric
|
101
|
+
memo.gsub! /[a-z]/i, ''
|
102
|
+
end
|
103
|
+
|
104
|
+
not_safe_for_yaml = nil
|
105
|
+
not_safe_for_yaml ||= memo =~ /\A(on|off)\z/i
|
106
|
+
not_safe_for_yaml ||= memo.include?('#')
|
107
|
+
not_safe_for_yaml ||= memo =~ /\A[@&,]/
|
108
|
+
not_safe_for_yaml ||= not_numeric && memo.start_with?('0')
|
109
|
+
not_safe_for_yaml ||= not_numeric && memo =~ /\A[^{\[]*\d[,_]/ #1,2,3, maybe a csv
|
110
|
+
|
111
|
+
safe_for_yaml = !not_safe_for_yaml
|
112
|
+
|
113
|
+
if safe_for_yaml
|
114
|
+
begin
|
115
|
+
memo = SafeYAML.load memo
|
116
|
+
rescue Exception # Psych::SyntaxError will blow up plain rescue in 1.9.3
|
117
|
+
$stderr.puts "#{memo.inspect} => #{$!}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
if possible_numeric
|
122
|
+
case memo
|
123
|
+
when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
|
124
|
+
# scientific notation
|
125
|
+
memo = memo.to_f
|
126
|
+
when /\A[+\-]?0o/
|
127
|
+
# octal per yaml 1.2
|
128
|
+
memo = memo.to_i 8
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
if memo.is_a?(String)
|
133
|
+
# compress whitespace
|
134
|
+
memo.gsub! /\s+/, ' '
|
135
|
+
end
|
136
|
+
|
137
|
+
memo = memo / 100.0 if percentage
|
138
|
+
memo = -memo if accounting_negative
|
139
|
+
memo
|
140
|
+
rescue
|
141
|
+
if options and options[:ignore_error]
|
142
|
+
# nothing to see here
|
143
|
+
else
|
144
|
+
raise "#{memo.inspect} => #{$!}"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
data/lib/parse/version.rb
CHANGED
data/parse.gemspec
CHANGED
@@ -18,12 +18,13 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_runtime_dependency 'safe_yaml'
|
21
|
+
spec.add_runtime_dependency 'safe_yaml', '>=1'
|
22
|
+
spec.add_runtime_dependency 'activesupport'
|
22
23
|
|
23
24
|
spec.add_development_dependency "bundler", "~> 1.5"
|
24
25
|
spec.add_development_dependency "rake"
|
25
26
|
spec.add_development_dependency "rspec"
|
26
27
|
spec.add_development_dependency 'multi_json'
|
27
|
-
spec.add_development_dependency 'activesupport'
|
28
28
|
spec.add_development_dependency 'pry'
|
29
|
+
# spec.add_development_dependency 'twitter_cldr'
|
29
30
|
end
|
data/spec/parse_spec.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Parse do
|
4
|
-
it "should parse with version 0.0.
|
4
|
+
it "should parse with version 0.0.2 of the algorithm" do
|
5
5
|
v = " 1990-04-03 "
|
6
|
-
expect(Parse.parse(v)).to eq(Parse.
|
6
|
+
expect(Parse.parse(v)).to eq(Parse.ver0_1_0(v))
|
7
7
|
end
|
8
8
|
end
|
data/spec/parse_ver0_0_1_spec.rb
CHANGED
@@ -0,0 +1,277 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'twitter_cldr'
|
3
|
+
|
4
|
+
describe Parse::Algorithm::Ver0_1_0 do
|
5
|
+
same = [
|
6
|
+
'1,2',
|
7
|
+
'1,20',
|
8
|
+
'1,2.0',
|
9
|
+
'-1,2',
|
10
|
+
'-1,20',
|
11
|
+
'-1,2.0',
|
12
|
+
'01,2',
|
13
|
+
'01,20',
|
14
|
+
'01,2.0',
|
15
|
+
'15_000',
|
16
|
+
'15_00_0',
|
17
|
+
'15_000.0',
|
18
|
+
'15_00_0.0',
|
19
|
+
'0_015.0', # just weird
|
20
|
+
'1_2.5e-1_3',
|
21
|
+
'-1_2.5e-1_3',
|
22
|
+
'$123_456',
|
23
|
+
'$123_456.7',
|
24
|
+
'-15_000',
|
25
|
+
'-15_00_0',
|
26
|
+
'-15_000.0',
|
27
|
+
'-15_00_0.0',
|
28
|
+
'0_0-15.0', # just weird
|
29
|
+
'-$123_456',
|
30
|
+
'($123_456)',
|
31
|
+
'-$123_456.7',
|
32
|
+
'($123_456.7)',
|
33
|
+
'10_14_A',
|
34
|
+
'10_14',
|
35
|
+
'10_140',
|
36
|
+
]
|
37
|
+
same.each do |v|
|
38
|
+
it "parses #{v.inspect} as itself (yaml=#{SafeYAML.load(v).inspect})" do
|
39
|
+
expect(Parse.ver0_1_0(v)).to eq(v)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
a = {
|
44
|
+
|
45
|
+
"@ foo" => "@ foo",
|
46
|
+
", foo" => ", foo",
|
47
|
+
"044-1-276-000" => "044-1-276-000",
|
48
|
+
|
49
|
+
['1 BEDROOMS', { type: Numeric } ] => 1,
|
50
|
+
'1 BEDROOMS' => '1 BEDROOMS',
|
51
|
+
|
52
|
+
[ '2.4 SQFT', { type: Numeric } ] => 2.4,
|
53
|
+
'2.4 SQFT' => '2.4 SQFT',
|
54
|
+
|
55
|
+
['000', { date: :us, ignore_error: true}] => nil,
|
56
|
+
['7/7/2004', {date: :us}] => Date.new(2004,7,7),
|
57
|
+
"999 HOLY CROSS ROAD, COLCHESTER, VT 05446" => "999 HOLY CROSS ROAD, COLCHESTER, VT 05446",
|
58
|
+
|
59
|
+
'00020110628' => 20110628,
|
60
|
+
'0002011-06-28' => Date.new(2011,6,28),
|
61
|
+
'0002011/06/28' => Date.new(2011,6,28),
|
62
|
+
['00020110628', {date: :iso}] => Date.new(2011,6,28),
|
63
|
+
['00020110628', {type: Date}] => Date.new(2011,6,28),
|
64
|
+
|
65
|
+
'00019800628' => 19800628,
|
66
|
+
'0001980-06-28' => Date.new(1980,6,28),
|
67
|
+
'0001980/06/28' => Date.new(1980,6,28),
|
68
|
+
['00019800628', {date: :iso}] => Date.new(1980,6,28),
|
69
|
+
['00019800628', {type: Date}] => Date.new(1980,6,28),
|
70
|
+
|
71
|
+
'00030000628' => 30000628,
|
72
|
+
'0003000-06-28' => '0003000-06-28',
|
73
|
+
'0003000/06/28' => '0003000/06/28',
|
74
|
+
['00030000628', {date: :iso}] => Date.new(3000,6,28),
|
75
|
+
['00030000628', {type: Date}] => Date.new(3000,6,28),
|
76
|
+
|
77
|
+
['', {type: Numeric}] => nil,
|
78
|
+
|
79
|
+
# fortran double precision
|
80
|
+
'0.225120000000000D+06' => 0.22512e6,
|
81
|
+
'0.341913000000000D+07' => 0.341913e7,
|
82
|
+
'0.2500000E-01' => 0.25e-1,
|
83
|
+
'3.1D0' => 3.1,
|
84
|
+
'-2.D0' => -2.0,
|
85
|
+
|
86
|
+
'8e-05' => 8e-5,
|
87
|
+
'8e+4' => 8e4,
|
88
|
+
'8.0e+4' => 8.0e4,
|
89
|
+
'8e-4' => 8e-4,
|
90
|
+
'8.0e-4' => 8.0e-4,
|
91
|
+
'-8e+4' => -8e4,
|
92
|
+
'-8.0e+4' => -8.0e4,
|
93
|
+
'-8e-4' => -8e-4,
|
94
|
+
'-8.0e-4' => -8.0e-4,
|
95
|
+
'8E+4' => 8e4,
|
96
|
+
'8.0E+4' => 8.0e4,
|
97
|
+
'8E-4' => 8e-4,
|
98
|
+
'8.0E-4' => 8.0e-4,
|
99
|
+
'-8E+4' => -8e4,
|
100
|
+
'-8.0E+4' => -8.0e4,
|
101
|
+
'-8E-4' => -8e-4,
|
102
|
+
'-8.0E-4' => -8.0e-4,
|
103
|
+
|
104
|
+
# http://dojotoolkit.org/reference-guide/1.9/dojo/number.html
|
105
|
+
# '1,000,000.00' => 1_000_000.0,
|
106
|
+
# '1.000.000,00' => 1_000_000.0, # german
|
107
|
+
# '1 000 000,00' => 1_000_000.0, # french
|
108
|
+
# '10,00,000.00' => 1_000_000.0, # indian
|
109
|
+
|
110
|
+
'060-10-01' => '60-10-01',
|
111
|
+
'OFF' => 'OFF',
|
112
|
+
'ON' => 'ON',
|
113
|
+
|
114
|
+
'& P4' => '& P4',
|
115
|
+
|
116
|
+
# EVERYTHING BELOW IS SAME AS 0.0.1
|
117
|
+
|
118
|
+
'' => nil,
|
119
|
+
'nil' => nil,
|
120
|
+
'15' => 15,
|
121
|
+
'15,000' => 15_000,
|
122
|
+
'15.0' => 15.0,
|
123
|
+
'15,000.0' => 15_000.0,
|
124
|
+
'0015' => 15, # not octal
|
125
|
+
'0015.0' => 15.0, # not octal
|
126
|
+
'0x15' => 0x15, # hex
|
127
|
+
'0o15' => 015, # octal
|
128
|
+
'8e-05' => 8e-05,
|
129
|
+
'12.5e-13' => 12.5e-13,
|
130
|
+
'-12.5e-13' => -12.5e-13,
|
131
|
+
'$123.4' => 123.4,
|
132
|
+
'0$123.4' => 123.4,
|
133
|
+
'$15,000' => 15_000,
|
134
|
+
'0$15,000' => 15_000,
|
135
|
+
'10,000,000' => 10_000_000,
|
136
|
+
'10,000,000.00' => 10_000_000.0,
|
137
|
+
'$10,000,000.00' => 10_000_000.0,
|
138
|
+
'0$10,000,000.00' => 10_000_000.0,
|
139
|
+
'$010,000,000.00' => 10_000_000.0,
|
140
|
+
|
141
|
+
'-15' => -15,
|
142
|
+
'-15,000' => -15_000,
|
143
|
+
'-15.0' => -15.0,
|
144
|
+
'-15,000.0' => -15_000.0,
|
145
|
+
'00-15' => -15, # not octal
|
146
|
+
'00-15.0' => -15.0, # not octal
|
147
|
+
'-0x15' => -0x15, # hex
|
148
|
+
'-0o15' => -015, # octal
|
149
|
+
'-8e-05' => -8e-05,
|
150
|
+
'-$123.4' => -123.4,
|
151
|
+
'($123.4)' => -123.4,
|
152
|
+
'0($123.4)' => -123.4,
|
153
|
+
'-$15,000' => -15_000,
|
154
|
+
'($15,000)' => -15_000,
|
155
|
+
'-$123456' => -123_456,
|
156
|
+
'($123456)' => -123_456,
|
157
|
+
'-$123456.7' => -123_456.7,
|
158
|
+
'($123456.7)' => -123_456.7,
|
159
|
+
'-$123,456' => -123_456,
|
160
|
+
'($123,456)' => -123_456,
|
161
|
+
'-$123,456.7' => -123_456.7,
|
162
|
+
'($123,456.7)' => -123_456.7,
|
163
|
+
'-10,000,000' => -10_000_000,
|
164
|
+
'(10,000,000)' => -10_000_000,
|
165
|
+
'-10,000,000.00' => -10_000_000.0,
|
166
|
+
'(10,000,000.00)' => -10_000_000.0,
|
167
|
+
'-10000000' => -10_000_000,
|
168
|
+
'(10000000)' => -10_000_000,
|
169
|
+
'-10000000.00' => -10_000_000.0,
|
170
|
+
'(10000000.00)' => -10_000_000.0,
|
171
|
+
'1,200' => 1_200,
|
172
|
+
'1,200.0' => 1_200.0,
|
173
|
+
'1.0,2' => '1.0,2',
|
174
|
+
'1.0,2.0' => '1.0,2.0',
|
175
|
+
'-1,200' => -1_200,
|
176
|
+
'-1,200.0' => -1_200.0,
|
177
|
+
'-1.0,2' => '-1.0,2',
|
178
|
+
'-1.0,2.0' => '-1.0,2.0',
|
179
|
+
'01,200' => 1_200,
|
180
|
+
'01,200.0' => 1_200.0,
|
181
|
+
'01.0,2' => '01.0,2',
|
182
|
+
'01.0,2.0' => '01.0,2.0',
|
183
|
+
|
184
|
+
'05753' => 5753,
|
185
|
+
'true' => true,
|
186
|
+
'yes' => true,
|
187
|
+
'false' => false,
|
188
|
+
'no' => false,
|
189
|
+
'#DIV/0' => (1.0/0),
|
190
|
+
'#NAME?' => nil,
|
191
|
+
'Inf' => 'Inf',
|
192
|
+
'Infinity' => (1.0/0),
|
193
|
+
'-Infinity' => -(1.0/0),
|
194
|
+
'NaN' => 0.0/0, # need the dot
|
195
|
+
'.NaN' => 0.0/0, # NaN
|
196
|
+
'-.inf' => -(1.0/0), # -Infinity
|
197
|
+
'-' => nil, # per bigml
|
198
|
+
'?' => nil,
|
199
|
+
'1982-01-01' => Date.new(1982,1,1),
|
200
|
+
'2010-05-05 13:42:16 Z' => Time.parse('2010-05-05 13:42:16 Z'),
|
201
|
+
'2010-05-05 13:42:16 -02:00' => Time.parse('2010-05-05 13:42:16 -02:00'),
|
202
|
+
":not_a_symbol" => ':not_a_symbol',
|
203
|
+
'#hello' => '#hello',
|
204
|
+
"\n#hello\n#world" => '#hello #world',
|
205
|
+
"hello\nworld" => 'hello world', # whitespace compression
|
206
|
+
|
207
|
+
'0%' => 0.0,
|
208
|
+
'100%' => 1.0,
|
209
|
+
'50%' => 0.5,
|
210
|
+
'5%' => 0.05,
|
211
|
+
'00000%' => 0.0,
|
212
|
+
'0000100%' => 1.0,
|
213
|
+
'000050%' => 0.5,
|
214
|
+
'00005%' => 0.05,
|
215
|
+
|
216
|
+
['12/25/82', {date: :us}] => Date.new(1982,12,25),
|
217
|
+
['12/25/1982', {date: :us}] => Date.new(1982,12,25),
|
218
|
+
['25/12/82', {date: :euro}] => Date.new(1982,12,25),
|
219
|
+
['25/12/1982', {date: :euro}] => Date.new(1982,12,25),
|
220
|
+
['12-25-82', {date: :us}] => Date.new(1982,12,25),
|
221
|
+
['12-25-1982', {date: :us}] => Date.new(1982,12,25),
|
222
|
+
['25-12-82', {date: :euro}] => Date.new(1982,12,25),
|
223
|
+
['25-12-1982', {date: :euro}] => Date.new(1982,12,25),
|
224
|
+
|
225
|
+
'12/25/82' => '12/25/82',
|
226
|
+
|
227
|
+
',1' => ',1', # not a csv parser
|
228
|
+
',1,' => ',1,', # not a csv parser
|
229
|
+
'1,2,3' => '1,2,3', # not a csv parser
|
230
|
+
'[1,2,3]' => [1,2,3],
|
231
|
+
YAML.dump('a' => 1) => { 'a' => 1 },
|
232
|
+
YAML.dump(a: 1) => { ':a' => 1 }, # doesn't parse symbols
|
233
|
+
YAML.dump('a' => 1, 5 => "c\n3") => { 'a' => 1, 5 => "c\n3" },
|
234
|
+
MultiJson.dump(a: 1) => { 'a' => 1 }, # json always loses symbols
|
235
|
+
MultiJson.dump(a: 1, 5 => "c\n3") => { 'a' => 1, '5' => "c\n3" },
|
236
|
+
}
|
237
|
+
|
238
|
+
# TwitterCldr.supported_locales.each do |locale|
|
239
|
+
# 1.upto(9).map do |power|
|
240
|
+
# num = (rand * (10 ** power)).round(4)
|
241
|
+
# # a[[num.localize(locale).to_s, {locale: locale}]] = num
|
242
|
+
# a[[num.localize(locale).to_s, { locale: locale }]] = num
|
243
|
+
# # a[num.localize(locale).to_currency.to_s] = num
|
244
|
+
# end
|
245
|
+
# end
|
246
|
+
|
247
|
+
# and next dates!
|
248
|
+
# Time.now.localize(:es).to_full_s
|
249
|
+
|
250
|
+
a.each do |input, expected|
|
251
|
+
input = Array.wrap input
|
252
|
+
locale = if input[1].is_a?(Hash)
|
253
|
+
input[1][:locale]
|
254
|
+
end
|
255
|
+
it "#{locale ? "(#{locale}) " : nil}parses #{input[0].inspect} as #{expected.inspect}" do
|
256
|
+
got = Parse.ver0_1_0(*input)
|
257
|
+
# $lines << [ "Parse.parse(#{input.inspect})".ljust(45), "#=> #{got.inspect}" ].join
|
258
|
+
if expected.is_a?(Float) and expected.nan?
|
259
|
+
expect(got.nan?).to eq(true)
|
260
|
+
elsif expected.is_a?(Float) and got.is_a?(Float)
|
261
|
+
expect(got.round(8)).to eq(expected.round(8))
|
262
|
+
else
|
263
|
+
expect(got).to eq(expected)
|
264
|
+
end
|
265
|
+
|
266
|
+
input_with_spaces = [ "\t" + input[0] + "\t", input[1] ]
|
267
|
+
got_with_spaces = Parse.ver0_1_0(*input_with_spaces)
|
268
|
+
if expected.is_a?(Float) and expected.nan?
|
269
|
+
expect(got.nan?).to eq(true)
|
270
|
+
elsif expected.is_a?(Float) and got.is_a?(Float)
|
271
|
+
expect(got.round(8)).to eq(expected.round(8))
|
272
|
+
else
|
273
|
+
expect(got_with_spaces).to eq(expected)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
metadata
CHANGED
@@ -1,17 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: safe_yaml
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activesupport
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
31
|
- - ">="
|
@@ -80,20 +94,6 @@ dependencies:
|
|
80
94
|
- - ">="
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: activesupport
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: pry
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -125,10 +125,14 @@ files:
|
|
125
125
|
- README.md
|
126
126
|
- Rakefile
|
127
127
|
- lib/parse.rb
|
128
|
+
- lib/parse/algorithm.rb
|
129
|
+
- lib/parse/algorithm/ver0_0_1.rb
|
130
|
+
- lib/parse/algorithm/ver0_1_0.rb
|
128
131
|
- lib/parse/version.rb
|
129
132
|
- parse.gemspec
|
130
133
|
- spec/parse_spec.rb
|
131
134
|
- spec/parse_ver0_0_1_spec.rb
|
135
|
+
- spec/parse_ver0_1_0_spec.rb
|
132
136
|
- spec/spec_helper.rb
|
133
137
|
homepage: https://github.com/seamusabshere/parse
|
134
138
|
licenses:
|
@@ -150,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
150
154
|
version: '0'
|
151
155
|
requirements: []
|
152
156
|
rubyforge_project:
|
153
|
-
rubygems_version: 2.2.
|
157
|
+
rubygems_version: 2.2.2
|
154
158
|
signing_key:
|
155
159
|
specification_version: 4
|
156
160
|
summary: Detect and convert short strings into integers, floats, dates, times, booleans,
|
@@ -158,4 +162,5 @@ summary: Detect and convert short strings into integers, floats, dates, times, b
|
|
158
162
|
test_files:
|
159
163
|
- spec/parse_spec.rb
|
160
164
|
- spec/parse_ver0_0_1_spec.rb
|
165
|
+
- spec/parse_ver0_1_0_spec.rb
|
161
166
|
- spec/spec_helper.rb
|