parse 0.0.1.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d04d59e0d0c1eadc1d912f109697f28b18eb6bc
4
- data.tar.gz: c0207dac3109de5a81e8265e0e339138392f76a9
3
+ metadata.gz: 321f7a0116ae940b829a45faedb2fe731b3ada97
4
+ data.tar.gz: 6599e616962df9eac3ff27f31f4d9f11418270ab
5
5
  SHA512:
6
- metadata.gz: 2047276d42adf38a1b31af30d8610ed1daae0254765c4051ed381a15da44504b345f90cf83f57fc83bdfab84dd89700dc21793f8eecd06c5f31148453ae85d25
7
- data.tar.gz: 392dc0723a262777f89086320a34c9ae50eec4197fce5f174ac087cd91b29538c660bb96a4d7fe0b39f108dc853ce32956aa59eae7a44aed91bf47f39a913365
6
+ metadata.gz: a1d0fc63cabe88dcea9ae7f1049c99288782ffe4ebcaa575a2ae30b22f5f84aa4466223158b152d07d24118af84792eac77e40425c62690f25fe36692f9bafb9
7
+ data.tar.gz: 8faa4572ebe3ae57960e983596c98324c25188e576d1cd5d1a15d41561ae0818cbda6d564d3c603f6ef043081ca8cd550034bbcda8d7e072180a524dac4e1794
data/CHANGELOG CHANGED
@@ -1,3 +1,15 @@
1
+ 0.1.0
2
+
3
+ * Breaking changes
4
+
5
+ * Not parsed as numbers anymore: "10_14" or "10_400" or "-1_2e3_5" or "-$123_456.7" -- very unlikely to see in the wild
6
+ * Not parsed as numbers anymore: '1,2' or '-1,2' or '1,20' or '1,22.0' -- too hard to distinguish from CSV
7
+
8
+ * Enhancements
9
+
10
+ * Using proper versioning scheme - every version of algorithm involves a minor bump!
11
+ * Recover from YAML parsing errors in 1.9
12
+
1
13
  0.0.1.1 / 2014-02-06
2
14
 
3
15
  * Bug fixes
data/README.md CHANGED
@@ -4,10 +4,12 @@ Detect and convert short strings into integers, floats, dates, times, booleans,
4
4
 
5
5
  ## Note on versions
6
6
 
7
- You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.ver0_0_1`).
7
+ You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.ver0_1_0`).
8
8
 
9
9
  If the algorithm changes and you need the old version, you can reference it by its version number. For example, `Parse.ver0_0_1`.
10
10
 
11
+ Since almost any change to the algorithm is a breaking change, there are going to be lots of minor version bumps (as opposed to patches).
12
+
11
13
  ## Usage
12
14
 
13
15
  You get the idea:
@@ -39,26 +41,24 @@ More esoteric stuff:
39
41
  Parse.parse("-") #=> nil
40
42
  Parse.parse("?") #=> nil
41
43
  Parse.parse("-8e-05") #=> -8.0e-05
42
- Parse.parse("-1_2.5e-1_3") #=> -1.25e-12
44
+ Parse.parse("-12.5e-13") #=> -1.25e-12
43
45
  Parse.parse("05753") #=> 5753
44
- Parse.parse("15_000") #=> 15000
45
- Parse.parse("15_00_0") #=> 15000
46
+ Parse.parse("15000") #=> 15000
46
47
  Parse.parse("15.0") #=> 15.0
47
48
  Parse.parse("15,000.0") #=> 15000.0
48
- Parse.parse("15_000.0") #=> 15000.0
49
- Parse.parse("15_00_0.0") #=> 15000.0
49
+ Parse.parse("15000.0") #=> 15000.0
50
50
  Parse.parse("0015") #=> 15
51
51
  Parse.parse("0015.0") #=> 15.0
52
- Parse.parse("0_015.0") #=> 15.0
52
+ Parse.parse("0015.0") #=> 15.0
53
53
  Parse.parse("0x15") #=> 21
54
54
  Parse.parse("0o15") #=> 13
55
55
  Parse.parse("8e-05") #=> 8.0e-05
56
- Parse.parse("1_2.5e-1_3") #=> 1.25e-12
56
+ Parse.parse("12.5e-13") #=> 1.25e-12
57
57
  Parse.parse("0$123.4") #=> 123.4
58
58
  Parse.parse("$15,000") #=> 15000
59
59
  Parse.parse("0$15,000") #=> 15000
60
- Parse.parse("$123_456") #=> 123456
61
- Parse.parse("$123_456.7") #=> 123456.7
60
+ Parse.parse("$123456") #=> 123456
61
+ Parse.parse("$123456.7") #=> 123456.7
62
62
  Parse.parse("10,000,000") #=> 10000000
63
63
  Parse.parse("10,000,000.00") #=> 10000000.0
64
64
  Parse.parse("$10,000,000.00") #=> 10000000.0
@@ -66,15 +66,13 @@ More esoteric stuff:
66
66
  Parse.parse("$010,000,000.00") #=> 10000000.0
67
67
  Parse.parse("-15") #=> -15
68
68
  Parse.parse("-15,000") #=> -15000
69
- Parse.parse("-15_000") #=> -15000
70
- Parse.parse("-15_00_0") #=> -15000
69
+ Parse.parse("-15000") #=> -15000
71
70
  Parse.parse("-15.0") #=> -15.0
72
71
  Parse.parse("-15,000.0") #=> -15000.0
73
- Parse.parse("-15_000.0") #=> -15000.0
74
- Parse.parse("-15_00_0.0") #=> -15000.0
72
+ Parse.parse("-15000.0") #=> -15000.0
73
+ Parse.parse("-15000.0") #=> -15000.0
75
74
  Parse.parse("00-15") #=> -15
76
75
  Parse.parse("00-15.0") #=> -15.0
77
- Parse.parse("0_0-15.0") #=> "0_0-15.0"
78
76
  Parse.parse("-0x15") #=> -21
79
77
  Parse.parse("-0o15") #=> -13
80
78
  Parse.parse("-$123.4") #=> -123.4
@@ -82,10 +80,10 @@ More esoteric stuff:
82
80
  Parse.parse("0($123.4)") #=> -123.4
83
81
  Parse.parse("-$15,000") #=> -15000
84
82
  Parse.parse("($15,000)") #=> -15000
85
- Parse.parse("-$123_456") #=> -123456
86
- Parse.parse("($123_456)") #=> -123456
87
- Parse.parse("-$123_456.7") #=> -123456.7
88
- Parse.parse("($123_456.7)") #=> -123456.7
83
+ Parse.parse("-$123,456") #=> -123456
84
+ Parse.parse("($123,456)") #=> -123456
85
+ Parse.parse("-$123,456.7") #=> -123456.7
86
+ Parse.parse("($123,456.7)") #=> -123456.7
89
87
  Parse.parse("-10,000,000") #=> -10000000
90
88
  Parse.parse("(10,000,000)") #=> -10000000
91
89
  Parse.parse("-10,000,000.00") #=> -10000000.0
@@ -1,102 +1,24 @@
1
1
  require "parse/version"
2
+ require 'parse/algorithm'
3
+ require 'parse/algorithm/ver0_0_1'
4
+ require 'parse/algorithm/ver0_1_0'
2
5
 
3
6
  require 'date'
4
7
  require 'yaml'
5
8
  require 'safe_yaml/load'
9
+ require 'active_support/core_ext'
6
10
 
7
11
  module Parse
8
- # only need to deal with stuff not caught by YAML or JSON
9
- NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
10
- NAN = [ 'NaN' ]
11
- INFINITY = [ '#DIV/0', 'Infinity' ]
12
- NEG_INFINITY = [ '-Infinity' ]
13
- DATE = {
14
- euro: ['%d-%m-%Y', '%d-%m-%y'],
15
- us: ['%m-%d-%Y', '%m-%d-%y'],
16
- }
17
-
18
12
  def self.parse(raw, options = nil)
19
- ver0_0_1 raw, options
13
+ ver0_1_0 raw, options
20
14
  end
21
15
 
22
- # @private
23
- # use YAML to parse stuff like '1.5'
24
- # ruby's yaml is 1.1, which means it does weird stuff with '001' (fixed in 1.2, which jruby has)
25
- def self.ver0_0_1(raw, options = nil)
26
- return raw unless raw.is_a? String
27
-
28
- memo = raw.strip
29
-
30
- return nil if NULL.include? memo
31
- return 1.0/0 if INFINITY.include? memo
32
- return -1.0/0 if NEG_INFINITY.include? memo
33
- return 0.0/0 if NAN.include? memo
34
-
35
- if options and options[:date]
36
- yyyy, yy = DATE.fetch options[:date]
37
- memo.sub!(/0+/, '')
38
- memo.gsub! '/', '-'
39
- if memo =~ /\d{4,}/ # yyyy
40
- return Date.strptime(memo, yyyy)
41
- else
42
- return Date.strptime(memo, yy)
43
- end
44
- end
45
-
46
- not_numeric = nil
47
- not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
48
- not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
49
- not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
50
- possible_numeric = !not_numeric
51
- accounting_negative = nil
52
- percentage = nil
53
-
54
- if possible_numeric
55
- accounting_negative = memo =~ /\A[0$]*\([0$]*/
56
- percentage = memo.end_with?('%')
57
- memo.sub! /%\z/, '' if percentage
58
- memo.delete!('()') if accounting_negative # accounting negative
59
- # in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
60
- memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
61
- memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
62
- if memo.include?(',')
63
- a, b = memo.split('.', 2)
64
- a.delete! ','
65
- memo = b ? [a, b].join('.') : a
66
- end
67
- end
68
-
69
- not_safe_for_yaml = nil
70
- not_safe_for_yaml ||= memo.include?('#')
71
- not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
72
- safe_for_yaml = !not_safe_for_yaml
73
-
74
- if safe_for_yaml
75
- begin
76
- memo = SafeYAML.load memo
77
- rescue
78
- $stderr.puts "#{memo.inspect} => #{$!}"
79
- end
80
- end
81
-
82
- if possible_numeric
83
- case memo
84
- when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
85
- # scientific notation
86
- memo = memo.to_f
87
- when /\A[+\-]?0o/
88
- # octal per yaml 1.2
89
- memo = memo.to_i 8
90
- end
91
- end
92
-
93
- if memo.is_a?(String)
94
- # compress whitespace
95
- memo.gsub! /\s+/, ' '
96
- end
16
+ def self.ver0_1_0(*args)
17
+ Algorithm::Ver0_1_0.new(*args).result
18
+ end
97
19
 
98
- memo = memo / 100.0 if percentage
99
- memo = -memo if accounting_negative
100
- memo
20
+ def self.ver0_0_1(*args)
21
+ Algorithm::Ver0_0_1.new(*args).result
101
22
  end
23
+
102
24
  end
@@ -0,0 +1,8 @@
1
+ module Parse
2
+ module Algorithm
3
+ # only need to deal with stuff not caught by YAML or JSON
4
+ NAN = [ 'NaN' ]
5
+ INFINITY = [ '#DIV/0', 'Infinity' ]
6
+ NEG_INFINITY = [ '-Infinity' ]
7
+ end
8
+ end
@@ -0,0 +1,99 @@
1
+ module Parse
2
+ module Algorithm
3
+ class Ver0_0_1
4
+ NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
5
+ DATE = {
6
+ euro: ['%d-%m-%Y', '%d-%m-%y'],
7
+ us: ['%m-%d-%Y', '%m-%d-%y'],
8
+ }
9
+
10
+ attr_reader :raw
11
+ attr_reader :options
12
+ def initialize(raw, options = nil)
13
+ @raw = raw
14
+ @options = options
15
+ end
16
+
17
+ # @private
18
+ # use YAML to parse stuff like '1.5'
19
+ # ruby's yaml is 1.1, which means it does weird stuff with '001' (fixed in 1.2, which jruby has)
20
+ def result
21
+ return raw unless raw.is_a? String
22
+
23
+ memo = raw.strip
24
+
25
+ return nil if NULL.include? memo
26
+ return 1.0/0 if INFINITY.include? memo
27
+ return -1.0/0 if NEG_INFINITY.include? memo
28
+ return 0.0/0 if NAN.include? memo
29
+
30
+ if options and options[:date]
31
+ yyyy, yy = DATE.fetch options[:date]
32
+ memo.sub!(/0+/, '')
33
+ memo.gsub! '/', '-'
34
+ if memo =~ /\d{4,}/ # yyyy
35
+ return Date.strptime(memo, yyyy)
36
+ else
37
+ return Date.strptime(memo, yy)
38
+ end
39
+ end
40
+
41
+ not_numeric = nil
42
+ not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
43
+ not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
44
+ not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
45
+ possible_numeric = !not_numeric
46
+ accounting_negative = nil
47
+ percentage = nil
48
+
49
+ if possible_numeric
50
+ accounting_negative = memo =~ /\A[0$]*\([0$]*/
51
+ percentage = memo.end_with?('%')
52
+ memo.sub! /%\z/, '' if percentage
53
+ memo.delete!('()') if accounting_negative # accounting negative
54
+ # in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
55
+ memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
56
+ memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
57
+ if memo.include?(',')
58
+ a, b = memo.split('.', 2)
59
+ a.delete! ','
60
+ memo = b ? [a, b].join('.') : a
61
+ end
62
+ end
63
+
64
+ not_safe_for_yaml = nil
65
+ not_safe_for_yaml ||= memo.include?('#')
66
+ not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
67
+ safe_for_yaml = !not_safe_for_yaml
68
+
69
+ if safe_for_yaml
70
+ begin
71
+ memo = SafeYAML.load memo
72
+ rescue
73
+ $stderr.puts "#{memo.inspect} => #{$!}"
74
+ end
75
+ end
76
+
77
+ if possible_numeric
78
+ case memo
79
+ when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
80
+ # scientific notation
81
+ memo = memo.to_f
82
+ when /\A[+\-]?0o/
83
+ # octal per yaml 1.2
84
+ memo = memo.to_i 8
85
+ end
86
+ end
87
+
88
+ if memo.is_a?(String)
89
+ # compress whitespace
90
+ memo.gsub! /\s+/, ' '
91
+ end
92
+
93
+ memo = memo / 100.0 if percentage
94
+ memo = -memo if accounting_negative
95
+ memo
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,149 @@
1
+ module Parse
2
+ module Algorithm
3
+ class Ver0_1_0
4
+ NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!', '00/00/00', '0000-00-00'] # from bigml's list
5
+ REGION_DATE_FORMAT = {
6
+ euro: ['%d-%m-%Y', '%d-%m-%y'],
7
+ us: ['%m-%d-%Y', '%m-%d-%y'],
8
+ iso: ['%Y-%m-%d', '%y-%m-%d'], # second one is silly
9
+ }
10
+ DATE_DETECT = {
11
+ %r{\A0*[12]\d\d\d[\-/](?:(?:0[1-9])|(?:1[0-2]))[\-/][1-9]\d\z} => :iso, # $1 will be delimiter
12
+ }
13
+ EMPTY_OPTIONS = {}
14
+
15
+ attr_reader :raw
16
+ attr_reader :options
17
+ def initialize(raw, options = nil)
18
+ @raw = raw
19
+ @options = options || EMPTY_OPTIONS
20
+ end
21
+
22
+ def result
23
+ return raw unless raw.is_a? String
24
+
25
+ memo = raw.strip
26
+
27
+ return nil if NULL.include? memo
28
+ return 1.0/0 if INFINITY.include? memo
29
+ return -1.0/0 if NEG_INFINITY.include? memo
30
+ return 0.0/0 if NAN.include? memo
31
+
32
+ date_region = if options[:date]
33
+ options[:date]
34
+ else
35
+ catch :hit do
36
+ DATE_DETECT.each do |pattern, date_region|
37
+ # binding.pry if memo.include?('2011-')
38
+ if memo =~ pattern
39
+ throw :hit, date_region
40
+ end
41
+ end
42
+ nil
43
+ end
44
+ end
45
+
46
+ if date_region.nil? and options[:type] == Date
47
+ date_region = :iso
48
+ end
49
+
50
+ if date_region
51
+ yyyy, yy = REGION_DATE_FORMAT.fetch date_region
52
+ is_yyyy = memo =~ /[1-9]\d\d\d/
53
+ memo.sub! /\A0+/, ''
54
+ memo.gsub! '/', '-'
55
+ if is_yyyy
56
+ if memo.length < 10 and date_region == :iso
57
+ return Date.parse(memo)
58
+ else
59
+ return Date.strptime(memo, yyyy)
60
+ end
61
+ else
62
+ return Date.strptime(memo, yy)
63
+ end
64
+ end
65
+
66
+ possible_numeric = nil
67
+ not_numeric = nil
68
+ certain_numeric = nil
69
+ if [Numeric, Integer, Float].include?(options[:type])
70
+ certain_numeric = true
71
+ possible_numeric = true
72
+ not_numeric = false
73
+ else
74
+ # not_numeric ||= memo =~ /[1-9][^)\d_,%.eE]/ # has a dash in the middle
75
+ not_numeric ||= memo.include?('_')
76
+ not_numeric ||= memo =~ %r{[1-9][/-]\d}
77
+ not_numeric ||= memo =~ /,\d{1,2}(?:[.\D]|\z)/
78
+ not_numeric ||= memo.scan(/[^\d_,%.eE]/).length > memo.scan(/[\d_,%.eE]/).length
79
+ not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
80
+ possible_numeric = !not_numeric
81
+ end
82
+ accounting_negative = nil
83
+ percentage = nil
84
+ if possible_numeric
85
+ accounting_negative = memo =~ /\A[0$]*\([0$]*/
86
+ percentage = memo.end_with?('%')
87
+ memo.sub! /%\z/, '' if percentage
88
+ memo.delete!('()') if accounting_negative # accounting negative
89
+ # in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
90
+ memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
91
+ memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
92
+ memo.sub!('D', 'e') if memo =~ /\A[+\-]?[\d.]+D[+\-]?[\d.]+\z/ # fortran double precision
93
+ if memo.include?(',')
94
+ a, b = memo.split('.', 2)
95
+ a.delete! ','
96
+ memo = b ? [a, b].join('.') : a
97
+ end
98
+ end
99
+
100
+ if certain_numeric
101
+ memo.gsub! /[a-z]/i, ''
102
+ end
103
+
104
+ not_safe_for_yaml = nil
105
+ not_safe_for_yaml ||= memo =~ /\A(on|off)\z/i
106
+ not_safe_for_yaml ||= memo.include?('#')
107
+ not_safe_for_yaml ||= memo =~ /\A[@&,]/
108
+ not_safe_for_yaml ||= not_numeric && memo.start_with?('0')
109
+ not_safe_for_yaml ||= not_numeric && memo =~ /\A[^{\[]*\d[,_]/ #1,2,3, maybe a csv
110
+
111
+ safe_for_yaml = !not_safe_for_yaml
112
+
113
+ if safe_for_yaml
114
+ begin
115
+ memo = SafeYAML.load memo
116
+ rescue Exception # Psych::SyntaxError will blow up plain rescue in 1.9.3
117
+ $stderr.puts "#{memo.inspect} => #{$!}"
118
+ end
119
+ end
120
+
121
+ if possible_numeric
122
+ case memo
123
+ when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
124
+ # scientific notation
125
+ memo = memo.to_f
126
+ when /\A[+\-]?0o/
127
+ # octal per yaml 1.2
128
+ memo = memo.to_i 8
129
+ end
130
+ end
131
+
132
+ if memo.is_a?(String)
133
+ # compress whitespace
134
+ memo.gsub! /\s+/, ' '
135
+ end
136
+
137
+ memo = memo / 100.0 if percentage
138
+ memo = -memo if accounting_negative
139
+ memo
140
+ rescue
141
+ if options and options[:ignore_error]
142
+ # nothing to see here
143
+ else
144
+ raise "#{memo.inspect} => #{$!}"
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -1,3 +1,3 @@
1
1
  module Parse
2
- VERSION = "0.0.1.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -18,12 +18,13 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_runtime_dependency 'safe_yaml'
21
+ spec.add_runtime_dependency 'safe_yaml', '>=1'
22
+ spec.add_runtime_dependency 'activesupport'
22
23
 
23
24
  spec.add_development_dependency "bundler", "~> 1.5"
24
25
  spec.add_development_dependency "rake"
25
26
  spec.add_development_dependency "rspec"
26
27
  spec.add_development_dependency 'multi_json'
27
- spec.add_development_dependency 'activesupport'
28
28
  spec.add_development_dependency 'pry'
29
+ # spec.add_development_dependency 'twitter_cldr'
29
30
  end
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Parse do
4
- it "should parse with version 0.0.1 of the algorithm" do
4
+ it "should parse with version 0.0.2 of the algorithm" do
5
5
  v = " 1990-04-03 "
6
- expect(Parse.parse(v)).to eq(Parse.ver0_0_1(v))
6
+ expect(Parse.parse(v)).to eq(Parse.ver0_1_0(v))
7
7
  end
8
8
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Parse do
3
+ describe Parse::Algorithm::Ver0_0_1 do
4
4
  {
5
5
  '' => nil,
6
6
  'nil' => nil,
@@ -0,0 +1,277 @@
1
+ require 'spec_helper'
2
+ require 'twitter_cldr'
3
+
4
+ describe Parse::Algorithm::Ver0_1_0 do
5
+ same = [
6
+ '1,2',
7
+ '1,20',
8
+ '1,2.0',
9
+ '-1,2',
10
+ '-1,20',
11
+ '-1,2.0',
12
+ '01,2',
13
+ '01,20',
14
+ '01,2.0',
15
+ '15_000',
16
+ '15_00_0',
17
+ '15_000.0',
18
+ '15_00_0.0',
19
+ '0_015.0', # just weird
20
+ '1_2.5e-1_3',
21
+ '-1_2.5e-1_3',
22
+ '$123_456',
23
+ '$123_456.7',
24
+ '-15_000',
25
+ '-15_00_0',
26
+ '-15_000.0',
27
+ '-15_00_0.0',
28
+ '0_0-15.0', # just weird
29
+ '-$123_456',
30
+ '($123_456)',
31
+ '-$123_456.7',
32
+ '($123_456.7)',
33
+ '10_14_A',
34
+ '10_14',
35
+ '10_140',
36
+ ]
37
+ same.each do |v|
38
+ it "parses #{v.inspect} as itself (yaml=#{SafeYAML.load(v).inspect})" do
39
+ expect(Parse.ver0_1_0(v)).to eq(v)
40
+ end
41
+ end
42
+
43
+ a = {
44
+
45
+ "@ foo" => "@ foo",
46
+ ", foo" => ", foo",
47
+ "044-1-276-000" => "044-1-276-000",
48
+
49
+ ['1 BEDROOMS', { type: Numeric } ] => 1,
50
+ '1 BEDROOMS' => '1 BEDROOMS',
51
+
52
+ [ '2.4 SQFT', { type: Numeric } ] => 2.4,
53
+ '2.4 SQFT' => '2.4 SQFT',
54
+
55
+ ['000', { date: :us, ignore_error: true}] => nil,
56
+ ['7/7/2004', {date: :us}] => Date.new(2004,7,7),
57
+ "999 HOLY CROSS ROAD, COLCHESTER, VT 05446" => "999 HOLY CROSS ROAD, COLCHESTER, VT 05446",
58
+
59
+ '00020110628' => 20110628,
60
+ '0002011-06-28' => Date.new(2011,6,28),
61
+ '0002011/06/28' => Date.new(2011,6,28),
62
+ ['00020110628', {date: :iso}] => Date.new(2011,6,28),
63
+ ['00020110628', {type: Date}] => Date.new(2011,6,28),
64
+
65
+ '00019800628' => 19800628,
66
+ '0001980-06-28' => Date.new(1980,6,28),
67
+ '0001980/06/28' => Date.new(1980,6,28),
68
+ ['00019800628', {date: :iso}] => Date.new(1980,6,28),
69
+ ['00019800628', {type: Date}] => Date.new(1980,6,28),
70
+
71
+ '00030000628' => 30000628,
72
+ '0003000-06-28' => '0003000-06-28',
73
+ '0003000/06/28' => '0003000/06/28',
74
+ ['00030000628', {date: :iso}] => Date.new(3000,6,28),
75
+ ['00030000628', {type: Date}] => Date.new(3000,6,28),
76
+
77
+ ['', {type: Numeric}] => nil,
78
+
79
+ # fortran double precision
80
+ '0.225120000000000D+06' => 0.22512e6,
81
+ '0.341913000000000D+07' => 0.341913e7,
82
+ '0.2500000E-01' => 0.25e-1,
83
+ '3.1D0' => 3.1,
84
+ '-2.D0' => -2.0,
85
+
86
+ '8e-05' => 8e-5,
87
+ '8e+4' => 8e4,
88
+ '8.0e+4' => 8.0e4,
89
+ '8e-4' => 8e-4,
90
+ '8.0e-4' => 8.0e-4,
91
+ '-8e+4' => -8e4,
92
+ '-8.0e+4' => -8.0e4,
93
+ '-8e-4' => -8e-4,
94
+ '-8.0e-4' => -8.0e-4,
95
+ '8E+4' => 8e4,
96
+ '8.0E+4' => 8.0e4,
97
+ '8E-4' => 8e-4,
98
+ '8.0E-4' => 8.0e-4,
99
+ '-8E+4' => -8e4,
100
+ '-8.0E+4' => -8.0e4,
101
+ '-8E-4' => -8e-4,
102
+ '-8.0E-4' => -8.0e-4,
103
+
104
+ # http://dojotoolkit.org/reference-guide/1.9/dojo/number.html
105
+ # '1,000,000.00' => 1_000_000.0,
106
+ # '1.000.000,00' => 1_000_000.0, # german
107
+ # '1 000 000,00' => 1_000_000.0, # french
108
+ # '10,00,000.00' => 1_000_000.0, # indian
109
+
110
+ '060-10-01' => '60-10-01',
111
+ 'OFF' => 'OFF',
112
+ 'ON' => 'ON',
113
+
114
+ '& P4' => '& P4',
115
+
116
+ # EVERYTHING BELOW IS SAME AS 0.0.1
117
+
118
+ '' => nil,
119
+ 'nil' => nil,
120
+ '15' => 15,
121
+ '15,000' => 15_000,
122
+ '15.0' => 15.0,
123
+ '15,000.0' => 15_000.0,
124
+ '0015' => 15, # not octal
125
+ '0015.0' => 15.0, # not octal
126
+ '0x15' => 0x15, # hex
127
+ '0o15' => 015, # octal
128
+ '8e-05' => 8e-05,
129
+ '12.5e-13' => 12.5e-13,
130
+ '-12.5e-13' => -12.5e-13,
131
+ '$123.4' => 123.4,
132
+ '0$123.4' => 123.4,
133
+ '$15,000' => 15_000,
134
+ '0$15,000' => 15_000,
135
+ '10,000,000' => 10_000_000,
136
+ '10,000,000.00' => 10_000_000.0,
137
+ '$10,000,000.00' => 10_000_000.0,
138
+ '0$10,000,000.00' => 10_000_000.0,
139
+ '$010,000,000.00' => 10_000_000.0,
140
+
141
+ '-15' => -15,
142
+ '-15,000' => -15_000,
143
+ '-15.0' => -15.0,
144
+ '-15,000.0' => -15_000.0,
145
+ '00-15' => -15, # not octal
146
+ '00-15.0' => -15.0, # not octal
147
+ '-0x15' => -0x15, # hex
148
+ '-0o15' => -015, # octal
149
+ '-8e-05' => -8e-05,
150
+ '-$123.4' => -123.4,
151
+ '($123.4)' => -123.4,
152
+ '0($123.4)' => -123.4,
153
+ '-$15,000' => -15_000,
154
+ '($15,000)' => -15_000,
155
+ '-$123456' => -123_456,
156
+ '($123456)' => -123_456,
157
+ '-$123456.7' => -123_456.7,
158
+ '($123456.7)' => -123_456.7,
159
+ '-$123,456' => -123_456,
160
+ '($123,456)' => -123_456,
161
+ '-$123,456.7' => -123_456.7,
162
+ '($123,456.7)' => -123_456.7,
163
+ '-10,000,000' => -10_000_000,
164
+ '(10,000,000)' => -10_000_000,
165
+ '-10,000,000.00' => -10_000_000.0,
166
+ '(10,000,000.00)' => -10_000_000.0,
167
+ '-10000000' => -10_000_000,
168
+ '(10000000)' => -10_000_000,
169
+ '-10000000.00' => -10_000_000.0,
170
+ '(10000000.00)' => -10_000_000.0,
171
+ '1,200' => 1_200,
172
+ '1,200.0' => 1_200.0,
173
+ '1.0,2' => '1.0,2',
174
+ '1.0,2.0' => '1.0,2.0',
175
+ '-1,200' => -1_200,
176
+ '-1,200.0' => -1_200.0,
177
+ '-1.0,2' => '-1.0,2',
178
+ '-1.0,2.0' => '-1.0,2.0',
179
+ '01,200' => 1_200,
180
+ '01,200.0' => 1_200.0,
181
+ '01.0,2' => '01.0,2',
182
+ '01.0,2.0' => '01.0,2.0',
183
+
184
+ '05753' => 5753,
185
+ 'true' => true,
186
+ 'yes' => true,
187
+ 'false' => false,
188
+ 'no' => false,
189
+ '#DIV/0' => (1.0/0),
190
+ '#NAME?' => nil,
191
+ 'Inf' => 'Inf',
192
+ 'Infinity' => (1.0/0),
193
+ '-Infinity' => -(1.0/0),
194
+ 'NaN' => 0.0/0, # need the dot
195
+ '.NaN' => 0.0/0, # NaN
196
+ '-.inf' => -(1.0/0), # -Infinity
197
+ '-' => nil, # per bigml
198
+ '?' => nil,
199
+ '1982-01-01' => Date.new(1982,1,1),
200
+ '2010-05-05 13:42:16 Z' => Time.parse('2010-05-05 13:42:16 Z'),
201
+ '2010-05-05 13:42:16 -02:00' => Time.parse('2010-05-05 13:42:16 -02:00'),
202
+ ":not_a_symbol" => ':not_a_symbol',
203
+ '#hello' => '#hello',
204
+ "\n#hello\n#world" => '#hello #world',
205
+ "hello\nworld" => 'hello world', # whitespace compression
206
+
207
+ '0%' => 0.0,
208
+ '100%' => 1.0,
209
+ '50%' => 0.5,
210
+ '5%' => 0.05,
211
+ '00000%' => 0.0,
212
+ '0000100%' => 1.0,
213
+ '000050%' => 0.5,
214
+ '00005%' => 0.05,
215
+
216
+ ['12/25/82', {date: :us}] => Date.new(1982,12,25),
217
+ ['12/25/1982', {date: :us}] => Date.new(1982,12,25),
218
+ ['25/12/82', {date: :euro}] => Date.new(1982,12,25),
219
+ ['25/12/1982', {date: :euro}] => Date.new(1982,12,25),
220
+ ['12-25-82', {date: :us}] => Date.new(1982,12,25),
221
+ ['12-25-1982', {date: :us}] => Date.new(1982,12,25),
222
+ ['25-12-82', {date: :euro}] => Date.new(1982,12,25),
223
+ ['25-12-1982', {date: :euro}] => Date.new(1982,12,25),
224
+
225
+ '12/25/82' => '12/25/82',
226
+
227
+ ',1' => ',1', # not a csv parser
228
+ ',1,' => ',1,', # not a csv parser
229
+ '1,2,3' => '1,2,3', # not a csv parser
230
+ '[1,2,3]' => [1,2,3],
231
+ YAML.dump('a' => 1) => { 'a' => 1 },
232
+ YAML.dump(a: 1) => { ':a' => 1 }, # doesn't parse symbols
233
+ YAML.dump('a' => 1, 5 => "c\n3") => { 'a' => 1, 5 => "c\n3" },
234
+ MultiJson.dump(a: 1) => { 'a' => 1 }, # json always loses symbols
235
+ MultiJson.dump(a: 1, 5 => "c\n3") => { 'a' => 1, '5' => "c\n3" },
236
+ }
237
+
238
+ # TwitterCldr.supported_locales.each do |locale|
239
+ # 1.upto(9).map do |power|
240
+ # num = (rand * (10 ** power)).round(4)
241
+ # # a[[num.localize(locale).to_s, {locale: locale}]] = num
242
+ # a[[num.localize(locale).to_s, { locale: locale }]] = num
243
+ # # a[num.localize(locale).to_currency.to_s] = num
244
+ # end
245
+ # end
246
+
247
+ # and next dates!
248
+ # Time.now.localize(:es).to_full_s
249
+
250
+ a.each do |input, expected|
251
+ input = Array.wrap input
252
+ locale = if input[1].is_a?(Hash)
253
+ input[1][:locale]
254
+ end
255
+ it "#{locale ? "(#{locale}) " : nil}parses #{input[0].inspect} as #{expected.inspect}" do
256
+ got = Parse.ver0_1_0(*input)
257
+ # $lines << [ "Parse.parse(#{input.inspect})".ljust(45), "#=> #{got.inspect}" ].join
258
+ if expected.is_a?(Float) and expected.nan?
259
+ expect(got.nan?).to eq(true)
260
+ elsif expected.is_a?(Float) and got.is_a?(Float)
261
+ expect(got.round(8)).to eq(expected.round(8))
262
+ else
263
+ expect(got).to eq(expected)
264
+ end
265
+
266
+ input_with_spaces = [ "\t" + input[0] + "\t", input[1] ]
267
+ got_with_spaces = Parse.ver0_1_0(*input_with_spaces)
268
+ if expected.is_a?(Float) and expected.nan?
269
+ expect(got.nan?).to eq(true)
270
+ elsif expected.is_a?(Float) and got.is_a?(Float)
271
+ expect(got.round(8)).to eq(expected.round(8))
272
+ else
273
+ expect(got_with_spaces).to eq(expected)
274
+ end
275
+ end
276
+ end
277
+ end
metadata CHANGED
@@ -1,17 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: safe_yaml
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
31
  - - ">="
@@ -80,20 +94,6 @@ dependencies:
80
94
  - - ">="
81
95
  - !ruby/object:Gem::Version
82
96
  version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: activesupport
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: pry
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -125,10 +125,14 @@ files:
125
125
  - README.md
126
126
  - Rakefile
127
127
  - lib/parse.rb
128
+ - lib/parse/algorithm.rb
129
+ - lib/parse/algorithm/ver0_0_1.rb
130
+ - lib/parse/algorithm/ver0_1_0.rb
128
131
  - lib/parse/version.rb
129
132
  - parse.gemspec
130
133
  - spec/parse_spec.rb
131
134
  - spec/parse_ver0_0_1_spec.rb
135
+ - spec/parse_ver0_1_0_spec.rb
132
136
  - spec/spec_helper.rb
133
137
  homepage: https://github.com/seamusabshere/parse
134
138
  licenses:
@@ -150,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
150
154
  version: '0'
151
155
  requirements: []
152
156
  rubyforge_project:
153
- rubygems_version: 2.2.1
157
+ rubygems_version: 2.2.2
154
158
  signing_key:
155
159
  specification_version: 4
156
160
  summary: Detect and convert short strings into integers, floats, dates, times, booleans,
@@ -158,4 +162,5 @@ summary: Detect and convert short strings into integers, floats, dates, times, b
158
162
  test_files:
159
163
  - spec/parse_spec.rb
160
164
  - spec/parse_ver0_0_1_spec.rb
165
+ - spec/parse_ver0_1_0_spec.rb
161
166
  - spec/spec_helper.rb