parse 0.0.1.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d04d59e0d0c1eadc1d912f109697f28b18eb6bc
4
- data.tar.gz: c0207dac3109de5a81e8265e0e339138392f76a9
3
+ metadata.gz: 321f7a0116ae940b829a45faedb2fe731b3ada97
4
+ data.tar.gz: 6599e616962df9eac3ff27f31f4d9f11418270ab
5
5
  SHA512:
6
- metadata.gz: 2047276d42adf38a1b31af30d8610ed1daae0254765c4051ed381a15da44504b345f90cf83f57fc83bdfab84dd89700dc21793f8eecd06c5f31148453ae85d25
7
- data.tar.gz: 392dc0723a262777f89086320a34c9ae50eec4197fce5f174ac087cd91b29538c660bb96a4d7fe0b39f108dc853ce32956aa59eae7a44aed91bf47f39a913365
6
+ metadata.gz: a1d0fc63cabe88dcea9ae7f1049c99288782ffe4ebcaa575a2ae30b22f5f84aa4466223158b152d07d24118af84792eac77e40425c62690f25fe36692f9bafb9
7
+ data.tar.gz: 8faa4572ebe3ae57960e983596c98324c25188e576d1cd5d1a15d41561ae0818cbda6d564d3c603f6ef043081ca8cd550034bbcda8d7e072180a524dac4e1794
data/CHANGELOG CHANGED
@@ -1,3 +1,15 @@
1
+ 0.1.0
2
+
3
+ * Breaking changes
4
+
5
+ * Not parsed as numbers anymore: "10_14" or "10_400" or "-1_2e3_5" or "-$123_456.7" -- very unlikely to see in the wild
6
+ * Not parsed as numbers anymore: '1,2' or '-1,2' or '1,20' or '1,22.0' -- too hard to distinguish from CSV
7
+
8
+ * Enhancements
9
+
10
+ * Using proper versioning scheme - every version of algorithm involves a minor bump!
11
+ * Recover from YAML parsing errors in 1.9
12
+
1
13
  0.0.1.1 / 2014-02-06
2
14
 
3
15
  * Bug fixes
data/README.md CHANGED
@@ -4,10 +4,12 @@ Detect and convert short strings into integers, floats, dates, times, booleans,
4
4
 
5
5
  ## Note on versions
6
6
 
7
- You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.ver0_0_1`).
7
+ You can always use `Parse.parse`. It will always point to the most recent version of the algorithm (currently `Parse.ver0_1_0`).
8
8
 
9
9
  If the algorithm changes and you need the old version, you can reference it by its version number. For example, `Parse.ver0_0_1`.
10
10
 
11
+ Since almost any change to the algorithm is a breaking change, there are going to be lots of minor version bumps (as opposed to patches).
12
+
11
13
  ## Usage
12
14
 
13
15
  You get the idea:
@@ -39,26 +41,24 @@ More esoteric stuff:
39
41
  Parse.parse("-") #=> nil
40
42
  Parse.parse("?") #=> nil
41
43
  Parse.parse("-8e-05") #=> -8.0e-05
42
- Parse.parse("-1_2.5e-1_3") #=> -1.25e-12
44
+ Parse.parse("-12.5e-13") #=> -1.25e-12
43
45
  Parse.parse("05753") #=> 5753
44
- Parse.parse("15_000") #=> 15000
45
- Parse.parse("15_00_0") #=> 15000
46
+ Parse.parse("15000") #=> 15000
46
47
  Parse.parse("15.0") #=> 15.0
47
48
  Parse.parse("15,000.0") #=> 15000.0
48
- Parse.parse("15_000.0") #=> 15000.0
49
- Parse.parse("15_00_0.0") #=> 15000.0
49
+ Parse.parse("15000.0") #=> 15000.0
50
50
  Parse.parse("0015") #=> 15
51
51
  Parse.parse("0015.0") #=> 15.0
52
- Parse.parse("0_015.0") #=> 15.0
52
+ Parse.parse("0015.0") #=> 15.0
53
53
  Parse.parse("0x15") #=> 21
54
54
  Parse.parse("0o15") #=> 13
55
55
  Parse.parse("8e-05") #=> 8.0e-05
56
- Parse.parse("1_2.5e-1_3") #=> 1.25e-12
56
+ Parse.parse("12.5e-13") #=> 1.25e-12
57
57
  Parse.parse("0$123.4") #=> 123.4
58
58
  Parse.parse("$15,000") #=> 15000
59
59
  Parse.parse("0$15,000") #=> 15000
60
- Parse.parse("$123_456") #=> 123456
61
- Parse.parse("$123_456.7") #=> 123456.7
60
+ Parse.parse("$123456") #=> 123456
61
+ Parse.parse("$123456.7") #=> 123456.7
62
62
  Parse.parse("10,000,000") #=> 10000000
63
63
  Parse.parse("10,000,000.00") #=> 10000000.0
64
64
  Parse.parse("$10,000,000.00") #=> 10000000.0
@@ -66,15 +66,13 @@ More esoteric stuff:
66
66
  Parse.parse("$010,000,000.00") #=> 10000000.0
67
67
  Parse.parse("-15") #=> -15
68
68
  Parse.parse("-15,000") #=> -15000
69
- Parse.parse("-15_000") #=> -15000
70
- Parse.parse("-15_00_0") #=> -15000
69
+ Parse.parse("-15000") #=> -15000
71
70
  Parse.parse("-15.0") #=> -15.0
72
71
  Parse.parse("-15,000.0") #=> -15000.0
73
- Parse.parse("-15_000.0") #=> -15000.0
74
- Parse.parse("-15_00_0.0") #=> -15000.0
72
+ Parse.parse("-15000.0") #=> -15000.0
73
+ Parse.parse("-15000.0") #=> -15000.0
75
74
  Parse.parse("00-15") #=> -15
76
75
  Parse.parse("00-15.0") #=> -15.0
77
- Parse.parse("0_0-15.0") #=> "0_0-15.0"
78
76
  Parse.parse("-0x15") #=> -21
79
77
  Parse.parse("-0o15") #=> -13
80
78
  Parse.parse("-$123.4") #=> -123.4
@@ -82,10 +80,10 @@ More esoteric stuff:
82
80
  Parse.parse("0($123.4)") #=> -123.4
83
81
  Parse.parse("-$15,000") #=> -15000
84
82
  Parse.parse("($15,000)") #=> -15000
85
- Parse.parse("-$123_456") #=> -123456
86
- Parse.parse("($123_456)") #=> -123456
87
- Parse.parse("-$123_456.7") #=> -123456.7
88
- Parse.parse("($123_456.7)") #=> -123456.7
83
+ Parse.parse("-$123,456") #=> -123456
84
+ Parse.parse("($123,456)") #=> -123456
85
+ Parse.parse("-$123,456.7") #=> -123456.7
86
+ Parse.parse("($123,456.7)") #=> -123456.7
89
87
  Parse.parse("-10,000,000") #=> -10000000
90
88
  Parse.parse("(10,000,000)") #=> -10000000
91
89
  Parse.parse("-10,000,000.00") #=> -10000000.0
@@ -1,102 +1,24 @@
1
1
  require "parse/version"
2
+ require 'parse/algorithm'
3
+ require 'parse/algorithm/ver0_0_1'
4
+ require 'parse/algorithm/ver0_1_0'
2
5
 
3
6
  require 'date'
4
7
  require 'yaml'
5
8
  require 'safe_yaml/load'
9
+ require 'active_support/core_ext'
6
10
 
7
11
  module Parse
8
- # only need to deal with stuff not caught by YAML or JSON
9
- NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
10
- NAN = [ 'NaN' ]
11
- INFINITY = [ '#DIV/0', 'Infinity' ]
12
- NEG_INFINITY = [ '-Infinity' ]
13
- DATE = {
14
- euro: ['%d-%m-%Y', '%d-%m-%y'],
15
- us: ['%m-%d-%Y', '%m-%d-%y'],
16
- }
17
-
18
12
  def self.parse(raw, options = nil)
19
- ver0_0_1 raw, options
13
+ ver0_1_0 raw, options
20
14
  end
21
15
 
22
- # @private
23
- # use YAML to parse stuff like '1.5'
24
- # ruby's yaml is 1.1, which means it does weird stuff with '001' (fixed in 1.2, which jruby has)
25
- def self.ver0_0_1(raw, options = nil)
26
- return raw unless raw.is_a? String
27
-
28
- memo = raw.strip
29
-
30
- return nil if NULL.include? memo
31
- return 1.0/0 if INFINITY.include? memo
32
- return -1.0/0 if NEG_INFINITY.include? memo
33
- return 0.0/0 if NAN.include? memo
34
-
35
- if options and options[:date]
36
- yyyy, yy = DATE.fetch options[:date]
37
- memo.sub!(/0+/, '')
38
- memo.gsub! '/', '-'
39
- if memo =~ /\d{4,}/ # yyyy
40
- return Date.strptime(memo, yyyy)
41
- else
42
- return Date.strptime(memo, yy)
43
- end
44
- end
45
-
46
- not_numeric = nil
47
- not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
48
- not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
49
- not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
50
- possible_numeric = !not_numeric
51
- accounting_negative = nil
52
- percentage = nil
53
-
54
- if possible_numeric
55
- accounting_negative = memo =~ /\A[0$]*\([0$]*/
56
- percentage = memo.end_with?('%')
57
- memo.sub! /%\z/, '' if percentage
58
- memo.delete!('()') if accounting_negative # accounting negative
59
- # in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
60
- memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
61
- memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
62
- if memo.include?(',')
63
- a, b = memo.split('.', 2)
64
- a.delete! ','
65
- memo = b ? [a, b].join('.') : a
66
- end
67
- end
68
-
69
- not_safe_for_yaml = nil
70
- not_safe_for_yaml ||= memo.include?('#')
71
- not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
72
- safe_for_yaml = !not_safe_for_yaml
73
-
74
- if safe_for_yaml
75
- begin
76
- memo = SafeYAML.load memo
77
- rescue
78
- $stderr.puts "#{memo.inspect} => #{$!}"
79
- end
80
- end
81
-
82
- if possible_numeric
83
- case memo
84
- when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
85
- # scientific notation
86
- memo = memo.to_f
87
- when /\A[+\-]?0o/
88
- # octal per yaml 1.2
89
- memo = memo.to_i 8
90
- end
91
- end
92
-
93
- if memo.is_a?(String)
94
- # compress whitespace
95
- memo.gsub! /\s+/, ' '
96
- end
16
+ def self.ver0_1_0(*args)
17
+ Algorithm::Ver0_1_0.new(*args).result
18
+ end
97
19
 
98
- memo = memo / 100.0 if percentage
99
- memo = -memo if accounting_negative
100
- memo
20
+ def self.ver0_0_1(*args)
21
+ Algorithm::Ver0_0_1.new(*args).result
101
22
  end
23
+
102
24
  end
@@ -0,0 +1,8 @@
1
+ module Parse
2
+ module Algorithm
3
+ # only need to deal with stuff not caught by YAML or JSON
4
+ NAN = [ 'NaN' ]
5
+ INFINITY = [ '#DIV/0', 'Infinity' ]
6
+ NEG_INFINITY = [ '-Infinity' ]
7
+ end
8
+ end
@@ -0,0 +1,99 @@
1
+ module Parse
2
+ module Algorithm
3
+ class Ver0_0_1
4
+ NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!'] # from bigml's list
5
+ DATE = {
6
+ euro: ['%d-%m-%Y', '%d-%m-%y'],
7
+ us: ['%m-%d-%Y', '%m-%d-%y'],
8
+ }
9
+
10
+ attr_reader :raw
11
+ attr_reader :options
12
+ def initialize(raw, options = nil)
13
+ @raw = raw
14
+ @options = options
15
+ end
16
+
17
+ # @private
18
+ # use YAML to parse stuff like '1.5'
19
+ # ruby's yaml is 1.1, which means it does weird stuff with '001' (fixed in 1.2, which jruby has)
20
+ def result
21
+ return raw unless raw.is_a? String
22
+
23
+ memo = raw.strip
24
+
25
+ return nil if NULL.include? memo
26
+ return 1.0/0 if INFINITY.include? memo
27
+ return -1.0/0 if NEG_INFINITY.include? memo
28
+ return 0.0/0 if NAN.include? memo
29
+
30
+ if options and options[:date]
31
+ yyyy, yy = DATE.fetch options[:date]
32
+ memo.sub!(/0+/, '')
33
+ memo.gsub! '/', '-'
34
+ if memo =~ /\d{4,}/ # yyyy
35
+ return Date.strptime(memo, yyyy)
36
+ else
37
+ return Date.strptime(memo, yy)
38
+ end
39
+ end
40
+
41
+ not_numeric = nil
42
+ not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
43
+ not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
44
+ not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
45
+ possible_numeric = !not_numeric
46
+ accounting_negative = nil
47
+ percentage = nil
48
+
49
+ if possible_numeric
50
+ accounting_negative = memo =~ /\A[0$]*\([0$]*/
51
+ percentage = memo.end_with?('%')
52
+ memo.sub! /%\z/, '' if percentage
53
+ memo.delete!('()') if accounting_negative # accounting negative
54
+ # in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
55
+ memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
56
+ memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
57
+ if memo.include?(',')
58
+ a, b = memo.split('.', 2)
59
+ a.delete! ','
60
+ memo = b ? [a, b].join('.') : a
61
+ end
62
+ end
63
+
64
+ not_safe_for_yaml = nil
65
+ not_safe_for_yaml ||= memo.include?('#')
66
+ not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
67
+ safe_for_yaml = !not_safe_for_yaml
68
+
69
+ if safe_for_yaml
70
+ begin
71
+ memo = SafeYAML.load memo
72
+ rescue
73
+ $stderr.puts "#{memo.inspect} => #{$!}"
74
+ end
75
+ end
76
+
77
+ if possible_numeric
78
+ case memo
79
+ when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
80
+ # scientific notation
81
+ memo = memo.to_f
82
+ when /\A[+\-]?0o/
83
+ # octal per yaml 1.2
84
+ memo = memo.to_i 8
85
+ end
86
+ end
87
+
88
+ if memo.is_a?(String)
89
+ # compress whitespace
90
+ memo.gsub! /\s+/, ' '
91
+ end
92
+
93
+ memo = memo / 100.0 if percentage
94
+ memo = -memo if accounting_negative
95
+ memo
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,149 @@
1
+ module Parse
2
+ module Algorithm
3
+ class Ver0_1_0
4
+ NULL = [ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!', '00/00/00', '0000-00-00'] # from bigml's list
5
+ REGION_DATE_FORMAT = {
6
+ euro: ['%d-%m-%Y', '%d-%m-%y'],
7
+ us: ['%m-%d-%Y', '%m-%d-%y'],
8
+ iso: ['%Y-%m-%d', '%y-%m-%d'], # second one is silly
9
+ }
10
+ DATE_DETECT = {
11
+ %r{\A0*[12]\d\d\d[\-/](?:(?:0[1-9])|(?:1[0-2]))[\-/][1-9]\d\z} => :iso, # $1 will be delimiter
12
+ }
13
+ EMPTY_OPTIONS = {}
14
+
15
+ attr_reader :raw
16
+ attr_reader :options
17
+ def initialize(raw, options = nil)
18
+ @raw = raw
19
+ @options = options || EMPTY_OPTIONS
20
+ end
21
+
22
+ def result
23
+ return raw unless raw.is_a? String
24
+
25
+ memo = raw.strip
26
+
27
+ return nil if NULL.include? memo
28
+ return 1.0/0 if INFINITY.include? memo
29
+ return -1.0/0 if NEG_INFINITY.include? memo
30
+ return 0.0/0 if NAN.include? memo
31
+
32
+ date_region = if options[:date]
33
+ options[:date]
34
+ else
35
+ catch :hit do
36
+ DATE_DETECT.each do |pattern, date_region|
37
+ # binding.pry if memo.include?('2011-')
38
+ if memo =~ pattern
39
+ throw :hit, date_region
40
+ end
41
+ end
42
+ nil
43
+ end
44
+ end
45
+
46
+ if date_region.nil? and options[:type] == Date
47
+ date_region = :iso
48
+ end
49
+
50
+ if date_region
51
+ yyyy, yy = REGION_DATE_FORMAT.fetch date_region
52
+ is_yyyy = memo =~ /[1-9]\d\d\d/
53
+ memo.sub! /\A0+/, ''
54
+ memo.gsub! '/', '-'
55
+ if is_yyyy
56
+ if memo.length < 10 and date_region == :iso
57
+ return Date.parse(memo)
58
+ else
59
+ return Date.strptime(memo, yyyy)
60
+ end
61
+ else
62
+ return Date.strptime(memo, yy)
63
+ end
64
+ end
65
+
66
+ possible_numeric = nil
67
+ not_numeric = nil
68
+ certain_numeric = nil
69
+ if [Numeric, Integer, Float].include?(options[:type])
70
+ certain_numeric = true
71
+ possible_numeric = true
72
+ not_numeric = false
73
+ else
74
+ # not_numeric ||= memo =~ /[1-9][^)\d_,%.eE]/ # has a dash in the middle
75
+ not_numeric ||= memo.include?('_')
76
+ not_numeric ||= memo =~ %r{[1-9][/-]\d}
77
+ not_numeric ||= memo =~ /,\d{1,2}(?:[.\D]|\z)/
78
+ not_numeric ||= memo.scan(/[^\d_,%.eE]/).length > memo.scan(/[\d_,%.eE]/).length
79
+ not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
80
+ possible_numeric = !not_numeric
81
+ end
82
+ accounting_negative = nil
83
+ percentage = nil
84
+ if possible_numeric
85
+ accounting_negative = memo =~ /\A[0$]*\([0$]*/
86
+ percentage = memo.end_with?('%')
87
+ memo.sub! /%\z/, '' if percentage
88
+ memo.delete!('()') if accounting_negative # accounting negative
89
+ # in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
90
+ memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
91
+ memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
92
+ memo.sub!('D', 'e') if memo =~ /\A[+\-]?[\d.]+D[+\-]?[\d.]+\z/ # fortran double precision
93
+ if memo.include?(',')
94
+ a, b = memo.split('.', 2)
95
+ a.delete! ','
96
+ memo = b ? [a, b].join('.') : a
97
+ end
98
+ end
99
+
100
+ if certain_numeric
101
+ memo.gsub! /[a-z]/i, ''
102
+ end
103
+
104
+ not_safe_for_yaml = nil
105
+ not_safe_for_yaml ||= memo =~ /\A(on|off)\z/i
106
+ not_safe_for_yaml ||= memo.include?('#')
107
+ not_safe_for_yaml ||= memo =~ /\A[@&,]/
108
+ not_safe_for_yaml ||= not_numeric && memo.start_with?('0')
109
+ not_safe_for_yaml ||= not_numeric && memo =~ /\A[^{\[]*\d[,_]/ #1,2,3, maybe a csv
110
+
111
+ safe_for_yaml = !not_safe_for_yaml
112
+
113
+ if safe_for_yaml
114
+ begin
115
+ memo = SafeYAML.load memo
116
+ rescue Exception # Psych::SyntaxError will blow up plain rescue in 1.9.3
117
+ $stderr.puts "#{memo.inspect} => #{$!}"
118
+ end
119
+ end
120
+
121
+ if possible_numeric
122
+ case memo
123
+ when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
124
+ # scientific notation
125
+ memo = memo.to_f
126
+ when /\A[+\-]?0o/
127
+ # octal per yaml 1.2
128
+ memo = memo.to_i 8
129
+ end
130
+ end
131
+
132
+ if memo.is_a?(String)
133
+ # compress whitespace
134
+ memo.gsub! /\s+/, ' '
135
+ end
136
+
137
+ memo = memo / 100.0 if percentage
138
+ memo = -memo if accounting_negative
139
+ memo
140
+ rescue
141
+ if options and options[:ignore_error]
142
+ # nothing to see here
143
+ else
144
+ raise "#{memo.inspect} => #{$!}"
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -1,3 +1,3 @@
1
1
  module Parse
2
- VERSION = "0.0.1.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -18,12 +18,13 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_runtime_dependency 'safe_yaml'
21
+ spec.add_runtime_dependency 'safe_yaml', '>=1'
22
+ spec.add_runtime_dependency 'activesupport'
22
23
 
23
24
  spec.add_development_dependency "bundler", "~> 1.5"
24
25
  spec.add_development_dependency "rake"
25
26
  spec.add_development_dependency "rspec"
26
27
  spec.add_development_dependency 'multi_json'
27
- spec.add_development_dependency 'activesupport'
28
28
  spec.add_development_dependency 'pry'
29
+ # spec.add_development_dependency 'twitter_cldr'
29
30
  end
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Parse do
4
- it "should parse with version 0.0.1 of the algorithm" do
4
+ it "should parse with version 0.0.2 of the algorithm" do
5
5
  v = " 1990-04-03 "
6
- expect(Parse.parse(v)).to eq(Parse.ver0_0_1(v))
6
+ expect(Parse.parse(v)).to eq(Parse.ver0_1_0(v))
7
7
  end
8
8
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Parse do
3
+ describe Parse::Algorithm::Ver0_0_1 do
4
4
  {
5
5
  '' => nil,
6
6
  'nil' => nil,
@@ -0,0 +1,277 @@
1
+ require 'spec_helper'
2
+ require 'twitter_cldr'
3
+
4
+ describe Parse::Algorithm::Ver0_1_0 do
5
+ same = [
6
+ '1,2',
7
+ '1,20',
8
+ '1,2.0',
9
+ '-1,2',
10
+ '-1,20',
11
+ '-1,2.0',
12
+ '01,2',
13
+ '01,20',
14
+ '01,2.0',
15
+ '15_000',
16
+ '15_00_0',
17
+ '15_000.0',
18
+ '15_00_0.0',
19
+ '0_015.0', # just weird
20
+ '1_2.5e-1_3',
21
+ '-1_2.5e-1_3',
22
+ '$123_456',
23
+ '$123_456.7',
24
+ '-15_000',
25
+ '-15_00_0',
26
+ '-15_000.0',
27
+ '-15_00_0.0',
28
+ '0_0-15.0', # just weird
29
+ '-$123_456',
30
+ '($123_456)',
31
+ '-$123_456.7',
32
+ '($123_456.7)',
33
+ '10_14_A',
34
+ '10_14',
35
+ '10_140',
36
+ ]
37
+ same.each do |v|
38
+ it "parses #{v.inspect} as itself (yaml=#{SafeYAML.load(v).inspect})" do
39
+ expect(Parse.ver0_1_0(v)).to eq(v)
40
+ end
41
+ end
42
+
43
+ a = {
44
+
45
+ "@ foo" => "@ foo",
46
+ ", foo" => ", foo",
47
+ "044-1-276-000" => "044-1-276-000",
48
+
49
+ ['1 BEDROOMS', { type: Numeric } ] => 1,
50
+ '1 BEDROOMS' => '1 BEDROOMS',
51
+
52
+ [ '2.4 SQFT', { type: Numeric } ] => 2.4,
53
+ '2.4 SQFT' => '2.4 SQFT',
54
+
55
+ ['000', { date: :us, ignore_error: true}] => nil,
56
+ ['7/7/2004', {date: :us}] => Date.new(2004,7,7),
57
+ "999 HOLY CROSS ROAD, COLCHESTER, VT 05446" => "999 HOLY CROSS ROAD, COLCHESTER, VT 05446",
58
+
59
+ '00020110628' => 20110628,
60
+ '0002011-06-28' => Date.new(2011,6,28),
61
+ '0002011/06/28' => Date.new(2011,6,28),
62
+ ['00020110628', {date: :iso}] => Date.new(2011,6,28),
63
+ ['00020110628', {type: Date}] => Date.new(2011,6,28),
64
+
65
+ '00019800628' => 19800628,
66
+ '0001980-06-28' => Date.new(1980,6,28),
67
+ '0001980/06/28' => Date.new(1980,6,28),
68
+ ['00019800628', {date: :iso}] => Date.new(1980,6,28),
69
+ ['00019800628', {type: Date}] => Date.new(1980,6,28),
70
+
71
+ '00030000628' => 30000628,
72
+ '0003000-06-28' => '0003000-06-28',
73
+ '0003000/06/28' => '0003000/06/28',
74
+ ['00030000628', {date: :iso}] => Date.new(3000,6,28),
75
+ ['00030000628', {type: Date}] => Date.new(3000,6,28),
76
+
77
+ ['', {type: Numeric}] => nil,
78
+
79
+ # fortran double precision
80
+ '0.225120000000000D+06' => 0.22512e6,
81
+ '0.341913000000000D+07' => 0.341913e7,
82
+ '0.2500000E-01' => 0.25e-1,
83
+ '3.1D0' => 3.1,
84
+ '-2.D0' => -2.0,
85
+
86
+ '8e-05' => 8e-5,
87
+ '8e+4' => 8e4,
88
+ '8.0e+4' => 8.0e4,
89
+ '8e-4' => 8e-4,
90
+ '8.0e-4' => 8.0e-4,
91
+ '-8e+4' => -8e4,
92
+ '-8.0e+4' => -8.0e4,
93
+ '-8e-4' => -8e-4,
94
+ '-8.0e-4' => -8.0e-4,
95
+ '8E+4' => 8e4,
96
+ '8.0E+4' => 8.0e4,
97
+ '8E-4' => 8e-4,
98
+ '8.0E-4' => 8.0e-4,
99
+ '-8E+4' => -8e4,
100
+ '-8.0E+4' => -8.0e4,
101
+ '-8E-4' => -8e-4,
102
+ '-8.0E-4' => -8.0e-4,
103
+
104
+ # http://dojotoolkit.org/reference-guide/1.9/dojo/number.html
105
+ # '1,000,000.00' => 1_000_000.0,
106
+ # '1.000.000,00' => 1_000_000.0, # german
107
+ # '1 000 000,00' => 1_000_000.0, # french
108
+ # '10,00,000.00' => 1_000_000.0, # indian
109
+
110
+ '060-10-01' => '60-10-01',
111
+ 'OFF' => 'OFF',
112
+ 'ON' => 'ON',
113
+
114
+ '& P4' => '& P4',
115
+
116
+ # EVERYTHING BELOW IS SAME AS 0.0.1
117
+
118
+ '' => nil,
119
+ 'nil' => nil,
120
+ '15' => 15,
121
+ '15,000' => 15_000,
122
+ '15.0' => 15.0,
123
+ '15,000.0' => 15_000.0,
124
+ '0015' => 15, # not octal
125
+ '0015.0' => 15.0, # not octal
126
+ '0x15' => 0x15, # hex
127
+ '0o15' => 015, # octal
128
+ '8e-05' => 8e-05,
129
+ '12.5e-13' => 12.5e-13,
130
+ '-12.5e-13' => -12.5e-13,
131
+ '$123.4' => 123.4,
132
+ '0$123.4' => 123.4,
133
+ '$15,000' => 15_000,
134
+ '0$15,000' => 15_000,
135
+ '10,000,000' => 10_000_000,
136
+ '10,000,000.00' => 10_000_000.0,
137
+ '$10,000,000.00' => 10_000_000.0,
138
+ '0$10,000,000.00' => 10_000_000.0,
139
+ '$010,000,000.00' => 10_000_000.0,
140
+
141
+ '-15' => -15,
142
+ '-15,000' => -15_000,
143
+ '-15.0' => -15.0,
144
+ '-15,000.0' => -15_000.0,
145
+ '00-15' => -15, # not octal
146
+ '00-15.0' => -15.0, # not octal
147
+ '-0x15' => -0x15, # hex
148
+ '-0o15' => -015, # octal
149
+ '-8e-05' => -8e-05,
150
+ '-$123.4' => -123.4,
151
+ '($123.4)' => -123.4,
152
+ '0($123.4)' => -123.4,
153
+ '-$15,000' => -15_000,
154
+ '($15,000)' => -15_000,
155
+ '-$123456' => -123_456,
156
+ '($123456)' => -123_456,
157
+ '-$123456.7' => -123_456.7,
158
+ '($123456.7)' => -123_456.7,
159
+ '-$123,456' => -123_456,
160
+ '($123,456)' => -123_456,
161
+ '-$123,456.7' => -123_456.7,
162
+ '($123,456.7)' => -123_456.7,
163
+ '-10,000,000' => -10_000_000,
164
+ '(10,000,000)' => -10_000_000,
165
+ '-10,000,000.00' => -10_000_000.0,
166
+ '(10,000,000.00)' => -10_000_000.0,
167
+ '-10000000' => -10_000_000,
168
+ '(10000000)' => -10_000_000,
169
+ '-10000000.00' => -10_000_000.0,
170
+ '(10000000.00)' => -10_000_000.0,
171
+ '1,200' => 1_200,
172
+ '1,200.0' => 1_200.0,
173
+ '1.0,2' => '1.0,2',
174
+ '1.0,2.0' => '1.0,2.0',
175
+ '-1,200' => -1_200,
176
+ '-1,200.0' => -1_200.0,
177
+ '-1.0,2' => '-1.0,2',
178
+ '-1.0,2.0' => '-1.0,2.0',
179
+ '01,200' => 1_200,
180
+ '01,200.0' => 1_200.0,
181
+ '01.0,2' => '01.0,2',
182
+ '01.0,2.0' => '01.0,2.0',
183
+
184
+ '05753' => 5753,
185
+ 'true' => true,
186
+ 'yes' => true,
187
+ 'false' => false,
188
+ 'no' => false,
189
+ '#DIV/0' => (1.0/0),
190
+ '#NAME?' => nil,
191
+ 'Inf' => 'Inf',
192
+ 'Infinity' => (1.0/0),
193
+ '-Infinity' => -(1.0/0),
194
+ 'NaN' => 0.0/0, # need the dot
195
+ '.NaN' => 0.0/0, # NaN
196
+ '-.inf' => -(1.0/0), # -Infinity
197
+ '-' => nil, # per bigml
198
+ '?' => nil,
199
+ '1982-01-01' => Date.new(1982,1,1),
200
+ '2010-05-05 13:42:16 Z' => Time.parse('2010-05-05 13:42:16 Z'),
201
+ '2010-05-05 13:42:16 -02:00' => Time.parse('2010-05-05 13:42:16 -02:00'),
202
+ ":not_a_symbol" => ':not_a_symbol',
203
+ '#hello' => '#hello',
204
+ "\n#hello\n#world" => '#hello #world',
205
+ "hello\nworld" => 'hello world', # whitespace compression
206
+
207
+ '0%' => 0.0,
208
+ '100%' => 1.0,
209
+ '50%' => 0.5,
210
+ '5%' => 0.05,
211
+ '00000%' => 0.0,
212
+ '0000100%' => 1.0,
213
+ '000050%' => 0.5,
214
+ '00005%' => 0.05,
215
+
216
+ ['12/25/82', {date: :us}] => Date.new(1982,12,25),
217
+ ['12/25/1982', {date: :us}] => Date.new(1982,12,25),
218
+ ['25/12/82', {date: :euro}] => Date.new(1982,12,25),
219
+ ['25/12/1982', {date: :euro}] => Date.new(1982,12,25),
220
+ ['12-25-82', {date: :us}] => Date.new(1982,12,25),
221
+ ['12-25-1982', {date: :us}] => Date.new(1982,12,25),
222
+ ['25-12-82', {date: :euro}] => Date.new(1982,12,25),
223
+ ['25-12-1982', {date: :euro}] => Date.new(1982,12,25),
224
+
225
+ '12/25/82' => '12/25/82',
226
+
227
+ ',1' => ',1', # not a csv parser
228
+ ',1,' => ',1,', # not a csv parser
229
+ '1,2,3' => '1,2,3', # not a csv parser
230
+ '[1,2,3]' => [1,2,3],
231
+ YAML.dump('a' => 1) => { 'a' => 1 },
232
+ YAML.dump(a: 1) => { ':a' => 1 }, # doesn't parse symbols
233
+ YAML.dump('a' => 1, 5 => "c\n3") => { 'a' => 1, 5 => "c\n3" },
234
+ MultiJson.dump(a: 1) => { 'a' => 1 }, # json always loses symbols
235
+ MultiJson.dump(a: 1, 5 => "c\n3") => { 'a' => 1, '5' => "c\n3" },
236
+ }
237
+
238
+ # TwitterCldr.supported_locales.each do |locale|
239
+ # 1.upto(9).map do |power|
240
+ # num = (rand * (10 ** power)).round(4)
241
+ # # a[[num.localize(locale).to_s, {locale: locale}]] = num
242
+ # a[[num.localize(locale).to_s, { locale: locale }]] = num
243
+ # # a[num.localize(locale).to_currency.to_s] = num
244
+ # end
245
+ # end
246
+
247
+ # and next dates!
248
+ # Time.now.localize(:es).to_full_s
249
+
250
+ a.each do |input, expected|
251
+ input = Array.wrap input
252
+ locale = if input[1].is_a?(Hash)
253
+ input[1][:locale]
254
+ end
255
+ it "#{locale ? "(#{locale}) " : nil}parses #{input[0].inspect} as #{expected.inspect}" do
256
+ got = Parse.ver0_1_0(*input)
257
+ # $lines << [ "Parse.parse(#{input.inspect})".ljust(45), "#=> #{got.inspect}" ].join
258
+ if expected.is_a?(Float) and expected.nan?
259
+ expect(got.nan?).to eq(true)
260
+ elsif expected.is_a?(Float) and got.is_a?(Float)
261
+ expect(got.round(8)).to eq(expected.round(8))
262
+ else
263
+ expect(got).to eq(expected)
264
+ end
265
+
266
+ input_with_spaces = [ "\t" + input[0] + "\t", input[1] ]
267
+ got_with_spaces = Parse.ver0_1_0(*input_with_spaces)
268
+ if expected.is_a?(Float) and expected.nan?
269
+ expect(got.nan?).to eq(true)
270
+ elsif expected.is_a?(Float) and got.is_a?(Float)
271
+ expect(got.round(8)).to eq(expected.round(8))
272
+ else
273
+ expect(got_with_spaces).to eq(expected)
274
+ end
275
+ end
276
+ end
277
+ end
metadata CHANGED
@@ -1,17 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: safe_yaml
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
31
  - - ">="
@@ -80,20 +94,6 @@ dependencies:
80
94
  - - ">="
81
95
  - !ruby/object:Gem::Version
82
96
  version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: activesupport
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: pry
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -125,10 +125,14 @@ files:
125
125
  - README.md
126
126
  - Rakefile
127
127
  - lib/parse.rb
128
+ - lib/parse/algorithm.rb
129
+ - lib/parse/algorithm/ver0_0_1.rb
130
+ - lib/parse/algorithm/ver0_1_0.rb
128
131
  - lib/parse/version.rb
129
132
  - parse.gemspec
130
133
  - spec/parse_spec.rb
131
134
  - spec/parse_ver0_0_1_spec.rb
135
+ - spec/parse_ver0_1_0_spec.rb
132
136
  - spec/spec_helper.rb
133
137
  homepage: https://github.com/seamusabshere/parse
134
138
  licenses:
@@ -150,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
150
154
  version: '0'
151
155
  requirements: []
152
156
  rubyforge_project:
153
- rubygems_version: 2.2.1
157
+ rubygems_version: 2.2.2
154
158
  signing_key:
155
159
  specification_version: 4
156
160
  summary: Detect and convert short strings into integers, floats, dates, times, booleans,
@@ -158,4 +162,5 @@ summary: Detect and convert short strings into integers, floats, dates, times, b
158
162
  test_files:
159
163
  - spec/parse_spec.rb
160
164
  - spec/parse_ver0_0_1_spec.rb
165
+ - spec/parse_ver0_1_0_spec.rb
161
166
  - spec/spec_helper.rb