quandl_babelfish 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +8 -0
- data/lib/quandl/babelfish/date_maid.rb +17 -8
- data/lib/quandl/babelfish/version.rb +1 -1
- data/quandl_babelfish.gemspec +1 -0
- data/spec/lib/quandl/babelfish/cleaner_spec.rb +5 -0
- data/spec/lib/quandl/babelfish/date_maid_spec.rb +9 -10
- data/spec/lib/quandl/babelfish/number_maid_spec.rb +5 -0
- metadata +25 -2
data/Rakefile
ADDED
@@ -49,10 +49,10 @@ module Babelfish
|
|
49
49
|
# Regular formats and Custom formats (where Date.parse and Date.strptime
|
50
50
|
# fear to tread)
|
51
51
|
if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{4}/) # eg "07/03/2012"
|
52
|
-
if re[
|
53
|
-
return '%d-%m-%Y', nil
|
54
|
-
else
|
52
|
+
if re[2].to_i > 12
|
55
53
|
return '%m-%d-%Y', nil
|
54
|
+
else
|
55
|
+
return '%d-%m-%Y', nil
|
56
56
|
end
|
57
57
|
end
|
58
58
|
if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{2}/) # eg "07/03/12"
|
@@ -173,27 +173,36 @@ module Babelfish
|
|
173
173
|
|
174
174
|
|
175
175
|
#find good example of date to use as template for format
|
176
|
-
|
176
|
+
#if strict == true, no ambiguity is tolerated. If strict= false, we will accept abbiguity. (02/05/2009)
|
177
|
+
def find_good_date(all_dates, strict=true)
|
177
178
|
good_sample=nil
|
178
179
|
all_dates.each do |fuzzy_date|
|
179
|
-
if usable_cell(fuzzy_date)
|
180
|
+
if usable_cell(fuzzy_date,strict)
|
180
181
|
good_sample = fuzzy_date
|
181
182
|
break
|
182
183
|
end
|
183
184
|
end
|
184
|
-
good_sample
|
185
|
+
if good_sample == nil and strict==true
|
186
|
+
# We could not find a single unambiguous cell. Let's now be less strict and see if we can find something
|
187
|
+
find_good_date(all_dates,false)
|
188
|
+
else
|
189
|
+
good_sample
|
190
|
+
end
|
185
191
|
end
|
186
192
|
|
187
|
-
|
193
|
+
# if strict == true then we refuse to accept any ambiguity
|
194
|
+
# if strict == false, we'll settle for a bit of ambiguity
|
195
|
+
def usable_cell(cell,strict)
|
188
196
|
return false if cell.nil? || cell.to_s.empty?
|
189
197
|
return false if cell.to_s.size > 20 # even annotated date can't be bigger than 20
|
190
198
|
|
191
199
|
return true if cell.to_s =~ /^\w{3}\D[456789]\d$/
|
200
|
+
|
192
201
|
# date is not usable as an example if it is ambiguous as to day and month
|
193
202
|
# 03/04/2012, for example, is ambiguous. 03/17/2012 is NOT ambiguous
|
194
203
|
if re = cell.to_s.match(/^(\d{1,2})\D(\d{1,2})\D\d{2,4}/) # e.g. 03/04/2012
|
195
204
|
if re[1].to_i <= 12 and re[2].to_i <= 12
|
196
|
-
return false
|
205
|
+
return strict==true ? false : true
|
197
206
|
else
|
198
207
|
return true
|
199
208
|
end
|
data/quandl_babelfish.gemspec
CHANGED
@@ -13,6 +13,11 @@ describe Cleaner do
|
|
13
13
|
let(:input){ [[2012, nil], [2011, 20], [2010, 30]] }
|
14
14
|
it{ should eq [[Date.new(2012,12,31), nil], [Date.new(2011,12,31), 20.0], [Date.new(2010,12,31), 30.0]] }
|
15
15
|
end
|
16
|
+
|
17
|
+
context "given nil" do
|
18
|
+
let(:input){ [[2002,'#N.A.'], [2011, 20]]}
|
19
|
+
it{ should eq [[Date.new(2002,12,31), nil], [Date.new(2011,12,31), 20.0]] }
|
20
|
+
end
|
16
21
|
|
17
22
|
context "mismatch row count" do
|
18
23
|
let(:input){ [[2012], [2011, 20], [2010, 30, 25]] }
|
@@ -3,16 +3,6 @@ require 'spec_helper'
|
|
3
3
|
include Quandl::Babelfish
|
4
4
|
describe NumberMaid do
|
5
5
|
|
6
|
-
it 'should return an exception because month and day are ambiguous YYYY' do
|
7
|
-
dates = ['01/01/2011','1/2/2011','2/3/2011','11/1/2011']
|
8
|
-
lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
|
9
|
-
end
|
10
|
-
|
11
|
-
it 'should return an exception because month and day are ambiguous YY' do
|
12
|
-
dates = ['01/01/11','1/2/11','1/3/11','11/1/11']
|
13
|
-
lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
|
14
|
-
end
|
15
|
-
|
16
6
|
it 'should remove unwanted characters from dates (eg. )' do
|
17
7
|
a=194.chr+160.chr
|
18
8
|
dates = ["2005#{a}","#{a}2006",'2007','2008']
|
@@ -525,5 +515,14 @@ describe NumberMaid do
|
|
525
515
|
dates[1].should == Date.new(2012,04,29)
|
526
516
|
end
|
527
517
|
|
518
|
+
it 'should handle US format even if it is ambiguous' do
|
519
|
+
dates = ['1/1/1954','1/4/1954','1/7/1954','1/11/1954']
|
520
|
+
dates = DateMaid::sweep(dates)
|
521
|
+
dates[0].should == Date.new(1954,1,1)
|
522
|
+
dates[1].should == Date.new(1954,4,1)
|
523
|
+
dates[2].should == Date.new(1954,7,1)
|
524
|
+
dates[3].should == Date.new(1954,11,1)
|
525
|
+
end
|
526
|
+
|
528
527
|
|
529
528
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quandl_babelfish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-04-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -43,6 +43,22 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
46
62
|
description: Quandl Data Cleaner
|
47
63
|
email:
|
48
64
|
- sergei@quandl.com
|
@@ -55,6 +71,7 @@ files:
|
|
55
71
|
- Gemfile
|
56
72
|
- LICENSE
|
57
73
|
- README.md
|
74
|
+
- Rakefile
|
58
75
|
- UPGRADE.md
|
59
76
|
- lib/quandl/babelfish.rb
|
60
77
|
- lib/quandl/babelfish/chronometer.rb
|
@@ -89,12 +106,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
89
106
|
- - ! '>='
|
90
107
|
- !ruby/object:Gem::Version
|
91
108
|
version: '0'
|
109
|
+
segments:
|
110
|
+
- 0
|
111
|
+
hash: 4375924718490433472
|
92
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
113
|
none: false
|
94
114
|
requirements:
|
95
115
|
- - ! '>='
|
96
116
|
- !ruby/object:Gem::Version
|
97
117
|
version: '0'
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
hash: 4375924718490433472
|
98
121
|
requirements: []
|
99
122
|
rubyforge_project:
|
100
123
|
rubygems_version: 1.8.23
|