quandl_babelfish 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +8 -0
- data/lib/quandl/babelfish/date_maid.rb +17 -8
- data/lib/quandl/babelfish/version.rb +1 -1
- data/quandl_babelfish.gemspec +1 -0
- data/spec/lib/quandl/babelfish/cleaner_spec.rb +5 -0
- data/spec/lib/quandl/babelfish/date_maid_spec.rb +9 -10
- data/spec/lib/quandl/babelfish/number_maid_spec.rb +5 -0
- metadata +25 -2
data/Rakefile
ADDED
@@ -49,10 +49,10 @@ module Babelfish
|
|
49
49
|
# Regular formats and Custom formats (where Date.parse and Date.strptime
|
50
50
|
# fear to tread)
|
51
51
|
if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{4}/) # eg "07/03/2012"
|
52
|
-
if re[
|
53
|
-
return '%d-%m-%Y', nil
|
54
|
-
else
|
52
|
+
if re[2].to_i > 12
|
55
53
|
return '%m-%d-%Y', nil
|
54
|
+
else
|
55
|
+
return '%d-%m-%Y', nil
|
56
56
|
end
|
57
57
|
end
|
58
58
|
if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{2}/) # eg "07/03/12"
|
@@ -173,27 +173,36 @@ module Babelfish
|
|
173
173
|
|
174
174
|
|
175
175
|
#find good example of date to use as template for format
|
176
|
-
|
176
|
+
#if strict == true, no ambiguity is tolerated. If strict= false, we will accept abbiguity. (02/05/2009)
|
177
|
+
def find_good_date(all_dates, strict=true)
|
177
178
|
good_sample=nil
|
178
179
|
all_dates.each do |fuzzy_date|
|
179
|
-
if usable_cell(fuzzy_date)
|
180
|
+
if usable_cell(fuzzy_date,strict)
|
180
181
|
good_sample = fuzzy_date
|
181
182
|
break
|
182
183
|
end
|
183
184
|
end
|
184
|
-
good_sample
|
185
|
+
if good_sample == nil and strict==true
|
186
|
+
# We could not find a single unambiguous cell. Let's now be less strict and see if we can find something
|
187
|
+
find_good_date(all_dates,false)
|
188
|
+
else
|
189
|
+
good_sample
|
190
|
+
end
|
185
191
|
end
|
186
192
|
|
187
|
-
|
193
|
+
# if strict == true then we refuse to accept any ambiguity
|
194
|
+
# if strict == false, we'll settle for a bit of ambiguity
|
195
|
+
def usable_cell(cell,strict)
|
188
196
|
return false if cell.nil? || cell.to_s.empty?
|
189
197
|
return false if cell.to_s.size > 20 # even annotated date can't be bigger than 20
|
190
198
|
|
191
199
|
return true if cell.to_s =~ /^\w{3}\D[456789]\d$/
|
200
|
+
|
192
201
|
# date is not usable as an example if it is ambiguous as to day and month
|
193
202
|
# 03/04/2012, for example, is ambiguous. 03/17/2012 is NOT ambiguous
|
194
203
|
if re = cell.to_s.match(/^(\d{1,2})\D(\d{1,2})\D\d{2,4}/) # e.g. 03/04/2012
|
195
204
|
if re[1].to_i <= 12 and re[2].to_i <= 12
|
196
|
-
return false
|
205
|
+
return strict==true ? false : true
|
197
206
|
else
|
198
207
|
return true
|
199
208
|
end
|
data/quandl_babelfish.gemspec
CHANGED
@@ -13,6 +13,11 @@ describe Cleaner do
|
|
13
13
|
let(:input){ [[2012, nil], [2011, 20], [2010, 30]] }
|
14
14
|
it{ should eq [[Date.new(2012,12,31), nil], [Date.new(2011,12,31), 20.0], [Date.new(2010,12,31), 30.0]] }
|
15
15
|
end
|
16
|
+
|
17
|
+
context "given nil" do
|
18
|
+
let(:input){ [[2002,'#N.A.'], [2011, 20]]}
|
19
|
+
it{ should eq [[Date.new(2002,12,31), nil], [Date.new(2011,12,31), 20.0]] }
|
20
|
+
end
|
16
21
|
|
17
22
|
context "mismatch row count" do
|
18
23
|
let(:input){ [[2012], [2011, 20], [2010, 30, 25]] }
|
@@ -3,16 +3,6 @@ require 'spec_helper'
|
|
3
3
|
include Quandl::Babelfish
|
4
4
|
describe NumberMaid do
|
5
5
|
|
6
|
-
it 'should return an exception because month and day are ambiguous YYYY' do
|
7
|
-
dates = ['01/01/2011','1/2/2011','2/3/2011','11/1/2011']
|
8
|
-
lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
|
9
|
-
end
|
10
|
-
|
11
|
-
it 'should return an exception because month and day are ambiguous YY' do
|
12
|
-
dates = ['01/01/11','1/2/11','1/3/11','11/1/11']
|
13
|
-
lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
|
14
|
-
end
|
15
|
-
|
16
6
|
it 'should remove unwanted characters from dates (eg. )' do
|
17
7
|
a=194.chr+160.chr
|
18
8
|
dates = ["2005#{a}","#{a}2006",'2007','2008']
|
@@ -525,5 +515,14 @@ describe NumberMaid do
|
|
525
515
|
dates[1].should == Date.new(2012,04,29)
|
526
516
|
end
|
527
517
|
|
518
|
+
it 'should handle US format even if it is ambiguous' do
|
519
|
+
dates = ['1/1/1954','1/4/1954','1/7/1954','1/11/1954']
|
520
|
+
dates = DateMaid::sweep(dates)
|
521
|
+
dates[0].should == Date.new(1954,1,1)
|
522
|
+
dates[1].should == Date.new(1954,4,1)
|
523
|
+
dates[2].should == Date.new(1954,7,1)
|
524
|
+
dates[3].should == Date.new(1954,11,1)
|
525
|
+
end
|
526
|
+
|
528
527
|
|
529
528
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quandl_babelfish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-04-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -43,6 +43,22 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
46
62
|
description: Quandl Data Cleaner
|
47
63
|
email:
|
48
64
|
- sergei@quandl.com
|
@@ -55,6 +71,7 @@ files:
|
|
55
71
|
- Gemfile
|
56
72
|
- LICENSE
|
57
73
|
- README.md
|
74
|
+
- Rakefile
|
58
75
|
- UPGRADE.md
|
59
76
|
- lib/quandl/babelfish.rb
|
60
77
|
- lib/quandl/babelfish/chronometer.rb
|
@@ -89,12 +106,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
89
106
|
- - ! '>='
|
90
107
|
- !ruby/object:Gem::Version
|
91
108
|
version: '0'
|
109
|
+
segments:
|
110
|
+
- 0
|
111
|
+
hash: 4375924718490433472
|
92
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
113
|
none: false
|
94
114
|
requirements:
|
95
115
|
- - ! '>='
|
96
116
|
- !ruby/object:Gem::Version
|
97
117
|
version: '0'
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
hash: 4375924718490433472
|
98
121
|
requirements: []
|
99
122
|
rubyforge_project:
|
100
123
|
rubygems_version: 1.8.23
|