quandl_babelfish 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler"
2
+ require "rake"
3
+ require "bundler/gem_tasks"
4
+
5
+ $:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
6
+
7
+ require 'pry'
8
+ require "quandl/babelfish"
@@ -49,10 +49,10 @@ module Babelfish
49
49
  # Regular formats and Custom formats (where Date.parse and Date.strptime
50
50
  # fear to tread)
51
51
  if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{4}/) # eg "07/03/2012"
52
- if re[1].to_i > 12
53
- return '%d-%m-%Y', nil
54
- else
52
+ if re[2].to_i > 12
55
53
  return '%m-%d-%Y', nil
54
+ else
55
+ return '%d-%m-%Y', nil
56
56
  end
57
57
  end
58
58
  if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{2}/) # eg "07/03/12"
@@ -173,27 +173,36 @@ module Babelfish
173
173
 
174
174
 
175
175
  #find good example of date to use as template for format
176
- def find_good_date(all_dates)
176
+ #if strict == true, no ambiguity is tolerated. If strict= false, we will accept abbiguity. (02/05/2009)
177
+ def find_good_date(all_dates, strict=true)
177
178
  good_sample=nil
178
179
  all_dates.each do |fuzzy_date|
179
- if usable_cell(fuzzy_date)
180
+ if usable_cell(fuzzy_date,strict)
180
181
  good_sample = fuzzy_date
181
182
  break
182
183
  end
183
184
  end
184
- good_sample
185
+ if good_sample == nil and strict==true
186
+ # We could not find a single unambiguous cell. Let's now be less strict and see if we can find something
187
+ find_good_date(all_dates,false)
188
+ else
189
+ good_sample
190
+ end
185
191
  end
186
192
 
187
- def usable_cell(cell)
193
+ # if strict == true then we refuse to accept any ambiguity
194
+ # if strict == false, we'll settle for a bit of ambiguity
195
+ def usable_cell(cell,strict)
188
196
  return false if cell.nil? || cell.to_s.empty?
189
197
  return false if cell.to_s.size > 20 # even annotated date can't be bigger than 20
190
198
 
191
199
  return true if cell.to_s =~ /^\w{3}\D[456789]\d$/
200
+
192
201
  # date is not usable as an example if it is ambiguous as to day and month
193
202
  # 03/04/2012, for example, is ambiguous. 03/17/2012 is NOT ambiguous
194
203
  if re = cell.to_s.match(/^(\d{1,2})\D(\d{1,2})\D\d{2,4}/) # e.g. 03/04/2012
195
204
  if re[1].to_i <= 12 and re[2].to_i <= 12
196
- return false
205
+ return strict==true ? false : true
197
206
  else
198
207
  return true
199
208
  end
@@ -1,5 +1,5 @@
1
1
  module Quandl
2
2
  module Babelfish
3
- VERSION = '0.0.9'
3
+ VERSION = '0.0.10'
4
4
  end
5
5
  end
@@ -18,4 +18,5 @@ Gem::Specification.new do |s|
18
18
 
19
19
  s.add_development_dependency "rspec", "~> 2.13"
20
20
  s.add_development_dependency "pry"
21
+ s.add_development_dependency "rake"
21
22
  end
@@ -13,6 +13,11 @@ describe Cleaner do
13
13
  let(:input){ [[2012, nil], [2011, 20], [2010, 30]] }
14
14
  it{ should eq [[Date.new(2012,12,31), nil], [Date.new(2011,12,31), 20.0], [Date.new(2010,12,31), 30.0]] }
15
15
  end
16
+
17
+ context "given nil" do
18
+ let(:input){ [[2002,'#N.A.'], [2011, 20]]}
19
+ it{ should eq [[Date.new(2002,12,31), nil], [Date.new(2011,12,31), 20.0]] }
20
+ end
16
21
 
17
22
  context "mismatch row count" do
18
23
  let(:input){ [[2012], [2011, 20], [2010, 30, 25]] }
@@ -3,16 +3,6 @@ require 'spec_helper'
3
3
  include Quandl::Babelfish
4
4
  describe NumberMaid do
5
5
 
6
- it 'should return an exception because month and day are ambiguous YYYY' do
7
- dates = ['01/01/2011','1/2/2011','2/3/2011','11/1/2011']
8
- lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
9
- end
10
-
11
- it 'should return an exception because month and day are ambiguous YY' do
12
- dates = ['01/01/11','1/2/11','1/3/11','11/1/11']
13
- lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
14
- end
15
-
16
6
  it 'should remove unwanted characters from dates (eg. &nbsp;)' do
17
7
  a=194.chr+160.chr
18
8
  dates = ["2005#{a}","#{a}2006",'2007','2008']
@@ -525,5 +515,14 @@ describe NumberMaid do
525
515
  dates[1].should == Date.new(2012,04,29)
526
516
  end
527
517
 
518
+ it 'should handle US format even if it is ambiguous' do
519
+ dates = ['1/1/1954','1/4/1954','1/7/1954','1/11/1954']
520
+ dates = DateMaid::sweep(dates)
521
+ dates[0].should == Date.new(1954,1,1)
522
+ dates[1].should == Date.new(1954,4,1)
523
+ dates[2].should == Date.new(1954,7,1)
524
+ dates[3].should == Date.new(1954,11,1)
525
+ end
526
+
528
527
 
529
528
  end
@@ -123,4 +123,9 @@ describe NumberMaid do
123
123
  numbers.should == 0.12345678901235
124
124
  end
125
125
 
126
+ it "should handle pound in front" do
127
+ NumberMaid::clean('#N/A').should be_nil
128
+ end
129
+
130
+
126
131
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: quandl_babelfish
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-10 00:00:00.000000000 Z
12
+ date: 2014-04-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -43,6 +43,22 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
46
62
  description: Quandl Data Cleaner
47
63
  email:
48
64
  - sergei@quandl.com
@@ -55,6 +71,7 @@ files:
55
71
  - Gemfile
56
72
  - LICENSE
57
73
  - README.md
74
+ - Rakefile
58
75
  - UPGRADE.md
59
76
  - lib/quandl/babelfish.rb
60
77
  - lib/quandl/babelfish/chronometer.rb
@@ -89,12 +106,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
89
106
  - - ! '>='
90
107
  - !ruby/object:Gem::Version
91
108
  version: '0'
109
+ segments:
110
+ - 0
111
+ hash: 4375924718490433472
92
112
  required_rubygems_version: !ruby/object:Gem::Requirement
93
113
  none: false
94
114
  requirements:
95
115
  - - ! '>='
96
116
  - !ruby/object:Gem::Version
97
117
  version: '0'
118
+ segments:
119
+ - 0
120
+ hash: 4375924718490433472
98
121
  requirements: []
99
122
  rubyforge_project:
100
123
  rubygems_version: 1.8.23