quandl_babelfish 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler"
2
+ require "rake"
3
+ require "bundler/gem_tasks"
4
+
5
+ $:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
6
+
7
+ require 'pry'
8
+ require "quandl/babelfish"
@@ -49,10 +49,10 @@ module Babelfish
49
49
  # Regular formats and Custom formats (where Date.parse and Date.strptime
50
50
  # fear to tread)
51
51
  if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{4}/) # eg "07/03/2012"
52
- if re[1].to_i > 12
53
- return '%d-%m-%Y', nil
54
- else
52
+ if re[2].to_i > 12
55
53
  return '%m-%d-%Y', nil
54
+ else
55
+ return '%d-%m-%Y', nil
56
56
  end
57
57
  end
58
58
  if re = example.match(/^(\d{1,2})\D(\d{1,2})\D\d{2}/) # eg "07/03/12"
@@ -173,27 +173,36 @@ module Babelfish
173
173
 
174
174
 
175
175
  #find good example of date to use as template for format
176
- def find_good_date(all_dates)
176
+ #if strict == true, no ambiguity is tolerated. If strict= false, we will accept abbiguity. (02/05/2009)
177
+ def find_good_date(all_dates, strict=true)
177
178
  good_sample=nil
178
179
  all_dates.each do |fuzzy_date|
179
- if usable_cell(fuzzy_date)
180
+ if usable_cell(fuzzy_date,strict)
180
181
  good_sample = fuzzy_date
181
182
  break
182
183
  end
183
184
  end
184
- good_sample
185
+ if good_sample == nil and strict==true
186
+ # We could not find a single unambiguous cell. Let's now be less strict and see if we can find something
187
+ find_good_date(all_dates,false)
188
+ else
189
+ good_sample
190
+ end
185
191
  end
186
192
 
187
- def usable_cell(cell)
193
+ # if strict == true then we refuse to accept any ambiguity
194
+ # if strict == false, we'll settle for a bit of ambiguity
195
+ def usable_cell(cell,strict)
188
196
  return false if cell.nil? || cell.to_s.empty?
189
197
  return false if cell.to_s.size > 20 # even annotated date can't be bigger than 20
190
198
 
191
199
  return true if cell.to_s =~ /^\w{3}\D[456789]\d$/
200
+
192
201
  # date is not usable as an example if it is ambiguous as to day and month
193
202
  # 03/04/2012, for example, is ambiguous. 03/17/2012 is NOT ambiguous
194
203
  if re = cell.to_s.match(/^(\d{1,2})\D(\d{1,2})\D\d{2,4}/) # e.g. 03/04/2012
195
204
  if re[1].to_i <= 12 and re[2].to_i <= 12
196
- return false
205
+ return strict==true ? false : true
197
206
  else
198
207
  return true
199
208
  end
@@ -1,5 +1,5 @@
1
1
  module Quandl
2
2
  module Babelfish
3
- VERSION = '0.0.9'
3
+ VERSION = '0.0.10'
4
4
  end
5
5
  end
@@ -18,4 +18,5 @@ Gem::Specification.new do |s|
18
18
 
19
19
  s.add_development_dependency "rspec", "~> 2.13"
20
20
  s.add_development_dependency "pry"
21
+ s.add_development_dependency "rake"
21
22
  end
@@ -13,6 +13,11 @@ describe Cleaner do
13
13
  let(:input){ [[2012, nil], [2011, 20], [2010, 30]] }
14
14
  it{ should eq [[Date.new(2012,12,31), nil], [Date.new(2011,12,31), 20.0], [Date.new(2010,12,31), 30.0]] }
15
15
  end
16
+
17
+ context "given nil" do
18
+ let(:input){ [[2002,'#N.A.'], [2011, 20]]}
19
+ it{ should eq [[Date.new(2002,12,31), nil], [Date.new(2011,12,31), 20.0]] }
20
+ end
16
21
 
17
22
  context "mismatch row count" do
18
23
  let(:input){ [[2012], [2011, 20], [2010, 30, 25]] }
@@ -3,16 +3,6 @@ require 'spec_helper'
3
3
  include Quandl::Babelfish
4
4
  describe NumberMaid do
5
5
 
6
- it 'should return an exception because month and day are ambiguous YYYY' do
7
- dates = ['01/01/2011','1/2/2011','2/3/2011','11/1/2011']
8
- lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
9
- end
10
-
11
- it 'should return an exception because month and day are ambiguous YY' do
12
- dates = ['01/01/11','1/2/11','1/3/11','11/1/11']
13
- lambda {DateMaid::sweep(dates)}.should raise_error(Error::GuessDateFormat)
14
- end
15
-
16
6
  it 'should remove unwanted characters from dates (eg. &nbsp;)' do
17
7
  a=194.chr+160.chr
18
8
  dates = ["2005#{a}","#{a}2006",'2007','2008']
@@ -525,5 +515,14 @@ describe NumberMaid do
525
515
  dates[1].should == Date.new(2012,04,29)
526
516
  end
527
517
 
518
+ it 'should handle US format even if it is ambiguous' do
519
+ dates = ['1/1/1954','1/4/1954','1/7/1954','1/11/1954']
520
+ dates = DateMaid::sweep(dates)
521
+ dates[0].should == Date.new(1954,1,1)
522
+ dates[1].should == Date.new(1954,4,1)
523
+ dates[2].should == Date.new(1954,7,1)
524
+ dates[3].should == Date.new(1954,11,1)
525
+ end
526
+
528
527
 
529
528
  end
@@ -123,4 +123,9 @@ describe NumberMaid do
123
123
  numbers.should == 0.12345678901235
124
124
  end
125
125
 
126
+ it "should handle pound in front" do
127
+ NumberMaid::clean('#N/A').should be_nil
128
+ end
129
+
130
+
126
131
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: quandl_babelfish
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-10 00:00:00.000000000 Z
12
+ date: 2014-04-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -43,6 +43,22 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
46
62
  description: Quandl Data Cleaner
47
63
  email:
48
64
  - sergei@quandl.com
@@ -55,6 +71,7 @@ files:
55
71
  - Gemfile
56
72
  - LICENSE
57
73
  - README.md
74
+ - Rakefile
58
75
  - UPGRADE.md
59
76
  - lib/quandl/babelfish.rb
60
77
  - lib/quandl/babelfish/chronometer.rb
@@ -89,12 +106,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
89
106
  - - ! '>='
90
107
  - !ruby/object:Gem::Version
91
108
  version: '0'
109
+ segments:
110
+ - 0
111
+ hash: 4375924718490433472
92
112
  required_rubygems_version: !ruby/object:Gem::Requirement
93
113
  none: false
94
114
  requirements:
95
115
  - - ! '>='
96
116
  - !ruby/object:Gem::Version
97
117
  version: '0'
118
+ segments:
119
+ - 0
120
+ hash: 4375924718490433472
98
121
  requirements: []
99
122
  rubyforge_project:
100
123
  rubygems_version: 1.8.23