quandl_babelfish 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -7
- data/.travis.yml +12 -12
- data/Gemfile +1 -1
- data/LICENSE +7 -7
- data/README.md +18 -18
- data/UPGRADE.md +38 -31
- data/lib/quandl/babelfish.rb +28 -28
- data/lib/quandl/babelfish/chronometer.rb +43 -43
- data/lib/quandl/babelfish/cleaner.rb +33 -32
- data/lib/quandl/babelfish/date_maid.rb +237 -237
- data/lib/quandl/babelfish/helper.rb +8 -8
- data/lib/quandl/babelfish/number_maid.rb +79 -79
- data/lib/quandl/babelfish/version.rb +4 -4
- data/lib/quandl/error/guess_date_format.rb +4 -4
- data/lib/quandl/error/invalid_date.rb +4 -4
- data/lib/quandl/error/standard.rb +26 -26
- data/lib/quandl/error/unknown_date_format.rb +4 -4
- data/quandl_babelfish.gemspec +21 -21
- data/spec/lib/quandl/babelfish/chronometer_spec.rb +50 -50
- data/spec/lib/quandl/babelfish/cleaner_spec.rb +70 -70
- data/spec/lib/quandl/babelfish/date_maid_spec.rb +528 -528
- data/spec/lib/quandl/babelfish/helper_spec.rb +44 -44
- data/spec/lib/quandl/babelfish/number_maid_spec.rb +126 -126
- data/spec/lib/quandl/babelfish_spec.rb +15 -15
- data/spec/spec_helper.rb +12 -12
- data/spec/support/matchers/be_eq_at_index.rb +31 -31
- metadata +12 -4
@@ -1,9 +1,9 @@
|
|
1
|
-
class Helper
|
2
|
-
|
3
|
-
# Actions expect a square table, make it so
|
4
|
-
def self.make_square(table)
|
5
|
-
longest_row = 0
|
6
|
-
table.each { |row| longest_row = [longest_row, row.length].max }
|
7
|
-
table.collect { |row| row += Array.new(longest_row - row.length, nil) }
|
8
|
-
end
|
1
|
+
class Helper
|
2
|
+
|
3
|
+
# Actions expect a square table, make it so
|
4
|
+
def self.make_square(table)
|
5
|
+
longest_row = 0
|
6
|
+
table.each { |row| longest_row = [longest_row, row.length].max }
|
7
|
+
table.collect { |row| row += Array.new(longest_row - row.length, nil) }
|
8
|
+
end
|
9
9
|
end
|
@@ -1,80 +1,80 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Babelfish
|
3
|
-
|
4
|
-
#responsible for number cleaning
|
5
|
-
class NumberMaid
|
6
|
-
@defaults = {
|
7
|
-
:decimal_mark => Regexp.escape('.'),
|
8
|
-
:ignore_brackets => false, # Brackets ARE negative by default
|
9
|
-
}
|
10
|
-
|
11
|
-
@settings = @defaults #init with defaults
|
12
|
-
|
13
|
-
class << self
|
14
|
-
|
15
|
-
|
16
|
-
def init(user_settings)
|
17
|
-
@settings=@defaults.merge(user_settings)
|
18
|
-
@escaped_decimal = Regexp.escape(@settings[:decimal_mark])
|
19
|
-
end
|
20
|
-
|
21
|
-
#cleans each number one by one
|
22
|
-
def clean(dirty_numbers)
|
23
|
-
return nil if dirty_numbers.nil?
|
24
|
-
numbers=[]
|
25
|
-
Array(dirty_numbers).each do |cell|
|
26
|
-
numbers << cell_to_number(cell.to_s)
|
27
|
-
end
|
28
|
-
|
29
|
-
(numbers.size == 1) ? numbers[0] : numbers
|
30
|
-
end
|
31
|
-
|
32
|
-
def cell_to_number(num)
|
33
|
-
return nil if num.nil?
|
34
|
-
# Remove annotations
|
35
|
-
# if there is something in parenthesis and a number elsewhere, nuke the parenthesis
|
36
|
-
temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
|
37
|
-
num = temp if temp.match(/\d/)
|
38
|
-
|
39
|
-
num.gsub!("est.", '')
|
40
|
-
|
41
|
-
#check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
|
42
|
-
is_exp = false
|
43
|
-
expmultiplier = 1
|
44
|
-
m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
|
45
|
-
#check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
|
46
|
-
if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
|
47
|
-
is_exp = true
|
48
|
-
num = m.pre_match
|
49
|
-
expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
|
50
|
-
end
|
51
|
-
is_million = (num =~ /million/i)
|
52
|
-
is_billion = (num =~ /billion/i)
|
53
|
-
is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
|
54
|
-
|
55
|
-
# watch out for two numbers, like a range eg "27.3 - 33.9"
|
56
|
-
# how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
|
57
|
-
num.gsub!(/(\d) (\d)/, '\1\2')
|
58
|
-
if m = num.match(/-?\s*[,\d\.]+/)
|
59
|
-
num = m[0]
|
60
|
-
end
|
61
|
-
|
62
|
-
# only keep #s and decimal mark
|
63
|
-
num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
|
64
|
-
num.gsub!(/[^0-9]/, '.')
|
65
|
-
|
66
|
-
return nil if num.nil? || num !~ /[\d]/
|
67
|
-
return nil if num.end_with?(".")
|
68
|
-
return nil if num.count(".") > 1
|
69
|
-
cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
|
70
|
-
cell *= 1e6 if is_million
|
71
|
-
cell *= 1e9 if is_billion
|
72
|
-
cell *= -1 if is_negative
|
73
|
-
cell *= expmultiplier if is_exp
|
74
|
-
cell
|
75
|
-
end
|
76
|
-
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Babelfish
|
3
|
+
|
4
|
+
#responsible for number cleaning
|
5
|
+
class NumberMaid
|
6
|
+
@defaults = {
|
7
|
+
:decimal_mark => Regexp.escape('.'),
|
8
|
+
:ignore_brackets => false, # Brackets ARE negative by default
|
9
|
+
}
|
10
|
+
|
11
|
+
@settings = @defaults #init with defaults
|
12
|
+
|
13
|
+
class << self
|
14
|
+
|
15
|
+
|
16
|
+
def init(user_settings)
|
17
|
+
@settings=@defaults.merge(user_settings)
|
18
|
+
@escaped_decimal = Regexp.escape(@settings[:decimal_mark])
|
19
|
+
end
|
20
|
+
|
21
|
+
#cleans each number one by one
|
22
|
+
def clean(dirty_numbers)
|
23
|
+
return nil if dirty_numbers.nil?
|
24
|
+
numbers=[]
|
25
|
+
Array(dirty_numbers).each do |cell|
|
26
|
+
numbers << cell_to_number(cell.to_s)
|
27
|
+
end
|
28
|
+
|
29
|
+
(numbers.size == 1) ? numbers[0] : numbers
|
30
|
+
end
|
31
|
+
|
32
|
+
def cell_to_number(num)
|
33
|
+
return nil if num.nil?
|
34
|
+
# Remove annotations
|
35
|
+
# if there is something in parenthesis and a number elsewhere, nuke the parenthesis
|
36
|
+
temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
|
37
|
+
num = temp if temp.match(/\d/)
|
38
|
+
|
39
|
+
num.gsub!("est.", '')
|
40
|
+
|
41
|
+
#check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
|
42
|
+
is_exp = false
|
43
|
+
expmultiplier = 1
|
44
|
+
m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
|
45
|
+
#check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
|
46
|
+
if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
|
47
|
+
is_exp = true
|
48
|
+
num = m.pre_match
|
49
|
+
expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
|
50
|
+
end
|
51
|
+
is_million = (num =~ /million/i)
|
52
|
+
is_billion = (num =~ /billion/i)
|
53
|
+
is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
|
54
|
+
|
55
|
+
# watch out for two numbers, like a range eg "27.3 - 33.9"
|
56
|
+
# how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
|
57
|
+
num.gsub!(/(\d) (\d)/, '\1\2')
|
58
|
+
if m = num.match(/-?\s*[,\d\.]+/)
|
59
|
+
num = m[0]
|
60
|
+
end
|
61
|
+
|
62
|
+
# only keep #s and decimal mark
|
63
|
+
num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
|
64
|
+
num.gsub!(/[^0-9]/, '.')
|
65
|
+
|
66
|
+
return nil if num.nil? || num !~ /[\d]/
|
67
|
+
return nil if num.end_with?(".")
|
68
|
+
return nil if num.count(".") > 1
|
69
|
+
cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
|
70
|
+
cell *= 1e6 if is_million
|
71
|
+
cell *= 1e9 if is_billion
|
72
|
+
cell *= -1 if is_negative
|
73
|
+
cell *= expmultiplier if is_exp
|
74
|
+
cell
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
80
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Babelfish
|
3
|
-
VERSION = '0.0.
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Babelfish
|
3
|
+
VERSION = '0.0.8'
|
4
|
+
end
|
5
5
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class GuessDateFormat < Quandl::Error::Standard; end
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class GuessDateFormat < Quandl::Error::Standard; end
|
4
|
+
end
|
5
5
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class InvalidDate < Quandl::Error::Standard; end
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class InvalidDate < Quandl::Error::Standard; end
|
4
|
+
end
|
5
5
|
end
|
@@ -1,27 +1,27 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class Standard < StandardError
|
4
|
-
|
5
|
-
attr_accessor :details
|
6
|
-
|
7
|
-
def line
|
8
|
-
detail :line
|
9
|
-
end
|
10
|
-
def context
|
11
|
-
detail :context
|
12
|
-
end
|
13
|
-
def problem
|
14
|
-
detail :problem
|
15
|
-
end
|
16
|
-
|
17
|
-
def detail(key)
|
18
|
-
details.send(key) if details.respond_to?(key)
|
19
|
-
end
|
20
|
-
|
21
|
-
def initialize(opts=nil)
|
22
|
-
@details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class Standard < StandardError
|
4
|
+
|
5
|
+
attr_accessor :details
|
6
|
+
|
7
|
+
def line
|
8
|
+
detail :line
|
9
|
+
end
|
10
|
+
def context
|
11
|
+
detail :context
|
12
|
+
end
|
13
|
+
def problem
|
14
|
+
detail :problem
|
15
|
+
end
|
16
|
+
|
17
|
+
def detail(key)
|
18
|
+
details.send(key) if details.respond_to?(key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(opts=nil)
|
22
|
+
@details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
27
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class UnknownDateFormat < Quandl::Error::Standard; end
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class UnknownDateFormat < Quandl::Error::Standard; end
|
4
|
+
end
|
5
5
|
end
|
data/quandl_babelfish.gemspec
CHANGED
@@ -1,21 +1,21 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
$:.push File.expand_path("../lib", __FILE__)
|
3
|
-
require "quandl/babelfish/version"
|
4
|
-
|
5
|
-
Gem::Specification.new do |s|
|
6
|
-
s.name = "quandl_babelfish"
|
7
|
-
s.version = Quandl::Babelfish::VERSION
|
8
|
-
s.authors = ["Sergei Ryshkevich"]
|
9
|
-
s.email = ["sergei@quandl.com"]
|
10
|
-
s.homepage = "http://quandl.com/"
|
11
|
-
s.license = "MIT"
|
12
|
-
s.summary = "Quandl Data Cleaner"
|
13
|
-
s.description = "Quandl Data Cleaner"
|
14
|
-
|
15
|
-
s.files = `git ls-files`.split("\n")
|
16
|
-
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
-
s.require_paths = ["lib"]
|
18
|
-
|
19
|
-
s.add_development_dependency "rspec", "~> 2.13"
|
20
|
-
s.add_development_dependency "pry"
|
21
|
-
end
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "quandl/babelfish/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "quandl_babelfish"
|
7
|
+
s.version = Quandl::Babelfish::VERSION
|
8
|
+
s.authors = ["Sergei Ryshkevich"]
|
9
|
+
s.email = ["sergei@quandl.com"]
|
10
|
+
s.homepage = "http://quandl.com/"
|
11
|
+
s.license = "MIT"
|
12
|
+
s.summary = "Quandl Data Cleaner"
|
13
|
+
s.description = "Quandl Data Cleaner"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
|
19
|
+
s.add_development_dependency "rspec", "~> 2.13"
|
20
|
+
s.add_development_dependency "pry"
|
21
|
+
end
|
@@ -1,51 +1,51 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
include Quandl::Babelfish
|
4
|
-
describe Chronometer do
|
5
|
-
|
6
|
-
it 'should calculate frequency = daily' do
|
7
|
-
table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
|
8
|
-
frequency = Chronometer.process(table)
|
9
|
-
frequency.should == 'daily'
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'should calculate frequency = monthly' do
|
13
|
-
table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
|
14
|
-
['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
|
15
|
-
frequency = Chronometer.process(table)
|
16
|
-
frequency.should == 'monthly'
|
17
|
-
end
|
18
|
-
|
19
|
-
it 'should calculate frequency = quarterly' do
|
20
|
-
table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
|
21
|
-
['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
|
22
|
-
frequency = Chronometer.process(table)
|
23
|
-
frequency.should == 'quarterly'
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'should calculate frequency = quarterly' do
|
27
|
-
table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
|
28
|
-
['2013-07-01','1','2']]
|
29
|
-
frequency = Chronometer.process(table)
|
30
|
-
frequency.should == 'quarterly'
|
31
|
-
end
|
32
|
-
|
33
|
-
it 'should calculate frequency = annual' do
|
34
|
-
table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
|
35
|
-
['2011-01-01','1','2'],['2013-01-01','5','6']]
|
36
|
-
frequency = Chronometer.process(table)
|
37
|
-
frequency.should == 'annual'
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'should calculate frequency = daily if only one row' do
|
41
|
-
table = [['2010-01-01','1','2']]
|
42
|
-
frequency = Chronometer.process(table)
|
43
|
-
frequency.should == 'daily'
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'should calculate frequency = nil if nil table passed' do
|
47
|
-
frequency = Chronometer.process(nil)
|
48
|
-
frequency.should == nil
|
49
|
-
end
|
50
|
-
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
include Quandl::Babelfish
|
4
|
+
describe Chronometer do
|
5
|
+
|
6
|
+
it 'should calculate frequency = daily' do
|
7
|
+
table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
|
8
|
+
frequency = Chronometer.process(table)
|
9
|
+
frequency.should == 'daily'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should calculate frequency = monthly' do
|
13
|
+
table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
|
14
|
+
['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
|
15
|
+
frequency = Chronometer.process(table)
|
16
|
+
frequency.should == 'monthly'
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should calculate frequency = quarterly' do
|
20
|
+
table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
|
21
|
+
['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
|
22
|
+
frequency = Chronometer.process(table)
|
23
|
+
frequency.should == 'quarterly'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should calculate frequency = quarterly' do
|
27
|
+
table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
|
28
|
+
['2013-07-01','1','2']]
|
29
|
+
frequency = Chronometer.process(table)
|
30
|
+
frequency.should == 'quarterly'
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should calculate frequency = annual' do
|
34
|
+
table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
|
35
|
+
['2011-01-01','1','2'],['2013-01-01','5','6']]
|
36
|
+
frequency = Chronometer.process(table)
|
37
|
+
frequency.should == 'annual'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should calculate frequency = daily if only one row' do
|
41
|
+
table = [['2010-01-01','1','2']]
|
42
|
+
frequency = Chronometer.process(table)
|
43
|
+
frequency.should == 'daily'
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should calculate frequency = nil if nil table passed' do
|
47
|
+
frequency = Chronometer.process(nil)
|
48
|
+
frequency.should == nil
|
49
|
+
end
|
50
|
+
|
51
51
|
end
|
@@ -1,70 +1,70 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
include Quandl::Babelfish
|
4
|
-
describe Cleaner do
|
5
|
-
|
6
|
-
let(:input){ [] }
|
7
|
-
let(:output){ Cleaner.process(input) }
|
8
|
-
let(:data){ output[0] }
|
9
|
-
let(:headers){ output[1] }
|
10
|
-
subject{ data }
|
11
|
-
|
12
|
-
context "garbage" do
|
13
|
-
let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
|
14
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
|
15
|
-
end
|
16
|
-
|
17
|
-
context "headers with whitespace" do
|
18
|
-
let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
|
19
|
-
it{ headers.should eq ["Date", "C1", "C2", "C4"] }
|
20
|
-
end
|
21
|
-
|
22
|
-
context "annual" do
|
23
|
-
let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
|
24
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
|
25
|
-
it{ should be_eq_at_index '[0][1]', 1 }
|
26
|
-
it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
|
27
|
-
it{ should be_eq_at_index '[1][3]', 6 }
|
28
|
-
it{ headers.should be_nil }
|
29
|
-
end
|
30
|
-
|
31
|
-
context "numeric date" do
|
32
|
-
let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
33
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
34
|
-
it{ should be_eq_at_index '[0][1]', 1 }
|
35
|
-
it{ should be_eq_at_index '[0][2]', 2.3 }
|
36
|
-
it{ should be_eq_at_index '[0][3]', nil }
|
37
|
-
it{ headers.should be_nil }
|
38
|
-
end
|
39
|
-
|
40
|
-
context "data with headers" do
|
41
|
-
let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
42
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
43
|
-
it{ should be_eq_at_index '[0][1]', 1 }
|
44
|
-
it{ should be_eq_at_index '[0][2]', 2.3 }
|
45
|
-
it{ should be_eq_at_index '[0][3]', nil }
|
46
|
-
it{ headers.should eq ['Date','0','0','0'] }
|
47
|
-
end
|
48
|
-
|
49
|
-
context "data with nil" do
|
50
|
-
let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
|
51
|
-
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
52
|
-
it{ data[0].length.should ==2}
|
53
|
-
it{ should be_eq_at_index '[0][1]', nil }
|
54
|
-
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
55
|
-
it{ should be_eq_at_index '[1][1]', 5 }
|
56
|
-
it{ headers.should eq ['Date','Col1'] }
|
57
|
-
end
|
58
|
-
|
59
|
-
context "data with middle nil" do
|
60
|
-
let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
|
61
|
-
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
62
|
-
it{ should be_eq_at_index '[0][1]', nil }
|
63
|
-
it{ should be_eq_at_index '[0][2]', 1}
|
64
|
-
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
65
|
-
it{ should be_eq_at_index '[1][1]', 5 }
|
66
|
-
it{ should be_eq_at_index '[1][2]', 6 }
|
67
|
-
it{ headers.should eq ['Date','Col1', 'Col2'] }
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
include Quandl::Babelfish
|
4
|
+
describe Cleaner do
|
5
|
+
|
6
|
+
let(:input){ [] }
|
7
|
+
let(:output){ Cleaner.process(input) }
|
8
|
+
let(:data){ output[0] }
|
9
|
+
let(:headers){ output[1] }
|
10
|
+
subject{ data }
|
11
|
+
|
12
|
+
context "garbage" do
|
13
|
+
let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
|
14
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
|
15
|
+
end
|
16
|
+
|
17
|
+
context "headers with whitespace" do
|
18
|
+
let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
|
19
|
+
it{ headers.should eq ["Date", "C1", "C2", "C4"] }
|
20
|
+
end
|
21
|
+
|
22
|
+
context "annual" do
|
23
|
+
let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
|
24
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
|
25
|
+
it{ should be_eq_at_index '[0][1]', 1 }
|
26
|
+
it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
|
27
|
+
it{ should be_eq_at_index '[1][3]', 6 }
|
28
|
+
it{ headers.should be_nil }
|
29
|
+
end
|
30
|
+
|
31
|
+
context "numeric date" do
|
32
|
+
let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
33
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
34
|
+
it{ should be_eq_at_index '[0][1]', 1 }
|
35
|
+
it{ should be_eq_at_index '[0][2]', 2.3 }
|
36
|
+
it{ should be_eq_at_index '[0][3]', nil }
|
37
|
+
it{ headers.should be_nil }
|
38
|
+
end
|
39
|
+
|
40
|
+
context "data with headers" do
|
41
|
+
let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
42
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
43
|
+
it{ should be_eq_at_index '[0][1]', 1 }
|
44
|
+
it{ should be_eq_at_index '[0][2]', 2.3 }
|
45
|
+
it{ should be_eq_at_index '[0][3]', nil }
|
46
|
+
it{ headers.should eq ['Date','0','0','0'] }
|
47
|
+
end
|
48
|
+
|
49
|
+
context "data with nil" do
|
50
|
+
let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
|
51
|
+
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
52
|
+
it{ data[0].length.should ==2}
|
53
|
+
it{ should be_eq_at_index '[0][1]', nil }
|
54
|
+
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
55
|
+
it{ should be_eq_at_index '[1][1]', 5 }
|
56
|
+
it{ headers.should eq ['Date','Col1'] }
|
57
|
+
end
|
58
|
+
|
59
|
+
context "data with middle nil" do
|
60
|
+
let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
|
61
|
+
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
62
|
+
it{ should be_eq_at_index '[0][1]', nil }
|
63
|
+
it{ should be_eq_at_index '[0][2]', 1}
|
64
|
+
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
65
|
+
it{ should be_eq_at_index '[1][1]', 5 }
|
66
|
+
it{ should be_eq_at_index '[1][2]', 6 }
|
67
|
+
it{ headers.should eq ['Date','Col1', 'Col2'] }
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|