quandl_babelfish 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -7
- data/.travis.yml +12 -12
- data/Gemfile +1 -1
- data/LICENSE +7 -7
- data/README.md +18 -18
- data/UPGRADE.md +38 -31
- data/lib/quandl/babelfish.rb +28 -28
- data/lib/quandl/babelfish/chronometer.rb +43 -43
- data/lib/quandl/babelfish/cleaner.rb +33 -32
- data/lib/quandl/babelfish/date_maid.rb +237 -237
- data/lib/quandl/babelfish/helper.rb +8 -8
- data/lib/quandl/babelfish/number_maid.rb +79 -79
- data/lib/quandl/babelfish/version.rb +4 -4
- data/lib/quandl/error/guess_date_format.rb +4 -4
- data/lib/quandl/error/invalid_date.rb +4 -4
- data/lib/quandl/error/standard.rb +26 -26
- data/lib/quandl/error/unknown_date_format.rb +4 -4
- data/quandl_babelfish.gemspec +21 -21
- data/spec/lib/quandl/babelfish/chronometer_spec.rb +50 -50
- data/spec/lib/quandl/babelfish/cleaner_spec.rb +70 -70
- data/spec/lib/quandl/babelfish/date_maid_spec.rb +528 -528
- data/spec/lib/quandl/babelfish/helper_spec.rb +44 -44
- data/spec/lib/quandl/babelfish/number_maid_spec.rb +126 -126
- data/spec/lib/quandl/babelfish_spec.rb +15 -15
- data/spec/spec_helper.rb +12 -12
- data/spec/support/matchers/be_eq_at_index.rb +31 -31
- metadata +12 -4
@@ -1,9 +1,9 @@
|
|
1
|
-
class Helper
|
2
|
-
|
3
|
-
# Actions expect a square table, make it so
|
4
|
-
def self.make_square(table)
|
5
|
-
longest_row = 0
|
6
|
-
table.each { |row| longest_row = [longest_row, row.length].max }
|
7
|
-
table.collect { |row| row += Array.new(longest_row - row.length, nil) }
|
8
|
-
end
|
1
|
+
class Helper
|
2
|
+
|
3
|
+
# Actions expect a square table, make it so
|
4
|
+
def self.make_square(table)
|
5
|
+
longest_row = 0
|
6
|
+
table.each { |row| longest_row = [longest_row, row.length].max }
|
7
|
+
table.collect { |row| row += Array.new(longest_row - row.length, nil) }
|
8
|
+
end
|
9
9
|
end
|
@@ -1,80 +1,80 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Babelfish
|
3
|
-
|
4
|
-
#responsible for number cleaning
|
5
|
-
class NumberMaid
|
6
|
-
@defaults = {
|
7
|
-
:decimal_mark => Regexp.escape('.'),
|
8
|
-
:ignore_brackets => false, # Brackets ARE negative by default
|
9
|
-
}
|
10
|
-
|
11
|
-
@settings = @defaults #init with defaults
|
12
|
-
|
13
|
-
class << self
|
14
|
-
|
15
|
-
|
16
|
-
def init(user_settings)
|
17
|
-
@settings=@defaults.merge(user_settings)
|
18
|
-
@escaped_decimal = Regexp.escape(@settings[:decimal_mark])
|
19
|
-
end
|
20
|
-
|
21
|
-
#cleans each number one by one
|
22
|
-
def clean(dirty_numbers)
|
23
|
-
return nil if dirty_numbers.nil?
|
24
|
-
numbers=[]
|
25
|
-
Array(dirty_numbers).each do |cell|
|
26
|
-
numbers << cell_to_number(cell.to_s)
|
27
|
-
end
|
28
|
-
|
29
|
-
(numbers.size == 1) ? numbers[0] : numbers
|
30
|
-
end
|
31
|
-
|
32
|
-
def cell_to_number(num)
|
33
|
-
return nil if num.nil?
|
34
|
-
# Remove annotations
|
35
|
-
# if there is something in parenthesis and a number elsewhere, nuke the parenthesis
|
36
|
-
temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
|
37
|
-
num = temp if temp.match(/\d/)
|
38
|
-
|
39
|
-
num.gsub!("est.", '')
|
40
|
-
|
41
|
-
#check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
|
42
|
-
is_exp = false
|
43
|
-
expmultiplier = 1
|
44
|
-
m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
|
45
|
-
#check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
|
46
|
-
if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
|
47
|
-
is_exp = true
|
48
|
-
num = m.pre_match
|
49
|
-
expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
|
50
|
-
end
|
51
|
-
is_million = (num =~ /million/i)
|
52
|
-
is_billion = (num =~ /billion/i)
|
53
|
-
is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
|
54
|
-
|
55
|
-
# watch out for two numbers, like a range eg "27.3 - 33.9"
|
56
|
-
# how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
|
57
|
-
num.gsub!(/(\d) (\d)/, '\1\2')
|
58
|
-
if m = num.match(/-?\s*[,\d\.]+/)
|
59
|
-
num = m[0]
|
60
|
-
end
|
61
|
-
|
62
|
-
# only keep #s and decimal mark
|
63
|
-
num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
|
64
|
-
num.gsub!(/[^0-9]/, '.')
|
65
|
-
|
66
|
-
return nil if num.nil? || num !~ /[\d]/
|
67
|
-
return nil if num.end_with?(".")
|
68
|
-
return nil if num.count(".") > 1
|
69
|
-
cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
|
70
|
-
cell *= 1e6 if is_million
|
71
|
-
cell *= 1e9 if is_billion
|
72
|
-
cell *= -1 if is_negative
|
73
|
-
cell *= expmultiplier if is_exp
|
74
|
-
cell
|
75
|
-
end
|
76
|
-
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Babelfish
|
3
|
+
|
4
|
+
#responsible for number cleaning
|
5
|
+
class NumberMaid
|
6
|
+
@defaults = {
|
7
|
+
:decimal_mark => Regexp.escape('.'),
|
8
|
+
:ignore_brackets => false, # Brackets ARE negative by default
|
9
|
+
}
|
10
|
+
|
11
|
+
@settings = @defaults #init with defaults
|
12
|
+
|
13
|
+
class << self
|
14
|
+
|
15
|
+
|
16
|
+
def init(user_settings)
|
17
|
+
@settings=@defaults.merge(user_settings)
|
18
|
+
@escaped_decimal = Regexp.escape(@settings[:decimal_mark])
|
19
|
+
end
|
20
|
+
|
21
|
+
#cleans each number one by one
|
22
|
+
def clean(dirty_numbers)
|
23
|
+
return nil if dirty_numbers.nil?
|
24
|
+
numbers=[]
|
25
|
+
Array(dirty_numbers).each do |cell|
|
26
|
+
numbers << cell_to_number(cell.to_s)
|
27
|
+
end
|
28
|
+
|
29
|
+
(numbers.size == 1) ? numbers[0] : numbers
|
30
|
+
end
|
31
|
+
|
32
|
+
def cell_to_number(num)
|
33
|
+
return nil if num.nil?
|
34
|
+
# Remove annotations
|
35
|
+
# if there is something in parenthesis and a number elsewhere, nuke the parenthesis
|
36
|
+
temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
|
37
|
+
num = temp if temp.match(/\d/)
|
38
|
+
|
39
|
+
num.gsub!("est.", '')
|
40
|
+
|
41
|
+
#check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
|
42
|
+
is_exp = false
|
43
|
+
expmultiplier = 1
|
44
|
+
m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
|
45
|
+
#check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
|
46
|
+
if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
|
47
|
+
is_exp = true
|
48
|
+
num = m.pre_match
|
49
|
+
expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
|
50
|
+
end
|
51
|
+
is_million = (num =~ /million/i)
|
52
|
+
is_billion = (num =~ /billion/i)
|
53
|
+
is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
|
54
|
+
|
55
|
+
# watch out for two numbers, like a range eg "27.3 - 33.9"
|
56
|
+
# how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
|
57
|
+
num.gsub!(/(\d) (\d)/, '\1\2')
|
58
|
+
if m = num.match(/-?\s*[,\d\.]+/)
|
59
|
+
num = m[0]
|
60
|
+
end
|
61
|
+
|
62
|
+
# only keep #s and decimal mark
|
63
|
+
num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
|
64
|
+
num.gsub!(/[^0-9]/, '.')
|
65
|
+
|
66
|
+
return nil if num.nil? || num !~ /[\d]/
|
67
|
+
return nil if num.end_with?(".")
|
68
|
+
return nil if num.count(".") > 1
|
69
|
+
cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
|
70
|
+
cell *= 1e6 if is_million
|
71
|
+
cell *= 1e9 if is_billion
|
72
|
+
cell *= -1 if is_negative
|
73
|
+
cell *= expmultiplier if is_exp
|
74
|
+
cell
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
80
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Babelfish
|
3
|
-
VERSION = '0.0.
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Babelfish
|
3
|
+
VERSION = '0.0.8'
|
4
|
+
end
|
5
5
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class GuessDateFormat < Quandl::Error::Standard; end
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class GuessDateFormat < Quandl::Error::Standard; end
|
4
|
+
end
|
5
5
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class InvalidDate < Quandl::Error::Standard; end
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class InvalidDate < Quandl::Error::Standard; end
|
4
|
+
end
|
5
5
|
end
|
@@ -1,27 +1,27 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class Standard < StandardError
|
4
|
-
|
5
|
-
attr_accessor :details
|
6
|
-
|
7
|
-
def line
|
8
|
-
detail :line
|
9
|
-
end
|
10
|
-
def context
|
11
|
-
detail :context
|
12
|
-
end
|
13
|
-
def problem
|
14
|
-
detail :problem
|
15
|
-
end
|
16
|
-
|
17
|
-
def detail(key)
|
18
|
-
details.send(key) if details.respond_to?(key)
|
19
|
-
end
|
20
|
-
|
21
|
-
def initialize(opts=nil)
|
22
|
-
@details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class Standard < StandardError
|
4
|
+
|
5
|
+
attr_accessor :details
|
6
|
+
|
7
|
+
def line
|
8
|
+
detail :line
|
9
|
+
end
|
10
|
+
def context
|
11
|
+
detail :context
|
12
|
+
end
|
13
|
+
def problem
|
14
|
+
detail :problem
|
15
|
+
end
|
16
|
+
|
17
|
+
def detail(key)
|
18
|
+
details.send(key) if details.respond_to?(key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(opts=nil)
|
22
|
+
@details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
27
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Quandl
|
2
|
-
module Error
|
3
|
-
class UnknownDateFormat < Quandl::Error::Standard; end
|
4
|
-
end
|
1
|
+
module Quandl
|
2
|
+
module Error
|
3
|
+
class UnknownDateFormat < Quandl::Error::Standard; end
|
4
|
+
end
|
5
5
|
end
|
data/quandl_babelfish.gemspec
CHANGED
@@ -1,21 +1,21 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
$:.push File.expand_path("../lib", __FILE__)
|
3
|
-
require "quandl/babelfish/version"
|
4
|
-
|
5
|
-
Gem::Specification.new do |s|
|
6
|
-
s.name = "quandl_babelfish"
|
7
|
-
s.version = Quandl::Babelfish::VERSION
|
8
|
-
s.authors = ["Sergei Ryshkevich"]
|
9
|
-
s.email = ["sergei@quandl.com"]
|
10
|
-
s.homepage = "http://quandl.com/"
|
11
|
-
s.license = "MIT"
|
12
|
-
s.summary = "Quandl Data Cleaner"
|
13
|
-
s.description = "Quandl Data Cleaner"
|
14
|
-
|
15
|
-
s.files = `git ls-files`.split("\n")
|
16
|
-
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
-
s.require_paths = ["lib"]
|
18
|
-
|
19
|
-
s.add_development_dependency "rspec", "~> 2.13"
|
20
|
-
s.add_development_dependency "pry"
|
21
|
-
end
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "quandl/babelfish/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "quandl_babelfish"
|
7
|
+
s.version = Quandl::Babelfish::VERSION
|
8
|
+
s.authors = ["Sergei Ryshkevich"]
|
9
|
+
s.email = ["sergei@quandl.com"]
|
10
|
+
s.homepage = "http://quandl.com/"
|
11
|
+
s.license = "MIT"
|
12
|
+
s.summary = "Quandl Data Cleaner"
|
13
|
+
s.description = "Quandl Data Cleaner"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
|
19
|
+
s.add_development_dependency "rspec", "~> 2.13"
|
20
|
+
s.add_development_dependency "pry"
|
21
|
+
end
|
@@ -1,51 +1,51 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
include Quandl::Babelfish
|
4
|
-
describe Chronometer do
|
5
|
-
|
6
|
-
it 'should calculate frequency = daily' do
|
7
|
-
table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
|
8
|
-
frequency = Chronometer.process(table)
|
9
|
-
frequency.should == 'daily'
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'should calculate frequency = monthly' do
|
13
|
-
table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
|
14
|
-
['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
|
15
|
-
frequency = Chronometer.process(table)
|
16
|
-
frequency.should == 'monthly'
|
17
|
-
end
|
18
|
-
|
19
|
-
it 'should calculate frequency = quarterly' do
|
20
|
-
table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
|
21
|
-
['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
|
22
|
-
frequency = Chronometer.process(table)
|
23
|
-
frequency.should == 'quarterly'
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'should calculate frequency = quarterly' do
|
27
|
-
table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
|
28
|
-
['2013-07-01','1','2']]
|
29
|
-
frequency = Chronometer.process(table)
|
30
|
-
frequency.should == 'quarterly'
|
31
|
-
end
|
32
|
-
|
33
|
-
it 'should calculate frequency = annual' do
|
34
|
-
table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
|
35
|
-
['2011-01-01','1','2'],['2013-01-01','5','6']]
|
36
|
-
frequency = Chronometer.process(table)
|
37
|
-
frequency.should == 'annual'
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'should calculate frequency = daily if only one row' do
|
41
|
-
table = [['2010-01-01','1','2']]
|
42
|
-
frequency = Chronometer.process(table)
|
43
|
-
frequency.should == 'daily'
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'should calculate frequency = nil if nil table passed' do
|
47
|
-
frequency = Chronometer.process(nil)
|
48
|
-
frequency.should == nil
|
49
|
-
end
|
50
|
-
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
include Quandl::Babelfish
|
4
|
+
describe Chronometer do
|
5
|
+
|
6
|
+
it 'should calculate frequency = daily' do
|
7
|
+
table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
|
8
|
+
frequency = Chronometer.process(table)
|
9
|
+
frequency.should == 'daily'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should calculate frequency = monthly' do
|
13
|
+
table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
|
14
|
+
['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
|
15
|
+
frequency = Chronometer.process(table)
|
16
|
+
frequency.should == 'monthly'
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should calculate frequency = quarterly' do
|
20
|
+
table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
|
21
|
+
['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
|
22
|
+
frequency = Chronometer.process(table)
|
23
|
+
frequency.should == 'quarterly'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should calculate frequency = quarterly' do
|
27
|
+
table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
|
28
|
+
['2013-07-01','1','2']]
|
29
|
+
frequency = Chronometer.process(table)
|
30
|
+
frequency.should == 'quarterly'
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should calculate frequency = annual' do
|
34
|
+
table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
|
35
|
+
['2011-01-01','1','2'],['2013-01-01','5','6']]
|
36
|
+
frequency = Chronometer.process(table)
|
37
|
+
frequency.should == 'annual'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should calculate frequency = daily if only one row' do
|
41
|
+
table = [['2010-01-01','1','2']]
|
42
|
+
frequency = Chronometer.process(table)
|
43
|
+
frequency.should == 'daily'
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should calculate frequency = nil if nil table passed' do
|
47
|
+
frequency = Chronometer.process(nil)
|
48
|
+
frequency.should == nil
|
49
|
+
end
|
50
|
+
|
51
51
|
end
|
@@ -1,70 +1,70 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
include Quandl::Babelfish
|
4
|
-
describe Cleaner do
|
5
|
-
|
6
|
-
let(:input){ [] }
|
7
|
-
let(:output){ Cleaner.process(input) }
|
8
|
-
let(:data){ output[0] }
|
9
|
-
let(:headers){ output[1] }
|
10
|
-
subject{ data }
|
11
|
-
|
12
|
-
context "garbage" do
|
13
|
-
let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
|
14
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
|
15
|
-
end
|
16
|
-
|
17
|
-
context "headers with whitespace" do
|
18
|
-
let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
|
19
|
-
it{ headers.should eq ["Date", "C1", "C2", "C4"] }
|
20
|
-
end
|
21
|
-
|
22
|
-
context "annual" do
|
23
|
-
let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
|
24
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
|
25
|
-
it{ should be_eq_at_index '[0][1]', 1 }
|
26
|
-
it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
|
27
|
-
it{ should be_eq_at_index '[1][3]', 6 }
|
28
|
-
it{ headers.should be_nil }
|
29
|
-
end
|
30
|
-
|
31
|
-
context "numeric date" do
|
32
|
-
let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
33
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
34
|
-
it{ should be_eq_at_index '[0][1]', 1 }
|
35
|
-
it{ should be_eq_at_index '[0][2]', 2.3 }
|
36
|
-
it{ should be_eq_at_index '[0][3]', nil }
|
37
|
-
it{ headers.should be_nil }
|
38
|
-
end
|
39
|
-
|
40
|
-
context "data with headers" do
|
41
|
-
let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
42
|
-
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
43
|
-
it{ should be_eq_at_index '[0][1]', 1 }
|
44
|
-
it{ should be_eq_at_index '[0][2]', 2.3 }
|
45
|
-
it{ should be_eq_at_index '[0][3]', nil }
|
46
|
-
it{ headers.should eq ['Date','0','0','0'] }
|
47
|
-
end
|
48
|
-
|
49
|
-
context "data with nil" do
|
50
|
-
let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
|
51
|
-
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
52
|
-
it{ data[0].length.should ==2}
|
53
|
-
it{ should be_eq_at_index '[0][1]', nil }
|
54
|
-
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
55
|
-
it{ should be_eq_at_index '[1][1]', 5 }
|
56
|
-
it{ headers.should eq ['Date','Col1'] }
|
57
|
-
end
|
58
|
-
|
59
|
-
context "data with middle nil" do
|
60
|
-
let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
|
61
|
-
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
62
|
-
it{ should be_eq_at_index '[0][1]', nil }
|
63
|
-
it{ should be_eq_at_index '[0][2]', 1}
|
64
|
-
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
65
|
-
it{ should be_eq_at_index '[1][1]', 5 }
|
66
|
-
it{ should be_eq_at_index '[1][2]', 6 }
|
67
|
-
it{ headers.should eq ['Date','Col1', 'Col2'] }
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
include Quandl::Babelfish
|
4
|
+
describe Cleaner do
|
5
|
+
|
6
|
+
let(:input){ [] }
|
7
|
+
let(:output){ Cleaner.process(input) }
|
8
|
+
let(:data){ output[0] }
|
9
|
+
let(:headers){ output[1] }
|
10
|
+
subject{ data }
|
11
|
+
|
12
|
+
context "garbage" do
|
13
|
+
let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
|
14
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
|
15
|
+
end
|
16
|
+
|
17
|
+
context "headers with whitespace" do
|
18
|
+
let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
|
19
|
+
it{ headers.should eq ["Date", "C1", "C2", "C4"] }
|
20
|
+
end
|
21
|
+
|
22
|
+
context "annual" do
|
23
|
+
let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
|
24
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
|
25
|
+
it{ should be_eq_at_index '[0][1]', 1 }
|
26
|
+
it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
|
27
|
+
it{ should be_eq_at_index '[1][3]', 6 }
|
28
|
+
it{ headers.should be_nil }
|
29
|
+
end
|
30
|
+
|
31
|
+
context "numeric date" do
|
32
|
+
let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
33
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
34
|
+
it{ should be_eq_at_index '[0][1]', 1 }
|
35
|
+
it{ should be_eq_at_index '[0][2]', 2.3 }
|
36
|
+
it{ should be_eq_at_index '[0][3]', nil }
|
37
|
+
it{ headers.should be_nil }
|
38
|
+
end
|
39
|
+
|
40
|
+
context "data with headers" do
|
41
|
+
let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
|
42
|
+
it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
|
43
|
+
it{ should be_eq_at_index '[0][1]', 1 }
|
44
|
+
it{ should be_eq_at_index '[0][2]', 2.3 }
|
45
|
+
it{ should be_eq_at_index '[0][3]', nil }
|
46
|
+
it{ headers.should eq ['Date','0','0','0'] }
|
47
|
+
end
|
48
|
+
|
49
|
+
context "data with nil" do
|
50
|
+
let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
|
51
|
+
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
52
|
+
it{ data[0].length.should ==2}
|
53
|
+
it{ should be_eq_at_index '[0][1]', nil }
|
54
|
+
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
55
|
+
it{ should be_eq_at_index '[1][1]', 5 }
|
56
|
+
it{ headers.should eq ['Date','Col1'] }
|
57
|
+
end
|
58
|
+
|
59
|
+
context "data with middle nil" do
|
60
|
+
let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
|
61
|
+
it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
|
62
|
+
it{ should be_eq_at_index '[0][1]', nil }
|
63
|
+
it{ should be_eq_at_index '[0][2]', 1}
|
64
|
+
it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
|
65
|
+
it{ should be_eq_at_index '[1][1]', 5 }
|
66
|
+
it{ should be_eq_at_index '[1][2]', 6 }
|
67
|
+
it{ headers.should eq ['Date','Col1', 'Col2'] }
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|