quandl_babelfish 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +1,9 @@
1
- class Helper
2
-
3
- # Actions expect a square table, make it so
4
- def self.make_square(table)
5
- longest_row = 0
6
- table.each { |row| longest_row = [longest_row, row.length].max }
7
- table.collect { |row| row += Array.new(longest_row - row.length, nil) }
8
- end
1
+ class Helper
2
+
3
+ # Actions expect a square table, make it so
4
+ def self.make_square(table)
5
+ longest_row = 0
6
+ table.each { |row| longest_row = [longest_row, row.length].max }
7
+ table.collect { |row| row += Array.new(longest_row - row.length, nil) }
8
+ end
9
9
  end
@@ -1,80 +1,80 @@
1
- module Quandl
2
- module Babelfish
3
-
4
- #responsible for number cleaning
5
- class NumberMaid
6
- @defaults = {
7
- :decimal_mark => Regexp.escape('.'),
8
- :ignore_brackets => false, # Brackets ARE negative by default
9
- }
10
-
11
- @settings = @defaults #init with defaults
12
-
13
- class << self
14
-
15
-
16
- def init(user_settings)
17
- @settings=@defaults.merge(user_settings)
18
- @escaped_decimal = Regexp.escape(@settings[:decimal_mark])
19
- end
20
-
21
- #cleans each number one by one
22
- def clean(dirty_numbers)
23
- return nil if dirty_numbers.nil?
24
- numbers=[]
25
- Array(dirty_numbers).each do |cell|
26
- numbers << cell_to_number(cell.to_s)
27
- end
28
-
29
- (numbers.size == 1) ? numbers[0] : numbers
30
- end
31
-
32
- def cell_to_number(num)
33
- return nil if num.nil?
34
- # Remove annotations
35
- # if there is something in parenthesis and a number elsewhere, nuke the parenthesis
36
- temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
37
- num = temp if temp.match(/\d/)
38
-
39
- num.gsub!("est.", '')
40
-
41
- #check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
42
- is_exp = false
43
- expmultiplier = 1
44
- m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
45
- #check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
46
- if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
47
- is_exp = true
48
- num = m.pre_match
49
- expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
50
- end
51
- is_million = (num =~ /million/i)
52
- is_billion = (num =~ /billion/i)
53
- is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
54
-
55
- # watch out for two numbers, like a range eg "27.3 - 33.9"
56
- # how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
57
- num.gsub!(/(\d) (\d)/, '\1\2')
58
- if m = num.match(/-?\s*[,\d\.]+/)
59
- num = m[0]
60
- end
61
-
62
- # only keep #s and decimal mark
63
- num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
64
- num.gsub!(/[^0-9]/, '.')
65
-
66
- return nil if num.nil? || num !~ /[\d]/
67
- return nil if num.end_with?(".")
68
- return nil if num.count(".") > 1
69
- cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
70
- cell *= 1e6 if is_million
71
- cell *= 1e9 if is_billion
72
- cell *= -1 if is_negative
73
- cell *= expmultiplier if is_exp
74
- cell
75
- end
76
-
77
- end
78
- end
79
- end
1
+ module Quandl
2
+ module Babelfish
3
+
4
+ #responsible for number cleaning
5
+ class NumberMaid
6
+ @defaults = {
7
+ :decimal_mark => Regexp.escape('.'),
8
+ :ignore_brackets => false, # Brackets ARE negative by default
9
+ }
10
+
11
+ @settings = @defaults #init with defaults
12
+
13
+ class << self
14
+
15
+
16
+ def init(user_settings)
17
+ @settings=@defaults.merge(user_settings)
18
+ @escaped_decimal = Regexp.escape(@settings[:decimal_mark])
19
+ end
20
+
21
+ #cleans each number one by one
22
+ def clean(dirty_numbers)
23
+ return nil if dirty_numbers.nil?
24
+ numbers=[]
25
+ Array(dirty_numbers).each do |cell|
26
+ numbers << cell_to_number(cell.to_s)
27
+ end
28
+
29
+ (numbers.size == 1) ? numbers[0] : numbers
30
+ end
31
+
32
+ def cell_to_number(num)
33
+ return nil if num.nil?
34
+ # Remove annotations
35
+ # if there is something in parenthesis and a number elsewhere, nuke the parenthesis
36
+ temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
37
+ num = temp if temp.match(/\d/)
38
+
39
+ num.gsub!("est.", '')
40
+
41
+ #check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
42
+ is_exp = false
43
+ expmultiplier = 1
44
+ m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
45
+ #check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
46
+ if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
47
+ is_exp = true
48
+ num = m.pre_match
49
+ expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
50
+ end
51
+ is_million = (num =~ /million/i)
52
+ is_billion = (num =~ /billion/i)
53
+ is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
54
+
55
+ # watch out for two numbers, like a range eg "27.3 - 33.9"
56
+ # how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
57
+ num.gsub!(/(\d) (\d)/, '\1\2')
58
+ if m = num.match(/-?\s*[,\d\.]+/)
59
+ num = m[0]
60
+ end
61
+
62
+ # only keep #s and decimal mark
63
+ num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
64
+ num.gsub!(/[^0-9]/, '.')
65
+
66
+ return nil if num.nil? || num !~ /[\d]/
67
+ return nil if num.end_with?(".")
68
+ return nil if num.count(".") > 1
69
+ cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
70
+ cell *= 1e6 if is_million
71
+ cell *= 1e9 if is_billion
72
+ cell *= -1 if is_negative
73
+ cell *= expmultiplier if is_exp
74
+ cell
75
+ end
76
+
77
+ end
78
+ end
79
+ end
80
80
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Babelfish
3
- VERSION = '0.0.7'
4
- end
1
+ module Quandl
2
+ module Babelfish
3
+ VERSION = '0.0.8'
4
+ end
5
5
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Error
3
- class GuessDateFormat < Quandl::Error::Standard; end
4
- end
1
+ module Quandl
2
+ module Error
3
+ class GuessDateFormat < Quandl::Error::Standard; end
4
+ end
5
5
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Error
3
- class InvalidDate < Quandl::Error::Standard; end
4
- end
1
+ module Quandl
2
+ module Error
3
+ class InvalidDate < Quandl::Error::Standard; end
4
+ end
5
5
  end
@@ -1,27 +1,27 @@
1
- module Quandl
2
- module Error
3
- class Standard < StandardError
4
-
5
- attr_accessor :details
6
-
7
- def line
8
- detail :line
9
- end
10
- def context
11
- detail :context
12
- end
13
- def problem
14
- detail :problem
15
- end
16
-
17
- def detail(key)
18
- details.send(key) if details.respond_to?(key)
19
- end
20
-
21
- def initialize(opts=nil)
22
- @details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
23
- end
24
-
25
- end
26
- end
1
+ module Quandl
2
+ module Error
3
+ class Standard < StandardError
4
+
5
+ attr_accessor :details
6
+
7
+ def line
8
+ detail :line
9
+ end
10
+ def context
11
+ detail :context
12
+ end
13
+ def problem
14
+ detail :problem
15
+ end
16
+
17
+ def detail(key)
18
+ details.send(key) if details.respond_to?(key)
19
+ end
20
+
21
+ def initialize(opts=nil)
22
+ @details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
23
+ end
24
+
25
+ end
26
+ end
27
27
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Error
3
- class UnknownDateFormat < Quandl::Error::Standard; end
4
- end
1
+ module Quandl
2
+ module Error
3
+ class UnknownDateFormat < Quandl::Error::Standard; end
4
+ end
5
5
  end
@@ -1,21 +1,21 @@
1
- # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
3
- require "quandl/babelfish/version"
4
-
5
- Gem::Specification.new do |s|
6
- s.name = "quandl_babelfish"
7
- s.version = Quandl::Babelfish::VERSION
8
- s.authors = ["Sergei Ryshkevich"]
9
- s.email = ["sergei@quandl.com"]
10
- s.homepage = "http://quandl.com/"
11
- s.license = "MIT"
12
- s.summary = "Quandl Data Cleaner"
13
- s.description = "Quandl Data Cleaner"
14
-
15
- s.files = `git ls-files`.split("\n")
16
- s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
- s.require_paths = ["lib"]
18
-
19
- s.add_development_dependency "rspec", "~> 2.13"
20
- s.add_development_dependency "pry"
21
- end
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "quandl/babelfish/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "quandl_babelfish"
7
+ s.version = Quandl::Babelfish::VERSION
8
+ s.authors = ["Sergei Ryshkevich"]
9
+ s.email = ["sergei@quandl.com"]
10
+ s.homepage = "http://quandl.com/"
11
+ s.license = "MIT"
12
+ s.summary = "Quandl Data Cleaner"
13
+ s.description = "Quandl Data Cleaner"
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.require_paths = ["lib"]
18
+
19
+ s.add_development_dependency "rspec", "~> 2.13"
20
+ s.add_development_dependency "pry"
21
+ end
@@ -1,51 +1,51 @@
1
- require 'spec_helper'
2
-
3
- include Quandl::Babelfish
4
- describe Chronometer do
5
-
6
- it 'should calculate frequency = daily' do
7
- table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
8
- frequency = Chronometer.process(table)
9
- frequency.should == 'daily'
10
- end
11
-
12
- it 'should calculate frequency = monthly' do
13
- table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
14
- ['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
15
- frequency = Chronometer.process(table)
16
- frequency.should == 'monthly'
17
- end
18
-
19
- it 'should calculate frequency = quarterly' do
20
- table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
21
- ['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
22
- frequency = Chronometer.process(table)
23
- frequency.should == 'quarterly'
24
- end
25
-
26
- it 'should calculate frequency = quarterly' do
27
- table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
28
- ['2013-07-01','1','2']]
29
- frequency = Chronometer.process(table)
30
- frequency.should == 'quarterly'
31
- end
32
-
33
- it 'should calculate frequency = annual' do
34
- table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
35
- ['2011-01-01','1','2'],['2013-01-01','5','6']]
36
- frequency = Chronometer.process(table)
37
- frequency.should == 'annual'
38
- end
39
-
40
- it 'should calculate frequency = daily if only one row' do
41
- table = [['2010-01-01','1','2']]
42
- frequency = Chronometer.process(table)
43
- frequency.should == 'daily'
44
- end
45
-
46
- it 'should calculate frequency = nil if nil table passed' do
47
- frequency = Chronometer.process(nil)
48
- frequency.should == nil
49
- end
50
-
1
+ require 'spec_helper'
2
+
3
+ include Quandl::Babelfish
4
+ describe Chronometer do
5
+
6
+ it 'should calculate frequency = daily' do
7
+ table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
8
+ frequency = Chronometer.process(table)
9
+ frequency.should == 'daily'
10
+ end
11
+
12
+ it 'should calculate frequency = monthly' do
13
+ table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
14
+ ['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
15
+ frequency = Chronometer.process(table)
16
+ frequency.should == 'monthly'
17
+ end
18
+
19
+ it 'should calculate frequency = quarterly' do
20
+ table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
21
+ ['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
22
+ frequency = Chronometer.process(table)
23
+ frequency.should == 'quarterly'
24
+ end
25
+
26
+ it 'should calculate frequency = quarterly' do
27
+ table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
28
+ ['2013-07-01','1','2']]
29
+ frequency = Chronometer.process(table)
30
+ frequency.should == 'quarterly'
31
+ end
32
+
33
+ it 'should calculate frequency = annual' do
34
+ table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
35
+ ['2011-01-01','1','2'],['2013-01-01','5','6']]
36
+ frequency = Chronometer.process(table)
37
+ frequency.should == 'annual'
38
+ end
39
+
40
+ it 'should calculate frequency = daily if only one row' do
41
+ table = [['2010-01-01','1','2']]
42
+ frequency = Chronometer.process(table)
43
+ frequency.should == 'daily'
44
+ end
45
+
46
+ it 'should calculate frequency = nil if nil table passed' do
47
+ frequency = Chronometer.process(nil)
48
+ frequency.should == nil
49
+ end
50
+
51
51
  end
@@ -1,70 +1,70 @@
1
- require 'spec_helper'
2
-
3
- include Quandl::Babelfish
4
- describe Cleaner do
5
-
6
- let(:input){ [] }
7
- let(:output){ Cleaner.process(input) }
8
- let(:data){ output[0] }
9
- let(:headers){ output[1] }
10
- subject{ data }
11
-
12
- context "garbage" do
13
- let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
14
- it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
15
- end
16
-
17
- context "headers with whitespace" do
18
- let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
19
- it{ headers.should eq ["Date", "C1", "C2", "C4"] }
20
- end
21
-
22
- context "annual" do
23
- let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
24
- it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
25
- it{ should be_eq_at_index '[0][1]', 1 }
26
- it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
27
- it{ should be_eq_at_index '[1][3]', 6 }
28
- it{ headers.should be_nil }
29
- end
30
-
31
- context "numeric date" do
32
- let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
33
- it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
34
- it{ should be_eq_at_index '[0][1]', 1 }
35
- it{ should be_eq_at_index '[0][2]', 2.3 }
36
- it{ should be_eq_at_index '[0][3]', nil }
37
- it{ headers.should be_nil }
38
- end
39
-
40
- context "data with headers" do
41
- let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
42
- it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
43
- it{ should be_eq_at_index '[0][1]', 1 }
44
- it{ should be_eq_at_index '[0][2]', 2.3 }
45
- it{ should be_eq_at_index '[0][3]', nil }
46
- it{ headers.should eq ['Date','0','0','0'] }
47
- end
48
-
49
- context "data with nil" do
50
- let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
51
- it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
52
- it{ data[0].length.should ==2}
53
- it{ should be_eq_at_index '[0][1]', nil }
54
- it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
55
- it{ should be_eq_at_index '[1][1]', 5 }
56
- it{ headers.should eq ['Date','Col1'] }
57
- end
58
-
59
- context "data with middle nil" do
60
- let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
61
- it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
62
- it{ should be_eq_at_index '[0][1]', nil }
63
- it{ should be_eq_at_index '[0][2]', 1}
64
- it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
65
- it{ should be_eq_at_index '[1][1]', 5 }
66
- it{ should be_eq_at_index '[1][2]', 6 }
67
- it{ headers.should eq ['Date','Col1', 'Col2'] }
68
- end
69
-
70
- end
1
+ require 'spec_helper'
2
+
3
+ include Quandl::Babelfish
4
+ describe Cleaner do
5
+
6
+ let(:input){ [] }
7
+ let(:output){ Cleaner.process(input) }
8
+ let(:data){ output[0] }
9
+ let(:headers){ output[1] }
10
+ subject{ data }
11
+
12
+ context "garbage" do
13
+ let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
14
+ it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
15
+ end
16
+
17
+ context "headers with whitespace" do
18
+ let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
19
+ it{ headers.should eq ["Date", "C1", "C2", "C4"] }
20
+ end
21
+
22
+ context "annual" do
23
+ let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
24
+ it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
25
+ it{ should be_eq_at_index '[0][1]', 1 }
26
+ it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
27
+ it{ should be_eq_at_index '[1][3]', 6 }
28
+ it{ headers.should be_nil }
29
+ end
30
+
31
+ context "numeric date" do
32
+ let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
33
+ it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
34
+ it{ should be_eq_at_index '[0][1]', 1 }
35
+ it{ should be_eq_at_index '[0][2]', 2.3 }
36
+ it{ should be_eq_at_index '[0][3]', nil }
37
+ it{ headers.should be_nil }
38
+ end
39
+
40
+ context "data with headers" do
41
+ let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
42
+ it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
43
+ it{ should be_eq_at_index '[0][1]', 1 }
44
+ it{ should be_eq_at_index '[0][2]', 2.3 }
45
+ it{ should be_eq_at_index '[0][3]', nil }
46
+ it{ headers.should eq ['Date','0','0','0'] }
47
+ end
48
+
49
+ context "data with nil" do
50
+ let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
51
+ it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
52
+ it{ data[0].length.should ==2}
53
+ it{ should be_eq_at_index '[0][1]', nil }
54
+ it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
55
+ it{ should be_eq_at_index '[1][1]', 5 }
56
+ it{ headers.should eq ['Date','Col1'] }
57
+ end
58
+
59
+ context "data with middle nil" do
60
+ let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
61
+ it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
62
+ it{ should be_eq_at_index '[0][1]', nil }
63
+ it{ should be_eq_at_index '[0][2]', 1}
64
+ it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
65
+ it{ should be_eq_at_index '[1][1]', 5 }
66
+ it{ should be_eq_at_index '[1][2]', 6 }
67
+ it{ headers.should eq ['Date','Col1', 'Col2'] }
68
+ end
69
+
70
+ end