quandl_babelfish 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
- class Helper
2
-
3
- # Actions expect a square table, make it so
4
- def self.make_square(table)
5
- longest_row = 0
6
- table.each { |row| longest_row = [longest_row, row.length].max }
7
- table.collect { |row| row += Array.new(longest_row - row.length, nil) }
8
- end
1
+ class Helper
2
+
3
+ # Actions expect a square table, make it so
4
+ def self.make_square(table)
5
+ longest_row = 0
6
+ table.each { |row| longest_row = [longest_row, row.length].max }
7
+ table.collect { |row| row += Array.new(longest_row - row.length, nil) }
8
+ end
9
9
  end
@@ -1,80 +1,80 @@
1
- module Quandl
2
- module Babelfish
3
-
4
- #responsible for number cleaning
5
- class NumberMaid
6
- @defaults = {
7
- :decimal_mark => Regexp.escape('.'),
8
- :ignore_brackets => false, # Brackets ARE negative by default
9
- }
10
-
11
- @settings = @defaults #init with defaults
12
-
13
- class << self
14
-
15
-
16
- def init(user_settings)
17
- @settings=@defaults.merge(user_settings)
18
- @escaped_decimal = Regexp.escape(@settings[:decimal_mark])
19
- end
20
-
21
- #cleans each number one by one
22
- def clean(dirty_numbers)
23
- return nil if dirty_numbers.nil?
24
- numbers=[]
25
- Array(dirty_numbers).each do |cell|
26
- numbers << cell_to_number(cell.to_s)
27
- end
28
-
29
- (numbers.size == 1) ? numbers[0] : numbers
30
- end
31
-
32
- def cell_to_number(num)
33
- return nil if num.nil?
34
- # Remove annotations
35
- # if there is something in parenthesis and a number elsewhere, nuke the parenthesis
36
- temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
37
- num = temp if temp.match(/\d/)
38
-
39
- num.gsub!("est.", '')
40
-
41
- #check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
42
- is_exp = false
43
- expmultiplier = 1
44
- m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
45
- #check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
46
- if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
47
- is_exp = true
48
- num = m.pre_match
49
- expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
50
- end
51
- is_million = (num =~ /million/i)
52
- is_billion = (num =~ /billion/i)
53
- is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
54
-
55
- # watch out for two numbers, like a range eg "27.3 - 33.9"
56
- # how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
57
- num.gsub!(/(\d) (\d)/, '\1\2')
58
- if m = num.match(/-?\s*[,\d\.]+/)
59
- num = m[0]
60
- end
61
-
62
- # only keep #s and decimal mark
63
- num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
64
- num.gsub!(/[^0-9]/, '.')
65
-
66
- return nil if num.nil? || num !~ /[\d]/
67
- return nil if num.end_with?(".")
68
- return nil if num.count(".") > 1
69
- cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
70
- cell *= 1e6 if is_million
71
- cell *= 1e9 if is_billion
72
- cell *= -1 if is_negative
73
- cell *= expmultiplier if is_exp
74
- cell
75
- end
76
-
77
- end
78
- end
79
- end
1
+ module Quandl
2
+ module Babelfish
3
+
4
+ #responsible for number cleaning
5
+ class NumberMaid
6
+ @defaults = {
7
+ :decimal_mark => Regexp.escape('.'),
8
+ :ignore_brackets => false, # Brackets ARE negative by default
9
+ }
10
+
11
+ @settings = @defaults #init with defaults
12
+
13
+ class << self
14
+
15
+
16
+ def init(user_settings)
17
+ @settings=@defaults.merge(user_settings)
18
+ @escaped_decimal = Regexp.escape(@settings[:decimal_mark])
19
+ end
20
+
21
+ #cleans each number one by one
22
+ def clean(dirty_numbers)
23
+ return nil if dirty_numbers.nil?
24
+ numbers=[]
25
+ Array(dirty_numbers).each do |cell|
26
+ numbers << cell_to_number(cell.to_s)
27
+ end
28
+
29
+ (numbers.size == 1) ? numbers[0] : numbers
30
+ end
31
+
32
+ def cell_to_number(num)
33
+ return nil if num.nil?
34
+ # Remove annotations
35
+ # if there is something in parenthesis and a number elsewhere, nuke the parenthesis
36
+ temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '')
37
+ num = temp if temp.match(/\d/)
38
+
39
+ num.gsub!("est.", '')
40
+
41
+ #check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^'
42
+ is_exp = false
43
+ expmultiplier = 1
44
+ m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num)
45
+ #check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign
46
+ if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/
47
+ is_exp = true
48
+ num = m.pre_match
49
+ expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i
50
+ end
51
+ is_million = (num =~ /million/i)
52
+ is_billion = (num =~ /billion/i)
53
+ is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/))
54
+
55
+ # watch out for two numbers, like a range eg "27.3 - 33.9"
56
+ # how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that
57
+ num.gsub!(/(\d) (\d)/, '\1\2')
58
+ if m = num.match(/-?\s*[,\d\.]+/)
59
+ num = m[0]
60
+ end
61
+
62
+ # only keep #s and decimal mark
63
+ num.gsub!(/[^0-9#{@escaped_decimal}]/, '')
64
+ num.gsub!(/[^0-9]/, '.')
65
+
66
+ return nil if num.nil? || num !~ /[\d]/
67
+ return nil if num.end_with?(".")
68
+ return nil if num.count(".") > 1
69
+ cell = num.nil? ? 0.0 : Float("%.#{14}g" % num)
70
+ cell *= 1e6 if is_million
71
+ cell *= 1e9 if is_billion
72
+ cell *= -1 if is_negative
73
+ cell *= expmultiplier if is_exp
74
+ cell
75
+ end
76
+
77
+ end
78
+ end
79
+ end
80
80
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Babelfish
3
- VERSION = '0.0.7'
4
- end
1
+ module Quandl
2
+ module Babelfish
3
+ VERSION = '0.0.8'
4
+ end
5
5
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Error
3
- class GuessDateFormat < Quandl::Error::Standard; end
4
- end
1
+ module Quandl
2
+ module Error
3
+ class GuessDateFormat < Quandl::Error::Standard; end
4
+ end
5
5
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Error
3
- class InvalidDate < Quandl::Error::Standard; end
4
- end
1
+ module Quandl
2
+ module Error
3
+ class InvalidDate < Quandl::Error::Standard; end
4
+ end
5
5
  end
@@ -1,27 +1,27 @@
1
- module Quandl
2
- module Error
3
- class Standard < StandardError
4
-
5
- attr_accessor :details
6
-
7
- def line
8
- detail :line
9
- end
10
- def context
11
- detail :context
12
- end
13
- def problem
14
- detail :problem
15
- end
16
-
17
- def detail(key)
18
- details.send(key) if details.respond_to?(key)
19
- end
20
-
21
- def initialize(opts=nil)
22
- @details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
23
- end
24
-
25
- end
26
- end
1
+ module Quandl
2
+ module Error
3
+ class Standard < StandardError
4
+
5
+ attr_accessor :details
6
+
7
+ def line
8
+ detail :line
9
+ end
10
+ def context
11
+ detail :context
12
+ end
13
+ def problem
14
+ detail :problem
15
+ end
16
+
17
+ def detail(key)
18
+ details.send(key) if details.respond_to?(key)
19
+ end
20
+
21
+ def initialize(opts=nil)
22
+ @details = OpenStruct.new( opts ) if opts && opts.is_a?(Hash)
23
+ end
24
+
25
+ end
26
+ end
27
27
  end
@@ -1,5 +1,5 @@
1
- module Quandl
2
- module Error
3
- class UnknownDateFormat < Quandl::Error::Standard; end
4
- end
1
+ module Quandl
2
+ module Error
3
+ class UnknownDateFormat < Quandl::Error::Standard; end
4
+ end
5
5
  end
@@ -1,21 +1,21 @@
1
- # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
3
- require "quandl/babelfish/version"
4
-
5
- Gem::Specification.new do |s|
6
- s.name = "quandl_babelfish"
7
- s.version = Quandl::Babelfish::VERSION
8
- s.authors = ["Sergei Ryshkevich"]
9
- s.email = ["sergei@quandl.com"]
10
- s.homepage = "http://quandl.com/"
11
- s.license = "MIT"
12
- s.summary = "Quandl Data Cleaner"
13
- s.description = "Quandl Data Cleaner"
14
-
15
- s.files = `git ls-files`.split("\n")
16
- s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
- s.require_paths = ["lib"]
18
-
19
- s.add_development_dependency "rspec", "~> 2.13"
20
- s.add_development_dependency "pry"
21
- end
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "quandl/babelfish/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "quandl_babelfish"
7
+ s.version = Quandl::Babelfish::VERSION
8
+ s.authors = ["Sergei Ryshkevich"]
9
+ s.email = ["sergei@quandl.com"]
10
+ s.homepage = "http://quandl.com/"
11
+ s.license = "MIT"
12
+ s.summary = "Quandl Data Cleaner"
13
+ s.description = "Quandl Data Cleaner"
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.require_paths = ["lib"]
18
+
19
+ s.add_development_dependency "rspec", "~> 2.13"
20
+ s.add_development_dependency "pry"
21
+ end
@@ -1,51 +1,51 @@
1
- require 'spec_helper'
2
-
3
- include Quandl::Babelfish
4
- describe Chronometer do
5
-
6
- it 'should calculate frequency = daily' do
7
- table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
8
- frequency = Chronometer.process(table)
9
- frequency.should == 'daily'
10
- end
11
-
12
- it 'should calculate frequency = monthly' do
13
- table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
14
- ['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
15
- frequency = Chronometer.process(table)
16
- frequency.should == 'monthly'
17
- end
18
-
19
- it 'should calculate frequency = quarterly' do
20
- table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
21
- ['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
22
- frequency = Chronometer.process(table)
23
- frequency.should == 'quarterly'
24
- end
25
-
26
- it 'should calculate frequency = quarterly' do
27
- table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
28
- ['2013-07-01','1','2']]
29
- frequency = Chronometer.process(table)
30
- frequency.should == 'quarterly'
31
- end
32
-
33
- it 'should calculate frequency = annual' do
34
- table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
35
- ['2011-01-01','1','2'],['2013-01-01','5','6']]
36
- frequency = Chronometer.process(table)
37
- frequency.should == 'annual'
38
- end
39
-
40
- it 'should calculate frequency = daily if only one row' do
41
- table = [['2010-01-01','1','2']]
42
- frequency = Chronometer.process(table)
43
- frequency.should == 'daily'
44
- end
45
-
46
- it 'should calculate frequency = nil if nil table passed' do
47
- frequency = Chronometer.process(nil)
48
- frequency.should == nil
49
- end
50
-
1
+ require 'spec_helper'
2
+
3
+ include Quandl::Babelfish
4
+ describe Chronometer do
5
+
6
+ it 'should calculate frequency = daily' do
7
+ table = [['2012-01-01','1','2'],['2012-01-02','3','4'],['2012-01-03','5','6']]
8
+ frequency = Chronometer.process(table)
9
+ frequency.should == 'daily'
10
+ end
11
+
12
+ it 'should calculate frequency = monthly' do
13
+ table = [['2012-01-01','1','2'],['2012-02-01','3','4'],['2012-04-01','5','6'],
14
+ ['2012-04-01','1','2'],['2012-05-01','3','4'],['2012-06-01','5','6']]
15
+ frequency = Chronometer.process(table)
16
+ frequency.should == 'monthly'
17
+ end
18
+
19
+ it 'should calculate frequency = quarterly' do
20
+ table = [['2012-01-01','1','2'],['2012-04-01','3','4'],['2012-07-01','5','6'],
21
+ ['2012-10-01','1','2'],['2013-01-01','3','4'],['2012-04-01','5','6']]
22
+ frequency = Chronometer.process(table)
23
+ frequency.should == 'quarterly'
24
+ end
25
+
26
+ it 'should calculate frequency = quarterly' do
27
+ table = [['2012-01-01','1','2'],['2012-07-01','3','4'],['2013-01-01','5','6'],
28
+ ['2013-07-01','1','2']]
29
+ frequency = Chronometer.process(table)
30
+ frequency.should == 'quarterly'
31
+ end
32
+
33
+ it 'should calculate frequency = annual' do
34
+ table = [['2008-01-01','1','2'],['2008-12-01','3','4'],['2010-01-01','5','6'],
35
+ ['2011-01-01','1','2'],['2013-01-01','5','6']]
36
+ frequency = Chronometer.process(table)
37
+ frequency.should == 'annual'
38
+ end
39
+
40
+ it 'should calculate frequency = daily if only one row' do
41
+ table = [['2010-01-01','1','2']]
42
+ frequency = Chronometer.process(table)
43
+ frequency.should == 'daily'
44
+ end
45
+
46
+ it 'should calculate frequency = nil if nil table passed' do
47
+ frequency = Chronometer.process(nil)
48
+ frequency.should == nil
49
+ end
50
+
51
51
  end
@@ -1,70 +1,70 @@
1
- require 'spec_helper'
2
-
3
- include Quandl::Babelfish
4
- describe Cleaner do
5
-
6
- let(:input){ [] }
7
- let(:output){ Cleaner.process(input) }
8
- let(:data){ output[0] }
9
- let(:headers){ output[1] }
10
- subject{ data }
11
-
12
- context "garbage" do
13
- let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
14
- it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
15
- end
16
-
17
- context "headers with whitespace" do
18
- let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
19
- it{ headers.should eq ["Date", "C1", "C2", "C4"] }
20
- end
21
-
22
- context "annual" do
23
- let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
24
- it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
25
- it{ should be_eq_at_index '[0][1]', 1 }
26
- it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
27
- it{ should be_eq_at_index '[1][3]', 6 }
28
- it{ headers.should be_nil }
29
- end
30
-
31
- context "numeric date" do
32
- let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
33
- it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
34
- it{ should be_eq_at_index '[0][1]', 1 }
35
- it{ should be_eq_at_index '[0][2]', 2.3 }
36
- it{ should be_eq_at_index '[0][3]', nil }
37
- it{ headers.should be_nil }
38
- end
39
-
40
- context "data with headers" do
41
- let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
42
- it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
43
- it{ should be_eq_at_index '[0][1]', 1 }
44
- it{ should be_eq_at_index '[0][2]', 2.3 }
45
- it{ should be_eq_at_index '[0][3]', nil }
46
- it{ headers.should eq ['Date','0','0','0'] }
47
- end
48
-
49
- context "data with nil" do
50
- let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
51
- it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
52
- it{ data[0].length.should ==2}
53
- it{ should be_eq_at_index '[0][1]', nil }
54
- it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
55
- it{ should be_eq_at_index '[1][1]', 5 }
56
- it{ headers.should eq ['Date','Col1'] }
57
- end
58
-
59
- context "data with middle nil" do
60
- let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
61
- it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
62
- it{ should be_eq_at_index '[0][1]', nil }
63
- it{ should be_eq_at_index '[0][2]', 1}
64
- it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
65
- it{ should be_eq_at_index '[1][1]', 5 }
66
- it{ should be_eq_at_index '[1][2]', 6 }
67
- it{ headers.should eq ['Date','Col1', 'Col2'] }
68
- end
69
-
70
- end
1
+ require 'spec_helper'
2
+
3
+ include Quandl::Babelfish
4
+ describe Cleaner do
5
+
6
+ let(:input){ [] }
7
+ let(:output){ Cleaner.process(input) }
8
+ let(:data){ output[0] }
9
+ let(:headers){ output[1] }
10
+ subject{ data }
11
+
12
+ context "garbage" do
13
+ let(:input){ [[2456624, 10], [2456625, 20], [2456626, 30]] }
14
+ it{ should be_eq_at_index '[0][0]', Date.new(1970,01,29) }
15
+ end
16
+
17
+ context "headers with whitespace" do
18
+ let(:input){ [[" Date ", " C1 ", "C2 ", " C4"],[1990,1,2,3],[1991,4,5,6]] }
19
+ it{ headers.should eq ["Date", "C1", "C2", "C4"] }
20
+ end
21
+
22
+ context "annual" do
23
+ let(:input){ [[1990,1,2,3],[1991,4,5,6]] }
24
+ it{ should be_eq_at_index '[0][0]', Date.new(1990,12,31) }
25
+ it{ should be_eq_at_index '[0][1]', 1 }
26
+ it{ should be_eq_at_index '[1][0]', Date.new(1991,12,31) }
27
+ it{ should be_eq_at_index '[1][3]', 6 }
28
+ it{ headers.should be_nil }
29
+ end
30
+
31
+ context "numeric date" do
32
+ let(:input){ [[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
33
+ it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
34
+ it{ should be_eq_at_index '[0][1]', 1 }
35
+ it{ should be_eq_at_index '[0][2]', 2.3 }
36
+ it{ should be_eq_at_index '[0][3]', nil }
37
+ it{ headers.should be_nil }
38
+ end
39
+
40
+ context "data with headers" do
41
+ let(:input){ [['Date',0,0,0],[19900101,'1 [estimate]','2.3 - 4.0','not a number']] }
42
+ it{ should be_eq_at_index '[0][0]', Date.new(1990,01,01) }
43
+ it{ should be_eq_at_index '[0][1]', 1 }
44
+ it{ should be_eq_at_index '[0][2]', 2.3 }
45
+ it{ should be_eq_at_index '[0][3]', nil }
46
+ it{ headers.should eq ['Date','0','0','0'] }
47
+ end
48
+
49
+ context "data with nil" do
50
+ let(:input){ [["Date", "Col1"], ["2002", nil], ["2003", "5"]] }
51
+ it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
52
+ it{ data[0].length.should ==2}
53
+ it{ should be_eq_at_index '[0][1]', nil }
54
+ it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
55
+ it{ should be_eq_at_index '[1][1]', 5 }
56
+ it{ headers.should eq ['Date','Col1'] }
57
+ end
58
+
59
+ context "data with middle nil" do
60
+ let(:input){ [["Date", "Col1", "Col2"], ["2002", nil, '1'], ["2003", "5", '6']] }
61
+ it{ should be_eq_at_index '[0][0]', Date.new(2002,12,31) }
62
+ it{ should be_eq_at_index '[0][1]', nil }
63
+ it{ should be_eq_at_index '[0][2]', 1}
64
+ it{ should be_eq_at_index '[1][0]', Date.new(2003,12,31) }
65
+ it{ should be_eq_at_index '[1][1]', 5 }
66
+ it{ should be_eq_at_index '[1][2]', 6 }
67
+ it{ headers.should eq ['Date','Col1', 'Col2'] }
68
+ end
69
+
70
+ end