dataduck 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef6f3cd5a8054cf855b227324845f2ec365516dd
4
- data.tar.gz: 45030532745d7a68988bee5e95a791ed55bae6b0
3
+ metadata.gz: 497a92b4bfb99dba50ee81efb9bdf2b5b802023c
4
+ data.tar.gz: 7a5b3ef29d7bf2856b3ba196ed8a6cf25158b3f1
5
5
  SHA512:
6
- metadata.gz: 1d167785a5f64fd8ea77546dcb3f7d19107c3c651aac5933afe09e7eee4eb1674c25a9223b1c724a8229334d669c41604288c29fa4da6e35bfa596ad8bde6a90
7
- data.tar.gz: 708a7eb4404cee131bcd946314b57febaa6908c716c4ce2928167c88a0a61f084820f34258a3b5b74cbc97a215759e8cec13e9f89ff25a17aba2453a636d4517
6
+ metadata.gz: 89c16aa7ca78cbed8522f6c137eb4a3b0dd6ae6a87c3466b39ba937f319ce3d3d0db8370557d6259c6157ed2150ed94072a7489d6811bb3ad7908333fa0483f0
7
+ data.tar.gz: 314e2a64179b39b0f0f4d32fcdd5522a4b8b8574df9994c97956c5ff3066cde5c59e1a5a72d0c4c296594ee75ecaf4c30374c6d4a585a72d8b03ab80055fdfc2
data/lib/dataduck.rb CHANGED
@@ -31,7 +31,7 @@ module DataDuck
31
31
  detect_project_root = Dir.getwd
32
32
  while true
33
33
  if detect_project_root == ""
34
- raise Exception.new("Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?")
34
+ raise "Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?"
35
35
  end
36
36
 
37
37
  if File.exist?(detect_project_root + '/Gemfile')
@@ -2,6 +2,7 @@ require 'erb'
2
2
  require 'yaml'
3
3
  require 'fileutils'
4
4
  require 'typhoeus'
5
+ require 'io/console'
5
6
 
6
7
  module DataDuck
7
8
  class Commands
@@ -49,7 +50,7 @@ module DataDuck
49
50
 
50
51
  begin
51
52
  DataDuck::Commands.public_send(command, *args[1..-1])
52
- rescue Exception => err
53
+ rescue => err
53
54
  DataDuck::Logs.error(err)
54
55
  end
55
56
  end
@@ -111,7 +112,7 @@ module DataDuck
111
112
  require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
112
113
  table_class = Object.const_get(table_name_camelized)
113
114
  if !(table_class <= DataDuck::Table)
114
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
115
+ raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
115
116
  end
116
117
  table = table_class.new
117
118
  tables << table
@@ -135,7 +136,7 @@ module DataDuck
135
136
  require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
136
137
  table_class = Object.const_get(table_name_camelized)
137
138
  if !(table_class <= DataDuck::Table)
138
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
139
+ raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
139
140
  end
140
141
  table = table_class.new
141
142
  table.recreate!(DataDuck::Destination.only_destination)
@@ -157,7 +158,7 @@ module DataDuck
157
158
  require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
158
159
  table_class = Object.const_get(table_name_camelized)
159
160
  if !(table_class <= DataDuck::Table)
160
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
161
+ raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
161
162
  end
162
163
 
163
164
  table = table_class.new
@@ -7,15 +7,15 @@ module DataDuck
7
7
  end
8
8
 
9
9
  def connection
10
- raise Exception.new("Must implement connection in subclass.")
10
+ raise "Must implement connection in subclass."
11
11
  end
12
12
 
13
13
  def query(sql)
14
- raise Exception.new("Must implement query in subclass.")
14
+ raise "Must implement query in subclass."
15
15
  end
16
16
 
17
17
  def table_names
18
- raise Exception.new("Must implement query in subclass.")
18
+ raise "Must implement query in subclass."
19
19
  end
20
20
 
21
21
  protected
@@ -20,18 +20,18 @@ module DataDuck
20
20
 
21
21
  def self.destination_config(name)
22
22
  if DataDuck.config['destinations'].nil? || DataDuck.config['destinations'][name.to_s].nil?
23
- raise Exception.new("Could not find destination #{ name } in destinations configs.")
23
+ raise "Could not find destination #{ name } in destinations configs."
24
24
  end
25
25
 
26
26
  DataDuck.config['destinations'][name.to_s]
27
27
  end
28
28
 
29
29
  def load_table!(table)
30
- raise Exception.new("Must implement load_table! in subclass")
30
+ raise "Must implement load_table! in subclass"
31
31
  end
32
32
 
33
33
  def recreate_table!(table)
34
- raise Exception.new("Must implement load_table! in subclass")
34
+ raise "Must implement recreate_table! in subclass"
35
35
  end
36
36
 
37
37
  def postprocess!(table)
@@ -46,7 +46,7 @@ module DataDuck
46
46
  elsif allow_nil
47
47
  return nil
48
48
  else
49
- raise Exception.new("Could not find destination #{ name } in destination configs.")
49
+ raise "Could not find destination #{ name } in destination configs."
50
50
  end
51
51
  end
52
52
 
data/lib/dataduck/etl.rb CHANGED
@@ -51,7 +51,7 @@ module DataDuck
51
51
  Logs.info("Processing table '#{ table.name }'...")
52
52
  begin
53
53
  table.etl!(destinations_to_use)
54
- rescue Exception => err
54
+ rescue => err
55
55
  Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
56
56
  errored_tables << table
57
57
  end
@@ -203,7 +203,7 @@ module DataDuck
203
203
  Logs.debug("SQL executing on #{ self.name }:\n " + sql)
204
204
  begin
205
205
  self.connection[sql].map { |elem| elem }
206
- rescue Exception => err
206
+ rescue => err
207
207
  if err.to_s.include?("Check 'stl_load_errors' system table for details")
208
208
  self.raise_stl_load_error!
209
209
  else
@@ -266,7 +266,7 @@ module DataDuck
266
266
  DataDuck::Logs.info "Recreating table #{ table.name }..."
267
267
 
268
268
  if !self.table_names.include?(table.name)
269
- raise Exception.new("Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?")
269
+ raise "Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?"
270
270
  end
271
271
 
272
272
  recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
@@ -50,7 +50,7 @@ module DataDuck
50
50
  })
51
51
  begin
52
52
  response = s3.put_object(put_hash)
53
- rescue Exception => e
53
+ rescue => e
54
54
  if attempts == S3Object.max_retries
55
55
  throw e
56
56
  end
@@ -22,7 +22,7 @@ module DataDuck
22
22
 
23
23
  def self.source_config(name)
24
24
  if DataDuck.config['sources'].nil? || DataDuck.config['sources'][name.to_s].nil?
25
- raise Exception.new("Could not find source #{ name } in source configs.")
25
+ raise "Could not find source #{ name } in source configs."
26
26
  end
27
27
 
28
28
  DataDuck.config['sources'][name.to_s]
@@ -36,7 +36,7 @@ module DataDuck
36
36
  elsif allow_nil
37
37
  return nil
38
38
  else
39
- raise Exception.new("Could not find source #{ name } in source configs.")
39
+ raise "Could not find source #{ name } in source configs."
40
40
  end
41
41
  end
42
42
 
@@ -57,8 +57,8 @@ module DataDuck
57
57
 
58
58
  def check_table_valid!
59
59
  if !self.batch_size.nil?
60
- raise Exception.new("Table #{ self.name }'s batch_size must be > 0") unless self.batch_size > 0
61
- raise Exception.new("Table #{ self.name } has batch_size defined but no extract_by_column") if self.extract_by_column.nil?
60
+ raise "Table #{ self.name }'s batch_size must be > 0" unless self.batch_size > 0
61
+ raise "Table #{ self.name } has batch_size defined but no extract_by_column" if self.extract_by_column.nil?
62
62
  end
63
63
  end
64
64
 
@@ -87,12 +87,17 @@ module DataDuck
87
87
  destination.drop_staging_table!(self)
88
88
  end
89
89
 
90
+ data_processed = false
90
91
  batch_number = 0
91
92
  while batch_number < 1_000
92
93
  batch_number += 1
93
94
  self.extract!(destination, options)
94
- self.transform!
95
- self.load!(destination)
95
+ if self.data.length > 0
96
+ self.transform!
97
+ self.data.compact!
98
+ self.load!(destination) if self.data.length > 0
99
+ data_processed = true
100
+ end
96
101
 
97
102
  if self.batch_size.nil?
98
103
  break
@@ -108,11 +113,15 @@ module DataDuck
108
113
 
109
114
  self.data = []
110
115
 
111
- if self.should_fully_reload?
112
- destination.finish_fully_reloading_table!(self)
113
- end
116
+ if data_processed
117
+ if self.should_fully_reload?
118
+ destination.finish_fully_reloading_table!(self)
119
+ end
114
120
 
115
- self.postprocess!(destination, options)
121
+ self.postprocess!(destination, options)
122
+ else
123
+ DataDuck::Logs.info "No data extracted for table #{ self.name }"
124
+ end
116
125
  end
117
126
 
118
127
  def extract!(destination = nil, options = {})
@@ -1,7 +1,7 @@
1
1
  module DataDuck
2
2
  if !defined?(DataDuck::VERSION)
3
3
  VERSION_MAJOR = 0
4
- VERSION_MINOR = 7
4
+ VERSION_MINOR = 8
5
5
  VERSION_PATCH = 0
6
6
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
7
  end
@@ -43,7 +43,7 @@ module DataDuck
43
43
  experiment_variations = []
44
44
  begin
45
45
  experiment_variations = fetch_data(endpoint)
46
- rescue Exception => err
46
+ rescue => err
47
47
  broken_experiments << experiment
48
48
  end
49
49
  experiment_variations.each do |exp_var|
@@ -69,7 +69,7 @@ module DataDuck
69
69
 
70
70
  response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' => optimizely_api_token})
71
71
  if response.response_code != 200
72
- raise Exception.new("Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }")
72
+ raise "Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }"
73
73
  end
74
74
 
75
75
  rows = Oj.load(response.body)
@@ -4,7 +4,7 @@ require 'uri'
4
4
 
5
5
  module DataDuck
6
6
  module SEMRush
7
- class OrganicResultsAPIError < Exception; end
7
+ class OrganicResultsAPIError < StandardError; end
8
8
 
9
9
  class OrganicResults < DataDuck::IntegrationTable
10
10
  def display_limit
@@ -16,7 +16,7 @@ module DataDuck
16
16
  end
17
17
 
18
18
  def phrases
19
- raise Exception("Must implement phrases method to be an array of the phrases you want.")
19
+ raise "Must implement phrases method to be an array of the phrases you want."
20
20
  end
21
21
 
22
22
  def prefix
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-06 00:00:00.000000000 Z
11
+ date: 2016-04-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler