dataduck 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef6f3cd5a8054cf855b227324845f2ec365516dd
4
- data.tar.gz: 45030532745d7a68988bee5e95a791ed55bae6b0
3
+ metadata.gz: 497a92b4bfb99dba50ee81efb9bdf2b5b802023c
4
+ data.tar.gz: 7a5b3ef29d7bf2856b3ba196ed8a6cf25158b3f1
5
5
  SHA512:
6
- metadata.gz: 1d167785a5f64fd8ea77546dcb3f7d19107c3c651aac5933afe09e7eee4eb1674c25a9223b1c724a8229334d669c41604288c29fa4da6e35bfa596ad8bde6a90
7
- data.tar.gz: 708a7eb4404cee131bcd946314b57febaa6908c716c4ce2928167c88a0a61f084820f34258a3b5b74cbc97a215759e8cec13e9f89ff25a17aba2453a636d4517
6
+ metadata.gz: 89c16aa7ca78cbed8522f6c137eb4a3b0dd6ae6a87c3466b39ba937f319ce3d3d0db8370557d6259c6157ed2150ed94072a7489d6811bb3ad7908333fa0483f0
7
+ data.tar.gz: 314e2a64179b39b0f0f4d32fcdd5522a4b8b8574df9994c97956c5ff3066cde5c59e1a5a72d0c4c296594ee75ecaf4c30374c6d4a585a72d8b03ab80055fdfc2
data/lib/dataduck.rb CHANGED
@@ -31,7 +31,7 @@ module DataDuck
31
31
  detect_project_root = Dir.getwd
32
32
  while true
33
33
  if detect_project_root == ""
34
- raise Exception.new("Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?")
34
+ raise "Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?"
35
35
  end
36
36
 
37
37
  if File.exist?(detect_project_root + '/Gemfile')
@@ -2,6 +2,7 @@ require 'erb'
2
2
  require 'yaml'
3
3
  require 'fileutils'
4
4
  require 'typhoeus'
5
+ require 'io/console'
5
6
 
6
7
  module DataDuck
7
8
  class Commands
@@ -49,7 +50,7 @@ module DataDuck
49
50
 
50
51
  begin
51
52
  DataDuck::Commands.public_send(command, *args[1..-1])
52
- rescue Exception => err
53
+ rescue => err
53
54
  DataDuck::Logs.error(err)
54
55
  end
55
56
  end
@@ -111,7 +112,7 @@ module DataDuck
111
112
  require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
112
113
  table_class = Object.const_get(table_name_camelized)
113
114
  if !(table_class <= DataDuck::Table)
114
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
115
+ raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
115
116
  end
116
117
  table = table_class.new
117
118
  tables << table
@@ -135,7 +136,7 @@ module DataDuck
135
136
  require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
136
137
  table_class = Object.const_get(table_name_camelized)
137
138
  if !(table_class <= DataDuck::Table)
138
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
139
+ raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
139
140
  end
140
141
  table = table_class.new
141
142
  table.recreate!(DataDuck::Destination.only_destination)
@@ -157,7 +158,7 @@ module DataDuck
157
158
  require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
158
159
  table_class = Object.const_get(table_name_camelized)
159
160
  if !(table_class <= DataDuck::Table)
160
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
161
+ raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
161
162
  end
162
163
 
163
164
  table = table_class.new
@@ -7,15 +7,15 @@ module DataDuck
7
7
  end
8
8
 
9
9
  def connection
10
- raise Exception.new("Must implement connection in subclass.")
10
+ raise "Must implement connection in subclass."
11
11
  end
12
12
 
13
13
  def query(sql)
14
- raise Exception.new("Must implement query in subclass.")
14
+ raise "Must implement query in subclass."
15
15
  end
16
16
 
17
17
  def table_names
18
- raise Exception.new("Must implement query in subclass.")
18
+ raise "Must implement query in subclass."
19
19
  end
20
20
 
21
21
  protected
@@ -20,18 +20,18 @@ module DataDuck
20
20
 
21
21
  def self.destination_config(name)
22
22
  if DataDuck.config['destinations'].nil? || DataDuck.config['destinations'][name.to_s].nil?
23
- raise Exception.new("Could not find destination #{ name } in destinations configs.")
23
+ raise "Could not find destination #{ name } in destinations configs."
24
24
  end
25
25
 
26
26
  DataDuck.config['destinations'][name.to_s]
27
27
  end
28
28
 
29
29
  def load_table!(table)
30
- raise Exception.new("Must implement load_table! in subclass")
30
+ raise "Must implement load_table! in subclass"
31
31
  end
32
32
 
33
33
  def recreate_table!(table)
34
- raise Exception.new("Must implement load_table! in subclass")
34
+ raise "Must implement recreate_table! in subclass"
35
35
  end
36
36
 
37
37
  def postprocess!(table)
@@ -46,7 +46,7 @@ module DataDuck
46
46
  elsif allow_nil
47
47
  return nil
48
48
  else
49
- raise Exception.new("Could not find destination #{ name } in destination configs.")
49
+ raise "Could not find destination #{ name } in destination configs."
50
50
  end
51
51
  end
52
52
 
data/lib/dataduck/etl.rb CHANGED
@@ -51,7 +51,7 @@ module DataDuck
51
51
  Logs.info("Processing table '#{ table.name }'...")
52
52
  begin
53
53
  table.etl!(destinations_to_use)
54
- rescue Exception => err
54
+ rescue => err
55
55
  Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
56
56
  errored_tables << table
57
57
  end
@@ -203,7 +203,7 @@ module DataDuck
203
203
  Logs.debug("SQL executing on #{ self.name }:\n " + sql)
204
204
  begin
205
205
  self.connection[sql].map { |elem| elem }
206
- rescue Exception => err
206
+ rescue => err
207
207
  if err.to_s.include?("Check 'stl_load_errors' system table for details")
208
208
  self.raise_stl_load_error!
209
209
  else
@@ -266,7 +266,7 @@ module DataDuck
266
266
  DataDuck::Logs.info "Recreating table #{ table.name }..."
267
267
 
268
268
  if !self.table_names.include?(table.name)
269
- raise Exception.new("Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?")
269
+ raise "Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?"
270
270
  end
271
271
 
272
272
  recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
@@ -50,7 +50,7 @@ module DataDuck
50
50
  })
51
51
  begin
52
52
  response = s3.put_object(put_hash)
53
- rescue Exception => e
53
+ rescue => e
54
54
  if attempts == S3Object.max_retries
55
55
  throw e
56
56
  end
@@ -22,7 +22,7 @@ module DataDuck
22
22
 
23
23
  def self.source_config(name)
24
24
  if DataDuck.config['sources'].nil? || DataDuck.config['sources'][name.to_s].nil?
25
- raise Exception.new("Could not find source #{ name } in source configs.")
25
+ raise "Could not find source #{ name } in source configs."
26
26
  end
27
27
 
28
28
  DataDuck.config['sources'][name.to_s]
@@ -36,7 +36,7 @@ module DataDuck
36
36
  elsif allow_nil
37
37
  return nil
38
38
  else
39
- raise Exception.new("Could not find source #{ name } in source configs.")
39
+ raise "Could not find source #{ name } in source configs."
40
40
  end
41
41
  end
42
42
 
@@ -57,8 +57,8 @@ module DataDuck
57
57
 
58
58
  def check_table_valid!
59
59
  if !self.batch_size.nil?
60
- raise Exception.new("Table #{ self.name }'s batch_size must be > 0") unless self.batch_size > 0
61
- raise Exception.new("Table #{ self.name } has batch_size defined but no extract_by_column") if self.extract_by_column.nil?
60
+ raise "Table #{ self.name }'s batch_size must be > 0" unless self.batch_size > 0
61
+ raise "Table #{ self.name } has batch_size defined but no extract_by_column" if self.extract_by_column.nil?
62
62
  end
63
63
  end
64
64
 
@@ -87,12 +87,17 @@ module DataDuck
87
87
  destination.drop_staging_table!(self)
88
88
  end
89
89
 
90
+ data_processed = false
90
91
  batch_number = 0
91
92
  while batch_number < 1_000
92
93
  batch_number += 1
93
94
  self.extract!(destination, options)
94
- self.transform!
95
- self.load!(destination)
95
+ if self.data.length > 0
96
+ self.transform!
97
+ self.data.compact!
98
+ self.load!(destination) if self.data.length > 0
99
+ data_processed = true
100
+ end
96
101
 
97
102
  if self.batch_size.nil?
98
103
  break
@@ -108,11 +113,15 @@ module DataDuck
108
113
 
109
114
  self.data = []
110
115
 
111
- if self.should_fully_reload?
112
- destination.finish_fully_reloading_table!(self)
113
- end
116
+ if data_processed
117
+ if self.should_fully_reload?
118
+ destination.finish_fully_reloading_table!(self)
119
+ end
114
120
 
115
- self.postprocess!(destination, options)
121
+ self.postprocess!(destination, options)
122
+ else
123
+ DataDuck::Logs.info "No data extracted for table #{ self.name }"
124
+ end
116
125
  end
117
126
 
118
127
  def extract!(destination = nil, options = {})
@@ -1,7 +1,7 @@
1
1
  module DataDuck
2
2
  if !defined?(DataDuck::VERSION)
3
3
  VERSION_MAJOR = 0
4
- VERSION_MINOR = 7
4
+ VERSION_MINOR = 8
5
5
  VERSION_PATCH = 0
6
6
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
7
  end
@@ -43,7 +43,7 @@ module DataDuck
43
43
  experiment_variations = []
44
44
  begin
45
45
  experiment_variations = fetch_data(endpoint)
46
- rescue Exception => err
46
+ rescue => err
47
47
  broken_experiments << experiment
48
48
  end
49
49
  experiment_variations.each do |exp_var|
@@ -69,7 +69,7 @@ module DataDuck
69
69
 
70
70
  response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' => optimizely_api_token})
71
71
  if response.response_code != 200
72
- raise Exception.new("Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }")
72
+ raise "Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }"
73
73
  end
74
74
 
75
75
  rows = Oj.load(response.body)
@@ -4,7 +4,7 @@ require 'uri'
4
4
 
5
5
  module DataDuck
6
6
  module SEMRush
7
- class OrganicResultsAPIError < Exception; end
7
+ class OrganicResultsAPIError < StandardError; end
8
8
 
9
9
  class OrganicResults < DataDuck::IntegrationTable
10
10
  def display_limit
@@ -16,7 +16,7 @@ module DataDuck
16
16
  end
17
17
 
18
18
  def phrases
19
- raise Exception("Must implement phrases method to be an array of the phrases you want.")
19
+ raise "Must implement phrases method to be an array of the phrases you want."
20
20
  end
21
21
 
22
22
  def prefix
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-06 00:00:00.000000000 Z
11
+ date: 2016-04-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler