dataduck 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dataduck.rb +1 -1
- data/lib/dataduck/commands.rb +5 -4
- data/lib/dataduck/database.rb +3 -3
- data/lib/dataduck/destination.rb +4 -4
- data/lib/dataduck/etl.rb +1 -1
- data/lib/dataduck/redshift_destination.rb +2 -2
- data/lib/dataduck/s3_object.rb +1 -1
- data/lib/dataduck/source.rb +2 -2
- data/lib/dataduck/table.rb +17 -8
- data/lib/dataduck/version.rb +1 -1
- data/lib/integrations/optimizely/optimizely_integration.rb +2 -2
- data/lib/integrations/semrush/organic_results.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 497a92b4bfb99dba50ee81efb9bdf2b5b802023c
|
4
|
+
data.tar.gz: 7a5b3ef29d7bf2856b3ba196ed8a6cf25158b3f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89c16aa7ca78cbed8522f6c137eb4a3b0dd6ae6a87c3466b39ba937f319ce3d3d0db8370557d6259c6157ed2150ed94072a7489d6811bb3ad7908333fa0483f0
|
7
|
+
data.tar.gz: 314e2a64179b39b0f0f4d32fcdd5522a4b8b8574df9994c97956c5ff3066cde5c59e1a5a72d0c4c296594ee75ecaf4c30374c6d4a585a72d8b03ab80055fdfc2
|
data/lib/dataduck.rb
CHANGED
@@ -31,7 +31,7 @@ module DataDuck
|
|
31
31
|
detect_project_root = Dir.getwd
|
32
32
|
while true
|
33
33
|
if detect_project_root == ""
|
34
|
-
raise
|
34
|
+
raise "Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?"
|
35
35
|
end
|
36
36
|
|
37
37
|
if File.exist?(detect_project_root + '/Gemfile')
|
data/lib/dataduck/commands.rb
CHANGED
@@ -2,6 +2,7 @@ require 'erb'
|
|
2
2
|
require 'yaml'
|
3
3
|
require 'fileutils'
|
4
4
|
require 'typhoeus'
|
5
|
+
require 'io/console'
|
5
6
|
|
6
7
|
module DataDuck
|
7
8
|
class Commands
|
@@ -49,7 +50,7 @@ module DataDuck
|
|
49
50
|
|
50
51
|
begin
|
51
52
|
DataDuck::Commands.public_send(command, *args[1..-1])
|
52
|
-
rescue
|
53
|
+
rescue => err
|
53
54
|
DataDuck::Logs.error(err)
|
54
55
|
end
|
55
56
|
end
|
@@ -111,7 +112,7 @@ module DataDuck
|
|
111
112
|
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
112
113
|
table_class = Object.const_get(table_name_camelized)
|
113
114
|
if !(table_class <= DataDuck::Table)
|
114
|
-
raise
|
115
|
+
raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
|
115
116
|
end
|
116
117
|
table = table_class.new
|
117
118
|
tables << table
|
@@ -135,7 +136,7 @@ module DataDuck
|
|
135
136
|
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
136
137
|
table_class = Object.const_get(table_name_camelized)
|
137
138
|
if !(table_class <= DataDuck::Table)
|
138
|
-
raise
|
139
|
+
raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
|
139
140
|
end
|
140
141
|
table = table_class.new
|
141
142
|
table.recreate!(DataDuck::Destination.only_destination)
|
@@ -157,7 +158,7 @@ module DataDuck
|
|
157
158
|
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
158
159
|
table_class = Object.const_get(table_name_camelized)
|
159
160
|
if !(table_class <= DataDuck::Table)
|
160
|
-
raise
|
161
|
+
raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
|
161
162
|
end
|
162
163
|
|
163
164
|
table = table_class.new
|
data/lib/dataduck/database.rb
CHANGED
@@ -7,15 +7,15 @@ module DataDuck
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def connection
|
10
|
-
raise
|
10
|
+
raise "Must implement connection in subclass."
|
11
11
|
end
|
12
12
|
|
13
13
|
def query(sql)
|
14
|
-
raise
|
14
|
+
raise "Must implement query in subclass."
|
15
15
|
end
|
16
16
|
|
17
17
|
def table_names
|
18
|
-
raise
|
18
|
+
raise "Must implement query in subclass."
|
19
19
|
end
|
20
20
|
|
21
21
|
protected
|
data/lib/dataduck/destination.rb
CHANGED
@@ -20,18 +20,18 @@ module DataDuck
|
|
20
20
|
|
21
21
|
def self.destination_config(name)
|
22
22
|
if DataDuck.config['destinations'].nil? || DataDuck.config['destinations'][name.to_s].nil?
|
23
|
-
raise
|
23
|
+
raise "Could not find destination #{ name } in destinations configs."
|
24
24
|
end
|
25
25
|
|
26
26
|
DataDuck.config['destinations'][name.to_s]
|
27
27
|
end
|
28
28
|
|
29
29
|
def load_table!(table)
|
30
|
-
raise
|
30
|
+
raise "Must implement load_table! in subclass"
|
31
31
|
end
|
32
32
|
|
33
33
|
def recreate_table!(table)
|
34
|
-
raise
|
34
|
+
raise "Must implement recreate_table! in subclass"
|
35
35
|
end
|
36
36
|
|
37
37
|
def postprocess!(table)
|
@@ -46,7 +46,7 @@ module DataDuck
|
|
46
46
|
elsif allow_nil
|
47
47
|
return nil
|
48
48
|
else
|
49
|
-
raise
|
49
|
+
raise "Could not find destination #{ name } in destination configs."
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
data/lib/dataduck/etl.rb
CHANGED
@@ -51,7 +51,7 @@ module DataDuck
|
|
51
51
|
Logs.info("Processing table '#{ table.name }'...")
|
52
52
|
begin
|
53
53
|
table.etl!(destinations_to_use)
|
54
|
-
rescue
|
54
|
+
rescue => err
|
55
55
|
Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
|
56
56
|
errored_tables << table
|
57
57
|
end
|
@@ -203,7 +203,7 @@ module DataDuck
|
|
203
203
|
Logs.debug("SQL executing on #{ self.name }:\n " + sql)
|
204
204
|
begin
|
205
205
|
self.connection[sql].map { |elem| elem }
|
206
|
-
rescue
|
206
|
+
rescue => err
|
207
207
|
if err.to_s.include?("Check 'stl_load_errors' system table for details")
|
208
208
|
self.raise_stl_load_error!
|
209
209
|
else
|
@@ -266,7 +266,7 @@ module DataDuck
|
|
266
266
|
DataDuck::Logs.info "Recreating table #{ table.name }..."
|
267
267
|
|
268
268
|
if !self.table_names.include?(table.name)
|
269
|
-
raise
|
269
|
+
raise "Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?"
|
270
270
|
end
|
271
271
|
|
272
272
|
recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
|
data/lib/dataduck/s3_object.rb
CHANGED
data/lib/dataduck/source.rb
CHANGED
@@ -22,7 +22,7 @@ module DataDuck
|
|
22
22
|
|
23
23
|
def self.source_config(name)
|
24
24
|
if DataDuck.config['sources'].nil? || DataDuck.config['sources'][name.to_s].nil?
|
25
|
-
raise
|
25
|
+
raise "Could not find source #{ name } in source configs."
|
26
26
|
end
|
27
27
|
|
28
28
|
DataDuck.config['sources'][name.to_s]
|
@@ -36,7 +36,7 @@ module DataDuck
|
|
36
36
|
elsif allow_nil
|
37
37
|
return nil
|
38
38
|
else
|
39
|
-
raise
|
39
|
+
raise "Could not find source #{ name } in source configs."
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
data/lib/dataduck/table.rb
CHANGED
@@ -57,8 +57,8 @@ module DataDuck
|
|
57
57
|
|
58
58
|
def check_table_valid!
|
59
59
|
if !self.batch_size.nil?
|
60
|
-
raise
|
61
|
-
raise
|
60
|
+
raise "Table #{ self.name }'s batch_size must be > 0" unless self.batch_size > 0
|
61
|
+
raise "Table #{ self.name } has batch_size defined but no extract_by_column" if self.extract_by_column.nil?
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
@@ -87,12 +87,17 @@ module DataDuck
|
|
87
87
|
destination.drop_staging_table!(self)
|
88
88
|
end
|
89
89
|
|
90
|
+
data_processed = false
|
90
91
|
batch_number = 0
|
91
92
|
while batch_number < 1_000
|
92
93
|
batch_number += 1
|
93
94
|
self.extract!(destination, options)
|
94
|
-
self.
|
95
|
-
|
95
|
+
if self.data.length > 0
|
96
|
+
self.transform!
|
97
|
+
self.data.compact!
|
98
|
+
self.load!(destination) if self.data.length > 0
|
99
|
+
data_processed = true
|
100
|
+
end
|
96
101
|
|
97
102
|
if self.batch_size.nil?
|
98
103
|
break
|
@@ -108,11 +113,15 @@ module DataDuck
|
|
108
113
|
|
109
114
|
self.data = []
|
110
115
|
|
111
|
-
if
|
112
|
-
|
113
|
-
|
116
|
+
if data_processed
|
117
|
+
if self.should_fully_reload?
|
118
|
+
destination.finish_fully_reloading_table!(self)
|
119
|
+
end
|
114
120
|
|
115
|
-
|
121
|
+
self.postprocess!(destination, options)
|
122
|
+
else
|
123
|
+
DataDuck::Logs.info "No data extracted for table #{ self.name }"
|
124
|
+
end
|
116
125
|
end
|
117
126
|
|
118
127
|
def extract!(destination = nil, options = {})
|
data/lib/dataduck/version.rb
CHANGED
@@ -43,7 +43,7 @@ module DataDuck
|
|
43
43
|
experiment_variations = []
|
44
44
|
begin
|
45
45
|
experiment_variations = fetch_data(endpoint)
|
46
|
-
rescue
|
46
|
+
rescue => err
|
47
47
|
broken_experiments << experiment
|
48
48
|
end
|
49
49
|
experiment_variations.each do |exp_var|
|
@@ -69,7 +69,7 @@ module DataDuck
|
|
69
69
|
|
70
70
|
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' => optimizely_api_token})
|
71
71
|
if response.response_code != 200
|
72
|
-
raise
|
72
|
+
raise "Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }"
|
73
73
|
end
|
74
74
|
|
75
75
|
rows = Oj.load(response.body)
|
@@ -4,7 +4,7 @@ require 'uri'
|
|
4
4
|
|
5
5
|
module DataDuck
|
6
6
|
module SEMRush
|
7
|
-
class OrganicResultsAPIError <
|
7
|
+
class OrganicResultsAPIError < StandardError; end
|
8
8
|
|
9
9
|
class OrganicResults < DataDuck::IntegrationTable
|
10
10
|
def display_limit
|
@@ -16,7 +16,7 @@ module DataDuck
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def phrases
|
19
|
-
raise
|
19
|
+
raise "Must implement phrases method to be an array of the phrases you want."
|
20
20
|
end
|
21
21
|
|
22
22
|
def prefix
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|