dataduck 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dataduck.rb +1 -1
- data/lib/dataduck/commands.rb +5 -4
- data/lib/dataduck/database.rb +3 -3
- data/lib/dataduck/destination.rb +4 -4
- data/lib/dataduck/etl.rb +1 -1
- data/lib/dataduck/redshift_destination.rb +2 -2
- data/lib/dataduck/s3_object.rb +1 -1
- data/lib/dataduck/source.rb +2 -2
- data/lib/dataduck/table.rb +17 -8
- data/lib/dataduck/version.rb +1 -1
- data/lib/integrations/optimizely/optimizely_integration.rb +2 -2
- data/lib/integrations/semrush/organic_results.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 497a92b4bfb99dba50ee81efb9bdf2b5b802023c
|
4
|
+
data.tar.gz: 7a5b3ef29d7bf2856b3ba196ed8a6cf25158b3f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89c16aa7ca78cbed8522f6c137eb4a3b0dd6ae6a87c3466b39ba937f319ce3d3d0db8370557d6259c6157ed2150ed94072a7489d6811bb3ad7908333fa0483f0
|
7
|
+
data.tar.gz: 314e2a64179b39b0f0f4d32fcdd5522a4b8b8574df9994c97956c5ff3066cde5c59e1a5a72d0c4c296594ee75ecaf4c30374c6d4a585a72d8b03ab80055fdfc2
|
data/lib/dataduck.rb
CHANGED
@@ -31,7 +31,7 @@ module DataDuck
|
|
31
31
|
detect_project_root = Dir.getwd
|
32
32
|
while true
|
33
33
|
if detect_project_root == ""
|
34
|
-
raise
|
34
|
+
raise "Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?"
|
35
35
|
end
|
36
36
|
|
37
37
|
if File.exist?(detect_project_root + '/Gemfile')
|
data/lib/dataduck/commands.rb
CHANGED
@@ -2,6 +2,7 @@ require 'erb'
|
|
2
2
|
require 'yaml'
|
3
3
|
require 'fileutils'
|
4
4
|
require 'typhoeus'
|
5
|
+
require 'io/console'
|
5
6
|
|
6
7
|
module DataDuck
|
7
8
|
class Commands
|
@@ -49,7 +50,7 @@ module DataDuck
|
|
49
50
|
|
50
51
|
begin
|
51
52
|
DataDuck::Commands.public_send(command, *args[1..-1])
|
52
|
-
rescue
|
53
|
+
rescue => err
|
53
54
|
DataDuck::Logs.error(err)
|
54
55
|
end
|
55
56
|
end
|
@@ -111,7 +112,7 @@ module DataDuck
|
|
111
112
|
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
112
113
|
table_class = Object.const_get(table_name_camelized)
|
113
114
|
if !(table_class <= DataDuck::Table)
|
114
|
-
raise
|
115
|
+
raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
|
115
116
|
end
|
116
117
|
table = table_class.new
|
117
118
|
tables << table
|
@@ -135,7 +136,7 @@ module DataDuck
|
|
135
136
|
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
136
137
|
table_class = Object.const_get(table_name_camelized)
|
137
138
|
if !(table_class <= DataDuck::Table)
|
138
|
-
raise
|
139
|
+
raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
|
139
140
|
end
|
140
141
|
table = table_class.new
|
141
142
|
table.recreate!(DataDuck::Destination.only_destination)
|
@@ -157,7 +158,7 @@ module DataDuck
|
|
157
158
|
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
158
159
|
table_class = Object.const_get(table_name_camelized)
|
159
160
|
if !(table_class <= DataDuck::Table)
|
160
|
-
raise
|
161
|
+
raise "Table class #{ table_name_camelized } must inherit from DataDuck::Table"
|
161
162
|
end
|
162
163
|
|
163
164
|
table = table_class.new
|
data/lib/dataduck/database.rb
CHANGED
@@ -7,15 +7,15 @@ module DataDuck
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def connection
|
10
|
-
raise
|
10
|
+
raise "Must implement connection in subclass."
|
11
11
|
end
|
12
12
|
|
13
13
|
def query(sql)
|
14
|
-
raise
|
14
|
+
raise "Must implement query in subclass."
|
15
15
|
end
|
16
16
|
|
17
17
|
def table_names
|
18
|
-
raise
|
18
|
+
raise "Must implement query in subclass."
|
19
19
|
end
|
20
20
|
|
21
21
|
protected
|
data/lib/dataduck/destination.rb
CHANGED
@@ -20,18 +20,18 @@ module DataDuck
|
|
20
20
|
|
21
21
|
def self.destination_config(name)
|
22
22
|
if DataDuck.config['destinations'].nil? || DataDuck.config['destinations'][name.to_s].nil?
|
23
|
-
raise
|
23
|
+
raise "Could not find destination #{ name } in destinations configs."
|
24
24
|
end
|
25
25
|
|
26
26
|
DataDuck.config['destinations'][name.to_s]
|
27
27
|
end
|
28
28
|
|
29
29
|
def load_table!(table)
|
30
|
-
raise
|
30
|
+
raise "Must implement load_table! in subclass"
|
31
31
|
end
|
32
32
|
|
33
33
|
def recreate_table!(table)
|
34
|
-
raise
|
34
|
+
raise "Must implement recreate_table! in subclass"
|
35
35
|
end
|
36
36
|
|
37
37
|
def postprocess!(table)
|
@@ -46,7 +46,7 @@ module DataDuck
|
|
46
46
|
elsif allow_nil
|
47
47
|
return nil
|
48
48
|
else
|
49
|
-
raise
|
49
|
+
raise "Could not find destination #{ name } in destination configs."
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
data/lib/dataduck/etl.rb
CHANGED
@@ -51,7 +51,7 @@ module DataDuck
|
|
51
51
|
Logs.info("Processing table '#{ table.name }'...")
|
52
52
|
begin
|
53
53
|
table.etl!(destinations_to_use)
|
54
|
-
rescue
|
54
|
+
rescue => err
|
55
55
|
Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
|
56
56
|
errored_tables << table
|
57
57
|
end
|
@@ -203,7 +203,7 @@ module DataDuck
|
|
203
203
|
Logs.debug("SQL executing on #{ self.name }:\n " + sql)
|
204
204
|
begin
|
205
205
|
self.connection[sql].map { |elem| elem }
|
206
|
-
rescue
|
206
|
+
rescue => err
|
207
207
|
if err.to_s.include?("Check 'stl_load_errors' system table for details")
|
208
208
|
self.raise_stl_load_error!
|
209
209
|
else
|
@@ -266,7 +266,7 @@ module DataDuck
|
|
266
266
|
DataDuck::Logs.info "Recreating table #{ table.name }..."
|
267
267
|
|
268
268
|
if !self.table_names.include?(table.name)
|
269
|
-
raise
|
269
|
+
raise "Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?"
|
270
270
|
end
|
271
271
|
|
272
272
|
recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
|
data/lib/dataduck/s3_object.rb
CHANGED
data/lib/dataduck/source.rb
CHANGED
@@ -22,7 +22,7 @@ module DataDuck
|
|
22
22
|
|
23
23
|
def self.source_config(name)
|
24
24
|
if DataDuck.config['sources'].nil? || DataDuck.config['sources'][name.to_s].nil?
|
25
|
-
raise
|
25
|
+
raise "Could not find source #{ name } in source configs."
|
26
26
|
end
|
27
27
|
|
28
28
|
DataDuck.config['sources'][name.to_s]
|
@@ -36,7 +36,7 @@ module DataDuck
|
|
36
36
|
elsif allow_nil
|
37
37
|
return nil
|
38
38
|
else
|
39
|
-
raise
|
39
|
+
raise "Could not find source #{ name } in source configs."
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
data/lib/dataduck/table.rb
CHANGED
@@ -57,8 +57,8 @@ module DataDuck
|
|
57
57
|
|
58
58
|
def check_table_valid!
|
59
59
|
if !self.batch_size.nil?
|
60
|
-
raise
|
61
|
-
raise
|
60
|
+
raise "Table #{ self.name }'s batch_size must be > 0" unless self.batch_size > 0
|
61
|
+
raise "Table #{ self.name } has batch_size defined but no extract_by_column" if self.extract_by_column.nil?
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
@@ -87,12 +87,17 @@ module DataDuck
|
|
87
87
|
destination.drop_staging_table!(self)
|
88
88
|
end
|
89
89
|
|
90
|
+
data_processed = false
|
90
91
|
batch_number = 0
|
91
92
|
while batch_number < 1_000
|
92
93
|
batch_number += 1
|
93
94
|
self.extract!(destination, options)
|
94
|
-
self.
|
95
|
-
|
95
|
+
if self.data.length > 0
|
96
|
+
self.transform!
|
97
|
+
self.data.compact!
|
98
|
+
self.load!(destination) if self.data.length > 0
|
99
|
+
data_processed = true
|
100
|
+
end
|
96
101
|
|
97
102
|
if self.batch_size.nil?
|
98
103
|
break
|
@@ -108,11 +113,15 @@ module DataDuck
|
|
108
113
|
|
109
114
|
self.data = []
|
110
115
|
|
111
|
-
if
|
112
|
-
|
113
|
-
|
116
|
+
if data_processed
|
117
|
+
if self.should_fully_reload?
|
118
|
+
destination.finish_fully_reloading_table!(self)
|
119
|
+
end
|
114
120
|
|
115
|
-
|
121
|
+
self.postprocess!(destination, options)
|
122
|
+
else
|
123
|
+
DataDuck::Logs.info "No data extracted for table #{ self.name }"
|
124
|
+
end
|
116
125
|
end
|
117
126
|
|
118
127
|
def extract!(destination = nil, options = {})
|
data/lib/dataduck/version.rb
CHANGED
@@ -43,7 +43,7 @@ module DataDuck
|
|
43
43
|
experiment_variations = []
|
44
44
|
begin
|
45
45
|
experiment_variations = fetch_data(endpoint)
|
46
|
-
rescue
|
46
|
+
rescue => err
|
47
47
|
broken_experiments << experiment
|
48
48
|
end
|
49
49
|
experiment_variations.each do |exp_var|
|
@@ -69,7 +69,7 @@ module DataDuck
|
|
69
69
|
|
70
70
|
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' => optimizely_api_token})
|
71
71
|
if response.response_code != 200
|
72
|
-
raise
|
72
|
+
raise "Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }"
|
73
73
|
end
|
74
74
|
|
75
75
|
rows = Oj.load(response.body)
|
@@ -4,7 +4,7 @@ require 'uri'
|
|
4
4
|
|
5
5
|
module DataDuck
|
6
6
|
module SEMRush
|
7
|
-
class OrganicResultsAPIError <
|
7
|
+
class OrganicResultsAPIError < StandardError; end
|
8
8
|
|
9
9
|
class OrganicResults < DataDuck::IntegrationTable
|
10
10
|
def display_limit
|
@@ -16,7 +16,7 @@ module DataDuck
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def phrases
|
19
|
-
raise
|
19
|
+
raise "Must implement phrases method to be an array of the phrases you want."
|
20
20
|
end
|
21
21
|
|
22
22
|
def prefix
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|