dataduck 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dataduck/etl.rb +3 -3
- data/lib/dataduck/redshift_destination.rb +17 -1
- data/lib/dataduck/table.rb +6 -0
- data/lib/dataduck/version.rb +1 -1
- data/lib/integrations/optimizely/experiments.rb +12 -38
- data/lib/integrations/optimizely/optimizely_integration.rb +60 -2
- data/lib/integrations/optimizely/optimizely_table.rb +4 -0
- data/lib/integrations/optimizely/projects.rb +7 -14
- data/lib/integrations/optimizely/variations.rb +48 -1
- data/lib/integrations/semrush/organic_results.rb +38 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bd5effb261990d6eccb37fe16de93195c89ef24
|
4
|
+
data.tar.gz: b2c7d31fac424890e6b605d2828423b624625467
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61b17cc19b83ad9733b9744de6a65f884af5f3b145ddbc967ce01eff2fa8b552b103c9c69016c59f87c729cd9bc10866ed5ee720f903236c90e1d2b4ec2712c5
|
7
|
+
data.tar.gz: 0217b069e2d3997698c4135a170c65c22d16187cb02d35adf68d4e62c8798073c2c95a9d5f9689329762cc8c6d25a0b5682f542f7b802918a6776f792e302db3
|
data/lib/dataduck/etl.rb
CHANGED
@@ -25,9 +25,9 @@ module DataDuck
|
|
25
25
|
table_name_underscores = file.split("/").last.gsub(".rb", "")
|
26
26
|
table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name_underscores)
|
27
27
|
require file
|
28
|
-
|
29
|
-
if
|
30
|
-
@tables <<
|
28
|
+
table_class = Object.const_get(table_name_camelized)
|
29
|
+
if table_class <= DataDuck::Table && table_class.new.include_with_all?
|
30
|
+
@tables << table_class
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
@@ -175,10 +175,26 @@ module DataDuck
|
|
175
175
|
end
|
176
176
|
|
177
177
|
# Following guidelines in http://docs.aws.amazon.com/redshift/latest/dg/merge-examples.html
|
178
|
+
self.delete_before_inserting!(table)
|
179
|
+
self.insert_from_staging!(table)
|
180
|
+
end
|
181
|
+
|
182
|
+
def delete_before_inserting!(table)
|
178
183
|
staging_name = table.staging_name
|
179
184
|
building_name = table.building_name
|
180
|
-
|
185
|
+
|
186
|
+
where_equals_parts = []
|
187
|
+
table.identify_by_columns.each do |attribute|
|
188
|
+
where_equals_parts << "#{ building_name }.#{ attribute } = #{ staging_name }.#{ attribute }"
|
189
|
+
end
|
190
|
+
|
191
|
+
delete_query = "DELETE FROM #{ building_name } USING #{ staging_name } WHERE #{ where_equals_parts.join(' AND ') }"
|
181
192
|
self.query(delete_query)
|
193
|
+
end
|
194
|
+
|
195
|
+
def insert_from_staging!(table)
|
196
|
+
staging_name = table.staging_name
|
197
|
+
building_name = table.building_name
|
182
198
|
insert_query = "INSERT INTO #{ building_name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ staging_name }"
|
183
199
|
self.query(insert_query)
|
184
200
|
end
|
data/lib/dataduck/table.rb
CHANGED
@@ -194,6 +194,12 @@ module DataDuck
|
|
194
194
|
nil
|
195
195
|
end
|
196
196
|
|
197
|
+
def identify_by_columns
|
198
|
+
return ["id"] if self.output_column_names.include?("id")
|
199
|
+
|
200
|
+
[]
|
201
|
+
end
|
202
|
+
|
197
203
|
def should_fully_reload?
|
198
204
|
false # Set to true if you want to fully reload a table with each ETL
|
199
205
|
end
|
data/lib/dataduck/version.rb
CHANGED
@@ -7,50 +7,18 @@ require 'date'
|
|
7
7
|
module DataDuck
|
8
8
|
module Optimizely
|
9
9
|
class Experiments < DataDuck::Optimizely::OptimizelyTable
|
10
|
-
|
11
10
|
transforms :percentage_included_to_float
|
12
|
-
transforms :
|
13
|
-
|
14
|
-
def extract!(destination, options = {})
|
15
|
-
self.data = []
|
11
|
+
transforms :rename_description_to_name
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
raise Exception.new("Optimizely API for projects returned error #{ response.response_code} #{ response.body }")
|
20
|
-
end
|
21
|
-
projects = Oj.load(projects_response.body)
|
22
|
-
|
23
|
-
projects.each do |project|
|
24
|
-
self.extract_for_project!(project["id"])
|
25
|
-
end
|
13
|
+
def initialize(experiments)
|
14
|
+
self.data = experiments
|
26
15
|
end
|
27
16
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/projects/#{ project_id }/experiments", headers: {'Token' => self.optimizely_api_token})
|
32
|
-
|
33
|
-
if response.response_code != 200
|
34
|
-
raise Exception.new("Optimizely API for experiments returned error #{ response.response_code} #{ response.body }")
|
35
|
-
end
|
36
|
-
|
37
|
-
experiments = Oj.load(response.body)
|
38
|
-
experiments.each do |experiment|
|
39
|
-
experiment[:dataduck_extracted_at] = now
|
40
|
-
experiment[:project_id] = project_id
|
41
|
-
end
|
42
|
-
|
43
|
-
self.data.concat(experiments)
|
17
|
+
def extract!(*args)
|
18
|
+
# already initialized data
|
44
19
|
end
|
45
20
|
|
46
|
-
def
|
47
|
-
row["created"] = DateTime.parse(row["created"])
|
48
|
-
row["last_modified"] = DateTime.parse(row["last_modified"])
|
49
|
-
|
50
|
-
row
|
51
|
-
end
|
52
|
-
|
53
|
-
def rename_description_to_name
|
21
|
+
def rename_description_to_name(row)
|
54
22
|
row[:name] = row['description']
|
55
23
|
|
56
24
|
row
|
@@ -62,6 +30,10 @@ module DataDuck
|
|
62
30
|
row
|
63
31
|
end
|
64
32
|
|
33
|
+
def should_fully_reload?
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
65
37
|
def indexes
|
66
38
|
["id", "project_id", "primary_goal_id", "name"]
|
67
39
|
end
|
@@ -76,6 +48,7 @@ module DataDuck
|
|
76
48
|
:primary_goal_id => :integer,
|
77
49
|
:details => :bigtext,
|
78
50
|
:status => :string,
|
51
|
+
:audience_ids => :bigtext,
|
79
52
|
:url_conditions => :bigtext,
|
80
53
|
:last_modified => :datetime,
|
81
54
|
:is_multivariate => :boolean,
|
@@ -84,6 +57,7 @@ module DataDuck
|
|
84
57
|
:percentage_included => :float,
|
85
58
|
:experiment_type => :string,
|
86
59
|
:edit_url => :string,
|
60
|
+
:auto_allocated => :boolean,
|
87
61
|
:dataduck_extracted_at => :datetime,
|
88
62
|
})
|
89
63
|
end
|
@@ -1,15 +1,73 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'oj'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
require_relative './experiments'
|
6
|
+
require_relative './projects'
|
7
|
+
require_relative './variations'
|
8
|
+
|
1
9
|
module DataDuck
|
2
10
|
module Optimizely
|
3
11
|
class OptimizelyIntegration < DataDuck::Optimizely::OptimizelyTable
|
4
12
|
def etl!(destinations, options = {})
|
13
|
+
now = DateTime.now
|
14
|
+
|
5
15
|
projects = fetch_data("projects")
|
6
|
-
|
16
|
+
|
17
|
+
experiments = []
|
18
|
+
projects.each do |project|
|
19
|
+
project["created"] = DateTime.parse(project["created"])
|
20
|
+
project["last_modified"] = DateTime.parse(project["last_modified"])
|
21
|
+
|
22
|
+
project_experiments = fetch_data("projects/#{ project['id'] }/experiments")
|
23
|
+
project_experiments.each do |proj_exp|
|
24
|
+
proj_exp['project_id'] = project['id']
|
25
|
+
proj_exp["created"] = DateTime.parse(proj_exp["created"])
|
26
|
+
proj_exp["last_modified"] = DateTime.parse(proj_exp["last_modified"])
|
27
|
+
end
|
28
|
+
experiments.concat(project_experiments)
|
29
|
+
end
|
30
|
+
|
31
|
+
variations = []
|
32
|
+
# Experiments started after January 21, 2015 have statistics computed by Optimizely Stats Engine.
|
33
|
+
# Older experiments should use the old results endpoint.
|
34
|
+
date_for_stats_engine = DateTime.parse('Jan 22, 2015')
|
35
|
+
date_too_old_for_api = DateTime.parse('Jan 1, 2013')
|
36
|
+
broken_experiments = []
|
37
|
+
experiments.each do |experiment|
|
38
|
+
if experiment["created"] < date_too_old_for_api
|
39
|
+
next # seems like there's a problem with the API and old experiments
|
40
|
+
end
|
41
|
+
|
42
|
+
endpoint = experiment["created"] >= date_for_stats_engine ? "experiments/#{ experiment["id"] }/stats" : "experiments/#{ experiment["id"] }/results"
|
43
|
+
experiment_variations = []
|
44
|
+
begin
|
45
|
+
experiment_variations = fetch_data(endpoint)
|
46
|
+
rescue Exception => err
|
47
|
+
broken_experiments << experiment
|
48
|
+
end
|
49
|
+
experiment_variations.each do |exp_var|
|
50
|
+
exp_var["begin_time"] = DateTime.parse(exp_var["begin_time"]) if exp_var["begin_time"]
|
51
|
+
exp_var["end_time"] = DateTime.parse(exp_var["end_time"]) if exp_var["end_time"]
|
52
|
+
exp_var["experiment_id"] = experiment["id"]
|
53
|
+
end
|
54
|
+
variations.concat(experiment_variations)
|
55
|
+
end
|
56
|
+
|
57
|
+
projects_etl_table = DataDuck::Optimizely::Projects.new(projects)
|
58
|
+
projects_etl_table.etl!(destinations, options)
|
59
|
+
|
60
|
+
experiments_etl_table = DataDuck::Optimizely::Experiments.new(experiments)
|
61
|
+
experiments_etl_table.etl!(destinations, options)
|
62
|
+
|
63
|
+
variations_etl_table = DataDuck::Optimizely::Variations.new(variations)
|
64
|
+
variations_etl_table.etl!(destinations, options)
|
7
65
|
end
|
8
66
|
|
9
67
|
def fetch_data(api_endpoint)
|
10
68
|
now = DateTime.now
|
11
69
|
|
12
|
-
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' =>
|
70
|
+
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' => optimizely_api_token})
|
13
71
|
if response.response_code != 200
|
14
72
|
raise Exception.new("Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }")
|
15
73
|
end
|
@@ -7,27 +7,20 @@ require 'date'
|
|
7
7
|
module DataDuck
|
8
8
|
module Optimizely
|
9
9
|
class Projects < DataDuck::Optimizely::OptimizelyTable
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
self.data = []
|
14
|
-
|
15
|
-
now = DateTime.now
|
16
|
-
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/projects", headers: {'Token' => self.optimizely_api_token})
|
10
|
+
def initialize(data)
|
11
|
+
self.data = data
|
12
|
+
end
|
17
13
|
|
18
|
-
|
19
|
-
|
20
|
-
project[:dataduck_extracted_at] = now
|
21
|
-
end
|
14
|
+
def extract!(*args)
|
15
|
+
# already initialized data
|
22
16
|
end
|
23
17
|
|
24
18
|
def indexes
|
25
19
|
["id", "account_id", "project_name"]
|
26
20
|
end
|
27
21
|
|
28
|
-
def
|
29
|
-
|
30
|
-
project["last_modified"] = DateTime.parse(project["last_modified"])
|
22
|
+
def should_fully_reload?
|
23
|
+
true
|
31
24
|
end
|
32
25
|
|
33
26
|
output({
|
@@ -5,7 +5,54 @@ require_relative 'optimizely_table'
|
|
5
5
|
module DataDuck
|
6
6
|
module Optimizely
|
7
7
|
class Variations < DataDuck::Optimizely::OptimizelyTable
|
8
|
-
|
8
|
+
transforms :fix_fields
|
9
|
+
|
10
|
+
def initialize(data)
|
11
|
+
self.data = data
|
12
|
+
end
|
13
|
+
|
14
|
+
def extract!(*args)
|
15
|
+
# already initialized data
|
16
|
+
end
|
17
|
+
|
18
|
+
def fix_fields(row)
|
19
|
+
row[:id] = row['variation_id'].to_i
|
20
|
+
row[:name] = row['variation_name']
|
21
|
+
row['baseline_id'] = row['baseline_id'].to_i
|
22
|
+
row['improvement'] = row['improvement'].to_f
|
23
|
+
row['confidence'] = row['confidence'].to_f
|
24
|
+
row['conversion_rate'] = row['conversion_rate'].to_f
|
25
|
+
row['difference'] = row['difference'].to_f
|
26
|
+
|
27
|
+
row
|
28
|
+
end
|
29
|
+
|
30
|
+
def indexes
|
31
|
+
["id", "goal_id", "experiment_id", "name"]
|
32
|
+
end
|
33
|
+
|
34
|
+
def should_fully_reload?
|
35
|
+
true
|
36
|
+
end
|
37
|
+
|
38
|
+
output({
|
39
|
+
:id => :bigint,
|
40
|
+
:name => :string,
|
41
|
+
:experiment_id => :bigint,
|
42
|
+
:baseline_id => :bigint,
|
43
|
+
:goal_name => :string,
|
44
|
+
:goal_id => :bigint,
|
45
|
+
:visitors => :integer,
|
46
|
+
:conversions => :integer,
|
47
|
+
:begin_time => :datetime,
|
48
|
+
:end_time => :datetime,
|
49
|
+
:improvement => :float,
|
50
|
+
:confidence => :float,
|
51
|
+
:conversion_rate => :float,
|
52
|
+
:difference => :float,
|
53
|
+
:status => :string,
|
54
|
+
:dataduck_extracted_at => :datetime,
|
55
|
+
})
|
9
56
|
end
|
10
57
|
end
|
11
58
|
end
|
@@ -1,10 +1,12 @@
|
|
1
|
+
require 'date'
|
1
2
|
require 'typhoeus'
|
3
|
+
require 'uri'
|
2
4
|
|
3
5
|
module DataDuck
|
4
6
|
module SEMRush
|
5
7
|
class OrganicResults < DataDuck::IntegrationTable
|
6
8
|
def display_limit
|
7
|
-
|
9
|
+
20
|
8
10
|
end
|
9
11
|
|
10
12
|
def key
|
@@ -24,23 +26,48 @@ module DataDuck
|
|
24
26
|
end
|
25
27
|
|
26
28
|
def extract!(destination, options = {})
|
27
|
-
dates = options[:dates]
|
28
|
-
if dates.nil? || dates.length == 0
|
29
|
-
raise Exception("Must pass at least one date.")
|
30
|
-
end
|
31
|
-
|
32
29
|
self.data = []
|
33
30
|
|
34
31
|
self.phrases.each do |phrase|
|
35
|
-
self.
|
36
|
-
|
32
|
+
self.extract_results_for_keyword_and_date!(phrase)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def extract_results_for_keyword_and_date!(phrase)
|
37
|
+
date = Date.today
|
38
|
+
phrase.strip!
|
39
|
+
escaped_phrase = URI.escape(phrase)
|
40
|
+
semrush_api_url = "http://api.semrush.com/?type=phrase_organic&key=#{ self.key }&display_limit=#{ self.display_limit }&export_columns=Dn,Ur&phrase=#{ escaped_phrase }&database=#{ self.search_database }"
|
41
|
+
|
42
|
+
response = Typhoeus.get(semrush_api_url)
|
43
|
+
if response.response_code != 200
|
44
|
+
raise Exception.new("SEMrush API returned error #{ response.response_code} #{ response.body }")
|
45
|
+
end
|
46
|
+
|
47
|
+
rank = -1
|
48
|
+
response.body.each_line do |line|
|
49
|
+
rank += 1
|
50
|
+
if rank == 0
|
51
|
+
# This is the header line
|
52
|
+
next
|
37
53
|
end
|
54
|
+
|
55
|
+
domain, url = line.split(';')
|
56
|
+
domain.strip!
|
57
|
+
url.strip!
|
58
|
+
|
59
|
+
self.data << {
|
60
|
+
date: date,
|
61
|
+
phrase: phrase,
|
62
|
+
rank: rank,
|
63
|
+
domain: domain,
|
64
|
+
url: url
|
65
|
+
}
|
38
66
|
end
|
39
67
|
end
|
40
68
|
|
41
|
-
def
|
42
|
-
|
43
|
-
# TODO
|
69
|
+
def identify_by_columns
|
70
|
+
["date", "phrase"]
|
44
71
|
end
|
45
72
|
|
46
73
|
def indexes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|