dataduck 0.6.5 → 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dataduck/etl.rb +3 -3
- data/lib/dataduck/redshift_destination.rb +17 -1
- data/lib/dataduck/table.rb +6 -0
- data/lib/dataduck/version.rb +1 -1
- data/lib/integrations/optimizely/experiments.rb +12 -38
- data/lib/integrations/optimizely/optimizely_integration.rb +60 -2
- data/lib/integrations/optimizely/optimizely_table.rb +4 -0
- data/lib/integrations/optimizely/projects.rb +7 -14
- data/lib/integrations/optimizely/variations.rb +48 -1
- data/lib/integrations/semrush/organic_results.rb +38 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bd5effb261990d6eccb37fe16de93195c89ef24
|
4
|
+
data.tar.gz: b2c7d31fac424890e6b605d2828423b624625467
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61b17cc19b83ad9733b9744de6a65f884af5f3b145ddbc967ce01eff2fa8b552b103c9c69016c59f87c729cd9bc10866ed5ee720f903236c90e1d2b4ec2712c5
|
7
|
+
data.tar.gz: 0217b069e2d3997698c4135a170c65c22d16187cb02d35adf68d4e62c8798073c2c95a9d5f9689329762cc8c6d25a0b5682f542f7b802918a6776f792e302db3
|
data/lib/dataduck/etl.rb
CHANGED
@@ -25,9 +25,9 @@ module DataDuck
|
|
25
25
|
table_name_underscores = file.split("/").last.gsub(".rb", "")
|
26
26
|
table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name_underscores)
|
27
27
|
require file
|
28
|
-
|
29
|
-
if
|
30
|
-
@tables <<
|
28
|
+
table_class = Object.const_get(table_name_camelized)
|
29
|
+
if table_class <= DataDuck::Table && table_class.new.include_with_all?
|
30
|
+
@tables << table_class
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
@@ -175,10 +175,26 @@ module DataDuck
|
|
175
175
|
end
|
176
176
|
|
177
177
|
# Following guidelines in http://docs.aws.amazon.com/redshift/latest/dg/merge-examples.html
|
178
|
+
self.delete_before_inserting!(table)
|
179
|
+
self.insert_from_staging!(table)
|
180
|
+
end
|
181
|
+
|
182
|
+
def delete_before_inserting!(table)
|
178
183
|
staging_name = table.staging_name
|
179
184
|
building_name = table.building_name
|
180
|
-
|
185
|
+
|
186
|
+
where_equals_parts = []
|
187
|
+
table.identify_by_columns.each do |attribute|
|
188
|
+
where_equals_parts << "#{ building_name }.#{ attribute } = #{ staging_name }.#{ attribute }"
|
189
|
+
end
|
190
|
+
|
191
|
+
delete_query = "DELETE FROM #{ building_name } USING #{ staging_name } WHERE #{ where_equals_parts.join(' AND ') }"
|
181
192
|
self.query(delete_query)
|
193
|
+
end
|
194
|
+
|
195
|
+
def insert_from_staging!(table)
|
196
|
+
staging_name = table.staging_name
|
197
|
+
building_name = table.building_name
|
182
198
|
insert_query = "INSERT INTO #{ building_name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ staging_name }"
|
183
199
|
self.query(insert_query)
|
184
200
|
end
|
data/lib/dataduck/table.rb
CHANGED
@@ -194,6 +194,12 @@ module DataDuck
|
|
194
194
|
nil
|
195
195
|
end
|
196
196
|
|
197
|
+
def identify_by_columns
|
198
|
+
return ["id"] if self.output_column_names.include?("id")
|
199
|
+
|
200
|
+
[]
|
201
|
+
end
|
202
|
+
|
197
203
|
def should_fully_reload?
|
198
204
|
false # Set to true if you want to fully reload a table with each ETL
|
199
205
|
end
|
data/lib/dataduck/version.rb
CHANGED
@@ -7,50 +7,18 @@ require 'date'
|
|
7
7
|
module DataDuck
|
8
8
|
module Optimizely
|
9
9
|
class Experiments < DataDuck::Optimizely::OptimizelyTable
|
10
|
-
|
11
10
|
transforms :percentage_included_to_float
|
12
|
-
transforms :
|
13
|
-
|
14
|
-
def extract!(destination, options = {})
|
15
|
-
self.data = []
|
11
|
+
transforms :rename_description_to_name
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
raise Exception.new("Optimizely API for projects returned error #{ response.response_code} #{ response.body }")
|
20
|
-
end
|
21
|
-
projects = Oj.load(projects_response.body)
|
22
|
-
|
23
|
-
projects.each do |project|
|
24
|
-
self.extract_for_project!(project["id"])
|
25
|
-
end
|
13
|
+
def initialize(experiments)
|
14
|
+
self.data = experiments
|
26
15
|
end
|
27
16
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/projects/#{ project_id }/experiments", headers: {'Token' => self.optimizely_api_token})
|
32
|
-
|
33
|
-
if response.response_code != 200
|
34
|
-
raise Exception.new("Optimizely API for experiments returned error #{ response.response_code} #{ response.body }")
|
35
|
-
end
|
36
|
-
|
37
|
-
experiments = Oj.load(response.body)
|
38
|
-
experiments.each do |experiment|
|
39
|
-
experiment[:dataduck_extracted_at] = now
|
40
|
-
experiment[:project_id] = project_id
|
41
|
-
end
|
42
|
-
|
43
|
-
self.data.concat(experiments)
|
17
|
+
def extract!(*args)
|
18
|
+
# already initialized data
|
44
19
|
end
|
45
20
|
|
46
|
-
def
|
47
|
-
row["created"] = DateTime.parse(row["created"])
|
48
|
-
row["last_modified"] = DateTime.parse(row["last_modified"])
|
49
|
-
|
50
|
-
row
|
51
|
-
end
|
52
|
-
|
53
|
-
def rename_description_to_name
|
21
|
+
def rename_description_to_name(row)
|
54
22
|
row[:name] = row['description']
|
55
23
|
|
56
24
|
row
|
@@ -62,6 +30,10 @@ module DataDuck
|
|
62
30
|
row
|
63
31
|
end
|
64
32
|
|
33
|
+
def should_fully_reload?
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
65
37
|
def indexes
|
66
38
|
["id", "project_id", "primary_goal_id", "name"]
|
67
39
|
end
|
@@ -76,6 +48,7 @@ module DataDuck
|
|
76
48
|
:primary_goal_id => :integer,
|
77
49
|
:details => :bigtext,
|
78
50
|
:status => :string,
|
51
|
+
:audience_ids => :bigtext,
|
79
52
|
:url_conditions => :bigtext,
|
80
53
|
:last_modified => :datetime,
|
81
54
|
:is_multivariate => :boolean,
|
@@ -84,6 +57,7 @@ module DataDuck
|
|
84
57
|
:percentage_included => :float,
|
85
58
|
:experiment_type => :string,
|
86
59
|
:edit_url => :string,
|
60
|
+
:auto_allocated => :boolean,
|
87
61
|
:dataduck_extracted_at => :datetime,
|
88
62
|
})
|
89
63
|
end
|
@@ -1,15 +1,73 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'oj'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
require_relative './experiments'
|
6
|
+
require_relative './projects'
|
7
|
+
require_relative './variations'
|
8
|
+
|
1
9
|
module DataDuck
|
2
10
|
module Optimizely
|
3
11
|
class OptimizelyIntegration < DataDuck::Optimizely::OptimizelyTable
|
4
12
|
def etl!(destinations, options = {})
|
13
|
+
now = DateTime.now
|
14
|
+
|
5
15
|
projects = fetch_data("projects")
|
6
|
-
|
16
|
+
|
17
|
+
experiments = []
|
18
|
+
projects.each do |project|
|
19
|
+
project["created"] = DateTime.parse(project["created"])
|
20
|
+
project["last_modified"] = DateTime.parse(project["last_modified"])
|
21
|
+
|
22
|
+
project_experiments = fetch_data("projects/#{ project['id'] }/experiments")
|
23
|
+
project_experiments.each do |proj_exp|
|
24
|
+
proj_exp['project_id'] = project['id']
|
25
|
+
proj_exp["created"] = DateTime.parse(proj_exp["created"])
|
26
|
+
proj_exp["last_modified"] = DateTime.parse(proj_exp["last_modified"])
|
27
|
+
end
|
28
|
+
experiments.concat(project_experiments)
|
29
|
+
end
|
30
|
+
|
31
|
+
variations = []
|
32
|
+
# Experiments started after January 21, 2015 have statistics computed by Optimizely Stats Engine.
|
33
|
+
# Older experiments should use the old results endpoint.
|
34
|
+
date_for_stats_engine = DateTime.parse('Jan 22, 2015')
|
35
|
+
date_too_old_for_api = DateTime.parse('Jan 1, 2013')
|
36
|
+
broken_experiments = []
|
37
|
+
experiments.each do |experiment|
|
38
|
+
if experiment["created"] < date_too_old_for_api
|
39
|
+
next # seems like there's a problem with the API and old experiments
|
40
|
+
end
|
41
|
+
|
42
|
+
endpoint = experiment["created"] >= date_for_stats_engine ? "experiments/#{ experiment["id"] }/stats" : "experiments/#{ experiment["id"] }/results"
|
43
|
+
experiment_variations = []
|
44
|
+
begin
|
45
|
+
experiment_variations = fetch_data(endpoint)
|
46
|
+
rescue Exception => err
|
47
|
+
broken_experiments << experiment
|
48
|
+
end
|
49
|
+
experiment_variations.each do |exp_var|
|
50
|
+
exp_var["begin_time"] = DateTime.parse(exp_var["begin_time"]) if exp_var["begin_time"]
|
51
|
+
exp_var["end_time"] = DateTime.parse(exp_var["end_time"]) if exp_var["end_time"]
|
52
|
+
exp_var["experiment_id"] = experiment["id"]
|
53
|
+
end
|
54
|
+
variations.concat(experiment_variations)
|
55
|
+
end
|
56
|
+
|
57
|
+
projects_etl_table = DataDuck::Optimizely::Projects.new(projects)
|
58
|
+
projects_etl_table.etl!(destinations, options)
|
59
|
+
|
60
|
+
experiments_etl_table = DataDuck::Optimizely::Experiments.new(experiments)
|
61
|
+
experiments_etl_table.etl!(destinations, options)
|
62
|
+
|
63
|
+
variations_etl_table = DataDuck::Optimizely::Variations.new(variations)
|
64
|
+
variations_etl_table.etl!(destinations, options)
|
7
65
|
end
|
8
66
|
|
9
67
|
def fetch_data(api_endpoint)
|
10
68
|
now = DateTime.now
|
11
69
|
|
12
|
-
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' =>
|
70
|
+
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/#{ api_endpoint }", headers: {'Token' => optimizely_api_token})
|
13
71
|
if response.response_code != 200
|
14
72
|
raise Exception.new("Optimizely API for #{ api_endpoint } returned error #{ response.response_code} #{ response.body }")
|
15
73
|
end
|
@@ -7,27 +7,20 @@ require 'date'
|
|
7
7
|
module DataDuck
|
8
8
|
module Optimizely
|
9
9
|
class Projects < DataDuck::Optimizely::OptimizelyTable
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
self.data = []
|
14
|
-
|
15
|
-
now = DateTime.now
|
16
|
-
response = Typhoeus.get("https://www.optimizelyapis.com/experiment/v1/projects", headers: {'Token' => self.optimizely_api_token})
|
10
|
+
def initialize(data)
|
11
|
+
self.data = data
|
12
|
+
end
|
17
13
|
|
18
|
-
|
19
|
-
|
20
|
-
project[:dataduck_extracted_at] = now
|
21
|
-
end
|
14
|
+
def extract!(*args)
|
15
|
+
# already initialized data
|
22
16
|
end
|
23
17
|
|
24
18
|
def indexes
|
25
19
|
["id", "account_id", "project_name"]
|
26
20
|
end
|
27
21
|
|
28
|
-
def
|
29
|
-
|
30
|
-
project["last_modified"] = DateTime.parse(project["last_modified"])
|
22
|
+
def should_fully_reload?
|
23
|
+
true
|
31
24
|
end
|
32
25
|
|
33
26
|
output({
|
@@ -5,7 +5,54 @@ require_relative 'optimizely_table'
|
|
5
5
|
module DataDuck
|
6
6
|
module Optimizely
|
7
7
|
class Variations < DataDuck::Optimizely::OptimizelyTable
|
8
|
-
|
8
|
+
transforms :fix_fields
|
9
|
+
|
10
|
+
def initialize(data)
|
11
|
+
self.data = data
|
12
|
+
end
|
13
|
+
|
14
|
+
def extract!(*args)
|
15
|
+
# already initialized data
|
16
|
+
end
|
17
|
+
|
18
|
+
def fix_fields(row)
|
19
|
+
row[:id] = row['variation_id'].to_i
|
20
|
+
row[:name] = row['variation_name']
|
21
|
+
row['baseline_id'] = row['baseline_id'].to_i
|
22
|
+
row['improvement'] = row['improvement'].to_f
|
23
|
+
row['confidence'] = row['confidence'].to_f
|
24
|
+
row['conversion_rate'] = row['conversion_rate'].to_f
|
25
|
+
row['difference'] = row['difference'].to_f
|
26
|
+
|
27
|
+
row
|
28
|
+
end
|
29
|
+
|
30
|
+
def indexes
|
31
|
+
["id", "goal_id", "experiment_id", "name"]
|
32
|
+
end
|
33
|
+
|
34
|
+
def should_fully_reload?
|
35
|
+
true
|
36
|
+
end
|
37
|
+
|
38
|
+
output({
|
39
|
+
:id => :bigint,
|
40
|
+
:name => :string,
|
41
|
+
:experiment_id => :bigint,
|
42
|
+
:baseline_id => :bigint,
|
43
|
+
:goal_name => :string,
|
44
|
+
:goal_id => :bigint,
|
45
|
+
:visitors => :integer,
|
46
|
+
:conversions => :integer,
|
47
|
+
:begin_time => :datetime,
|
48
|
+
:end_time => :datetime,
|
49
|
+
:improvement => :float,
|
50
|
+
:confidence => :float,
|
51
|
+
:conversion_rate => :float,
|
52
|
+
:difference => :float,
|
53
|
+
:status => :string,
|
54
|
+
:dataduck_extracted_at => :datetime,
|
55
|
+
})
|
9
56
|
end
|
10
57
|
end
|
11
58
|
end
|
@@ -1,10 +1,12 @@
|
|
1
|
+
require 'date'
|
1
2
|
require 'typhoeus'
|
3
|
+
require 'uri'
|
2
4
|
|
3
5
|
module DataDuck
|
4
6
|
module SEMRush
|
5
7
|
class OrganicResults < DataDuck::IntegrationTable
|
6
8
|
def display_limit
|
7
|
-
|
9
|
+
20
|
8
10
|
end
|
9
11
|
|
10
12
|
def key
|
@@ -24,23 +26,48 @@ module DataDuck
|
|
24
26
|
end
|
25
27
|
|
26
28
|
def extract!(destination, options = {})
|
27
|
-
dates = options[:dates]
|
28
|
-
if dates.nil? || dates.length == 0
|
29
|
-
raise Exception("Must pass at least one date.")
|
30
|
-
end
|
31
|
-
|
32
29
|
self.data = []
|
33
30
|
|
34
31
|
self.phrases.each do |phrase|
|
35
|
-
self.
|
36
|
-
|
32
|
+
self.extract_results_for_keyword_and_date!(phrase)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def extract_results_for_keyword_and_date!(phrase)
|
37
|
+
date = Date.today
|
38
|
+
phrase.strip!
|
39
|
+
escaped_phrase = URI.escape(phrase)
|
40
|
+
semrush_api_url = "http://api.semrush.com/?type=phrase_organic&key=#{ self.key }&display_limit=#{ self.display_limit }&export_columns=Dn,Ur&phrase=#{ escaped_phrase }&database=#{ self.search_database }"
|
41
|
+
|
42
|
+
response = Typhoeus.get(semrush_api_url)
|
43
|
+
if response.response_code != 200
|
44
|
+
raise Exception.new("SEMrush API returned error #{ response.response_code} #{ response.body }")
|
45
|
+
end
|
46
|
+
|
47
|
+
rank = -1
|
48
|
+
response.body.each_line do |line|
|
49
|
+
rank += 1
|
50
|
+
if rank == 0
|
51
|
+
# This is the header line
|
52
|
+
next
|
37
53
|
end
|
54
|
+
|
55
|
+
domain, url = line.split(';')
|
56
|
+
domain.strip!
|
57
|
+
url.strip!
|
58
|
+
|
59
|
+
self.data << {
|
60
|
+
date: date,
|
61
|
+
phrase: phrase,
|
62
|
+
rank: rank,
|
63
|
+
domain: domain,
|
64
|
+
url: url
|
65
|
+
}
|
38
66
|
end
|
39
67
|
end
|
40
68
|
|
41
|
-
def
|
42
|
-
|
43
|
-
# TODO
|
69
|
+
def identify_by_columns
|
70
|
+
["date", "phrase"]
|
44
71
|
end
|
45
72
|
|
46
73
|
def indexes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|