dataduck 0.6.7 → 0.6.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/dataduck.gemspec +1 -0
- data/lib/dataduck/commands.rb +26 -12
- data/lib/dataduck/etl.rb +17 -12
- data/lib/dataduck/version.rb +1 -1
- data/lib/integrations/semrush/organic_results.rb +0 -2
- data/lib/templates/quickstart/schedule.rb.erb +19 -0
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e07cb92ca8f7f36c672dfb89e617e41ddb92f302
|
4
|
+
data.tar.gz: 40037cb63d58385f830043257883322e7dad1dbb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bfce8a29678d44a96f26b05c3cef1fe047776976ced0d856a028276f9f65c0b8094f0ac64405f1eb8af91c1c2c07205b15776a448e9475d52c573596455409b2
|
7
|
+
data.tar.gz: 55a7dc9934972cb5953bac4e416dfddf26d4ae65389022168e8e8e36a6d6ffc74c4d76ce57eca9289bbc7a677061a093ec1f809a398b1838b776f0d0975589c6
|
data/dataduck.gemspec
CHANGED
data/lib/dataduck/commands.rb
CHANGED
@@ -89,28 +89,35 @@ module DataDuck
|
|
89
89
|
which_database.dbconsole
|
90
90
|
end
|
91
91
|
|
92
|
-
def self.etl(
|
93
|
-
if
|
92
|
+
def self.etl(*table_names_underscore)
|
93
|
+
if table_names_underscore.length == 0
|
94
94
|
puts "You need to specify a table name or 'all'. Usage: dataduck etl all OR datduck etl my_table_name"
|
95
95
|
return
|
96
96
|
end
|
97
97
|
|
98
98
|
only_destination = DataDuck::Destination.only_destination
|
99
99
|
|
100
|
-
if
|
100
|
+
if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
|
101
101
|
etl = ETL.new(destinations: [only_destination], autoload_tables: true)
|
102
102
|
etl.process!
|
103
103
|
else
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
104
|
+
tables = []
|
105
|
+
table_names_underscore.each do |table_name|
|
106
|
+
table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
|
107
|
+
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
108
|
+
table_class = Object.const_get(table_name_camelized)
|
109
|
+
if !(table_class <= DataDuck::Table)
|
110
|
+
raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
|
111
|
+
end
|
112
|
+
table = table_class.new
|
113
|
+
tables << table
|
109
114
|
end
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
115
|
+
etl = ETL.new({
|
116
|
+
destinations: [only_destination],
|
117
|
+
autoload_tables: false,
|
118
|
+
tables: tables
|
119
|
+
})
|
120
|
+
etl.process!
|
114
121
|
end
|
115
122
|
end
|
116
123
|
|
@@ -305,6 +312,13 @@ source1_password=#{ source_password }
|
|
305
312
|
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/tables/#{ table_name }.rb", result)
|
306
313
|
end
|
307
314
|
|
315
|
+
def self.quickstart_create_schedule_config
|
316
|
+
namespace = Namespace.new
|
317
|
+
template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/schedule.rb.erb", 'r').read
|
318
|
+
result = ERB.new(template).result(namespace.get_binding)
|
319
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/schedule.rb", result)
|
320
|
+
end
|
321
|
+
|
308
322
|
def self.quickstart_save_file(output_path_full, contents)
|
309
323
|
*output_path, output_filename = output_path_full.split('/')
|
310
324
|
output_path = output_path.join("/")
|
data/lib/dataduck/etl.rb
CHANGED
@@ -40,22 +40,27 @@ module DataDuck
|
|
40
40
|
destinations_to_use = destinations_to_use.concat(self.class.destinations)
|
41
41
|
destinations_to_use = destinations_to_use.concat(self.destinations)
|
42
42
|
destinations_to_use.uniq!
|
43
|
-
|
44
|
-
|
45
|
-
table_to_etl = table_class.new
|
46
|
-
table_to_etl.etl!(destinations_to_use)
|
43
|
+
if destinations_to_use.length == 0
|
44
|
+
destinations_to_use << DataDuck::Destination.only_destination
|
47
45
|
end
|
48
|
-
end
|
49
46
|
|
50
|
-
|
51
|
-
Logs.info("Processing ETL for table #{ table.name } on pid #{ Process.pid }...")
|
47
|
+
errored_tables = []
|
52
48
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
49
|
+
@tables.each do |table_or_class|
|
50
|
+
table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
|
51
|
+
Logs.info("Processing table '#{ table.name }'...")
|
52
|
+
begin
|
53
|
+
table.etl!(destinations_to_use)
|
54
|
+
rescue Exception => err
|
55
|
+
Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
|
56
|
+
errored_tables << table
|
57
|
+
end
|
58
|
+
end
|
57
59
|
|
58
|
-
|
60
|
+
Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
|
61
|
+
if errored_tables.length > 0
|
62
|
+
Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
|
63
|
+
end
|
59
64
|
end
|
60
65
|
end
|
61
66
|
end
|
data/lib/dataduck/version.rb
CHANGED
@@ -45,8 +45,6 @@ module DataDuck
|
|
45
45
|
escaped_phrase = URI.escape(phrase)
|
46
46
|
semrush_api_url = "http://api.semrush.com/?type=phrase_organic&key=#{ self.key }&display_limit=#{ self.display_limit }&export_columns=Dn,Ur&phrase=#{ escaped_phrase }&database=#{ self.search_database }"
|
47
47
|
|
48
|
-
puts semrush_api_url
|
49
|
-
|
50
48
|
response = Typhoeus.get(semrush_api_url)
|
51
49
|
if response.response_code != 200
|
52
50
|
raise OrganicResultsAPIError.new("SEMrush API for phrase #{ phrase } returned error #{ response.response_code } #{ response.body }")
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# This file configures the whenver gem:
|
2
|
+
# https://github.com/javan/whenever
|
3
|
+
#
|
4
|
+
# To update crontab from this schedule.rb file:
|
5
|
+
# whenever --update-crontab etl
|
6
|
+
#
|
7
|
+
# To clear the crontab from this schedule.rb file:
|
8
|
+
# whenever --clear-crontab etl
|
9
|
+
#
|
10
|
+
# To list out your crontab:
|
11
|
+
# crontab -l
|
12
|
+
|
13
|
+
require 'dataduck'
|
14
|
+
|
15
|
+
set :output, "#{ DataDuck.project_root }/log/cron.log"
|
16
|
+
|
17
|
+
every 1.day, :at => '2:00 am' do
|
18
|
+
command "cd #{ DataDuck.project_root } && dataduck etl all"
|
19
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -164,6 +164,20 @@ dependencies:
|
|
164
164
|
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: whenever
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.9'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0.9'
|
167
181
|
description: A straightforward, effective ETL framework.
|
168
182
|
email:
|
169
183
|
- pickhardt@gmail.com
|
@@ -226,6 +240,7 @@ files:
|
|
226
240
|
- lib/integrations/optimizely/projects.rb
|
227
241
|
- lib/integrations/optimizely/variations.rb
|
228
242
|
- lib/integrations/semrush/organic_results.rb
|
243
|
+
- lib/templates/quickstart/schedule.rb.erb
|
229
244
|
- lib/templates/quickstart/table.rb.erb
|
230
245
|
- static/logo.png
|
231
246
|
homepage: http://dataducketl.com/
|