dataduck 0.6.7 → 0.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/dataduck.gemspec +1 -0
- data/lib/dataduck/commands.rb +26 -12
- data/lib/dataduck/etl.rb +17 -12
- data/lib/dataduck/version.rb +1 -1
- data/lib/integrations/semrush/organic_results.rb +0 -2
- data/lib/templates/quickstart/schedule.rb.erb +19 -0
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e07cb92ca8f7f36c672dfb89e617e41ddb92f302
|
4
|
+
data.tar.gz: 40037cb63d58385f830043257883322e7dad1dbb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bfce8a29678d44a96f26b05c3cef1fe047776976ced0d856a028276f9f65c0b8094f0ac64405f1eb8af91c1c2c07205b15776a448e9475d52c573596455409b2
|
7
|
+
data.tar.gz: 55a7dc9934972cb5953bac4e416dfddf26d4ae65389022168e8e8e36a6d6ffc74c4d76ce57eca9289bbc7a677061a093ec1f809a398b1838b776f0d0975589c6
|
data/dataduck.gemspec
CHANGED
data/lib/dataduck/commands.rb
CHANGED
@@ -89,28 +89,35 @@ module DataDuck
|
|
89
89
|
which_database.dbconsole
|
90
90
|
end
|
91
91
|
|
92
|
-
def self.etl(
|
93
|
-
if
|
92
|
+
def self.etl(*table_names_underscore)
|
93
|
+
if table_names_underscore.length == 0
|
94
94
|
puts "You need to specify a table name or 'all'. Usage: dataduck etl all OR datduck etl my_table_name"
|
95
95
|
return
|
96
96
|
end
|
97
97
|
|
98
98
|
only_destination = DataDuck::Destination.only_destination
|
99
99
|
|
100
|
-
if
|
100
|
+
if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
|
101
101
|
etl = ETL.new(destinations: [only_destination], autoload_tables: true)
|
102
102
|
etl.process!
|
103
103
|
else
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
104
|
+
tables = []
|
105
|
+
table_names_underscore.each do |table_name|
|
106
|
+
table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
|
107
|
+
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
108
|
+
table_class = Object.const_get(table_name_camelized)
|
109
|
+
if !(table_class <= DataDuck::Table)
|
110
|
+
raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
|
111
|
+
end
|
112
|
+
table = table_class.new
|
113
|
+
tables << table
|
109
114
|
end
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
115
|
+
etl = ETL.new({
|
116
|
+
destinations: [only_destination],
|
117
|
+
autoload_tables: false,
|
118
|
+
tables: tables
|
119
|
+
})
|
120
|
+
etl.process!
|
114
121
|
end
|
115
122
|
end
|
116
123
|
|
@@ -305,6 +312,13 @@ source1_password=#{ source_password }
|
|
305
312
|
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/tables/#{ table_name }.rb", result)
|
306
313
|
end
|
307
314
|
|
315
|
+
def self.quickstart_create_schedule_config
|
316
|
+
namespace = Namespace.new
|
317
|
+
template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/schedule.rb.erb", 'r').read
|
318
|
+
result = ERB.new(template).result(namespace.get_binding)
|
319
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/schedule.rb", result)
|
320
|
+
end
|
321
|
+
|
308
322
|
def self.quickstart_save_file(output_path_full, contents)
|
309
323
|
*output_path, output_filename = output_path_full.split('/')
|
310
324
|
output_path = output_path.join("/")
|
data/lib/dataduck/etl.rb
CHANGED
@@ -40,22 +40,27 @@ module DataDuck
|
|
40
40
|
destinations_to_use = destinations_to_use.concat(self.class.destinations)
|
41
41
|
destinations_to_use = destinations_to_use.concat(self.destinations)
|
42
42
|
destinations_to_use.uniq!
|
43
|
-
|
44
|
-
|
45
|
-
table_to_etl = table_class.new
|
46
|
-
table_to_etl.etl!(destinations_to_use)
|
43
|
+
if destinations_to_use.length == 0
|
44
|
+
destinations_to_use << DataDuck::Destination.only_destination
|
47
45
|
end
|
48
|
-
end
|
49
46
|
|
50
|
-
|
51
|
-
Logs.info("Processing ETL for table #{ table.name } on pid #{ Process.pid }...")
|
47
|
+
errored_tables = []
|
52
48
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
49
|
+
@tables.each do |table_or_class|
|
50
|
+
table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
|
51
|
+
Logs.info("Processing table '#{ table.name }'...")
|
52
|
+
begin
|
53
|
+
table.etl!(destinations_to_use)
|
54
|
+
rescue Exception => err
|
55
|
+
Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
|
56
|
+
errored_tables << table
|
57
|
+
end
|
58
|
+
end
|
57
59
|
|
58
|
-
|
60
|
+
Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
|
61
|
+
if errored_tables.length > 0
|
62
|
+
Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
|
63
|
+
end
|
59
64
|
end
|
60
65
|
end
|
61
66
|
end
|
data/lib/dataduck/version.rb
CHANGED
@@ -45,8 +45,6 @@ module DataDuck
|
|
45
45
|
escaped_phrase = URI.escape(phrase)
|
46
46
|
semrush_api_url = "http://api.semrush.com/?type=phrase_organic&key=#{ self.key }&display_limit=#{ self.display_limit }&export_columns=Dn,Ur&phrase=#{ escaped_phrase }&database=#{ self.search_database }"
|
47
47
|
|
48
|
-
puts semrush_api_url
|
49
|
-
|
50
48
|
response = Typhoeus.get(semrush_api_url)
|
51
49
|
if response.response_code != 200
|
52
50
|
raise OrganicResultsAPIError.new("SEMrush API for phrase #{ phrase } returned error #{ response.response_code } #{ response.body }")
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# This file configures the whenver gem:
|
2
|
+
# https://github.com/javan/whenever
|
3
|
+
#
|
4
|
+
# To update crontab from this schedule.rb file:
|
5
|
+
# whenever --update-crontab etl
|
6
|
+
#
|
7
|
+
# To clear the crontab from this schedule.rb file:
|
8
|
+
# whenever --clear-crontab etl
|
9
|
+
#
|
10
|
+
# To list out your crontab:
|
11
|
+
# crontab -l
|
12
|
+
|
13
|
+
require 'dataduck'
|
14
|
+
|
15
|
+
set :output, "#{ DataDuck.project_root }/log/cron.log"
|
16
|
+
|
17
|
+
every 1.day, :at => '2:00 am' do
|
18
|
+
command "cd #{ DataDuck.project_root } && dataduck etl all"
|
19
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -164,6 +164,20 @@ dependencies:
|
|
164
164
|
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: whenever
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.9'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0.9'
|
167
181
|
description: A straightforward, effective ETL framework.
|
168
182
|
email:
|
169
183
|
- pickhardt@gmail.com
|
@@ -226,6 +240,7 @@ files:
|
|
226
240
|
- lib/integrations/optimizely/projects.rb
|
227
241
|
- lib/integrations/optimizely/variations.rb
|
228
242
|
- lib/integrations/semrush/organic_results.rb
|
243
|
+
- lib/templates/quickstart/schedule.rb.erb
|
229
244
|
- lib/templates/quickstart/table.rb.erb
|
230
245
|
- static/logo.png
|
231
246
|
homepage: http://dataducketl.com/
|