dataduck 0.6.7 → 0.6.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5944e3a23d61e1bcdbfa5c82e1a8d0ee3fe8b4cf
4
- data.tar.gz: 70c2e2b7af5377e13f473a9f7f56df438e5b6491
3
+ metadata.gz: e07cb92ca8f7f36c672dfb89e617e41ddb92f302
4
+ data.tar.gz: 40037cb63d58385f830043257883322e7dad1dbb
5
5
  SHA512:
6
- metadata.gz: af935179e9a4a52d56423cb2a785db6d51421caaaba6d5fa33b6283708b6199252770892446b394c826fab07d75e57553c43d8ccf15754156fe063fc48732e54
7
- data.tar.gz: 51ef2d29c8092aaf30e65326701ed3936c3e77a745ad171dbfd6d993c608e2312344258b31f0c525dd607739ae4376471ef3017858fbb50e6772bf4481b4b263
6
+ metadata.gz: bfce8a29678d44a96f26b05c3cef1fe047776976ced0d856a028276f9f65c0b8094f0ac64405f1eb8af91c1c2c07205b15776a448e9475d52c573596455409b2
7
+ data.tar.gz: 55a7dc9934972cb5953bac4e416dfddf26d4ae65389022168e8e8e36a6d6ffc74c4d76ce57eca9289bbc7a677061a093ec1f809a398b1838b776f0d0975589c6
data/dataduck.gemspec CHANGED
@@ -29,4 +29,5 @@ Gem::Specification.new do |spec|
29
29
  spec.add_runtime_dependency "typhoeus", "~> 0.8"
30
30
  spec.add_runtime_dependency "oj", "~> 2.12"
31
31
  spec.add_runtime_dependency "sequel-redshift"
32
+ spec.add_runtime_dependency "whenever", "~> 0.9"
32
33
  end
@@ -89,28 +89,35 @@ module DataDuck
89
89
  which_database.dbconsole
90
90
  end
91
91
 
92
- def self.etl(what = nil)
93
- if what.nil?
92
+ def self.etl(*table_names_underscore)
93
+ if table_names_underscore.length == 0
94
94
  puts "You need to specify a table name or 'all'. Usage: dataduck etl all OR datduck etl my_table_name"
95
95
  return
96
96
  end
97
97
 
98
98
  only_destination = DataDuck::Destination.only_destination
99
99
 
100
- if what == "all"
100
+ if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
101
101
  etl = ETL.new(destinations: [only_destination], autoload_tables: true)
102
102
  etl.process!
103
103
  else
104
- table_name_camelized = DataDuck::Util.underscore_to_camelcase(what)
105
- require DataDuck.project_root + "/src/tables/#{ what }.rb"
106
- table_class = Object.const_get(table_name_camelized)
107
- if !(table_class <= DataDuck::Table)
108
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
104
+ tables = []
105
+ table_names_underscore.each do |table_name|
106
+ table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
107
+ require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
108
+ table_class = Object.const_get(table_name_camelized)
109
+ if !(table_class <= DataDuck::Table)
110
+ raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
111
+ end
112
+ table = table_class.new
113
+ tables << table
109
114
  end
110
-
111
- table = table_class.new
112
- etl = ETL.new(destinations: [only_destination], autoload_tables: false, tables: [table])
113
- etl.process_table!(table)
115
+ etl = ETL.new({
116
+ destinations: [only_destination],
117
+ autoload_tables: false,
118
+ tables: tables
119
+ })
120
+ etl.process!
114
121
  end
115
122
  end
116
123
 
@@ -305,6 +312,13 @@ source1_password=#{ source_password }
305
312
  DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/tables/#{ table_name }.rb", result)
306
313
  end
307
314
 
315
+ def self.quickstart_create_schedule_config
316
+ namespace = Namespace.new
317
+ template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/schedule.rb.erb", 'r').read
318
+ result = ERB.new(template).result(namespace.get_binding)
319
+ DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/schedule.rb", result)
320
+ end
321
+
308
322
  def self.quickstart_save_file(output_path_full, contents)
309
323
  *output_path, output_filename = output_path_full.split('/')
310
324
  output_path = output_path.join("/")
data/lib/dataduck/etl.rb CHANGED
@@ -40,22 +40,27 @@ module DataDuck
40
40
  destinations_to_use = destinations_to_use.concat(self.class.destinations)
41
41
  destinations_to_use = destinations_to_use.concat(self.destinations)
42
42
  destinations_to_use.uniq!
43
-
44
- @tables.each do |table_class|
45
- table_to_etl = table_class.new
46
- table_to_etl.etl!(destinations_to_use)
43
+ if destinations_to_use.length == 0
44
+ destinations_to_use << DataDuck::Destination.only_destination
47
45
  end
48
- end
49
46
 
50
- def process_table!(table)
51
- Logs.info("Processing ETL for table #{ table.name } on pid #{ Process.pid }...")
47
+ errored_tables = []
52
48
 
53
- destinations_to_use = []
54
- destinations_to_use = destinations_to_use.concat(self.class.destinations)
55
- destinations_to_use = destinations_to_use.concat(self.destinations)
56
- destinations_to_use.uniq!
49
+ @tables.each do |table_or_class|
50
+ table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
51
+ Logs.info("Processing table '#{ table.name }'...")
52
+ begin
53
+ table.etl!(destinations_to_use)
54
+ rescue Exception => err
55
+ Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
56
+ errored_tables << table
57
+ end
58
+ end
57
59
 
58
- table.etl!(destinations_to_use)
60
+ Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
61
+ if errored_tables.length > 0
62
+ Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
63
+ end
59
64
  end
60
65
  end
61
66
  end
@@ -2,7 +2,7 @@ module DataDuck
2
2
  if !defined?(DataDuck::VERSION)
3
3
  VERSION_MAJOR = 0
4
4
  VERSION_MINOR = 6
5
- VERSION_PATCH = 7
5
+ VERSION_PATCH = 8
6
6
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
7
  end
8
8
  end
@@ -45,8 +45,6 @@ module DataDuck
45
45
  escaped_phrase = URI.escape(phrase)
46
46
  semrush_api_url = "http://api.semrush.com/?type=phrase_organic&key=#{ self.key }&display_limit=#{ self.display_limit }&export_columns=Dn,Ur&phrase=#{ escaped_phrase }&database=#{ self.search_database }"
47
47
 
48
- puts semrush_api_url
49
-
50
48
  response = Typhoeus.get(semrush_api_url)
51
49
  if response.response_code != 200
52
50
  raise OrganicResultsAPIError.new("SEMrush API for phrase #{ phrase } returned error #{ response.response_code } #{ response.body }")
@@ -0,0 +1,19 @@
1
+ # This file configures the whenver gem:
2
+ # https://github.com/javan/whenever
3
+ #
4
+ # To update crontab from this schedule.rb file:
5
+ # whenever --update-crontab etl
6
+ #
7
+ # To clear the crontab from this schedule.rb file:
8
+ # whenever --clear-crontab etl
9
+ #
10
+ # To list out your crontab:
11
+ # crontab -l
12
+
13
+ require 'dataduck'
14
+
15
+ set :output, "#{ DataDuck.project_root }/log/cron.log"
16
+
17
+ every 1.day, :at => '2:00 am' do
18
+ command "cd #{ DataDuck.project_root } && dataduck etl all"
19
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.7
4
+ version: 0.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-08 00:00:00.000000000 Z
11
+ date: 2015-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: whenever
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '0.9'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '0.9'
167
181
  description: A straightforward, effective ETL framework.
168
182
  email:
169
183
  - pickhardt@gmail.com
@@ -226,6 +240,7 @@ files:
226
240
  - lib/integrations/optimizely/projects.rb
227
241
  - lib/integrations/optimizely/variations.rb
228
242
  - lib/integrations/semrush/organic_results.rb
243
+ - lib/templates/quickstart/schedule.rb.erb
229
244
  - lib/templates/quickstart/table.rb.erb
230
245
  - static/logo.png
231
246
  homepage: http://dataducketl.com/