dataduck 0.6.7 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5944e3a23d61e1bcdbfa5c82e1a8d0ee3fe8b4cf
4
- data.tar.gz: 70c2e2b7af5377e13f473a9f7f56df438e5b6491
3
+ metadata.gz: e07cb92ca8f7f36c672dfb89e617e41ddb92f302
4
+ data.tar.gz: 40037cb63d58385f830043257883322e7dad1dbb
5
5
  SHA512:
6
- metadata.gz: af935179e9a4a52d56423cb2a785db6d51421caaaba6d5fa33b6283708b6199252770892446b394c826fab07d75e57553c43d8ccf15754156fe063fc48732e54
7
- data.tar.gz: 51ef2d29c8092aaf30e65326701ed3936c3e77a745ad171dbfd6d993c608e2312344258b31f0c525dd607739ae4376471ef3017858fbb50e6772bf4481b4b263
6
+ metadata.gz: bfce8a29678d44a96f26b05c3cef1fe047776976ced0d856a028276f9f65c0b8094f0ac64405f1eb8af91c1c2c07205b15776a448e9475d52c573596455409b2
7
+ data.tar.gz: 55a7dc9934972cb5953bac4e416dfddf26d4ae65389022168e8e8e36a6d6ffc74c4d76ce57eca9289bbc7a677061a093ec1f809a398b1838b776f0d0975589c6
data/dataduck.gemspec CHANGED
@@ -29,4 +29,5 @@ Gem::Specification.new do |spec|
29
29
  spec.add_runtime_dependency "typhoeus", "~> 0.8"
30
30
  spec.add_runtime_dependency "oj", "~> 2.12"
31
31
  spec.add_runtime_dependency "sequel-redshift"
32
+ spec.add_runtime_dependency "whenever", "~> 0.9"
32
33
  end
@@ -89,28 +89,35 @@ module DataDuck
89
89
  which_database.dbconsole
90
90
  end
91
91
 
92
- def self.etl(what = nil)
93
- if what.nil?
92
+ def self.etl(*table_names_underscore)
93
+ if table_names_underscore.length == 0
94
94
  puts "You need to specify a table name or 'all'. Usage: dataduck etl all OR datduck etl my_table_name"
95
95
  return
96
96
  end
97
97
 
98
98
  only_destination = DataDuck::Destination.only_destination
99
99
 
100
- if what == "all"
100
+ if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
101
101
  etl = ETL.new(destinations: [only_destination], autoload_tables: true)
102
102
  etl.process!
103
103
  else
104
- table_name_camelized = DataDuck::Util.underscore_to_camelcase(what)
105
- require DataDuck.project_root + "/src/tables/#{ what }.rb"
106
- table_class = Object.const_get(table_name_camelized)
107
- if !(table_class <= DataDuck::Table)
108
- raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
104
+ tables = []
105
+ table_names_underscore.each do |table_name|
106
+ table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
107
+ require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
108
+ table_class = Object.const_get(table_name_camelized)
109
+ if !(table_class <= DataDuck::Table)
110
+ raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
111
+ end
112
+ table = table_class.new
113
+ tables << table
109
114
  end
110
-
111
- table = table_class.new
112
- etl = ETL.new(destinations: [only_destination], autoload_tables: false, tables: [table])
113
- etl.process_table!(table)
115
+ etl = ETL.new({
116
+ destinations: [only_destination],
117
+ autoload_tables: false,
118
+ tables: tables
119
+ })
120
+ etl.process!
114
121
  end
115
122
  end
116
123
 
@@ -305,6 +312,13 @@ source1_password=#{ source_password }
305
312
  DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/tables/#{ table_name }.rb", result)
306
313
  end
307
314
 
315
+ def self.quickstart_create_schedule_config
316
+ namespace = Namespace.new
317
+ template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/schedule.rb.erb", 'r').read
318
+ result = ERB.new(template).result(namespace.get_binding)
319
+ DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/schedule.rb", result)
320
+ end
321
+
308
322
  def self.quickstart_save_file(output_path_full, contents)
309
323
  *output_path, output_filename = output_path_full.split('/')
310
324
  output_path = output_path.join("/")
data/lib/dataduck/etl.rb CHANGED
@@ -40,22 +40,27 @@ module DataDuck
40
40
  destinations_to_use = destinations_to_use.concat(self.class.destinations)
41
41
  destinations_to_use = destinations_to_use.concat(self.destinations)
42
42
  destinations_to_use.uniq!
43
-
44
- @tables.each do |table_class|
45
- table_to_etl = table_class.new
46
- table_to_etl.etl!(destinations_to_use)
43
+ if destinations_to_use.length == 0
44
+ destinations_to_use << DataDuck::Destination.only_destination
47
45
  end
48
- end
49
46
 
50
- def process_table!(table)
51
- Logs.info("Processing ETL for table #{ table.name } on pid #{ Process.pid }...")
47
+ errored_tables = []
52
48
 
53
- destinations_to_use = []
54
- destinations_to_use = destinations_to_use.concat(self.class.destinations)
55
- destinations_to_use = destinations_to_use.concat(self.destinations)
56
- destinations_to_use.uniq!
49
+ @tables.each do |table_or_class|
50
+ table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
51
+ Logs.info("Processing table '#{ table.name }'...")
52
+ begin
53
+ table.etl!(destinations_to_use)
54
+ rescue Exception => err
55
+ Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
56
+ errored_tables << table
57
+ end
58
+ end
57
59
 
58
- table.etl!(destinations_to_use)
60
+ Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
61
+ if errored_tables.length > 0
62
+ Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
63
+ end
59
64
  end
60
65
  end
61
66
  end
@@ -2,7 +2,7 @@ module DataDuck
2
2
  if !defined?(DataDuck::VERSION)
3
3
  VERSION_MAJOR = 0
4
4
  VERSION_MINOR = 6
5
- VERSION_PATCH = 7
5
+ VERSION_PATCH = 8
6
6
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
7
  end
8
8
  end
@@ -45,8 +45,6 @@ module DataDuck
45
45
  escaped_phrase = URI.escape(phrase)
46
46
  semrush_api_url = "http://api.semrush.com/?type=phrase_organic&key=#{ self.key }&display_limit=#{ self.display_limit }&export_columns=Dn,Ur&phrase=#{ escaped_phrase }&database=#{ self.search_database }"
47
47
 
48
- puts semrush_api_url
49
-
50
48
  response = Typhoeus.get(semrush_api_url)
51
49
  if response.response_code != 200
52
50
  raise OrganicResultsAPIError.new("SEMrush API for phrase #{ phrase } returned error #{ response.response_code } #{ response.body }")
@@ -0,0 +1,19 @@
1
+ # This file configures the whenver gem:
2
+ # https://github.com/javan/whenever
3
+ #
4
+ # To update crontab from this schedule.rb file:
5
+ # whenever --update-crontab etl
6
+ #
7
+ # To clear the crontab from this schedule.rb file:
8
+ # whenever --clear-crontab etl
9
+ #
10
+ # To list out your crontab:
11
+ # crontab -l
12
+
13
+ require 'dataduck'
14
+
15
+ set :output, "#{ DataDuck.project_root }/log/cron.log"
16
+
17
+ every 1.day, :at => '2:00 am' do
18
+ command "cd #{ DataDuck.project_root } && dataduck etl all"
19
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.7
4
+ version: 0.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-08 00:00:00.000000000 Z
11
+ date: 2015-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: whenever
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '0.9'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '0.9'
167
181
  description: A straightforward, effective ETL framework.
168
182
  email:
169
183
  - pickhardt@gmail.com
@@ -226,6 +240,7 @@ files:
226
240
  - lib/integrations/optimizely/projects.rb
227
241
  - lib/integrations/optimizely/variations.rb
228
242
  - lib/integrations/semrush/organic_results.rb
243
+ - lib/templates/quickstart/schedule.rb.erb
229
244
  - lib/templates/quickstart/table.rb.erb
230
245
  - static/logo.png
231
246
  homepage: http://dataducketl.com/