dataduck 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a8ecc21c43cf23e347238bd4eb130d4b0d2bddb
4
- data.tar.gz: 3a66f7465e14f20a13c3ce07b1a023d9f5436e4f
3
+ metadata.gz: fb5bcf70fd0c35ad944220251f360767852eeb80
4
+ data.tar.gz: e73dd71f9a0761c56dee3637754e26ae962d210b
5
5
  SHA512:
6
- metadata.gz: cefcb97553320a432f7f48dd586176519f4a381d215ce45516b92849e922fe8d87bc938ba65102b84e90c1b86f71b0bf35fb54106070cefb9b30c94a9e61f087
7
- data.tar.gz: 1f0fd5859639ec367ae9dbabb622bbd82583282166473eeab47c97dc8a853bc32de7f4c2d5dbcff0fc31eed3dc06f4f978143e5ce581ad3fc4f69d0e63c51c54
6
+ metadata.gz: a607da3c47de0279fa521321555bc4b1218f5375a6ec0aaa2244472e0b42322dca01708a57c813be64d9b34f78ef4588982c38e76c8e7c8ff6c83efb774e0d93
7
+ data.tar.gz: f7742ec8eff3e4c8bc36fa5015b42ede4fb852be759cb9be5bae25091a13c74160ae93eb91ba2391e1ba0099a53340f17e0839050ece403b90021d363eec3334
data/dataduck.gemspec CHANGED
@@ -26,5 +26,6 @@ Gem::Specification.new do |spec|
26
26
  spec.add_runtime_dependency "pg", '~> 0.16'
27
27
  spec.add_runtime_dependency "mysql2", '~> 0.4'
28
28
  spec.add_runtime_dependency "aws-sdk", "~> 2.0"
29
+ spec.add_runtime_dependency "typhoeus", "~> 0.8"
29
30
  spec.add_runtime_dependency "sequel-redshift"
30
31
  end
@@ -3,3 +3,7 @@
3
3
  The `console` command will place you into a Ruby console with DataDuck loaded. This can be useful for debugging. Run it with:
4
4
 
5
5
  `$ dataduck console`
6
+
7
+ It is also aliased to simply "c":
8
+
9
+ `$ dataduck c`
@@ -7,6 +7,10 @@ This will connect you with the destination (e.g. Redshift):
7
7
 
8
8
  `$ dataduck dbconsole`
9
9
 
10
+ It is also aliased to simply "d":
11
+
12
+ `$ dataduck d`
13
+
10
14
  You can also use one of these:
11
15
 
12
16
  `$ dataduck dbconsole source`
@@ -0,0 +1,21 @@
1
+ # The `recreate` command
2
+
3
+ The `recreate` command will create a brand new version of table on your destination, then move the existing data over to the new table.
4
+
5
+ This is useful if you change the indexes or distribution keys of a table.
6
+
7
+ It takes the following steps, so that the original table is affected for as little amount of time as needed:
8
+
9
+ 1. Creates a new table named zz_dataduck_recreating_(tablename)
10
+
11
+ 2. Moves the table data from the original table to the new table.
12
+
13
+ 3. Renames the original table to zz_dataduck_recreating_old_(tablename)
14
+
15
+ 4. Renames the zz_dataduck_recreating_(tablename) to tablename.
16
+
17
+ 5. Drops zz_dataduck_recreating_old_(tablename)
18
+
19
+ To recreate a table, use the command:
20
+
21
+ `$ dataduck recreate my_table_name`
data/docs/contents.yml CHANGED
@@ -7,6 +7,7 @@
7
7
  "dbconsole": dbconsole
8
8
  "etl": etl
9
9
  "quickstart": quickstart
10
+ "recreate": recreate
10
11
  "show": show
11
12
 
12
13
  "Tables":
@@ -1,6 +1,7 @@
1
1
  require 'erb'
2
2
  require 'yaml'
3
3
  require 'fileutils'
4
+ require 'typhoeus'
4
5
 
5
6
  module DataDuck
6
7
  class Commands
@@ -32,7 +33,7 @@ module DataDuck
32
33
  end
33
34
 
34
35
  def self.acceptable_commands
35
- ['console', 'dbconsole', 'etl', 'quickstart', 'show']
36
+ ['c', 'console', 'd', 'dbconsole', 'etl', 'quickstart', 'recreate', 'show']
36
37
  end
37
38
 
38
39
  def self.route_command(args)
@@ -49,12 +50,20 @@ module DataDuck
49
50
  DataDuck::Commands.public_send(command, *args[1..-1])
50
51
  end
51
52
 
53
+ def self.c
54
+ self.console
55
+ end
56
+
52
57
  def self.console
53
58
  require "irb"
54
59
  ARGV.clear
55
60
  IRB.start
56
61
  end
57
62
 
63
+ def self.d(where = "destination")
64
+ self.dbconsole(where)
65
+ end
66
+
58
67
  def self.dbconsole(where = "destination")
59
68
  which_database = nil
60
69
  if where == "destination"
@@ -110,6 +119,17 @@ module DataDuck
110
119
  puts "Commands: #{ acceptable_commands.sort.join(' ') }"
111
120
  end
112
121
 
122
+ def self.recreate(table_name)
123
+ table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
124
+ require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
125
+ table_class = Object.const_get(table_name_camelized)
126
+ if !(table_class <= DataDuck::Table)
127
+ raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
128
+ end
129
+ table = table_class.new
130
+ table.recreate!(DataDuck::Destination.only_destination)
131
+ end
132
+
113
133
  def self.show(table_name = nil)
114
134
  if table_name.nil?
115
135
  Dir[DataDuck.project_root + "/src/tables/*.rb"].each do |file|
@@ -134,10 +154,34 @@ module DataDuck
134
154
  end
135
155
  end
136
156
 
157
+ def self.quickstart_register_email(email)
158
+ registration_data = {
159
+ email: email,
160
+ version: DataDuck::VERSION,
161
+ source: "quickstart"
162
+ }
163
+
164
+ request = Typhoeus::Request.new(
165
+ "dataducketl.com/api/v1/register",
166
+ method: :post,
167
+ body: registration_data,
168
+ timeout: 30,
169
+ connecttimeout: 10,
170
+ )
171
+
172
+ hydra = Typhoeus::Hydra.new
173
+ hydra.queue(request)
174
+ hydra.run
175
+ end
176
+
137
177
  def self.quickstart
138
178
  puts "Welcome to DataDuck!"
139
179
  puts "This quickstart wizard will help you set up DataDuck."
140
180
 
181
+ puts "What is your work email address?"
182
+ email = STDIN.gets.strip
183
+ self.quickstart_register_email(email)
184
+
141
185
  puts "What kind of database would you like to source from?"
142
186
  db_type = prompt_choices([
143
187
  [:mysql, "MySQL"],
@@ -188,6 +232,11 @@ module DataDuck
188
232
  end
189
233
 
190
234
  config_obj = {
235
+ 'users' => {
236
+ email => {
237
+ 'admin' => true
238
+ }
239
+ },
191
240
  'sources' => {
192
241
  'source1' => {
193
242
  'type' => db_type.to_s,
@@ -30,6 +30,10 @@ module DataDuck
30
30
  raise Exception.new("Must implement load_table! in subclass")
31
31
  end
32
32
 
33
+ def recreate_table!(table)
34
+ raise Exception.new("Must implement load_table! in subclass")
35
+ end
36
+
33
37
  def self.destination(name, allow_nil = false)
34
38
  name = name.to_s
35
39
 
@@ -76,15 +76,19 @@ module DataDuck
76
76
  end
77
77
 
78
78
  def create_output_tables!(table)
79
- self.query(self.create_table_query(table, table.building_name))
79
+ self.create_output_table_with_name!(table, table.building_name)
80
80
  self.create_columns_on_data_warehouse!(table)
81
81
 
82
82
  if table.building_name != table.staging_name
83
83
  self.drop_staging_table!(table)
84
- self.query(self.create_table_query(table, table.staging_name))
84
+ self.create_output_table_with_name!(table, table.staging_name)
85
85
  end
86
86
  end
87
87
 
88
+ def create_output_table_with_name!(table, name)
89
+ self.query(self.create_table_query(table, name))
90
+ end
91
+
88
92
  def data_as_csv_string(data, property_names)
89
93
  data_string_components = [] # for performance reasons, join strings this way
90
94
  data.each do |result|
@@ -216,6 +220,21 @@ module DataDuck
216
220
  end
217
221
  end
218
222
 
223
+ def recreate_table!(table)
224
+ DataDuck::Logs.info "Recreating table #{ table.name }..."
225
+
226
+ if !self.table_names.include?(table.name)
227
+ raise Exception.new("Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?")
228
+ end
229
+
230
+ recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
231
+ self.create_output_table_with_name!(table, recreating_temp_name)
232
+ self.query("INSERT INTO #{ recreating_temp_name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ table.name }")
233
+ self.query("ALTER TABLE #{ table.name } RENAME TO zz_dataduck_recreating_old_#{ table.name }")
234
+ self.query("ALTER TABLE #{ recreating_temp_name } RENAME TO #{ table.name }")
235
+ self.query("DROP TABLE zz_dataduck_recreating_old_#{ table.name }")
236
+ end
237
+
219
238
  def self.value_to_string(value)
220
239
  string_value = ''
221
240
  if value.respond_to? :to_s
@@ -79,7 +79,7 @@ module DataDuck
79
79
  batch_number += 1
80
80
  self.extract!(destination)
81
81
  self.transform!
82
- destination.load_table!(self)
82
+ self.load!(destination)
83
83
 
84
84
  if self.batch_size.nil?
85
85
  break
@@ -152,6 +152,10 @@ module DataDuck
152
152
  end
153
153
  end
154
154
 
155
+ def load!(destination)
156
+ destination.load_table!(self)
157
+ end
158
+
155
159
  def indexes
156
160
  which_columns = []
157
161
  which_columns << "id" if self.output_column_names.include?("id")
@@ -189,6 +193,10 @@ module DataDuck
189
193
  self.output_schema.keys.sort.map(&:to_s)
190
194
  end
191
195
 
196
+ def recreate!(destination)
197
+ destination.recreate_table!(self)
198
+ end
199
+
192
200
  def show
193
201
  puts "Table #{ self.name }"
194
202
  self.class.sources.each do |source_spec|
@@ -1,6 +1,8 @@
1
1
  module DataDuck
2
- VERSION_MAJOR = 0
3
- VERSION_MINOR = 6
4
- VERSION_PATCH = 1
5
- VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
2
+ if !defined?(DataDuck::VERSION)
3
+ VERSION_MAJOR = 0
4
+ VERSION_MINOR = 6
5
+ VERSION_PATCH = 2
6
+ VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
+ end
6
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-28 00:00:00.000000000 Z
11
+ date: 2015-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '2.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: typhoeus
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.8'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.8'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: sequel-redshift
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -161,6 +175,7 @@ files:
161
175
  - docs/commands/dbconsole.md
162
176
  - docs/commands/etl.md
163
177
  - docs/commands/quickstart.md
178
+ - docs/commands/recreate.md
164
179
  - docs/commands/show.md
165
180
  - docs/contents.yml
166
181
  - docs/overview/README.md