dataduck 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a8ecc21c43cf23e347238bd4eb130d4b0d2bddb
4
- data.tar.gz: 3a66f7465e14f20a13c3ce07b1a023d9f5436e4f
3
+ metadata.gz: fb5bcf70fd0c35ad944220251f360767852eeb80
4
+ data.tar.gz: e73dd71f9a0761c56dee3637754e26ae962d210b
5
5
  SHA512:
6
- metadata.gz: cefcb97553320a432f7f48dd586176519f4a381d215ce45516b92849e922fe8d87bc938ba65102b84e90c1b86f71b0bf35fb54106070cefb9b30c94a9e61f087
7
- data.tar.gz: 1f0fd5859639ec367ae9dbabb622bbd82583282166473eeab47c97dc8a853bc32de7f4c2d5dbcff0fc31eed3dc06f4f978143e5ce581ad3fc4f69d0e63c51c54
6
+ metadata.gz: a607da3c47de0279fa521321555bc4b1218f5375a6ec0aaa2244472e0b42322dca01708a57c813be64d9b34f78ef4588982c38e76c8e7c8ff6c83efb774e0d93
7
+ data.tar.gz: f7742ec8eff3e4c8bc36fa5015b42ede4fb852be759cb9be5bae25091a13c74160ae93eb91ba2391e1ba0099a53340f17e0839050ece403b90021d363eec3334
data/dataduck.gemspec CHANGED
@@ -26,5 +26,6 @@ Gem::Specification.new do |spec|
26
26
  spec.add_runtime_dependency "pg", '~> 0.16'
27
27
  spec.add_runtime_dependency "mysql2", '~> 0.4'
28
28
  spec.add_runtime_dependency "aws-sdk", "~> 2.0"
29
+ spec.add_runtime_dependency "typhoeus", "~> 0.8"
29
30
  spec.add_runtime_dependency "sequel-redshift"
30
31
  end
@@ -3,3 +3,7 @@
3
3
  The `console` command will place you into a Ruby console with DataDuck loaded. This can be useful for debugging. Run it with:
4
4
 
5
5
  `$ dataduck console`
6
+
7
+ It is also aliased to simply "c":
8
+
9
+ `$ dataduck c`
@@ -7,6 +7,10 @@ This will connect you with the destination (e.g. Redshift):
7
7
 
8
8
  `$ dataduck dbconsole`
9
9
 
10
+ It is also aliased to simply "d":
11
+
12
+ `$ dataduck d`
13
+
10
14
  You can also use one of these:
11
15
 
12
16
  `$ dataduck dbconsole source`
@@ -0,0 +1,21 @@
1
+ # The `recreate` command
2
+
3
+ The `recreate` command will create a brand new version of table on your destination, then move the existing data over to the new table.
4
+
5
+ This is useful if you change the indexes or distribution keys of a table.
6
+
7
+ It takes the following steps, so that the original table is affected for as little amount of time as needed:
8
+
9
+ 1. Creates a new table named zz_dataduck_recreating_(tablename)
10
+
11
+ 2. Moves the table data from the original table to the new table.
12
+
13
+ 3. Renames the original table to zz_dataduck_recreating_old_(tablename)
14
+
15
+ 4. Renames the zz_dataduck_recreating_(tablename) to tablename.
16
+
17
+ 5. Drops zz_dataduck_recreating_old_(tablename)
18
+
19
+ To recreate a table, use the command:
20
+
21
+ `$ dataduck recreate my_table_name`
data/docs/contents.yml CHANGED
@@ -7,6 +7,7 @@
7
7
  "dbconsole": dbconsole
8
8
  "etl": etl
9
9
  "quickstart": quickstart
10
+ "recreate": recreate
10
11
  "show": show
11
12
 
12
13
  "Tables":
@@ -1,6 +1,7 @@
1
1
  require 'erb'
2
2
  require 'yaml'
3
3
  require 'fileutils'
4
+ require 'typhoeus'
4
5
 
5
6
  module DataDuck
6
7
  class Commands
@@ -32,7 +33,7 @@ module DataDuck
32
33
  end
33
34
 
34
35
  def self.acceptable_commands
35
- ['console', 'dbconsole', 'etl', 'quickstart', 'show']
36
+ ['c', 'console', 'd', 'dbconsole', 'etl', 'quickstart', 'recreate', 'show']
36
37
  end
37
38
 
38
39
  def self.route_command(args)
@@ -49,12 +50,20 @@ module DataDuck
49
50
  DataDuck::Commands.public_send(command, *args[1..-1])
50
51
  end
51
52
 
53
+ def self.c
54
+ self.console
55
+ end
56
+
52
57
  def self.console
53
58
  require "irb"
54
59
  ARGV.clear
55
60
  IRB.start
56
61
  end
57
62
 
63
+ def self.d(where = "destination")
64
+ self.dbconsole(where)
65
+ end
66
+
58
67
  def self.dbconsole(where = "destination")
59
68
  which_database = nil
60
69
  if where == "destination"
@@ -110,6 +119,17 @@ module DataDuck
110
119
  puts "Commands: #{ acceptable_commands.sort.join(' ') }"
111
120
  end
112
121
 
122
+ def self.recreate(table_name)
123
+ table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
124
+ require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
125
+ table_class = Object.const_get(table_name_camelized)
126
+ if !(table_class <= DataDuck::Table)
127
+ raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
128
+ end
129
+ table = table_class.new
130
+ table.recreate!(DataDuck::Destination.only_destination)
131
+ end
132
+
113
133
  def self.show(table_name = nil)
114
134
  if table_name.nil?
115
135
  Dir[DataDuck.project_root + "/src/tables/*.rb"].each do |file|
@@ -134,10 +154,34 @@ module DataDuck
134
154
  end
135
155
  end
136
156
 
157
+ def self.quickstart_register_email(email)
158
+ registration_data = {
159
+ email: email,
160
+ version: DataDuck::VERSION,
161
+ source: "quickstart"
162
+ }
163
+
164
+ request = Typhoeus::Request.new(
165
+ "dataducketl.com/api/v1/register",
166
+ method: :post,
167
+ body: registration_data,
168
+ timeout: 30,
169
+ connecttimeout: 10,
170
+ )
171
+
172
+ hydra = Typhoeus::Hydra.new
173
+ hydra.queue(request)
174
+ hydra.run
175
+ end
176
+
137
177
  def self.quickstart
138
178
  puts "Welcome to DataDuck!"
139
179
  puts "This quickstart wizard will help you set up DataDuck."
140
180
 
181
+ puts "What is your work email address?"
182
+ email = STDIN.gets.strip
183
+ self.quickstart_register_email(email)
184
+
141
185
  puts "What kind of database would you like to source from?"
142
186
  db_type = prompt_choices([
143
187
  [:mysql, "MySQL"],
@@ -188,6 +232,11 @@ module DataDuck
188
232
  end
189
233
 
190
234
  config_obj = {
235
+ 'users' => {
236
+ email => {
237
+ 'admin' => true
238
+ }
239
+ },
191
240
  'sources' => {
192
241
  'source1' => {
193
242
  'type' => db_type.to_s,
@@ -30,6 +30,10 @@ module DataDuck
30
30
  raise Exception.new("Must implement load_table! in subclass")
31
31
  end
32
32
 
33
+ def recreate_table!(table)
34
+ raise Exception.new("Must implement load_table! in subclass")
35
+ end
36
+
33
37
  def self.destination(name, allow_nil = false)
34
38
  name = name.to_s
35
39
 
@@ -76,15 +76,19 @@ module DataDuck
76
76
  end
77
77
 
78
78
  def create_output_tables!(table)
79
- self.query(self.create_table_query(table, table.building_name))
79
+ self.create_output_table_with_name!(table, table.building_name)
80
80
  self.create_columns_on_data_warehouse!(table)
81
81
 
82
82
  if table.building_name != table.staging_name
83
83
  self.drop_staging_table!(table)
84
- self.query(self.create_table_query(table, table.staging_name))
84
+ self.create_output_table_with_name!(table, table.staging_name)
85
85
  end
86
86
  end
87
87
 
88
+ def create_output_table_with_name!(table, name)
89
+ self.query(self.create_table_query(table, name))
90
+ end
91
+
88
92
  def data_as_csv_string(data, property_names)
89
93
  data_string_components = [] # for performance reasons, join strings this way
90
94
  data.each do |result|
@@ -216,6 +220,21 @@ module DataDuck
216
220
  end
217
221
  end
218
222
 
223
+ def recreate_table!(table)
224
+ DataDuck::Logs.info "Recreating table #{ table.name }..."
225
+
226
+ if !self.table_names.include?(table.name)
227
+ raise Exception.new("Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?")
228
+ end
229
+
230
+ recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
231
+ self.create_output_table_with_name!(table, recreating_temp_name)
232
+ self.query("INSERT INTO #{ recreating_temp_name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ table.name }")
233
+ self.query("ALTER TABLE #{ table.name } RENAME TO zz_dataduck_recreating_old_#{ table.name }")
234
+ self.query("ALTER TABLE #{ recreating_temp_name } RENAME TO #{ table.name }")
235
+ self.query("DROP TABLE zz_dataduck_recreating_old_#{ table.name }")
236
+ end
237
+
219
238
  def self.value_to_string(value)
220
239
  string_value = ''
221
240
  if value.respond_to? :to_s
@@ -79,7 +79,7 @@ module DataDuck
79
79
  batch_number += 1
80
80
  self.extract!(destination)
81
81
  self.transform!
82
- destination.load_table!(self)
82
+ self.load!(destination)
83
83
 
84
84
  if self.batch_size.nil?
85
85
  break
@@ -152,6 +152,10 @@ module DataDuck
152
152
  end
153
153
  end
154
154
 
155
+ def load!(destination)
156
+ destination.load_table!(self)
157
+ end
158
+
155
159
  def indexes
156
160
  which_columns = []
157
161
  which_columns << "id" if self.output_column_names.include?("id")
@@ -189,6 +193,10 @@ module DataDuck
189
193
  self.output_schema.keys.sort.map(&:to_s)
190
194
  end
191
195
 
196
+ def recreate!(destination)
197
+ destination.recreate_table!(self)
198
+ end
199
+
192
200
  def show
193
201
  puts "Table #{ self.name }"
194
202
  self.class.sources.each do |source_spec|
@@ -1,6 +1,8 @@
1
1
  module DataDuck
2
- VERSION_MAJOR = 0
3
- VERSION_MINOR = 6
4
- VERSION_PATCH = 1
5
- VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
2
+ if !defined?(DataDuck::VERSION)
3
+ VERSION_MAJOR = 0
4
+ VERSION_MINOR = 6
5
+ VERSION_PATCH = 2
6
+ VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
7
+ end
6
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-28 00:00:00.000000000 Z
11
+ date: 2015-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '2.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: typhoeus
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.8'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.8'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: sequel-redshift
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -161,6 +175,7 @@ files:
161
175
  - docs/commands/dbconsole.md
162
176
  - docs/commands/etl.md
163
177
  - docs/commands/quickstart.md
178
+ - docs/commands/recreate.md
164
179
  - docs/commands/show.md
165
180
  - docs/contents.yml
166
181
  - docs/overview/README.md