dataduck 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/dataduck.gemspec +1 -0
- data/docs/commands/console.md +4 -0
- data/docs/commands/dbconsole.md +4 -0
- data/docs/commands/recreate.md +21 -0
- data/docs/contents.yml +1 -0
- data/lib/dataduck/commands.rb +50 -1
- data/lib/dataduck/destination.rb +4 -0
- data/lib/dataduck/redshift_destination.rb +21 -2
- data/lib/dataduck/table.rb +9 -1
- data/lib/dataduck/version.rb +6 -4
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb5bcf70fd0c35ad944220251f360767852eeb80
|
4
|
+
data.tar.gz: e73dd71f9a0761c56dee3637754e26ae962d210b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a607da3c47de0279fa521321555bc4b1218f5375a6ec0aaa2244472e0b42322dca01708a57c813be64d9b34f78ef4588982c38e76c8e7c8ff6c83efb774e0d93
|
7
|
+
data.tar.gz: f7742ec8eff3e4c8bc36fa5015b42ede4fb852be759cb9be5bae25091a13c74160ae93eb91ba2391e1ba0099a53340f17e0839050ece403b90021d363eec3334
|
data/dataduck.gemspec
CHANGED
@@ -26,5 +26,6 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_runtime_dependency "pg", '~> 0.16'
|
27
27
|
spec.add_runtime_dependency "mysql2", '~> 0.4'
|
28
28
|
spec.add_runtime_dependency "aws-sdk", "~> 2.0"
|
29
|
+
spec.add_runtime_dependency "typhoeus", "~> 0.8"
|
29
30
|
spec.add_runtime_dependency "sequel-redshift"
|
30
31
|
end
|
data/docs/commands/console.md
CHANGED
data/docs/commands/dbconsole.md
CHANGED
@@ -0,0 +1,21 @@
|
|
1
|
+
# The `recreate` command
|
2
|
+
|
3
|
+
The `recreate` command will create a brand new version of table on your destination, then move the existing data over to the new table.
|
4
|
+
|
5
|
+
This is useful if you change the indexes or distribution keys of a table.
|
6
|
+
|
7
|
+
It takes the following steps, so that the original table is affected for as little amount of time as needed:
|
8
|
+
|
9
|
+
1. Creates a new table named zz_dataduck_recreating_(tablename)
|
10
|
+
|
11
|
+
2. Moves the table data from the original table to the new table.
|
12
|
+
|
13
|
+
3. Renames the original table to zz_dataduck_recreating_old_(tablename)
|
14
|
+
|
15
|
+
4. Renames the zz_dataduck_recreating_(tablename) to tablename.
|
16
|
+
|
17
|
+
5. Drops zz_dataduck_recreating_old_(tablename)
|
18
|
+
|
19
|
+
To recreate a table, use the command:
|
20
|
+
|
21
|
+
`$ dataduck recreate my_table_name`
|
data/docs/contents.yml
CHANGED
data/lib/dataduck/commands.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'erb'
|
2
2
|
require 'yaml'
|
3
3
|
require 'fileutils'
|
4
|
+
require 'typhoeus'
|
4
5
|
|
5
6
|
module DataDuck
|
6
7
|
class Commands
|
@@ -32,7 +33,7 @@ module DataDuck
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def self.acceptable_commands
|
35
|
-
['console', 'dbconsole', 'etl', 'quickstart', 'show']
|
36
|
+
['c', 'console', 'd', 'dbconsole', 'etl', 'quickstart', 'recreate', 'show']
|
36
37
|
end
|
37
38
|
|
38
39
|
def self.route_command(args)
|
@@ -49,12 +50,20 @@ module DataDuck
|
|
49
50
|
DataDuck::Commands.public_send(command, *args[1..-1])
|
50
51
|
end
|
51
52
|
|
53
|
+
def self.c
|
54
|
+
self.console
|
55
|
+
end
|
56
|
+
|
52
57
|
def self.console
|
53
58
|
require "irb"
|
54
59
|
ARGV.clear
|
55
60
|
IRB.start
|
56
61
|
end
|
57
62
|
|
63
|
+
def self.d(where = "destination")
|
64
|
+
self.dbconsole(where)
|
65
|
+
end
|
66
|
+
|
58
67
|
def self.dbconsole(where = "destination")
|
59
68
|
which_database = nil
|
60
69
|
if where == "destination"
|
@@ -110,6 +119,17 @@ module DataDuck
|
|
110
119
|
puts "Commands: #{ acceptable_commands.sort.join(' ') }"
|
111
120
|
end
|
112
121
|
|
122
|
+
def self.recreate(table_name)
|
123
|
+
table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
|
124
|
+
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
125
|
+
table_class = Object.const_get(table_name_camelized)
|
126
|
+
if !(table_class <= DataDuck::Table)
|
127
|
+
raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
|
128
|
+
end
|
129
|
+
table = table_class.new
|
130
|
+
table.recreate!(DataDuck::Destination.only_destination)
|
131
|
+
end
|
132
|
+
|
113
133
|
def self.show(table_name = nil)
|
114
134
|
if table_name.nil?
|
115
135
|
Dir[DataDuck.project_root + "/src/tables/*.rb"].each do |file|
|
@@ -134,10 +154,34 @@ module DataDuck
|
|
134
154
|
end
|
135
155
|
end
|
136
156
|
|
157
|
+
def self.quickstart_register_email(email)
|
158
|
+
registration_data = {
|
159
|
+
email: email,
|
160
|
+
version: DataDuck::VERSION,
|
161
|
+
source: "quickstart"
|
162
|
+
}
|
163
|
+
|
164
|
+
request = Typhoeus::Request.new(
|
165
|
+
"dataducketl.com/api/v1/register",
|
166
|
+
method: :post,
|
167
|
+
body: registration_data,
|
168
|
+
timeout: 30,
|
169
|
+
connecttimeout: 10,
|
170
|
+
)
|
171
|
+
|
172
|
+
hydra = Typhoeus::Hydra.new
|
173
|
+
hydra.queue(request)
|
174
|
+
hydra.run
|
175
|
+
end
|
176
|
+
|
137
177
|
def self.quickstart
|
138
178
|
puts "Welcome to DataDuck!"
|
139
179
|
puts "This quickstart wizard will help you set up DataDuck."
|
140
180
|
|
181
|
+
puts "What is your work email address?"
|
182
|
+
email = STDIN.gets.strip
|
183
|
+
self.quickstart_register_email(email)
|
184
|
+
|
141
185
|
puts "What kind of database would you like to source from?"
|
142
186
|
db_type = prompt_choices([
|
143
187
|
[:mysql, "MySQL"],
|
@@ -188,6 +232,11 @@ module DataDuck
|
|
188
232
|
end
|
189
233
|
|
190
234
|
config_obj = {
|
235
|
+
'users' => {
|
236
|
+
email => {
|
237
|
+
'admin' => true
|
238
|
+
}
|
239
|
+
},
|
191
240
|
'sources' => {
|
192
241
|
'source1' => {
|
193
242
|
'type' => db_type.to_s,
|
data/lib/dataduck/destination.rb
CHANGED
@@ -30,6 +30,10 @@ module DataDuck
|
|
30
30
|
raise Exception.new("Must implement load_table! in subclass")
|
31
31
|
end
|
32
32
|
|
33
|
+
def recreate_table!(table)
|
34
|
+
raise Exception.new("Must implement load_table! in subclass")
|
35
|
+
end
|
36
|
+
|
33
37
|
def self.destination(name, allow_nil = false)
|
34
38
|
name = name.to_s
|
35
39
|
|
@@ -76,15 +76,19 @@ module DataDuck
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def create_output_tables!(table)
|
79
|
-
self.
|
79
|
+
self.create_output_table_with_name!(table, table.building_name)
|
80
80
|
self.create_columns_on_data_warehouse!(table)
|
81
81
|
|
82
82
|
if table.building_name != table.staging_name
|
83
83
|
self.drop_staging_table!(table)
|
84
|
-
self.
|
84
|
+
self.create_output_table_with_name!(table, table.staging_name)
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
88
|
+
def create_output_table_with_name!(table, name)
|
89
|
+
self.query(self.create_table_query(table, name))
|
90
|
+
end
|
91
|
+
|
88
92
|
def data_as_csv_string(data, property_names)
|
89
93
|
data_string_components = [] # for performance reasons, join strings this way
|
90
94
|
data.each do |result|
|
@@ -216,6 +220,21 @@ module DataDuck
|
|
216
220
|
end
|
217
221
|
end
|
218
222
|
|
223
|
+
def recreate_table!(table)
|
224
|
+
DataDuck::Logs.info "Recreating table #{ table.name }..."
|
225
|
+
|
226
|
+
if !self.table_names.include?(table.name)
|
227
|
+
raise Exception.new("Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?")
|
228
|
+
end
|
229
|
+
|
230
|
+
recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
|
231
|
+
self.create_output_table_with_name!(table, recreating_temp_name)
|
232
|
+
self.query("INSERT INTO #{ recreating_temp_name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ table.name }")
|
233
|
+
self.query("ALTER TABLE #{ table.name } RENAME TO zz_dataduck_recreating_old_#{ table.name }")
|
234
|
+
self.query("ALTER TABLE #{ recreating_temp_name } RENAME TO #{ table.name }")
|
235
|
+
self.query("DROP TABLE zz_dataduck_recreating_old_#{ table.name }")
|
236
|
+
end
|
237
|
+
|
219
238
|
def self.value_to_string(value)
|
220
239
|
string_value = ''
|
221
240
|
if value.respond_to? :to_s
|
data/lib/dataduck/table.rb
CHANGED
@@ -79,7 +79,7 @@ module DataDuck
|
|
79
79
|
batch_number += 1
|
80
80
|
self.extract!(destination)
|
81
81
|
self.transform!
|
82
|
-
|
82
|
+
self.load!(destination)
|
83
83
|
|
84
84
|
if self.batch_size.nil?
|
85
85
|
break
|
@@ -152,6 +152,10 @@ module DataDuck
|
|
152
152
|
end
|
153
153
|
end
|
154
154
|
|
155
|
+
def load!(destination)
|
156
|
+
destination.load_table!(self)
|
157
|
+
end
|
158
|
+
|
155
159
|
def indexes
|
156
160
|
which_columns = []
|
157
161
|
which_columns << "id" if self.output_column_names.include?("id")
|
@@ -189,6 +193,10 @@ module DataDuck
|
|
189
193
|
self.output_schema.keys.sort.map(&:to_s)
|
190
194
|
end
|
191
195
|
|
196
|
+
def recreate!(destination)
|
197
|
+
destination.recreate_table!(self)
|
198
|
+
end
|
199
|
+
|
192
200
|
def show
|
193
201
|
puts "Table #{ self.name }"
|
194
202
|
self.class.sources.each do |source_spec|
|
data/lib/dataduck/version.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module DataDuck
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
if !defined?(DataDuck::VERSION)
|
3
|
+
VERSION_MAJOR = 0
|
4
|
+
VERSION_MINOR = 6
|
5
|
+
VERSION_PATCH = 2
|
6
|
+
VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
|
7
|
+
end
|
6
8
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '2.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: typhoeus
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0.8'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0.8'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: sequel-redshift
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -161,6 +175,7 @@ files:
|
|
161
175
|
- docs/commands/dbconsole.md
|
162
176
|
- docs/commands/etl.md
|
163
177
|
- docs/commands/quickstart.md
|
178
|
+
- docs/commands/recreate.md
|
164
179
|
- docs/commands/show.md
|
165
180
|
- docs/contents.yml
|
166
181
|
- docs/overview/README.md
|