dataduck 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/dataduck.gemspec +1 -0
- data/docs/commands/console.md +4 -0
- data/docs/commands/dbconsole.md +4 -0
- data/docs/commands/recreate.md +21 -0
- data/docs/contents.yml +1 -0
- data/lib/dataduck/commands.rb +50 -1
- data/lib/dataduck/destination.rb +4 -0
- data/lib/dataduck/redshift_destination.rb +21 -2
- data/lib/dataduck/table.rb +9 -1
- data/lib/dataduck/version.rb +6 -4
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb5bcf70fd0c35ad944220251f360767852eeb80
|
4
|
+
data.tar.gz: e73dd71f9a0761c56dee3637754e26ae962d210b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a607da3c47de0279fa521321555bc4b1218f5375a6ec0aaa2244472e0b42322dca01708a57c813be64d9b34f78ef4588982c38e76c8e7c8ff6c83efb774e0d93
|
7
|
+
data.tar.gz: f7742ec8eff3e4c8bc36fa5015b42ede4fb852be759cb9be5bae25091a13c74160ae93eb91ba2391e1ba0099a53340f17e0839050ece403b90021d363eec3334
|
data/dataduck.gemspec
CHANGED
@@ -26,5 +26,6 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_runtime_dependency "pg", '~> 0.16'
|
27
27
|
spec.add_runtime_dependency "mysql2", '~> 0.4'
|
28
28
|
spec.add_runtime_dependency "aws-sdk", "~> 2.0"
|
29
|
+
spec.add_runtime_dependency "typhoeus", "~> 0.8"
|
29
30
|
spec.add_runtime_dependency "sequel-redshift"
|
30
31
|
end
|
data/docs/commands/console.md
CHANGED
data/docs/commands/dbconsole.md
CHANGED
@@ -0,0 +1,21 @@
|
|
1
|
+
# The `recreate` command
|
2
|
+
|
3
|
+
The `recreate` command will create a brand new version of table on your destination, then move the existing data over to the new table.
|
4
|
+
|
5
|
+
This is useful if you change the indexes or distribution keys of a table.
|
6
|
+
|
7
|
+
It takes the following steps, so that the original table is affected for as little amount of time as needed:
|
8
|
+
|
9
|
+
1. Creates a new table named zz_dataduck_recreating_(tablename)
|
10
|
+
|
11
|
+
2. Moves the table data from the original table to the new table.
|
12
|
+
|
13
|
+
3. Renames the original table to zz_dataduck_recreating_old_(tablename)
|
14
|
+
|
15
|
+
4. Renames the zz_dataduck_recreating_(tablename) to tablename.
|
16
|
+
|
17
|
+
5. Drops zz_dataduck_recreating_old_(tablename)
|
18
|
+
|
19
|
+
To recreate a table, use the command:
|
20
|
+
|
21
|
+
`$ dataduck recreate my_table_name`
|
data/docs/contents.yml
CHANGED
data/lib/dataduck/commands.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'erb'
|
2
2
|
require 'yaml'
|
3
3
|
require 'fileutils'
|
4
|
+
require 'typhoeus'
|
4
5
|
|
5
6
|
module DataDuck
|
6
7
|
class Commands
|
@@ -32,7 +33,7 @@ module DataDuck
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def self.acceptable_commands
|
35
|
-
['console', 'dbconsole', 'etl', 'quickstart', 'show']
|
36
|
+
['c', 'console', 'd', 'dbconsole', 'etl', 'quickstart', 'recreate', 'show']
|
36
37
|
end
|
37
38
|
|
38
39
|
def self.route_command(args)
|
@@ -49,12 +50,20 @@ module DataDuck
|
|
49
50
|
DataDuck::Commands.public_send(command, *args[1..-1])
|
50
51
|
end
|
51
52
|
|
53
|
+
def self.c
|
54
|
+
self.console
|
55
|
+
end
|
56
|
+
|
52
57
|
def self.console
|
53
58
|
require "irb"
|
54
59
|
ARGV.clear
|
55
60
|
IRB.start
|
56
61
|
end
|
57
62
|
|
63
|
+
def self.d(where = "destination")
|
64
|
+
self.dbconsole(where)
|
65
|
+
end
|
66
|
+
|
58
67
|
def self.dbconsole(where = "destination")
|
59
68
|
which_database = nil
|
60
69
|
if where == "destination"
|
@@ -110,6 +119,17 @@ module DataDuck
|
|
110
119
|
puts "Commands: #{ acceptable_commands.sort.join(' ') }"
|
111
120
|
end
|
112
121
|
|
122
|
+
def self.recreate(table_name)
|
123
|
+
table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name)
|
124
|
+
require DataDuck.project_root + "/src/tables/#{ table_name }.rb"
|
125
|
+
table_class = Object.const_get(table_name_camelized)
|
126
|
+
if !(table_class <= DataDuck::Table)
|
127
|
+
raise Exception.new("Table class #{ table_name_camelized } must inherit from DataDuck::Table")
|
128
|
+
end
|
129
|
+
table = table_class.new
|
130
|
+
table.recreate!(DataDuck::Destination.only_destination)
|
131
|
+
end
|
132
|
+
|
113
133
|
def self.show(table_name = nil)
|
114
134
|
if table_name.nil?
|
115
135
|
Dir[DataDuck.project_root + "/src/tables/*.rb"].each do |file|
|
@@ -134,10 +154,34 @@ module DataDuck
|
|
134
154
|
end
|
135
155
|
end
|
136
156
|
|
157
|
+
def self.quickstart_register_email(email)
|
158
|
+
registration_data = {
|
159
|
+
email: email,
|
160
|
+
version: DataDuck::VERSION,
|
161
|
+
source: "quickstart"
|
162
|
+
}
|
163
|
+
|
164
|
+
request = Typhoeus::Request.new(
|
165
|
+
"dataducketl.com/api/v1/register",
|
166
|
+
method: :post,
|
167
|
+
body: registration_data,
|
168
|
+
timeout: 30,
|
169
|
+
connecttimeout: 10,
|
170
|
+
)
|
171
|
+
|
172
|
+
hydra = Typhoeus::Hydra.new
|
173
|
+
hydra.queue(request)
|
174
|
+
hydra.run
|
175
|
+
end
|
176
|
+
|
137
177
|
def self.quickstart
|
138
178
|
puts "Welcome to DataDuck!"
|
139
179
|
puts "This quickstart wizard will help you set up DataDuck."
|
140
180
|
|
181
|
+
puts "What is your work email address?"
|
182
|
+
email = STDIN.gets.strip
|
183
|
+
self.quickstart_register_email(email)
|
184
|
+
|
141
185
|
puts "What kind of database would you like to source from?"
|
142
186
|
db_type = prompt_choices([
|
143
187
|
[:mysql, "MySQL"],
|
@@ -188,6 +232,11 @@ module DataDuck
|
|
188
232
|
end
|
189
233
|
|
190
234
|
config_obj = {
|
235
|
+
'users' => {
|
236
|
+
email => {
|
237
|
+
'admin' => true
|
238
|
+
}
|
239
|
+
},
|
191
240
|
'sources' => {
|
192
241
|
'source1' => {
|
193
242
|
'type' => db_type.to_s,
|
data/lib/dataduck/destination.rb
CHANGED
@@ -30,6 +30,10 @@ module DataDuck
|
|
30
30
|
raise Exception.new("Must implement load_table! in subclass")
|
31
31
|
end
|
32
32
|
|
33
|
+
def recreate_table!(table)
|
34
|
+
raise Exception.new("Must implement load_table! in subclass")
|
35
|
+
end
|
36
|
+
|
33
37
|
def self.destination(name, allow_nil = false)
|
34
38
|
name = name.to_s
|
35
39
|
|
@@ -76,15 +76,19 @@ module DataDuck
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def create_output_tables!(table)
|
79
|
-
self.
|
79
|
+
self.create_output_table_with_name!(table, table.building_name)
|
80
80
|
self.create_columns_on_data_warehouse!(table)
|
81
81
|
|
82
82
|
if table.building_name != table.staging_name
|
83
83
|
self.drop_staging_table!(table)
|
84
|
-
self.
|
84
|
+
self.create_output_table_with_name!(table, table.staging_name)
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
88
|
+
def create_output_table_with_name!(table, name)
|
89
|
+
self.query(self.create_table_query(table, name))
|
90
|
+
end
|
91
|
+
|
88
92
|
def data_as_csv_string(data, property_names)
|
89
93
|
data_string_components = [] # for performance reasons, join strings this way
|
90
94
|
data.each do |result|
|
@@ -216,6 +220,21 @@ module DataDuck
|
|
216
220
|
end
|
217
221
|
end
|
218
222
|
|
223
|
+
def recreate_table!(table)
|
224
|
+
DataDuck::Logs.info "Recreating table #{ table.name }..."
|
225
|
+
|
226
|
+
if !self.table_names.include?(table.name)
|
227
|
+
raise Exception.new("Table #{ table.name } doesn't exist on the Redshift database, so it can't be recreated. Did you want to use `dataduck create #{ table.name }` instead?")
|
228
|
+
end
|
229
|
+
|
230
|
+
recreating_temp_name = "zz_dataduck_recreating_#{ table.name }"
|
231
|
+
self.create_output_table_with_name!(table, recreating_temp_name)
|
232
|
+
self.query("INSERT INTO #{ recreating_temp_name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ table.name }")
|
233
|
+
self.query("ALTER TABLE #{ table.name } RENAME TO zz_dataduck_recreating_old_#{ table.name }")
|
234
|
+
self.query("ALTER TABLE #{ recreating_temp_name } RENAME TO #{ table.name }")
|
235
|
+
self.query("DROP TABLE zz_dataduck_recreating_old_#{ table.name }")
|
236
|
+
end
|
237
|
+
|
219
238
|
def self.value_to_string(value)
|
220
239
|
string_value = ''
|
221
240
|
if value.respond_to? :to_s
|
data/lib/dataduck/table.rb
CHANGED
@@ -79,7 +79,7 @@ module DataDuck
|
|
79
79
|
batch_number += 1
|
80
80
|
self.extract!(destination)
|
81
81
|
self.transform!
|
82
|
-
|
82
|
+
self.load!(destination)
|
83
83
|
|
84
84
|
if self.batch_size.nil?
|
85
85
|
break
|
@@ -152,6 +152,10 @@ module DataDuck
|
|
152
152
|
end
|
153
153
|
end
|
154
154
|
|
155
|
+
def load!(destination)
|
156
|
+
destination.load_table!(self)
|
157
|
+
end
|
158
|
+
|
155
159
|
def indexes
|
156
160
|
which_columns = []
|
157
161
|
which_columns << "id" if self.output_column_names.include?("id")
|
@@ -189,6 +193,10 @@ module DataDuck
|
|
189
193
|
self.output_schema.keys.sort.map(&:to_s)
|
190
194
|
end
|
191
195
|
|
196
|
+
def recreate!(destination)
|
197
|
+
destination.recreate_table!(self)
|
198
|
+
end
|
199
|
+
|
192
200
|
def show
|
193
201
|
puts "Table #{ self.name }"
|
194
202
|
self.class.sources.each do |source_spec|
|
data/lib/dataduck/version.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module DataDuck
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
if !defined?(DataDuck::VERSION)
|
3
|
+
VERSION_MAJOR = 0
|
4
|
+
VERSION_MINOR = 6
|
5
|
+
VERSION_PATCH = 2
|
6
|
+
VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
|
7
|
+
end
|
6
8
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '2.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: typhoeus
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0.8'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0.8'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: sequel-redshift
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -161,6 +175,7 @@ files:
|
|
161
175
|
- docs/commands/dbconsole.md
|
162
176
|
- docs/commands/etl.md
|
163
177
|
- docs/commands/quickstart.md
|
178
|
+
- docs/commands/recreate.md
|
164
179
|
- docs/commands/show.md
|
165
180
|
- docs/contents.yml
|
166
181
|
- docs/overview/README.md
|