dataduck 0.5.5 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/docs/commands/show.md +1 -1
- data/docs/tables/README.md +1 -1
- data/lib/dataduck.rb +23 -5
- data/lib/dataduck/commands.rb +17 -23
- data/lib/dataduck/database.rb +12 -6
- data/lib/dataduck/mysql_source.rb +5 -5
- data/lib/dataduck/postgresql_source.rb +5 -5
- data/lib/dataduck/redshift_destination.rb +32 -20
- data/lib/dataduck/sql_db_source.rb +19 -12
- data/lib/dataduck/util.rb +5 -0
- data/lib/dataduck/version.rb +1 -1
- data/lib/templates/quickstart/table.rb.erb +1 -1
- metadata +2 -3
- data/lib/templates/quickstart/main.rb.erb +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c172e7a467c64f911c45129ee6cb4ec8cc862e9
|
4
|
+
data.tar.gz: 057a610687e9ecf15b2baeaa8dfe338c634205a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 327385731802b5932ba7c44b0cc596d2a412edf51100ee6df618905951b51b4a46c507aa44493ec43a1bc8f12c9ca1bcdc64722daf3ca2048b1ec8fa214a65f0
|
7
|
+
data.tar.gz: b65b340d598ee5302137bebf68a8172f33d346ddf792a9a8dcb8b7524288a1ae3f18d6e1eed6f8e7731e0cf77933c7931964b5c7b6830e85f3dfbaf8043cbecb
|
data/docs/commands/show.md
CHANGED
data/docs/tables/README.md
CHANGED
@@ -54,7 +54,7 @@ The following is an example table.
|
|
54
54
|
|
55
55
|
```ruby
|
56
56
|
class Decks < DataDuck::Table
|
57
|
-
source :
|
57
|
+
source :source1, ["id", "name", "user_id", "cards",
|
58
58
|
"num_wins", "num_losses", "created_at", "updated_at",
|
59
59
|
"is_drafted", "num_draft_wins", "num_draft_losses"]
|
60
60
|
|
data/lib/dataduck.rb
CHANGED
@@ -20,12 +20,30 @@ module DataDuck
|
|
20
20
|
spec = Gem::Specification.find_by_name("dataduck")
|
21
21
|
create_module_var("gem_root", spec.gem_dir)
|
22
22
|
|
23
|
-
|
24
|
-
|
23
|
+
detect_project_root = Dir.getwd
|
24
|
+
while true
|
25
|
+
if detect_project_root == ""
|
26
|
+
raise Exception.new("Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?")
|
27
|
+
end
|
28
|
+
|
29
|
+
if File.exist?(detect_project_root + '/Gemfile')
|
30
|
+
break
|
31
|
+
end
|
32
|
+
|
33
|
+
detect_project_root_splits = detect_project_root.split("/")
|
34
|
+
detect_project_root_splits = detect_project_root_splits[0..detect_project_root_splits.length - 2]
|
35
|
+
detect_project_root = detect_project_root_splits.join("/")
|
36
|
+
end
|
37
|
+
create_module_var("project_root", detect_project_root)
|
25
38
|
|
26
|
-
|
27
|
-
|
28
|
-
|
39
|
+
create_module_var("config", {})
|
40
|
+
configs_to_load = ["/config/base.yml", "/config/#{ ENV['DATADUCK_ENV'] }.yml",
|
41
|
+
"/config/secret/base.yml", "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"]
|
42
|
+
configs_to_load.each do |relative_path|
|
43
|
+
config_path = DataDuck.project_root + relative_path
|
44
|
+
loaded_config = File.exist?(config_path) ? YAML.load_file(config_path) : {}
|
45
|
+
DataDuck.config = Util.deep_merge(DataDuck.config, loaded_config)
|
46
|
+
end
|
29
47
|
|
30
48
|
create_module_var("sources", {})
|
31
49
|
create_module_var("destinations", {})
|
data/lib/dataduck/commands.rb
CHANGED
@@ -170,7 +170,7 @@ module DataDuck
|
|
170
170
|
postgresql: DataDuck::PostgresqlSource,
|
171
171
|
}[db_type]
|
172
172
|
|
173
|
-
db_source = db_class.new({
|
173
|
+
db_source = db_class.new("source1", {
|
174
174
|
'db_type' => db_type.to_s,
|
175
175
|
'host' => source_host,
|
176
176
|
'database' => source_database,
|
@@ -189,20 +189,17 @@ module DataDuck
|
|
189
189
|
|
190
190
|
config_obj = {
|
191
191
|
'sources' => {
|
192
|
-
'
|
192
|
+
'source1' => {
|
193
193
|
'type' => db_type.to_s,
|
194
194
|
'host' => source_host,
|
195
195
|
'database' => source_database,
|
196
196
|
'port' => source_port,
|
197
197
|
'username' => source_username,
|
198
|
-
'password' => source_password,
|
199
198
|
}
|
200
199
|
},
|
201
200
|
'destinations' => {
|
202
|
-
'
|
201
|
+
'destination1' => {
|
203
202
|
'type' => 'redshift',
|
204
|
-
'aws_key' => 'YOUR_AWS_KEY',
|
205
|
-
'aws_secret' => 'YOUR_AWS_SECRET',
|
206
203
|
's3_bucket' => 'YOUR_BUCKET',
|
207
204
|
's3_region' => 'YOUR_BUCKET_REGION',
|
208
205
|
'host' => 'redshift.somekeygoeshere.us-west-2.redshift.amazonaws.com',
|
@@ -210,28 +207,32 @@ module DataDuck
|
|
210
207
|
'database' => 'main',
|
211
208
|
'schema' => 'public',
|
212
209
|
'username' => 'YOUR_UESRNAME',
|
213
|
-
'password' => 'YOUR_PASSWORD',
|
214
210
|
}
|
215
211
|
}
|
216
212
|
}
|
213
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/base.yml", config_obj.to_yaml)
|
214
|
+
|
215
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/.env", """
|
216
|
+
destination1_aws_key=AWS_KEY_GOES_HERE
|
217
|
+
destination1_aws_secret=AWS_SECRET_GOES_HERE
|
218
|
+
destination1_password=REDSHIFT_PASSWORD_GOES_HERE
|
219
|
+
source1_password=#{ source_password }
|
220
|
+
""".strip)
|
217
221
|
|
218
|
-
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/secret/#{ DataDuck.environment }.yml", config_obj.to_yaml)
|
219
|
-
DataDuck::Commands.quickstart_save_main
|
220
222
|
DataDuck::Commands.quickstart_update_gitignore
|
221
223
|
|
222
224
|
puts "Quickstart complete!"
|
223
|
-
puts "You still need to edit your config/
|
224
|
-
puts "Run your ETL with:
|
225
|
+
puts "You still need to edit your .env and config/base.yml files with your AWS and Redshift credentials."
|
226
|
+
puts "Run your ETL with: dataduck etl all"
|
227
|
+
puts "For more help, visit http://dataducketl.com/docs"
|
225
228
|
end
|
226
229
|
|
227
230
|
def self.quickstart_update_gitignore
|
228
231
|
main_gitignore_path = "#{ DataDuck.project_root }/.gitignore"
|
229
232
|
FileUtils.touch(main_gitignore_path)
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
output = File.open(secret_gitignore_path, "w")
|
234
|
-
output << '[^.]*'
|
233
|
+
output = File.open(main_gitignore_path, "w")
|
234
|
+
output << ".DS_Store\n"
|
235
|
+
output << ".env\n"
|
235
236
|
output.close
|
236
237
|
end
|
237
238
|
|
@@ -264,12 +265,5 @@ module DataDuck
|
|
264
265
|
output << contents
|
265
266
|
output.close
|
266
267
|
end
|
267
|
-
|
268
|
-
def self.quickstart_save_main
|
269
|
-
namespace = Namespace.new
|
270
|
-
template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/main.rb.erb", 'r').read
|
271
|
-
result = ERB.new(template).result(namespace.get_binding)
|
272
|
-
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/main.rb", result)
|
273
|
-
end
|
274
268
|
end
|
275
269
|
end
|
data/lib/dataduck/database.rb
CHANGED
@@ -20,13 +20,19 @@ module DataDuck
|
|
20
20
|
|
21
21
|
protected
|
22
22
|
|
23
|
+
def load_value(prop_name, db_name, config)
|
24
|
+
self.send("#{ prop_name }=", config[prop_name] || ENV["#{ db_name }_#{ prop_name }"])
|
25
|
+
end
|
26
|
+
|
23
27
|
def find_command_and_execute(commands, *args)
|
24
28
|
# This function was originally sourced from Rails
|
25
29
|
# https://github.com/rails/rails
|
26
30
|
#
|
27
|
-
#
|
31
|
+
# This function was licensed under the MIT license
|
28
32
|
# http://opensource.org/licenses/MIT
|
29
33
|
#
|
34
|
+
# The license asks to include the following with the source code:
|
35
|
+
#
|
30
36
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
31
37
|
# of this software and associated documentation files (the "Software"), to deal
|
32
38
|
# in the Software without restriction, including without limitation the rights
|
@@ -68,11 +74,11 @@ module DataDuck
|
|
68
74
|
# This method is not all exhaustive, and is not meant to be necessarily relied on, but is a
|
69
75
|
# sanity check that can be used to ensure certain sql is not mutating.
|
70
76
|
|
71
|
-
return true if sql.downcase.start_with?("drop table")
|
72
|
-
return true if sql.downcase.start_with?("create table")
|
73
|
-
return true if sql.downcase.start_with?("delete from")
|
74
|
-
return true if sql.downcase.start_with?("insert into")
|
75
|
-
return true if sql.downcase.start_with?("alter table")
|
77
|
+
return true if sql.downcase.strip.start_with?("drop table")
|
78
|
+
return true if sql.downcase.strip.start_with?("create table")
|
79
|
+
return true if sql.downcase.strip.start_with?("delete from")
|
80
|
+
return true if sql.downcase.strip.start_with?("insert into")
|
81
|
+
return true if sql.downcase.strip.start_with?("alter table")
|
76
82
|
|
77
83
|
false
|
78
84
|
end
|
@@ -10,11 +10,11 @@ module DataDuck
|
|
10
10
|
|
11
11
|
def dbconsole(options = {})
|
12
12
|
args = []
|
13
|
-
args << "--host=#{
|
14
|
-
args << "--user=#{
|
15
|
-
args << "--database=#{
|
16
|
-
args << "--port=#{
|
17
|
-
args << "--password=#{
|
13
|
+
args << "--host=#{ self.host }"
|
14
|
+
args << "--user=#{ self.username }"
|
15
|
+
args << "--database=#{ self.database }"
|
16
|
+
args << "--port=#{ self.port }"
|
17
|
+
args << "--password=#{ self.password }"
|
18
18
|
|
19
19
|
self.find_command_and_execute("mysql", *args)
|
20
20
|
end
|
@@ -10,12 +10,12 @@ module DataDuck
|
|
10
10
|
|
11
11
|
def dbconsole(options = {})
|
12
12
|
args = []
|
13
|
-
args << "--host=#{
|
14
|
-
args << "--username=#{
|
15
|
-
args << "--dbname=#{
|
16
|
-
args << "--port=#{
|
13
|
+
args << "--host=#{ self.host }"
|
14
|
+
args << "--username=#{ self.username }"
|
15
|
+
args << "--dbname=#{ self.database }"
|
16
|
+
args << "--port=#{ self.port }"
|
17
17
|
|
18
|
-
ENV['PGPASSWORD'] =
|
18
|
+
ENV['PGPASSWORD'] = self.password
|
19
19
|
|
20
20
|
self.find_command_and_execute("psql", *args)
|
21
21
|
end
|
@@ -2,24 +2,36 @@ require_relative 'destination'
|
|
2
2
|
|
3
3
|
module DataDuck
|
4
4
|
class RedshiftDestination < DataDuck::Destination
|
5
|
+
attr_accessor :aws_key
|
6
|
+
attr_accessor :aws_secret
|
7
|
+
attr_accessor :s3_bucket
|
8
|
+
attr_accessor :s3_region
|
9
|
+
attr_accessor :host
|
10
|
+
attr_accessor :port
|
11
|
+
attr_accessor :database
|
12
|
+
attr_accessor :schema
|
13
|
+
attr_accessor :username
|
14
|
+
attr_accessor :password
|
15
|
+
|
5
16
|
def initialize(name, config)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
17
|
+
load_value('aws_key', name, config)
|
18
|
+
load_value('aws_secret', name, config)
|
19
|
+
load_value('s3_bucket', name, config)
|
20
|
+
load_value('s3_region', name, config)
|
21
|
+
load_value('host', name, config)
|
22
|
+
load_value('port', name, config)
|
23
|
+
load_value('database', name, config)
|
24
|
+
load_value('schema', name, config)
|
25
|
+
load_value('username', name, config)
|
26
|
+
load_value('password', name, config)
|
27
|
+
|
16
28
|
@redshift_connection = nil
|
17
29
|
|
18
30
|
super
|
19
31
|
end
|
20
32
|
|
21
33
|
def connection
|
22
|
-
@redshift_connection ||= Sequel.connect("redshift://#{
|
34
|
+
@redshift_connection ||= Sequel.connect("redshift://#{ self.username }:#{ self.password }@#{ self.host }:#{ self.port }/#{ self.database }" +
|
23
35
|
"?force_standard_strings=f",
|
24
36
|
:client_min_messages => '',
|
25
37
|
:force_standard_strings => false
|
@@ -31,8 +43,8 @@ module DataDuck
|
|
31
43
|
query_fragments = []
|
32
44
|
query_fragments << "COPY #{ table.staging_name } (#{ properties_joined_string })"
|
33
45
|
query_fragments << "FROM '#{ s3_path }'"
|
34
|
-
query_fragments << "CREDENTIALS 'aws_access_key_id=#{
|
35
|
-
query_fragments << "REGION '#{
|
46
|
+
query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
|
47
|
+
query_fragments << "REGION '#{ self.s3_region }'"
|
36
48
|
query_fragments << "CSV TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
|
37
49
|
query_fragments << "DATEFORMAT 'auto'"
|
38
50
|
return query_fragments.join(" ")
|
@@ -113,12 +125,12 @@ module DataDuck
|
|
113
125
|
|
114
126
|
def dbconsole(options = {})
|
115
127
|
args = []
|
116
|
-
args << "--host=#{
|
117
|
-
args << "--username=#{
|
118
|
-
args << "--dbname=#{
|
119
|
-
args << "--port=#{
|
128
|
+
args << "--host=#{ self.host }"
|
129
|
+
args << "--username=#{ self.username }"
|
130
|
+
args << "--dbname=#{ self.database }"
|
131
|
+
args << "--port=#{ self.port }"
|
120
132
|
|
121
|
-
ENV['PGPASSWORD'] =
|
133
|
+
ENV['PGPASSWORD'] = self.password
|
122
134
|
|
123
135
|
self.find_command_and_execute("psql", *args)
|
124
136
|
end
|
@@ -173,8 +185,8 @@ module DataDuck
|
|
173
185
|
|
174
186
|
table_csv = self.data_as_csv_string(table.data, table.output_column_names)
|
175
187
|
|
176
|
-
s3_obj = S3Object.new(filepath, table_csv,
|
177
|
-
|
188
|
+
s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
|
189
|
+
self.s3_bucket, self.s3_region)
|
178
190
|
s3_obj.upload!
|
179
191
|
return s3_obj
|
180
192
|
end
|
@@ -5,13 +5,20 @@ require 'sequel'
|
|
5
5
|
|
6
6
|
module DataDuck
|
7
7
|
class SqlDbSource < DataDuck::Source
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
8
|
+
attr_accessor :host
|
9
|
+
attr_accessor :port
|
10
|
+
attr_accessor :username
|
11
|
+
attr_accessor :password
|
12
|
+
attr_accessor :database
|
13
|
+
|
14
|
+
def initialize(name, config)
|
15
|
+
load_value('host', name, config)
|
16
|
+
load_value('port', name, config)
|
17
|
+
load_value('username', name, config)
|
18
|
+
load_value('password', name, config)
|
19
|
+
load_value('database', name, config)
|
20
|
+
|
21
|
+
@initialized_db_type = config['db_type']
|
15
22
|
|
16
23
|
super
|
17
24
|
end
|
@@ -19,11 +26,11 @@ module DataDuck
|
|
19
26
|
def connection
|
20
27
|
@connection ||= Sequel.connect(
|
21
28
|
adapter: self.db_type,
|
22
|
-
user:
|
23
|
-
host:
|
24
|
-
database:
|
25
|
-
password:
|
26
|
-
port:
|
29
|
+
user: self.username,
|
30
|
+
host: self.host,
|
31
|
+
database: self.database,
|
32
|
+
password: self.password,
|
33
|
+
port: self.port
|
27
34
|
)
|
28
35
|
end
|
29
36
|
|
data/lib/dataduck/util.rb
CHANGED
@@ -2,6 +2,11 @@ require 'fileutils'
|
|
2
2
|
|
3
3
|
module DataDuck
|
4
4
|
module Util
|
5
|
+
def Util.deep_merge(first, second)
|
6
|
+
merger = proc { |key, v1, v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : v2 }
|
7
|
+
first.merge(second, &merger)
|
8
|
+
end
|
9
|
+
|
5
10
|
def Util.ensure_path_exists!(full_path)
|
6
11
|
split_paths = full_path.split('/')
|
7
12
|
just_file_path = split_paths.pop
|
data/lib/dataduck/version.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
class <%= table_name_camelcased %> < DataDuck::Table
|
2
|
-
source :
|
2
|
+
source :source1, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
|
3
3
|
|
4
4
|
output({<% columns.each do |col| %>
|
5
5
|
<%= '# ' if col[2] %>:<%= col[0] %> => :<%= col[1] %>,<% end %>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -191,7 +191,6 @@ files:
|
|
191
191
|
- lib/dataduck/util.rb
|
192
192
|
- lib/dataduck/version.rb
|
193
193
|
- lib/helpers/module_vars.rb
|
194
|
-
- lib/templates/quickstart/main.rb.erb
|
195
194
|
- lib/templates/quickstart/table.rb.erb
|
196
195
|
- static/logo.png
|
197
196
|
homepage: http://dataducketl.com/
|