dataduck 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/docs/commands/show.md +1 -1
- data/docs/tables/README.md +1 -1
- data/lib/dataduck.rb +23 -5
- data/lib/dataduck/commands.rb +17 -23
- data/lib/dataduck/database.rb +12 -6
- data/lib/dataduck/mysql_source.rb +5 -5
- data/lib/dataduck/postgresql_source.rb +5 -5
- data/lib/dataduck/redshift_destination.rb +32 -20
- data/lib/dataduck/sql_db_source.rb +19 -12
- data/lib/dataduck/util.rb +5 -0
- data/lib/dataduck/version.rb +1 -1
- data/lib/templates/quickstart/table.rb.erb +1 -1
- metadata +2 -3
- data/lib/templates/quickstart/main.rb.erb +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c172e7a467c64f911c45129ee6cb4ec8cc862e9
|
4
|
+
data.tar.gz: 057a610687e9ecf15b2baeaa8dfe338c634205a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 327385731802b5932ba7c44b0cc596d2a412edf51100ee6df618905951b51b4a46c507aa44493ec43a1bc8f12c9ca1bcdc64722daf3ca2048b1ec8fa214a65f0
|
7
|
+
data.tar.gz: b65b340d598ee5302137bebf68a8172f33d346ddf792a9a8dcb8b7524288a1ae3f18d6e1eed6f8e7731e0cf77933c7931964b5c7b6830e85f3dfbaf8043cbecb
|
data/docs/commands/show.md
CHANGED
data/docs/tables/README.md
CHANGED
@@ -54,7 +54,7 @@ The following is an example table.
|
|
54
54
|
|
55
55
|
```ruby
|
56
56
|
class Decks < DataDuck::Table
|
57
|
-
source :
|
57
|
+
source :source1, ["id", "name", "user_id", "cards",
|
58
58
|
"num_wins", "num_losses", "created_at", "updated_at",
|
59
59
|
"is_drafted", "num_draft_wins", "num_draft_losses"]
|
60
60
|
|
data/lib/dataduck.rb
CHANGED
@@ -20,12 +20,30 @@ module DataDuck
|
|
20
20
|
spec = Gem::Specification.find_by_name("dataduck")
|
21
21
|
create_module_var("gem_root", spec.gem_dir)
|
22
22
|
|
23
|
-
|
24
|
-
|
23
|
+
detect_project_root = Dir.getwd
|
24
|
+
while true
|
25
|
+
if detect_project_root == ""
|
26
|
+
raise Exception.new("Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?")
|
27
|
+
end
|
28
|
+
|
29
|
+
if File.exist?(detect_project_root + '/Gemfile')
|
30
|
+
break
|
31
|
+
end
|
32
|
+
|
33
|
+
detect_project_root_splits = detect_project_root.split("/")
|
34
|
+
detect_project_root_splits = detect_project_root_splits[0..detect_project_root_splits.length - 2]
|
35
|
+
detect_project_root = detect_project_root_splits.join("/")
|
36
|
+
end
|
37
|
+
create_module_var("project_root", detect_project_root)
|
25
38
|
|
26
|
-
|
27
|
-
|
28
|
-
|
39
|
+
create_module_var("config", {})
|
40
|
+
configs_to_load = ["/config/base.yml", "/config/#{ ENV['DATADUCK_ENV'] }.yml",
|
41
|
+
"/config/secret/base.yml", "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"]
|
42
|
+
configs_to_load.each do |relative_path|
|
43
|
+
config_path = DataDuck.project_root + relative_path
|
44
|
+
loaded_config = File.exist?(config_path) ? YAML.load_file(config_path) : {}
|
45
|
+
DataDuck.config = Util.deep_merge(DataDuck.config, loaded_config)
|
46
|
+
end
|
29
47
|
|
30
48
|
create_module_var("sources", {})
|
31
49
|
create_module_var("destinations", {})
|
data/lib/dataduck/commands.rb
CHANGED
@@ -170,7 +170,7 @@ module DataDuck
|
|
170
170
|
postgresql: DataDuck::PostgresqlSource,
|
171
171
|
}[db_type]
|
172
172
|
|
173
|
-
db_source = db_class.new({
|
173
|
+
db_source = db_class.new("source1", {
|
174
174
|
'db_type' => db_type.to_s,
|
175
175
|
'host' => source_host,
|
176
176
|
'database' => source_database,
|
@@ -189,20 +189,17 @@ module DataDuck
|
|
189
189
|
|
190
190
|
config_obj = {
|
191
191
|
'sources' => {
|
192
|
-
'
|
192
|
+
'source1' => {
|
193
193
|
'type' => db_type.to_s,
|
194
194
|
'host' => source_host,
|
195
195
|
'database' => source_database,
|
196
196
|
'port' => source_port,
|
197
197
|
'username' => source_username,
|
198
|
-
'password' => source_password,
|
199
198
|
}
|
200
199
|
},
|
201
200
|
'destinations' => {
|
202
|
-
'
|
201
|
+
'destination1' => {
|
203
202
|
'type' => 'redshift',
|
204
|
-
'aws_key' => 'YOUR_AWS_KEY',
|
205
|
-
'aws_secret' => 'YOUR_AWS_SECRET',
|
206
203
|
's3_bucket' => 'YOUR_BUCKET',
|
207
204
|
's3_region' => 'YOUR_BUCKET_REGION',
|
208
205
|
'host' => 'redshift.somekeygoeshere.us-west-2.redshift.amazonaws.com',
|
@@ -210,28 +207,32 @@ module DataDuck
|
|
210
207
|
'database' => 'main',
|
211
208
|
'schema' => 'public',
|
212
209
|
'username' => 'YOUR_UESRNAME',
|
213
|
-
'password' => 'YOUR_PASSWORD',
|
214
210
|
}
|
215
211
|
}
|
216
212
|
}
|
213
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/base.yml", config_obj.to_yaml)
|
214
|
+
|
215
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/.env", """
|
216
|
+
destination1_aws_key=AWS_KEY_GOES_HERE
|
217
|
+
destination1_aws_secret=AWS_SECRET_GOES_HERE
|
218
|
+
destination1_password=REDSHIFT_PASSWORD_GOES_HERE
|
219
|
+
source1_password=#{ source_password }
|
220
|
+
""".strip)
|
217
221
|
|
218
|
-
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/secret/#{ DataDuck.environment }.yml", config_obj.to_yaml)
|
219
|
-
DataDuck::Commands.quickstart_save_main
|
220
222
|
DataDuck::Commands.quickstart_update_gitignore
|
221
223
|
|
222
224
|
puts "Quickstart complete!"
|
223
|
-
puts "You still need to edit your config/
|
224
|
-
puts "Run your ETL with:
|
225
|
+
puts "You still need to edit your .env and config/base.yml files with your AWS and Redshift credentials."
|
226
|
+
puts "Run your ETL with: dataduck etl all"
|
227
|
+
puts "For more help, visit http://dataducketl.com/docs"
|
225
228
|
end
|
226
229
|
|
227
230
|
def self.quickstart_update_gitignore
|
228
231
|
main_gitignore_path = "#{ DataDuck.project_root }/.gitignore"
|
229
232
|
FileUtils.touch(main_gitignore_path)
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
output = File.open(secret_gitignore_path, "w")
|
234
|
-
output << '[^.]*'
|
233
|
+
output = File.open(main_gitignore_path, "w")
|
234
|
+
output << ".DS_Store\n"
|
235
|
+
output << ".env\n"
|
235
236
|
output.close
|
236
237
|
end
|
237
238
|
|
@@ -264,12 +265,5 @@ module DataDuck
|
|
264
265
|
output << contents
|
265
266
|
output.close
|
266
267
|
end
|
267
|
-
|
268
|
-
def self.quickstart_save_main
|
269
|
-
namespace = Namespace.new
|
270
|
-
template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/main.rb.erb", 'r').read
|
271
|
-
result = ERB.new(template).result(namespace.get_binding)
|
272
|
-
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/main.rb", result)
|
273
|
-
end
|
274
268
|
end
|
275
269
|
end
|
data/lib/dataduck/database.rb
CHANGED
@@ -20,13 +20,19 @@ module DataDuck
|
|
20
20
|
|
21
21
|
protected
|
22
22
|
|
23
|
+
def load_value(prop_name, db_name, config)
|
24
|
+
self.send("#{ prop_name }=", config[prop_name] || ENV["#{ db_name }_#{ prop_name }"])
|
25
|
+
end
|
26
|
+
|
23
27
|
def find_command_and_execute(commands, *args)
|
24
28
|
# This function was originally sourced from Rails
|
25
29
|
# https://github.com/rails/rails
|
26
30
|
#
|
27
|
-
#
|
31
|
+
# This function was licensed under the MIT license
|
28
32
|
# http://opensource.org/licenses/MIT
|
29
33
|
#
|
34
|
+
# The license asks to include the following with the source code:
|
35
|
+
#
|
30
36
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
31
37
|
# of this software and associated documentation files (the "Software"), to deal
|
32
38
|
# in the Software without restriction, including without limitation the rights
|
@@ -68,11 +74,11 @@ module DataDuck
|
|
68
74
|
# This method is not all exhaustive, and is not meant to be necessarily relied on, but is a
|
69
75
|
# sanity check that can be used to ensure certain sql is not mutating.
|
70
76
|
|
71
|
-
return true if sql.downcase.start_with?("drop table")
|
72
|
-
return true if sql.downcase.start_with?("create table")
|
73
|
-
return true if sql.downcase.start_with?("delete from")
|
74
|
-
return true if sql.downcase.start_with?("insert into")
|
75
|
-
return true if sql.downcase.start_with?("alter table")
|
77
|
+
return true if sql.downcase.strip.start_with?("drop table")
|
78
|
+
return true if sql.downcase.strip.start_with?("create table")
|
79
|
+
return true if sql.downcase.strip.start_with?("delete from")
|
80
|
+
return true if sql.downcase.strip.start_with?("insert into")
|
81
|
+
return true if sql.downcase.strip.start_with?("alter table")
|
76
82
|
|
77
83
|
false
|
78
84
|
end
|
@@ -10,11 +10,11 @@ module DataDuck
|
|
10
10
|
|
11
11
|
def dbconsole(options = {})
|
12
12
|
args = []
|
13
|
-
args << "--host=#{
|
14
|
-
args << "--user=#{
|
15
|
-
args << "--database=#{
|
16
|
-
args << "--port=#{
|
17
|
-
args << "--password=#{
|
13
|
+
args << "--host=#{ self.host }"
|
14
|
+
args << "--user=#{ self.username }"
|
15
|
+
args << "--database=#{ self.database }"
|
16
|
+
args << "--port=#{ self.port }"
|
17
|
+
args << "--password=#{ self.password }"
|
18
18
|
|
19
19
|
self.find_command_and_execute("mysql", *args)
|
20
20
|
end
|
@@ -10,12 +10,12 @@ module DataDuck
|
|
10
10
|
|
11
11
|
def dbconsole(options = {})
|
12
12
|
args = []
|
13
|
-
args << "--host=#{
|
14
|
-
args << "--username=#{
|
15
|
-
args << "--dbname=#{
|
16
|
-
args << "--port=#{
|
13
|
+
args << "--host=#{ self.host }"
|
14
|
+
args << "--username=#{ self.username }"
|
15
|
+
args << "--dbname=#{ self.database }"
|
16
|
+
args << "--port=#{ self.port }"
|
17
17
|
|
18
|
-
ENV['PGPASSWORD'] =
|
18
|
+
ENV['PGPASSWORD'] = self.password
|
19
19
|
|
20
20
|
self.find_command_and_execute("psql", *args)
|
21
21
|
end
|
@@ -2,24 +2,36 @@ require_relative 'destination'
|
|
2
2
|
|
3
3
|
module DataDuck
|
4
4
|
class RedshiftDestination < DataDuck::Destination
|
5
|
+
attr_accessor :aws_key
|
6
|
+
attr_accessor :aws_secret
|
7
|
+
attr_accessor :s3_bucket
|
8
|
+
attr_accessor :s3_region
|
9
|
+
attr_accessor :host
|
10
|
+
attr_accessor :port
|
11
|
+
attr_accessor :database
|
12
|
+
attr_accessor :schema
|
13
|
+
attr_accessor :username
|
14
|
+
attr_accessor :password
|
15
|
+
|
5
16
|
def initialize(name, config)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
17
|
+
load_value('aws_key', name, config)
|
18
|
+
load_value('aws_secret', name, config)
|
19
|
+
load_value('s3_bucket', name, config)
|
20
|
+
load_value('s3_region', name, config)
|
21
|
+
load_value('host', name, config)
|
22
|
+
load_value('port', name, config)
|
23
|
+
load_value('database', name, config)
|
24
|
+
load_value('schema', name, config)
|
25
|
+
load_value('username', name, config)
|
26
|
+
load_value('password', name, config)
|
27
|
+
|
16
28
|
@redshift_connection = nil
|
17
29
|
|
18
30
|
super
|
19
31
|
end
|
20
32
|
|
21
33
|
def connection
|
22
|
-
@redshift_connection ||= Sequel.connect("redshift://#{
|
34
|
+
@redshift_connection ||= Sequel.connect("redshift://#{ self.username }:#{ self.password }@#{ self.host }:#{ self.port }/#{ self.database }" +
|
23
35
|
"?force_standard_strings=f",
|
24
36
|
:client_min_messages => '',
|
25
37
|
:force_standard_strings => false
|
@@ -31,8 +43,8 @@ module DataDuck
|
|
31
43
|
query_fragments = []
|
32
44
|
query_fragments << "COPY #{ table.staging_name } (#{ properties_joined_string })"
|
33
45
|
query_fragments << "FROM '#{ s3_path }'"
|
34
|
-
query_fragments << "CREDENTIALS 'aws_access_key_id=#{
|
35
|
-
query_fragments << "REGION '#{
|
46
|
+
query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
|
47
|
+
query_fragments << "REGION '#{ self.s3_region }'"
|
36
48
|
query_fragments << "CSV TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
|
37
49
|
query_fragments << "DATEFORMAT 'auto'"
|
38
50
|
return query_fragments.join(" ")
|
@@ -113,12 +125,12 @@ module DataDuck
|
|
113
125
|
|
114
126
|
def dbconsole(options = {})
|
115
127
|
args = []
|
116
|
-
args << "--host=#{
|
117
|
-
args << "--username=#{
|
118
|
-
args << "--dbname=#{
|
119
|
-
args << "--port=#{
|
128
|
+
args << "--host=#{ self.host }"
|
129
|
+
args << "--username=#{ self.username }"
|
130
|
+
args << "--dbname=#{ self.database }"
|
131
|
+
args << "--port=#{ self.port }"
|
120
132
|
|
121
|
-
ENV['PGPASSWORD'] =
|
133
|
+
ENV['PGPASSWORD'] = self.password
|
122
134
|
|
123
135
|
self.find_command_and_execute("psql", *args)
|
124
136
|
end
|
@@ -173,8 +185,8 @@ module DataDuck
|
|
173
185
|
|
174
186
|
table_csv = self.data_as_csv_string(table.data, table.output_column_names)
|
175
187
|
|
176
|
-
s3_obj = S3Object.new(filepath, table_csv,
|
177
|
-
|
188
|
+
s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
|
189
|
+
self.s3_bucket, self.s3_region)
|
178
190
|
s3_obj.upload!
|
179
191
|
return s3_obj
|
180
192
|
end
|
@@ -5,13 +5,20 @@ require 'sequel'
|
|
5
5
|
|
6
6
|
module DataDuck
|
7
7
|
class SqlDbSource < DataDuck::Source
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
8
|
+
attr_accessor :host
|
9
|
+
attr_accessor :port
|
10
|
+
attr_accessor :username
|
11
|
+
attr_accessor :password
|
12
|
+
attr_accessor :database
|
13
|
+
|
14
|
+
def initialize(name, config)
|
15
|
+
load_value('host', name, config)
|
16
|
+
load_value('port', name, config)
|
17
|
+
load_value('username', name, config)
|
18
|
+
load_value('password', name, config)
|
19
|
+
load_value('database', name, config)
|
20
|
+
|
21
|
+
@initialized_db_type = config['db_type']
|
15
22
|
|
16
23
|
super
|
17
24
|
end
|
@@ -19,11 +26,11 @@ module DataDuck
|
|
19
26
|
def connection
|
20
27
|
@connection ||= Sequel.connect(
|
21
28
|
adapter: self.db_type,
|
22
|
-
user:
|
23
|
-
host:
|
24
|
-
database:
|
25
|
-
password:
|
26
|
-
port:
|
29
|
+
user: self.username,
|
30
|
+
host: self.host,
|
31
|
+
database: self.database,
|
32
|
+
password: self.password,
|
33
|
+
port: self.port
|
27
34
|
)
|
28
35
|
end
|
29
36
|
|
data/lib/dataduck/util.rb
CHANGED
@@ -2,6 +2,11 @@ require 'fileutils'
|
|
2
2
|
|
3
3
|
module DataDuck
|
4
4
|
module Util
|
5
|
+
def Util.deep_merge(first, second)
|
6
|
+
merger = proc { |key, v1, v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : v2 }
|
7
|
+
first.merge(second, &merger)
|
8
|
+
end
|
9
|
+
|
5
10
|
def Util.ensure_path_exists!(full_path)
|
6
11
|
split_paths = full_path.split('/')
|
7
12
|
just_file_path = split_paths.pop
|
data/lib/dataduck/version.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
class <%= table_name_camelcased %> < DataDuck::Table
|
2
|
-
source :
|
2
|
+
source :source1, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
|
3
3
|
|
4
4
|
output({<% columns.each do |col| %>
|
5
5
|
<%= '# ' if col[2] %>:<%= col[0] %> => :<%= col[1] %>,<% end %>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -191,7 +191,6 @@ files:
|
|
191
191
|
- lib/dataduck/util.rb
|
192
192
|
- lib/dataduck/version.rb
|
193
193
|
- lib/helpers/module_vars.rb
|
194
|
-
- lib/templates/quickstart/main.rb.erb
|
195
194
|
- lib/templates/quickstart/table.rb.erb
|
196
195
|
- static/logo.png
|
197
196
|
homepage: http://dataducketl.com/
|