dataduck 0.5.5 → 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 878a28b576d47ad2c2ba6a1df7d984e937bad5dd
4
- data.tar.gz: 73ef27e0862f684e079a040631fd001a8bade409
3
+ metadata.gz: 9c172e7a467c64f911c45129ee6cb4ec8cc862e9
4
+ data.tar.gz: 057a610687e9ecf15b2baeaa8dfe338c634205a5
5
5
  SHA512:
6
- metadata.gz: 39e8be0c1acbbd2819a9dad50107ad77d5142ed2fdb7f2b9e946a0ed4fb9d0061db630d7be2004872381c270390fb169033dbeb7a8b10c2db574f5cbe37e6d34
7
- data.tar.gz: 3b9fcbf4268df2af1d3f6131d4e986c7995b633c56d001578f6e332c95131e3cfea956a46ac9c997fde61c77bd092985d3e5120bfeb6fd585dda8fcd9969752b
6
+ metadata.gz: 327385731802b5932ba7c44b0cc596d2a412edf51100ee6df618905951b51b4a46c507aa44493ec43a1bc8f12c9ca1bcdc64722daf3ca2048b1ec8fa214a65f0
7
+ data.tar.gz: b65b340d598ee5302137bebf68a8172f33d346ddf792a9a8dcb8b7524288a1ae3f18d6e1eed6f8e7731e0cf77933c7931964b5c7b6830e85f3dfbaf8043cbecb
@@ -13,7 +13,7 @@ Usage to show info for just one table:
13
13
  $ dataduck show users
14
14
  Table users
15
15
 
16
- Sources from users on my_database
16
+ Sources from users on source1
17
17
  created_at
18
18
  updated_at
19
19
  id
@@ -54,7 +54,7 @@ The following is an example table.
54
54
 
55
55
  ```ruby
56
56
  class Decks < DataDuck::Table
57
- source :my_database, ["id", "name", "user_id", "cards",
57
+ source :source1, ["id", "name", "user_id", "cards",
58
58
  "num_wins", "num_losses", "created_at", "updated_at",
59
59
  "is_drafted", "num_draft_wins", "num_draft_losses"]
60
60
 
data/lib/dataduck.rb CHANGED
@@ -20,12 +20,30 @@ module DataDuck
20
20
  spec = Gem::Specification.find_by_name("dataduck")
21
21
  create_module_var("gem_root", spec.gem_dir)
22
22
 
23
- create_module_var("project_root", Dir.getwd)
24
- create_module_var("config", {})
23
+ detect_project_root = Dir.getwd
24
+ while true
25
+ if detect_project_root == ""
26
+ raise Exception.new("Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?")
27
+ end
28
+
29
+ if File.exist?(detect_project_root + '/Gemfile')
30
+ break
31
+ end
32
+
33
+ detect_project_root_splits = detect_project_root.split("/")
34
+ detect_project_root_splits = detect_project_root_splits[0..detect_project_root_splits.length - 2]
35
+ detect_project_root = detect_project_root_splits.join("/")
36
+ end
37
+ create_module_var("project_root", detect_project_root)
25
38
 
26
- dd_env_path = DataDuck.project_root + "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"
27
- env_config = File.exist?(dd_env_path) ? YAML.load_file(dd_env_path) : {}
28
- DataDuck.config.merge!(env_config)
39
+ create_module_var("config", {})
40
+ configs_to_load = ["/config/base.yml", "/config/#{ ENV['DATADUCK_ENV'] }.yml",
41
+ "/config/secret/base.yml", "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"]
42
+ configs_to_load.each do |relative_path|
43
+ config_path = DataDuck.project_root + relative_path
44
+ loaded_config = File.exist?(config_path) ? YAML.load_file(config_path) : {}
45
+ DataDuck.config = Util.deep_merge(DataDuck.config, loaded_config)
46
+ end
29
47
 
30
48
  create_module_var("sources", {})
31
49
  create_module_var("destinations", {})
@@ -170,7 +170,7 @@ module DataDuck
170
170
  postgresql: DataDuck::PostgresqlSource,
171
171
  }[db_type]
172
172
 
173
- db_source = db_class.new({
173
+ db_source = db_class.new("source1", {
174
174
  'db_type' => db_type.to_s,
175
175
  'host' => source_host,
176
176
  'database' => source_database,
@@ -189,20 +189,17 @@ module DataDuck
189
189
 
190
190
  config_obj = {
191
191
  'sources' => {
192
- 'my_database' => {
192
+ 'source1' => {
193
193
  'type' => db_type.to_s,
194
194
  'host' => source_host,
195
195
  'database' => source_database,
196
196
  'port' => source_port,
197
197
  'username' => source_username,
198
- 'password' => source_password,
199
198
  }
200
199
  },
201
200
  'destinations' => {
202
- 'my_destination' => {
201
+ 'destination1' => {
203
202
  'type' => 'redshift',
204
- 'aws_key' => 'YOUR_AWS_KEY',
205
- 'aws_secret' => 'YOUR_AWS_SECRET',
206
203
  's3_bucket' => 'YOUR_BUCKET',
207
204
  's3_region' => 'YOUR_BUCKET_REGION',
208
205
  'host' => 'redshift.somekeygoeshere.us-west-2.redshift.amazonaws.com',
@@ -210,28 +207,32 @@ module DataDuck
210
207
  'database' => 'main',
211
208
  'schema' => 'public',
212
209
  'username' => 'YOUR_UESRNAME',
213
- 'password' => 'YOUR_PASSWORD',
214
210
  }
215
211
  }
216
212
  }
213
+ DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/base.yml", config_obj.to_yaml)
214
+
215
+ DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/.env", """
216
+ destination1_aws_key=AWS_KEY_GOES_HERE
217
+ destination1_aws_secret=AWS_SECRET_GOES_HERE
218
+ destination1_password=REDSHIFT_PASSWORD_GOES_HERE
219
+ source1_password=#{ source_password }
220
+ """.strip)
217
221
 
218
- DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/secret/#{ DataDuck.environment }.yml", config_obj.to_yaml)
219
- DataDuck::Commands.quickstart_save_main
220
222
  DataDuck::Commands.quickstart_update_gitignore
221
223
 
222
224
  puts "Quickstart complete!"
223
- puts "You still need to edit your config/secret/*.yml file with your AWS and Redshift credentials."
224
- puts "Run your ETL with: ruby src/main.rb"
225
+ puts "You still need to edit your .env and config/base.yml files with your AWS and Redshift credentials."
226
+ puts "Run your ETL with: dataduck etl all"
227
+ puts "For more help, visit http://dataducketl.com/docs"
225
228
  end
226
229
 
227
230
  def self.quickstart_update_gitignore
228
231
  main_gitignore_path = "#{ DataDuck.project_root }/.gitignore"
229
232
  FileUtils.touch(main_gitignore_path)
230
-
231
- secret_gitignore_path = "#{ DataDuck.project_root }/config/secret/.gitignore"
232
- FileUtils.touch(secret_gitignore_path)
233
- output = File.open(secret_gitignore_path, "w")
234
- output << '[^.]*'
233
+ output = File.open(main_gitignore_path, "w")
234
+ output << ".DS_Store\n"
235
+ output << ".env\n"
235
236
  output.close
236
237
  end
237
238
 
@@ -264,12 +265,5 @@ module DataDuck
264
265
  output << contents
265
266
  output.close
266
267
  end
267
-
268
- def self.quickstart_save_main
269
- namespace = Namespace.new
270
- template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/main.rb.erb", 'r').read
271
- result = ERB.new(template).result(namespace.get_binding)
272
- DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/main.rb", result)
273
- end
274
268
  end
275
269
  end
@@ -20,13 +20,19 @@ module DataDuck
20
20
 
21
21
  protected
22
22
 
23
+ def load_value(prop_name, db_name, config)
24
+ self.send("#{ prop_name }=", config[prop_name] || ENV["#{ db_name }_#{ prop_name }"])
25
+ end
26
+
23
27
  def find_command_and_execute(commands, *args)
24
28
  # This function was originally sourced from Rails
25
29
  # https://github.com/rails/rails
26
30
  #
27
- # Licensed under the MIT license
31
+ # This function was licensed under the MIT license
28
32
  # http://opensource.org/licenses/MIT
29
33
  #
34
+ # The license asks to include the following with the source code:
35
+ #
30
36
  # Permission is hereby granted, free of charge, to any person obtaining a copy
31
37
  # of this software and associated documentation files (the "Software"), to deal
32
38
  # in the Software without restriction, including without limitation the rights
@@ -68,11 +74,11 @@ module DataDuck
68
74
  # This method is not all exhaustive, and is not meant to be necessarily relied on, but is a
69
75
  # sanity check that can be used to ensure certain sql is not mutating.
70
76
 
71
- return true if sql.downcase.start_with?("drop table")
72
- return true if sql.downcase.start_with?("create table")
73
- return true if sql.downcase.start_with?("delete from")
74
- return true if sql.downcase.start_with?("insert into")
75
- return true if sql.downcase.start_with?("alter table")
77
+ return true if sql.downcase.strip.start_with?("drop table")
78
+ return true if sql.downcase.strip.start_with?("create table")
79
+ return true if sql.downcase.strip.start_with?("delete from")
80
+ return true if sql.downcase.strip.start_with?("insert into")
81
+ return true if sql.downcase.strip.start_with?("alter table")
76
82
 
77
83
  false
78
84
  end
@@ -10,11 +10,11 @@ module DataDuck
10
10
 
11
11
  def dbconsole(options = {})
12
12
  args = []
13
- args << "--host=#{ @host }"
14
- args << "--user=#{ @username }"
15
- args << "--database=#{ @database }"
16
- args << "--port=#{ @port }"
17
- args << "--password=#{ @password }"
13
+ args << "--host=#{ self.host }"
14
+ args << "--user=#{ self.username }"
15
+ args << "--database=#{ self.database }"
16
+ args << "--port=#{ self.port }"
17
+ args << "--password=#{ self.password }"
18
18
 
19
19
  self.find_command_and_execute("mysql", *args)
20
20
  end
@@ -10,12 +10,12 @@ module DataDuck
10
10
 
11
11
  def dbconsole(options = {})
12
12
  args = []
13
- args << "--host=#{ @host }"
14
- args << "--username=#{ @username }"
15
- args << "--dbname=#{ @database }"
16
- args << "--port=#{ @port }"
13
+ args << "--host=#{ self.host }"
14
+ args << "--username=#{ self.username }"
15
+ args << "--dbname=#{ self.database }"
16
+ args << "--port=#{ self.port }"
17
17
 
18
- ENV['PGPASSWORD'] = @password
18
+ ENV['PGPASSWORD'] = self.password
19
19
 
20
20
  self.find_command_and_execute("psql", *args)
21
21
  end
@@ -2,24 +2,36 @@ require_relative 'destination'
2
2
 
3
3
  module DataDuck
4
4
  class RedshiftDestination < DataDuck::Destination
5
+ attr_accessor :aws_key
6
+ attr_accessor :aws_secret
7
+ attr_accessor :s3_bucket
8
+ attr_accessor :s3_region
9
+ attr_accessor :host
10
+ attr_accessor :port
11
+ attr_accessor :database
12
+ attr_accessor :schema
13
+ attr_accessor :username
14
+ attr_accessor :password
15
+
5
16
  def initialize(name, config)
6
- @aws_key = config['aws_key']
7
- @aws_secret = config['aws_secret']
8
- @s3_bucket = config['s3_bucket']
9
- @s3_region = config['s3_region']
10
- @host = config['host']
11
- @port = config['port']
12
- @database = config['database']
13
- @schema = config['schema']
14
- @username = config['username']
15
- @password = config['password']
17
+ load_value('aws_key', name, config)
18
+ load_value('aws_secret', name, config)
19
+ load_value('s3_bucket', name, config)
20
+ load_value('s3_region', name, config)
21
+ load_value('host', name, config)
22
+ load_value('port', name, config)
23
+ load_value('database', name, config)
24
+ load_value('schema', name, config)
25
+ load_value('username', name, config)
26
+ load_value('password', name, config)
27
+
16
28
  @redshift_connection = nil
17
29
 
18
30
  super
19
31
  end
20
32
 
21
33
  def connection
22
- @redshift_connection ||= Sequel.connect("redshift://#{ @username }:#{ @password }@#{ @host }:#{ @port }/#{ @database }" +
34
+ @redshift_connection ||= Sequel.connect("redshift://#{ self.username }:#{ self.password }@#{ self.host }:#{ self.port }/#{ self.database }" +
23
35
  "?force_standard_strings=f",
24
36
  :client_min_messages => '',
25
37
  :force_standard_strings => false
@@ -31,8 +43,8 @@ module DataDuck
31
43
  query_fragments = []
32
44
  query_fragments << "COPY #{ table.staging_name } (#{ properties_joined_string })"
33
45
  query_fragments << "FROM '#{ s3_path }'"
34
- query_fragments << "CREDENTIALS 'aws_access_key_id=#{ @aws_key };aws_secret_access_key=#{ @aws_secret }'"
35
- query_fragments << "REGION '#{ @s3_region }'"
46
+ query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
47
+ query_fragments << "REGION '#{ self.s3_region }'"
36
48
  query_fragments << "CSV TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
37
49
  query_fragments << "DATEFORMAT 'auto'"
38
50
  return query_fragments.join(" ")
@@ -113,12 +125,12 @@ module DataDuck
113
125
 
114
126
  def dbconsole(options = {})
115
127
  args = []
116
- args << "--host=#{ @host }"
117
- args << "--username=#{ @username }"
118
- args << "--dbname=#{ @database }"
119
- args << "--port=#{ @port }"
128
+ args << "--host=#{ self.host }"
129
+ args << "--username=#{ self.username }"
130
+ args << "--dbname=#{ self.database }"
131
+ args << "--port=#{ self.port }"
120
132
 
121
- ENV['PGPASSWORD'] = @password
133
+ ENV['PGPASSWORD'] = self.password
122
134
 
123
135
  self.find_command_and_execute("psql", *args)
124
136
  end
@@ -173,8 +185,8 @@ module DataDuck
173
185
 
174
186
  table_csv = self.data_as_csv_string(table.data, table.output_column_names)
175
187
 
176
- s3_obj = S3Object.new(filepath, table_csv, @aws_key, @aws_secret,
177
- @s3_bucket, @s3_region)
188
+ s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
189
+ self.s3_bucket, self.s3_region)
178
190
  s3_obj.upload!
179
191
  return s3_obj
180
192
  end
@@ -5,13 +5,20 @@ require 'sequel'
5
5
 
6
6
  module DataDuck
7
7
  class SqlDbSource < DataDuck::Source
8
- def initialize(name, data)
9
- @host = data['host']
10
- @port = data['port']
11
- @username = data['username']
12
- @password = data['password']
13
- @database = data['database']
14
- @initialized_db_type = data['db_type']
8
+ attr_accessor :host
9
+ attr_accessor :port
10
+ attr_accessor :username
11
+ attr_accessor :password
12
+ attr_accessor :database
13
+
14
+ def initialize(name, config)
15
+ load_value('host', name, config)
16
+ load_value('port', name, config)
17
+ load_value('username', name, config)
18
+ load_value('password', name, config)
19
+ load_value('database', name, config)
20
+
21
+ @initialized_db_type = config['db_type']
15
22
 
16
23
  super
17
24
  end
@@ -19,11 +26,11 @@ module DataDuck
19
26
  def connection
20
27
  @connection ||= Sequel.connect(
21
28
  adapter: self.db_type,
22
- user: @username,
23
- host: @host,
24
- database: @database,
25
- password: @password,
26
- port: @port
29
+ user: self.username,
30
+ host: self.host,
31
+ database: self.database,
32
+ password: self.password,
33
+ port: self.port
27
34
  )
28
35
  end
29
36
 
data/lib/dataduck/util.rb CHANGED
@@ -2,6 +2,11 @@ require 'fileutils'
2
2
 
3
3
  module DataDuck
4
4
  module Util
5
+ def Util.deep_merge(first, second)
6
+ merger = proc { |key, v1, v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : v2 }
7
+ first.merge(second, &merger)
8
+ end
9
+
5
10
  def Util.ensure_path_exists!(full_path)
6
11
  split_paths = full_path.split('/')
7
12
  just_file_path = split_paths.pop
@@ -1,6 +1,6 @@
1
1
  module DataDuck
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 5
4
- VERSION_PATCH = 5
4
+ VERSION_PATCH = 6
5
5
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
6
6
  end
@@ -1,5 +1,5 @@
1
1
  class <%= table_name_camelcased %> < DataDuck::Table
2
- source :my_database, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
2
+ source :source1, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
3
3
 
4
4
  output({<% columns.each do |col| %>
5
5
  <%= '# ' if col[2] %>:<%= col[0] %> => :<%= col[1] %>,<% end %>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-26 00:00:00.000000000 Z
11
+ date: 2015-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -191,7 +191,6 @@ files:
191
191
  - lib/dataduck/util.rb
192
192
  - lib/dataduck/version.rb
193
193
  - lib/helpers/module_vars.rb
194
- - lib/templates/quickstart/main.rb.erb
195
194
  - lib/templates/quickstart/table.rb.erb
196
195
  - static/logo.png
197
196
  homepage: http://dataducketl.com/
@@ -1,10 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler/setup'
3
- Bundler.require
4
-
5
- class MyETL < DataDuck::ETL
6
- destination :my_destination
7
- end
8
-
9
- etl = MyETL.new
10
- etl.process!