dataduck 0.5.5 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 878a28b576d47ad2c2ba6a1df7d984e937bad5dd
4
- data.tar.gz: 73ef27e0862f684e079a040631fd001a8bade409
3
+ metadata.gz: 9c172e7a467c64f911c45129ee6cb4ec8cc862e9
4
+ data.tar.gz: 057a610687e9ecf15b2baeaa8dfe338c634205a5
5
5
  SHA512:
6
- metadata.gz: 39e8be0c1acbbd2819a9dad50107ad77d5142ed2fdb7f2b9e946a0ed4fb9d0061db630d7be2004872381c270390fb169033dbeb7a8b10c2db574f5cbe37e6d34
7
- data.tar.gz: 3b9fcbf4268df2af1d3f6131d4e986c7995b633c56d001578f6e332c95131e3cfea956a46ac9c997fde61c77bd092985d3e5120bfeb6fd585dda8fcd9969752b
6
+ metadata.gz: 327385731802b5932ba7c44b0cc596d2a412edf51100ee6df618905951b51b4a46c507aa44493ec43a1bc8f12c9ca1bcdc64722daf3ca2048b1ec8fa214a65f0
7
+ data.tar.gz: b65b340d598ee5302137bebf68a8172f33d346ddf792a9a8dcb8b7524288a1ae3f18d6e1eed6f8e7731e0cf77933c7931964b5c7b6830e85f3dfbaf8043cbecb
@@ -13,7 +13,7 @@ Usage to show info for just one table:
13
13
  $ dataduck show users
14
14
  Table users
15
15
 
16
- Sources from users on my_database
16
+ Sources from users on source1
17
17
  created_at
18
18
  updated_at
19
19
  id
@@ -54,7 +54,7 @@ The following is an example table.
54
54
 
55
55
  ```ruby
56
56
  class Decks < DataDuck::Table
57
- source :my_database, ["id", "name", "user_id", "cards",
57
+ source :source1, ["id", "name", "user_id", "cards",
58
58
  "num_wins", "num_losses", "created_at", "updated_at",
59
59
  "is_drafted", "num_draft_wins", "num_draft_losses"]
60
60
 
data/lib/dataduck.rb CHANGED
@@ -20,12 +20,30 @@ module DataDuck
20
20
  spec = Gem::Specification.find_by_name("dataduck")
21
21
  create_module_var("gem_root", spec.gem_dir)
22
22
 
23
- create_module_var("project_root", Dir.getwd)
24
- create_module_var("config", {})
23
+ detect_project_root = Dir.getwd
24
+ while true
25
+ if detect_project_root == ""
26
+ raise Exception.new("Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?")
27
+ end
28
+
29
+ if File.exist?(detect_project_root + '/Gemfile')
30
+ break
31
+ end
32
+
33
+ detect_project_root_splits = detect_project_root.split("/")
34
+ detect_project_root_splits = detect_project_root_splits[0..detect_project_root_splits.length - 2]
35
+ detect_project_root = detect_project_root_splits.join("/")
36
+ end
37
+ create_module_var("project_root", detect_project_root)
25
38
 
26
- dd_env_path = DataDuck.project_root + "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"
27
- env_config = File.exist?(dd_env_path) ? YAML.load_file(dd_env_path) : {}
28
- DataDuck.config.merge!(env_config)
39
+ create_module_var("config", {})
40
+ configs_to_load = ["/config/base.yml", "/config/#{ ENV['DATADUCK_ENV'] }.yml",
41
+ "/config/secret/base.yml", "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"]
42
+ configs_to_load.each do |relative_path|
43
+ config_path = DataDuck.project_root + relative_path
44
+ loaded_config = File.exist?(config_path) ? YAML.load_file(config_path) : {}
45
+ DataDuck.config = Util.deep_merge(DataDuck.config, loaded_config)
46
+ end
29
47
 
30
48
  create_module_var("sources", {})
31
49
  create_module_var("destinations", {})
@@ -170,7 +170,7 @@ module DataDuck
170
170
  postgresql: DataDuck::PostgresqlSource,
171
171
  }[db_type]
172
172
 
173
- db_source = db_class.new({
173
+ db_source = db_class.new("source1", {
174
174
  'db_type' => db_type.to_s,
175
175
  'host' => source_host,
176
176
  'database' => source_database,
@@ -189,20 +189,17 @@ module DataDuck
189
189
 
190
190
  config_obj = {
191
191
  'sources' => {
192
- 'my_database' => {
192
+ 'source1' => {
193
193
  'type' => db_type.to_s,
194
194
  'host' => source_host,
195
195
  'database' => source_database,
196
196
  'port' => source_port,
197
197
  'username' => source_username,
198
- 'password' => source_password,
199
198
  }
200
199
  },
201
200
  'destinations' => {
202
- 'my_destination' => {
201
+ 'destination1' => {
203
202
  'type' => 'redshift',
204
- 'aws_key' => 'YOUR_AWS_KEY',
205
- 'aws_secret' => 'YOUR_AWS_SECRET',
206
203
  's3_bucket' => 'YOUR_BUCKET',
207
204
  's3_region' => 'YOUR_BUCKET_REGION',
208
205
  'host' => 'redshift.somekeygoeshere.us-west-2.redshift.amazonaws.com',
@@ -210,28 +207,32 @@ module DataDuck
210
207
  'database' => 'main',
211
208
  'schema' => 'public',
212
209
  'username' => 'YOUR_UESRNAME',
213
- 'password' => 'YOUR_PASSWORD',
214
210
  }
215
211
  }
216
212
  }
213
+ DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/base.yml", config_obj.to_yaml)
214
+
215
+ DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/.env", """
216
+ destination1_aws_key=AWS_KEY_GOES_HERE
217
+ destination1_aws_secret=AWS_SECRET_GOES_HERE
218
+ destination1_password=REDSHIFT_PASSWORD_GOES_HERE
219
+ source1_password=#{ source_password }
220
+ """.strip)
217
221
 
218
- DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/secret/#{ DataDuck.environment }.yml", config_obj.to_yaml)
219
- DataDuck::Commands.quickstart_save_main
220
222
  DataDuck::Commands.quickstart_update_gitignore
221
223
 
222
224
  puts "Quickstart complete!"
223
- puts "You still need to edit your config/secret/*.yml file with your AWS and Redshift credentials."
224
- puts "Run your ETL with: ruby src/main.rb"
225
+ puts "You still need to edit your .env and config/base.yml files with your AWS and Redshift credentials."
226
+ puts "Run your ETL with: dataduck etl all"
227
+ puts "For more help, visit http://dataducketl.com/docs"
225
228
  end
226
229
 
227
230
  def self.quickstart_update_gitignore
228
231
  main_gitignore_path = "#{ DataDuck.project_root }/.gitignore"
229
232
  FileUtils.touch(main_gitignore_path)
230
-
231
- secret_gitignore_path = "#{ DataDuck.project_root }/config/secret/.gitignore"
232
- FileUtils.touch(secret_gitignore_path)
233
- output = File.open(secret_gitignore_path, "w")
234
- output << '[^.]*'
233
+ output = File.open(main_gitignore_path, "w")
234
+ output << ".DS_Store\n"
235
+ output << ".env\n"
235
236
  output.close
236
237
  end
237
238
 
@@ -264,12 +265,5 @@ module DataDuck
264
265
  output << contents
265
266
  output.close
266
267
  end
267
-
268
- def self.quickstart_save_main
269
- namespace = Namespace.new
270
- template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/main.rb.erb", 'r').read
271
- result = ERB.new(template).result(namespace.get_binding)
272
- DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/main.rb", result)
273
- end
274
268
  end
275
269
  end
@@ -20,13 +20,19 @@ module DataDuck
20
20
 
21
21
  protected
22
22
 
23
+ def load_value(prop_name, db_name, config)
24
+ self.send("#{ prop_name }=", config[prop_name] || ENV["#{ db_name }_#{ prop_name }"])
25
+ end
26
+
23
27
  def find_command_and_execute(commands, *args)
24
28
  # This function was originally sourced from Rails
25
29
  # https://github.com/rails/rails
26
30
  #
27
- # Licensed under the MIT license
31
+ # This function was licensed under the MIT license
28
32
  # http://opensource.org/licenses/MIT
29
33
  #
34
+ # The license asks to include the following with the source code:
35
+ #
30
36
  # Permission is hereby granted, free of charge, to any person obtaining a copy
31
37
  # of this software and associated documentation files (the "Software"), to deal
32
38
  # in the Software without restriction, including without limitation the rights
@@ -68,11 +74,11 @@ module DataDuck
68
74
  # This method is not all exhaustive, and is not meant to be necessarily relied on, but is a
69
75
  # sanity check that can be used to ensure certain sql is not mutating.
70
76
 
71
- return true if sql.downcase.start_with?("drop table")
72
- return true if sql.downcase.start_with?("create table")
73
- return true if sql.downcase.start_with?("delete from")
74
- return true if sql.downcase.start_with?("insert into")
75
- return true if sql.downcase.start_with?("alter table")
77
+ return true if sql.downcase.strip.start_with?("drop table")
78
+ return true if sql.downcase.strip.start_with?("create table")
79
+ return true if sql.downcase.strip.start_with?("delete from")
80
+ return true if sql.downcase.strip.start_with?("insert into")
81
+ return true if sql.downcase.strip.start_with?("alter table")
76
82
 
77
83
  false
78
84
  end
@@ -10,11 +10,11 @@ module DataDuck
10
10
 
11
11
  def dbconsole(options = {})
12
12
  args = []
13
- args << "--host=#{ @host }"
14
- args << "--user=#{ @username }"
15
- args << "--database=#{ @database }"
16
- args << "--port=#{ @port }"
17
- args << "--password=#{ @password }"
13
+ args << "--host=#{ self.host }"
14
+ args << "--user=#{ self.username }"
15
+ args << "--database=#{ self.database }"
16
+ args << "--port=#{ self.port }"
17
+ args << "--password=#{ self.password }"
18
18
 
19
19
  self.find_command_and_execute("mysql", *args)
20
20
  end
@@ -10,12 +10,12 @@ module DataDuck
10
10
 
11
11
  def dbconsole(options = {})
12
12
  args = []
13
- args << "--host=#{ @host }"
14
- args << "--username=#{ @username }"
15
- args << "--dbname=#{ @database }"
16
- args << "--port=#{ @port }"
13
+ args << "--host=#{ self.host }"
14
+ args << "--username=#{ self.username }"
15
+ args << "--dbname=#{ self.database }"
16
+ args << "--port=#{ self.port }"
17
17
 
18
- ENV['PGPASSWORD'] = @password
18
+ ENV['PGPASSWORD'] = self.password
19
19
 
20
20
  self.find_command_and_execute("psql", *args)
21
21
  end
@@ -2,24 +2,36 @@ require_relative 'destination'
2
2
 
3
3
  module DataDuck
4
4
  class RedshiftDestination < DataDuck::Destination
5
+ attr_accessor :aws_key
6
+ attr_accessor :aws_secret
7
+ attr_accessor :s3_bucket
8
+ attr_accessor :s3_region
9
+ attr_accessor :host
10
+ attr_accessor :port
11
+ attr_accessor :database
12
+ attr_accessor :schema
13
+ attr_accessor :username
14
+ attr_accessor :password
15
+
5
16
  def initialize(name, config)
6
- @aws_key = config['aws_key']
7
- @aws_secret = config['aws_secret']
8
- @s3_bucket = config['s3_bucket']
9
- @s3_region = config['s3_region']
10
- @host = config['host']
11
- @port = config['port']
12
- @database = config['database']
13
- @schema = config['schema']
14
- @username = config['username']
15
- @password = config['password']
17
+ load_value('aws_key', name, config)
18
+ load_value('aws_secret', name, config)
19
+ load_value('s3_bucket', name, config)
20
+ load_value('s3_region', name, config)
21
+ load_value('host', name, config)
22
+ load_value('port', name, config)
23
+ load_value('database', name, config)
24
+ load_value('schema', name, config)
25
+ load_value('username', name, config)
26
+ load_value('password', name, config)
27
+
16
28
  @redshift_connection = nil
17
29
 
18
30
  super
19
31
  end
20
32
 
21
33
  def connection
22
- @redshift_connection ||= Sequel.connect("redshift://#{ @username }:#{ @password }@#{ @host }:#{ @port }/#{ @database }" +
34
+ @redshift_connection ||= Sequel.connect("redshift://#{ self.username }:#{ self.password }@#{ self.host }:#{ self.port }/#{ self.database }" +
23
35
  "?force_standard_strings=f",
24
36
  :client_min_messages => '',
25
37
  :force_standard_strings => false
@@ -31,8 +43,8 @@ module DataDuck
31
43
  query_fragments = []
32
44
  query_fragments << "COPY #{ table.staging_name } (#{ properties_joined_string })"
33
45
  query_fragments << "FROM '#{ s3_path }'"
34
- query_fragments << "CREDENTIALS 'aws_access_key_id=#{ @aws_key };aws_secret_access_key=#{ @aws_secret }'"
35
- query_fragments << "REGION '#{ @s3_region }'"
46
+ query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
47
+ query_fragments << "REGION '#{ self.s3_region }'"
36
48
  query_fragments << "CSV TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
37
49
  query_fragments << "DATEFORMAT 'auto'"
38
50
  return query_fragments.join(" ")
@@ -113,12 +125,12 @@ module DataDuck
113
125
 
114
126
  def dbconsole(options = {})
115
127
  args = []
116
- args << "--host=#{ @host }"
117
- args << "--username=#{ @username }"
118
- args << "--dbname=#{ @database }"
119
- args << "--port=#{ @port }"
128
+ args << "--host=#{ self.host }"
129
+ args << "--username=#{ self.username }"
130
+ args << "--dbname=#{ self.database }"
131
+ args << "--port=#{ self.port }"
120
132
 
121
- ENV['PGPASSWORD'] = @password
133
+ ENV['PGPASSWORD'] = self.password
122
134
 
123
135
  self.find_command_and_execute("psql", *args)
124
136
  end
@@ -173,8 +185,8 @@ module DataDuck
173
185
 
174
186
  table_csv = self.data_as_csv_string(table.data, table.output_column_names)
175
187
 
176
- s3_obj = S3Object.new(filepath, table_csv, @aws_key, @aws_secret,
177
- @s3_bucket, @s3_region)
188
+ s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
189
+ self.s3_bucket, self.s3_region)
178
190
  s3_obj.upload!
179
191
  return s3_obj
180
192
  end
@@ -5,13 +5,20 @@ require 'sequel'
5
5
 
6
6
  module DataDuck
7
7
  class SqlDbSource < DataDuck::Source
8
- def initialize(name, data)
9
- @host = data['host']
10
- @port = data['port']
11
- @username = data['username']
12
- @password = data['password']
13
- @database = data['database']
14
- @initialized_db_type = data['db_type']
8
+ attr_accessor :host
9
+ attr_accessor :port
10
+ attr_accessor :username
11
+ attr_accessor :password
12
+ attr_accessor :database
13
+
14
+ def initialize(name, config)
15
+ load_value('host', name, config)
16
+ load_value('port', name, config)
17
+ load_value('username', name, config)
18
+ load_value('password', name, config)
19
+ load_value('database', name, config)
20
+
21
+ @initialized_db_type = config['db_type']
15
22
 
16
23
  super
17
24
  end
@@ -19,11 +26,11 @@ module DataDuck
19
26
  def connection
20
27
  @connection ||= Sequel.connect(
21
28
  adapter: self.db_type,
22
- user: @username,
23
- host: @host,
24
- database: @database,
25
- password: @password,
26
- port: @port
29
+ user: self.username,
30
+ host: self.host,
31
+ database: self.database,
32
+ password: self.password,
33
+ port: self.port
27
34
  )
28
35
  end
29
36
 
data/lib/dataduck/util.rb CHANGED
@@ -2,6 +2,11 @@ require 'fileutils'
2
2
 
3
3
  module DataDuck
4
4
  module Util
5
+ def Util.deep_merge(first, second)
6
+ merger = proc { |key, v1, v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : v2 }
7
+ first.merge(second, &merger)
8
+ end
9
+
5
10
  def Util.ensure_path_exists!(full_path)
6
11
  split_paths = full_path.split('/')
7
12
  just_file_path = split_paths.pop
@@ -1,6 +1,6 @@
1
1
  module DataDuck
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 5
4
- VERSION_PATCH = 5
4
+ VERSION_PATCH = 6
5
5
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
6
6
  end
@@ -1,5 +1,5 @@
1
1
  class <%= table_name_camelcased %> < DataDuck::Table
2
- source :my_database, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
2
+ source :source1, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
3
3
 
4
4
  output({<% columns.each do |col| %>
5
5
  <%= '# ' if col[2] %>:<%= col[0] %> => :<%= col[1] %>,<% end %>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-26 00:00:00.000000000 Z
11
+ date: 2015-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -191,7 +191,6 @@ files:
191
191
  - lib/dataduck/util.rb
192
192
  - lib/dataduck/version.rb
193
193
  - lib/helpers/module_vars.rb
194
- - lib/templates/quickstart/main.rb.erb
195
194
  - lib/templates/quickstart/table.rb.erb
196
195
  - static/logo.png
197
196
  homepage: http://dataducketl.com/
@@ -1,10 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler/setup'
3
- Bundler.require
4
-
5
- class MyETL < DataDuck::ETL
6
- destination :my_destination
7
- end
8
-
9
- etl = MyETL.new
10
- etl.process!