RubyGems - dataduck - Versions diffs - 0.5.5 → 0.5.6 - Mend

dataduck 0.5.5 → 0.5.6

Files changed (15) hide show

checksums.yaml +4 -4
data/docs/commands/show.md +1 -1
data/docs/tables/README.md +1 -1
data/lib/dataduck.rb +23 -5
data/lib/dataduck/commands.rb +17 -23
data/lib/dataduck/database.rb +12 -6
data/lib/dataduck/mysql_source.rb +5 -5
data/lib/dataduck/postgresql_source.rb +5 -5
data/lib/dataduck/redshift_destination.rb +32 -20
data/lib/dataduck/sql_db_source.rb +19 -12
data/lib/dataduck/util.rb +5 -0
data/lib/dataduck/version.rb +1 -1
data/lib/templates/quickstart/table.rb.erb +1 -1
metadata +2 -3
data/lib/templates/quickstart/main.rb.erb +0 -10

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 878a28b576d47ad2c2ba6a1df7d984e937bad5dd
-  data.tar.gz: 73ef27e0862f684e079a040631fd001a8bade409
+  metadata.gz: 9c172e7a467c64f911c45129ee6cb4ec8cc862e9
+  data.tar.gz: 057a610687e9ecf15b2baeaa8dfe338c634205a5
 SHA512:
-  metadata.gz: 39e8be0c1acbbd2819a9dad50107ad77d5142ed2fdb7f2b9e946a0ed4fb9d0061db630d7be2004872381c270390fb169033dbeb7a8b10c2db574f5cbe37e6d34
-  data.tar.gz: 3b9fcbf4268df2af1d3f6131d4e986c7995b633c56d001578f6e332c95131e3cfea956a46ac9c997fde61c77bd092985d3e5120bfeb6fd585dda8fcd9969752b
+  metadata.gz: 327385731802b5932ba7c44b0cc596d2a412edf51100ee6df618905951b51b4a46c507aa44493ec43a1bc8f12c9ca1bcdc64722daf3ca2048b1ec8fa214a65f0
+  data.tar.gz: b65b340d598ee5302137bebf68a8172f33d346ddf792a9a8dcb8b7524288a1ae3f18d6e1eed6f8e7731e0cf77933c7931964b5c7b6830e85f3dfbaf8043cbecb

data/docs/commands/show.md CHANGED Viewed

@@ -13,7 +13,7 @@ Usage to show info for just one table:
 $ dataduck show users
 Table users
-Sources from users on my_database
+Sources from users on source1
   created_at
   updated_at
   id

data/docs/tables/README.md CHANGED Viewed

@@ -54,7 +54,7 @@ The following is an example table.
 ```ruby
 class Decks < DataDuck::Table
-  source :my_database, ["id", "name", "user_id", "cards",
+  source :source1, ["id", "name", "user_id", "cards",
       "num_wins", "num_losses", "created_at", "updated_at",
       "is_drafted", "num_draft_wins", "num_draft_losses"]

data/lib/dataduck.rb CHANGED Viewed

@@ -20,12 +20,30 @@ module DataDuck
   spec = Gem::Specification.find_by_name("dataduck")
   create_module_var("gem_root", spec.gem_dir)
-  create_module_var("project_root", Dir.getwd)
-  create_module_var("config", {})
+  detect_project_root = Dir.getwd
+  while true
+    if detect_project_root == ""
+      raise Exception.new("Could not find a Gemfile in the current working directory or any parent directories. Are you sure you're running this from the right place?")
+    end
+    if File.exist?(detect_project_root + '/Gemfile')
+      break
+    end
+    detect_project_root_splits = detect_project_root.split("/")
+    detect_project_root_splits = detect_project_root_splits[0..detect_project_root_splits.length - 2]
+    detect_project_root = detect_project_root_splits.join("/")
+  end
+  create_module_var("project_root", detect_project_root)
-  dd_env_path = DataDuck.project_root + "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"
-  env_config = File.exist?(dd_env_path) ? YAML.load_file(dd_env_path) : {}
-  DataDuck.config.merge!(env_config)
+  create_module_var("config", {})
+  configs_to_load = ["/config/base.yml", "/config/#{ ENV['DATADUCK_ENV'] }.yml",
+    "/config/secret/base.yml", "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"]
+  configs_to_load.each do |relative_path|
+    config_path = DataDuck.project_root + relative_path
+    loaded_config = File.exist?(config_path) ? YAML.load_file(config_path) : {}
+    DataDuck.config = Util.deep_merge(DataDuck.config, loaded_config)
+  end
   create_module_var("sources", {})
   create_module_var("destinations", {})

data/lib/dataduck/commands.rb CHANGED Viewed

@@ -170,7 +170,7 @@ module DataDuck
           postgresql: DataDuck::PostgresqlSource,
       }[db_type]
-      db_source = db_class.new({
+      db_source = db_class.new("source1", {
           'db_type' => db_type.to_s,
           'host' => source_host,
           'database' => source_database,
@@ -189,20 +189,17 @@ module DataDuck
       config_obj = {
         'sources' => {
-          'my_database' => {
+          'source1' => {
             'type' => db_type.to_s,
             'host' => source_host,
             'database' => source_database,
             'port' => source_port,
             'username' => source_username,
-            'password' => source_password,
           }
         },
         'destinations' => {
-          'my_destination' => {
+          'destination1' => {
             'type'  => 'redshift',
-            'aws_key'  => 'YOUR_AWS_KEY',
-            'aws_secret'  => 'YOUR_AWS_SECRET',
             's3_bucket'  => 'YOUR_BUCKET',
             's3_region'  => 'YOUR_BUCKET_REGION',
             'host'  => 'redshift.somekeygoeshere.us-west-2.redshift.amazonaws.com',
@@ -210,28 +207,32 @@ module DataDuck
             'database'  => 'main',
             'schema'  => 'public',
             'username'  => 'YOUR_UESRNAME',
-            'password'  => 'YOUR_PASSWORD',
           }
         }
       }
+      DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/base.yml", config_obj.to_yaml)
+      DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/.env", """
+destination1_aws_key=AWS_KEY_GOES_HERE
+destination1_aws_secret=AWS_SECRET_GOES_HERE
+destination1_password=REDSHIFT_PASSWORD_GOES_HERE
+source1_password=#{ source_password }
+""".strip)
-      DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/secret/#{ DataDuck.environment }.yml", config_obj.to_yaml)
-      DataDuck::Commands.quickstart_save_main
       DataDuck::Commands.quickstart_update_gitignore
       puts "Quickstart complete!"
-      puts "You still need to edit your config/secret/*.yml file with your AWS and Redshift credentials."
-      puts "Run your ETL with: ruby src/main.rb"
+      puts "You still need to edit your .env and config/base.yml files with your AWS and Redshift credentials."
+      puts "Run your ETL with: dataduck etl all"
+      puts "For more help, visit http://dataducketl.com/docs"
     end
     def self.quickstart_update_gitignore
       main_gitignore_path = "#{ DataDuck.project_root }/.gitignore"
       FileUtils.touch(main_gitignore_path)
-      secret_gitignore_path = "#{ DataDuck.project_root }/config/secret/.gitignore"
-      FileUtils.touch(secret_gitignore_path)
-      output = File.open(secret_gitignore_path, "w")
-      output << '[^.]*'
+      output = File.open(main_gitignore_path, "w")
+      output << ".DS_Store\n"
+      output << ".env\n"
       output.close
     end
@@ -264,12 +265,5 @@ module DataDuck
       output << contents
       output.close
     end
-    def self.quickstart_save_main
-      namespace = Namespace.new
-      template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/main.rb.erb", 'r').read
-      result = ERB.new(template).result(namespace.get_binding)
-      DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/main.rb", result)
-    end
   end
 end

data/lib/dataduck/database.rb CHANGED Viewed

@@ -20,13 +20,19 @@ module DataDuck
     protected
+      def load_value(prop_name, db_name, config)
+        self.send("#{ prop_name }=", config[prop_name] || ENV["#{ db_name }_#{ prop_name }"])
+      end
       def find_command_and_execute(commands, *args)
         # This function was originally sourced from Rails
         # https://github.com/rails/rails
         #
-        # Licensed under the MIT license
+        # This function was licensed under the MIT license
         # http://opensource.org/licenses/MIT
         #
+        # The license asks to include the following with the source code:
+        #
         # Permission is hereby granted, free of charge, to any person obtaining a copy
         # of this software and associated documentation files (the "Software"), to deal
         # in the Software without restriction, including without limitation the rights
@@ -68,11 +74,11 @@ module DataDuck
         # This method is not all exhaustive, and is not meant to be necessarily relied on, but is a
         # sanity check that can be used to ensure certain sql is not mutating.
-        return true if sql.downcase.start_with?("drop table")
-        return true if sql.downcase.start_with?("create table")
-        return true if sql.downcase.start_with?("delete from")
-        return true if sql.downcase.start_with?("insert into")
-        return true if sql.downcase.start_with?("alter table")
+        return true if sql.downcase.strip.start_with?("drop table")
+        return true if sql.downcase.strip.start_with?("create table")
+        return true if sql.downcase.strip.start_with?("delete from")
+        return true if sql.downcase.strip.start_with?("insert into")
+        return true if sql.downcase.strip.start_with?("alter table")
         false
       end

data/lib/dataduck/mysql_source.rb CHANGED Viewed

@@ -10,11 +10,11 @@ module DataDuck
     def dbconsole(options = {})
       args = []
-      args << "--host=#{ @host }"
-      args << "--user=#{ @username }"
-      args << "--database=#{ @database }"
-      args << "--port=#{ @port }"
-      args << "--password=#{ @password }"
+      args << "--host=#{ self.host }"
+      args << "--user=#{ self.username }"
+      args << "--database=#{ self.database }"
+      args << "--port=#{ self.port }"
+      args << "--password=#{ self.password }"
       self.find_command_and_execute("mysql", *args)
     end

data/lib/dataduck/postgresql_source.rb CHANGED Viewed

@@ -10,12 +10,12 @@ module DataDuck
     def dbconsole(options = {})
       args = []
-      args << "--host=#{ @host }"
-      args << "--username=#{ @username }"
-      args << "--dbname=#{ @database }"
-      args << "--port=#{ @port }"
+      args << "--host=#{ self.host }"
+      args << "--username=#{ self.username }"
+      args << "--dbname=#{ self.database }"
+      args << "--port=#{ self.port }"
-      ENV['PGPASSWORD'] = @password
+      ENV['PGPASSWORD'] = self.password
       self.find_command_and_execute("psql", *args)
     end

data/lib/dataduck/redshift_destination.rb CHANGED Viewed

@@ -2,24 +2,36 @@ require_relative 'destination'
 module DataDuck
   class RedshiftDestination < DataDuck::Destination
+    attr_accessor :aws_key
+    attr_accessor :aws_secret
+    attr_accessor :s3_bucket
+    attr_accessor :s3_region
+    attr_accessor :host
+    attr_accessor :port
+    attr_accessor :database
+    attr_accessor :schema
+    attr_accessor :username
+    attr_accessor :password
     def initialize(name, config)
-      @aws_key = config['aws_key']
-      @aws_secret = config['aws_secret']
-      @s3_bucket = config['s3_bucket']
-      @s3_region = config['s3_region']
-      @host = config['host']
-      @port = config['port']
-      @database = config['database']
-      @schema = config['schema']
-      @username = config['username']
-      @password = config['password']
+      load_value('aws_key', name, config)
+      load_value('aws_secret', name, config)
+      load_value('s3_bucket', name, config)
+      load_value('s3_region', name, config)
+      load_value('host', name, config)
+      load_value('port', name, config)
+      load_value('database', name, config)
+      load_value('schema', name, config)
+      load_value('username', name, config)
+      load_value('password', name, config)
       @redshift_connection = nil
       super
     end
     def connection
-      @redshift_connection ||= Sequel.connect("redshift://#{ @username }:#{ @password }@#{ @host }:#{ @port }/#{ @database }" +
+      @redshift_connection ||= Sequel.connect("redshift://#{ self.username }:#{ self.password }@#{ self.host }:#{ self.port }/#{ self.database }" +
               "?force_standard_strings=f",
           :client_min_messages => '',
           :force_standard_strings => false
@@ -31,8 +43,8 @@ module DataDuck
       query_fragments = []
       query_fragments << "COPY #{ table.staging_name } (#{ properties_joined_string })"
       query_fragments << "FROM '#{ s3_path }'"
-      query_fragments << "CREDENTIALS 'aws_access_key_id=#{ @aws_key };aws_secret_access_key=#{ @aws_secret }'"
-      query_fragments << "REGION '#{ @s3_region }'"
+      query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
+      query_fragments << "REGION '#{ self.s3_region }'"
       query_fragments << "CSV TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
       query_fragments << "DATEFORMAT 'auto'"
       return query_fragments.join(" ")
@@ -113,12 +125,12 @@ module DataDuck
     def dbconsole(options = {})
       args = []
-      args << "--host=#{ @host }"
-      args << "--username=#{ @username }"
-      args << "--dbname=#{ @database }"
-      args << "--port=#{ @port }"
+      args << "--host=#{ self.host }"
+      args << "--username=#{ self.username }"
+      args << "--dbname=#{ self.database }"
+      args << "--port=#{ self.port }"
-      ENV['PGPASSWORD'] = @password
+      ENV['PGPASSWORD'] = self.password
       self.find_command_and_execute("psql", *args)
     end
@@ -173,8 +185,8 @@ module DataDuck
       table_csv = self.data_as_csv_string(table.data, table.output_column_names)
-      s3_obj = S3Object.new(filepath, table_csv, @aws_key, @aws_secret,
-          @s3_bucket, @s3_region)
+      s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
+          self.s3_bucket, self.s3_region)
       s3_obj.upload!
       return s3_obj
     end

data/lib/dataduck/sql_db_source.rb CHANGED Viewed

@@ -5,13 +5,20 @@ require 'sequel'
 module DataDuck
   class SqlDbSource < DataDuck::Source
-    def initialize(name, data)
-      @host = data['host']
-      @port = data['port']
-      @username = data['username']
-      @password = data['password']
-      @database = data['database']
-      @initialized_db_type = data['db_type']
+    attr_accessor :host
+    attr_accessor :port
+    attr_accessor :username
+    attr_accessor :password
+    attr_accessor :database
+    def initialize(name, config)
+      load_value('host', name, config)
+      load_value('port', name, config)
+      load_value('username', name, config)
+      load_value('password', name, config)
+      load_value('database', name, config)
+      @initialized_db_type = config['db_type']
       super
     end
@@ -19,11 +26,11 @@ module DataDuck
     def connection
       @connection ||= Sequel.connect(
         adapter: self.db_type,
-        user: @username,
-        host: @host,
-        database: @database,
-        password: @password,
-        port: @port
+        user: self.username,
+        host: self.host,
+        database: self.database,
+        password: self.password,
+        port: self.port
       )
     end

data/lib/dataduck/util.rb CHANGED Viewed

@@ -2,6 +2,11 @@ require 'fileutils'
 module DataDuck
   module Util
+    def Util.deep_merge(first, second)
+      merger = proc { |key, v1, v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : v2 }
+      first.merge(second, &merger)
+    end
     def Util.ensure_path_exists!(full_path)
       split_paths = full_path.split('/')
       just_file_path = split_paths.pop

data/lib/dataduck/version.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module DataDuck
   VERSION_MAJOR = 0
   VERSION_MINOR = 5
-  VERSION_PATCH = 5
+  VERSION_PATCH = 6
   VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
 end

data/lib/templates/quickstart/table.rb.erb CHANGED Viewed

@@ -1,5 +1,5 @@
 class <%= table_name_camelcased %> < DataDuck::Table
-  source :my_database, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
+  source :source1, :<%= table_name %>, ["<%= columns.map { |col| col[0] }.join('", "') %>"]
   output({<% columns.each do |col| %>
       <%= '# ' if col[2] %>:<%= col[0] %> => :<%= col[1] %>,<% end %>

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: dataduck
 version: !ruby/object:Gem::Version
-  version: 0.5.5
+  version: 0.5.6
 platform: ruby
 authors:
 - Jeff Pickhardt
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-10-26 00:00:00.000000000 Z
+date: 2015-10-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -191,7 +191,6 @@ files:
 - lib/dataduck/util.rb
 - lib/dataduck/version.rb
 - lib/helpers/module_vars.rb
-- lib/templates/quickstart/main.rb.erb
 - lib/templates/quickstart/table.rb.erb
 - static/logo.png
 homepage: http://dataducketl.com/

data/lib/templates/quickstart/main.rb.erb DELETED Viewed

@@ -1,10 +0,0 @@
-require 'rubygems'
-require 'bundler/setup'
-Bundler.require
-class MyETL < DataDuck::ETL
-  destination :my_destination
-end
-etl = MyETL.new
-etl.process!