dataduck 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/README.md +87 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/dataduck +6 -0
- data/bin/setup +7 -0
- data/dataduck.gemspec +28 -0
- data/examples/example/.gitignore +5 -0
- data/examples/example/.ruby-version +1 -0
- data/examples/example/Gemfile +5 -0
- data/examples/example/README.md +11 -0
- data/examples/example/config/replace_me.yml +22 -0
- data/examples/example/src/main.rb +13 -0
- data/examples/example/src/tables/games.rb +10 -0
- data/examples/example/src/tables/users.rb +16 -0
- data/lib/dataduck.rb +29 -0
- data/lib/dataduck/commands.rb +165 -0
- data/lib/dataduck/destination.rb +40 -0
- data/lib/dataduck/etl.rb +49 -0
- data/lib/dataduck/mysql_source.rb +11 -0
- data/lib/dataduck/postgresql_source.rb +11 -0
- data/lib/dataduck/redshift_destination.rb +176 -0
- data/lib/dataduck/s3_object.rb +71 -0
- data/lib/dataduck/source.rb +46 -0
- data/lib/dataduck/sql_db_source.rb +41 -0
- data/lib/dataduck/table.rb +82 -0
- data/lib/dataduck/util.rb +15 -0
- data/lib/dataduck/version.rb +6 -0
- data/lib/helpers/module_vars.rb +19 -0
- data/lib/templates/quickstart/main.rb.erb +10 -0
- data/lib/templates/quickstart/table.rb.erb +7 -0
- data/static/logo.png +0 -0
- metadata +178 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0f87bbaf674b1943242d3ea173a5e34fe00e0724
|
4
|
+
data.tar.gz: b8fbacadd9323ab917498712c8d4f39f1f5ca907
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 40bbfce9c990d1542236c59967c31fe3fe5982c84bed12ccaf604c7ce15f2cebc5432b865dfedac5e95607dc37f20e0d681462ee9e7936e30ffdce8361688c96
|
7
|
+
data.tar.gz: fdc25e1ddf3a00faeceb13f11f4b7452f4085b3c0e5ca805137cc21174727aabec052dd335c371cd21c78db7ca0af7ef4e5a93f2876df9abfff531ad90bc8612
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.1.2
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# DataDuck ETL
|
2
|
+
|
3
|
+
##### Set up in under 5 minutes
|
4
|
+
|
5
|
+
DataDuck ETL is probably the quickest extract-transform-load framework system to set up. If you want to set up a data warehouse, give DataDuck ETL a try.
|
6
|
+
|
7
|
+
##### Extract-transform-load to Amazon Redshift
|
8
|
+
|
9
|
+
DataDuck ETL is currently focused on loading to Amazon Redshift (through Amazon S3).
|
10
|
+
|
11
|
+
![DataDuck ETL](static/logo.png "DataDuck ETL")
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
##### Example project
|
16
|
+
|
17
|
+
See [https://github.com/DataDuckETL/DataDuck/tree/master/examples/example](https://github.com/DataDuckETL/DataDuck/tree/master/examples/example) for an example project setup.
|
18
|
+
|
19
|
+
##### Instructions for using DataDuck ETL
|
20
|
+
|
21
|
+
Create a new project, then add the following to your Gemfile:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
gem 'dataduck', :git => 'git://github.com/DataDuckETL/DataDuck.git'
|
25
|
+
```
|
26
|
+
|
27
|
+
Then execute:
|
28
|
+
|
29
|
+
$ bundle install
|
30
|
+
|
31
|
+
Finally, run the quickstart command:
|
32
|
+
|
33
|
+
$ dataduck quickstart
|
34
|
+
|
35
|
+
The quickstart wizard will ask you for credentials to your database, then create the basic setup for your project. After the setup, your project's ETL can be run by running `ruby src/main.rb`
|
36
|
+
|
37
|
+
If you'd like to run this regularly, such as every night, it's recommended to use the [whenever](https://github.com/javan/whenever) gem to manage a cron job to regularly run the ETL.
|
38
|
+
|
39
|
+
## Documentation
|
40
|
+
|
41
|
+
Tables are defined in their own file under /src/tables. Here's an example table:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
class Decks < DataDuck::Table
|
45
|
+
source :my_database, ["id", "name", "user_id", "cards",
|
46
|
+
"num_wins", "num_losses", "created_at", "updated_at",
|
47
|
+
"is_drafted", "num_draft_wins", "num_draft_losses"]
|
48
|
+
|
49
|
+
transforms :calculate_num_totals
|
50
|
+
|
51
|
+
validates :validates_num_total
|
52
|
+
|
53
|
+
output({
|
54
|
+
:id => :integer,
|
55
|
+
:name => :string,
|
56
|
+
:user_id => :integer,
|
57
|
+
:num_wins => :integer,
|
58
|
+
:num_losses => :integer,
|
59
|
+
:num_total => :integer,
|
60
|
+
:num_draft_total => :integer,
|
61
|
+
:created_at => :datetime,
|
62
|
+
:updated_at => :datetime,
|
63
|
+
:is_drafted => :boolean,
|
64
|
+
# Note that num_draft_wins and num_draft_losses
|
65
|
+
# are not included in the output, but are used in
|
66
|
+
# the transformation.
|
67
|
+
})
|
68
|
+
|
69
|
+
def calculate_num_totals(row)
|
70
|
+
row[:num_total] = row[:num_wins] + row[:num_losses]
|
71
|
+
row[:num_draft_total] = row[:num_draft_wins] + row[:num_draft_losses]
|
72
|
+
row
|
73
|
+
end
|
74
|
+
|
75
|
+
def validates_num_total(row)
|
76
|
+
return "Deck id #{ row[:id] } has negative value #{ row[:num_total] } for num_total." if row[:num_total] < 0
|
77
|
+
end
|
78
|
+
end
|
79
|
+
```
|
80
|
+
|
81
|
+
## Contributing
|
82
|
+
|
83
|
+
To contribute, get in touch at http://DataDuckETL.com/ so that we can share the [Contributor License Agreement (CLA)](https://en.wikipedia.org/wiki/Contributor_License_Agreement) with you, then create a pull request.
|
84
|
+
|
85
|
+
## License
|
86
|
+
|
87
|
+
Get in touch or visit [http://dataducketl.com/licensing](http://dataducketl.com/licensing) for licensing.
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "dataduck"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/dataduck
ADDED
data/bin/setup
ADDED
data/dataduck.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'dataduck/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.authors = ["Jeff Pickhardt"]
|
8
|
+
spec.description = "A straightforward, effective ETL framework."
|
9
|
+
spec.email = ["pickhardt@gmail.com", "admin@dataducketl.com"]
|
10
|
+
spec.executables = ["dataduck"]
|
11
|
+
spec.homepage = "http://dataducketl.com/"
|
12
|
+
spec.name = "dataduck"
|
13
|
+
spec.summary = "A straightforward, effective ETL framework."
|
14
|
+
spec.version = DataDuck::VERSION
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = "bin"
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
spec.add_development_dependency "rspec", "~> 3.3"
|
23
|
+
|
24
|
+
spec.add_runtime_dependency "sequel", '~> 4.19'
|
25
|
+
spec.add_runtime_dependency "pg", '~> 0.16'
|
26
|
+
spec.add_runtime_dependency "aws-sdk", "~> 2.0"
|
27
|
+
spec.add_runtime_dependency "sequel-redshift"
|
28
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.1.2
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# DataDuck ETL Example
|
2
|
+
|
3
|
+
This gives an example project showing how to set up [DataDuck ETL](http://dataducketl.com/)
|
4
|
+
|
5
|
+
# Instructions
|
6
|
+
|
7
|
+
Copy /config/replace_me.yml to /config/secret/development.yml, then replace the secrets with your AWS and DB connection details.
|
8
|
+
|
9
|
+
For each table you want to import, create a table file in /src/tables. You can use /src/tables/games.rb and /src/tables/users.rb as examples. (You should also delete, modify, or rename games.rb and users.rb, by the way, otherwise DataDuck ETL will try to load them.)
|
10
|
+
|
11
|
+
For further help, reach out at [http://dataducketl.com/](http://dataducketl.com/)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Move this file to /config/secret/development.yml and /config/secret/production.yml
|
2
|
+
destinations:
|
3
|
+
main_destination:
|
4
|
+
type: redshift
|
5
|
+
aws_key: YOUR_AWS_KEY
|
6
|
+
aws_secret: YOUR_AWS_SECRET
|
7
|
+
s3_bucket: YOUR_BUCKET
|
8
|
+
s3_region: YOUR_BUCKET_REGION
|
9
|
+
host: redshift.somekeygoeshere.us-west-2.redshift.amazonaws.com
|
10
|
+
port: 5439
|
11
|
+
database: main
|
12
|
+
schema: public
|
13
|
+
username: YOUR_UESRNAME
|
14
|
+
password: YOUR_PASSWORD
|
15
|
+
sources:
|
16
|
+
my_database:
|
17
|
+
type: postgresql
|
18
|
+
host: some.host.goes.here.com
|
19
|
+
database: db_name_goes_here
|
20
|
+
port: 5522
|
21
|
+
username: some_username_goes_here_probably_read_only
|
22
|
+
password: some_password_goes_here
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
Bundler.require
|
4
|
+
|
5
|
+
require_relative "tables/games"
|
6
|
+
require_relative "tables/users"
|
7
|
+
|
8
|
+
class MyCompanyETL < DataDuck::ETL
|
9
|
+
destination :main_destination
|
10
|
+
end
|
11
|
+
|
12
|
+
etl = MyCompanyETL.new
|
13
|
+
etl.process!
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class Users < DataDuck::Table
|
2
|
+
source :my_database, [:id, :username, :rating, :credits]
|
3
|
+
|
4
|
+
validate :non_negative_credits
|
5
|
+
|
6
|
+
columns({
|
7
|
+
:id => :integer,
|
8
|
+
:username => :string,
|
9
|
+
:rating => :integer,
|
10
|
+
:credits => :integer,
|
11
|
+
})
|
12
|
+
|
13
|
+
def non_negative_credits(row)
|
14
|
+
return "User id #{ row[:id] } has negative value of #{ row[:credits] } for credits." if row[:credits] < 0
|
15
|
+
end
|
16
|
+
end
|
data/lib/dataduck.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
Dir[File.dirname(__FILE__) + '/helpers/*.rb'].each do |file|
|
2
|
+
require file
|
3
|
+
end
|
4
|
+
|
5
|
+
Dir[File.dirname(__FILE__) + '/dataduck/*.rb'].each do |file|
|
6
|
+
require file
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'yaml'
|
10
|
+
|
11
|
+
module DataDuck
|
12
|
+
extend ModuleVars
|
13
|
+
|
14
|
+
ENV['DATADUCK_ENV'] ||= "development"
|
15
|
+
create_module_var("environment", ENV['DATADUCK_ENV'])
|
16
|
+
|
17
|
+
spec = Gem::Specification.find_by_name("dataduck")
|
18
|
+
create_module_var("gem_root", spec.gem_dir)
|
19
|
+
|
20
|
+
create_module_var("project_root", Dir.getwd)
|
21
|
+
create_module_var("config", {})
|
22
|
+
|
23
|
+
dd_env_path = DataDuck.project_root + "/config/secret/#{ ENV['DATADUCK_ENV'] }.yml"
|
24
|
+
env_config = File.exist?(dd_env_path) ? YAML.load_file(dd_env_path) : {}
|
25
|
+
DataDuck.config.merge!(env_config)
|
26
|
+
|
27
|
+
create_module_var("sources", {})
|
28
|
+
create_module_var("destinations", {})
|
29
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'yaml'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module DataDuck
|
6
|
+
class Commands
|
7
|
+
class Namespace
|
8
|
+
def initialize(hash = {})
|
9
|
+
hash.each do |key, value|
|
10
|
+
singleton_class.send(:define_method, key) { value }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_binding
|
15
|
+
binding
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.acceptable_commands
|
20
|
+
['console', 'quickstart']
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.route_command(args)
|
24
|
+
if args.length == 0
|
25
|
+
return DataDuck::Commands.help
|
26
|
+
end
|
27
|
+
|
28
|
+
command = args[0]
|
29
|
+
if !Commands.acceptable_commands.include?(command)
|
30
|
+
puts "No such command: #{ command }"
|
31
|
+
return DataDuck::Commands.help
|
32
|
+
end
|
33
|
+
|
34
|
+
DataDuck::Commands.public_send(command)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.console
|
38
|
+
require "irb"
|
39
|
+
IRB.start
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.help
|
43
|
+
puts "Usage: dataduck commandname"
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.quickstart
|
47
|
+
puts "Welcome to DataDuck!"
|
48
|
+
puts "This quickstart wizard will create your application, assuming the source is a Postgres database and the destination is an Amazon Redshift data warehouse."
|
49
|
+
|
50
|
+
puts "Enter the source (Postgres database) hostname:"
|
51
|
+
source_host = STDIN.gets.strip
|
52
|
+
|
53
|
+
puts "Enter the name of the database when connecting to #{ source_host }:"
|
54
|
+
source_database = STDIN.gets.strip
|
55
|
+
|
56
|
+
puts "Enter the source's port:"
|
57
|
+
source_port = STDIN.gets.strip.to_i
|
58
|
+
|
59
|
+
puts "Enter the username:"
|
60
|
+
source_username = STDIN.gets.strip
|
61
|
+
|
62
|
+
puts "Enter the password:"
|
63
|
+
source_password = STDIN.noecho(&:gets).chomp
|
64
|
+
|
65
|
+
db_source = DataDuck::PostgresqlSource.new({
|
66
|
+
'type' => 'postgresql',
|
67
|
+
'host' => source_host,
|
68
|
+
'database' => source_database,
|
69
|
+
'port' => source_port,
|
70
|
+
'username' => source_username,
|
71
|
+
'password' => source_password,
|
72
|
+
})
|
73
|
+
|
74
|
+
puts "Connecting to source database..."
|
75
|
+
table_names = db_source.table_names
|
76
|
+
puts "Connection successful. Detected #{ table_names.length } tables."
|
77
|
+
puts "Creating scaffolding..."
|
78
|
+
table_names.each do |table_name|
|
79
|
+
DataDuck::Commands.quickstart_create_table(table_name, db_source)
|
80
|
+
end
|
81
|
+
|
82
|
+
config_obj = {
|
83
|
+
'sources' => {
|
84
|
+
'my_database' => {
|
85
|
+
'type' => 'postgresql',
|
86
|
+
'host' => source_host,
|
87
|
+
'database' => source_database,
|
88
|
+
'port' => source_port,
|
89
|
+
'username' => source_username,
|
90
|
+
'password' => source_password,
|
91
|
+
}
|
92
|
+
},
|
93
|
+
'destinations' => {
|
94
|
+
'my_destination' => {
|
95
|
+
'type' => 'redshift',
|
96
|
+
'aws_key' => 'YOUR_AWS_KEY',
|
97
|
+
'aws_secret' => 'YOUR_AWS_SECRET',
|
98
|
+
's3_bucket' => 'YOUR_BUCKET',
|
99
|
+
's3_region' => 'YOUR_BUCKET_REGION',
|
100
|
+
'host' => 'redshift.somekeygoeshere.us-west-2.redshift.amazonaws.com',
|
101
|
+
'port' => 5439,
|
102
|
+
'database' => 'main',
|
103
|
+
'schema' => 'public',
|
104
|
+
'username' => 'YOUR_UESRNAME',
|
105
|
+
'password' => 'YOUR_PASSWORD',
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/config/secret/#{ DataDuck.environment }.yml", config_obj.to_yaml)
|
111
|
+
DataDuck::Commands.quickstart_save_main
|
112
|
+
DataDuck::Commands.quickstart_update_gitignore
|
113
|
+
|
114
|
+
puts "Quickstart complete!"
|
115
|
+
puts "You still need to edit your config/secret/*.yml file with your AWS and Redshift credentials."
|
116
|
+
puts "Run your ETL with: ruby src/main.rb"
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.quickstart_update_gitignore
|
120
|
+
main_gitignore_path = "#{ DataDuck.project_root }/.gitignore"
|
121
|
+
FileUtils.touch(main_gitignore_path)
|
122
|
+
|
123
|
+
secret_gitignore_path = "#{ DataDuck.project_root }/config/secret/.gitignore"
|
124
|
+
FileUtils.touch(secret_gitignore_path)
|
125
|
+
output = File.open(secret_gitignore_path, "w")
|
126
|
+
output << '[^.]*'
|
127
|
+
output.close
|
128
|
+
end
|
129
|
+
|
130
|
+
def self.quickstart_create_table(table_name, db)
|
131
|
+
columns = []
|
132
|
+
schema = db.schema(table_name)
|
133
|
+
schema.each do |property_schema|
|
134
|
+
property_name = property_schema[0]
|
135
|
+
property_type = property_schema[1][:type]
|
136
|
+
commented_out = ['ssn', 'socialsecurity', 'password', 'encrypted_password', 'salt', 'password_salt', 'pw'].include?(property_name.to_s.downcase)
|
137
|
+
columns << [property_name.to_s, property_type.to_s, commented_out]
|
138
|
+
end
|
139
|
+
|
140
|
+
table_name = table_name.to_s.downcase
|
141
|
+
table_name_camelcased = table_name.split('_').collect(&:capitalize).join
|
142
|
+
namespace = Namespace.new(table_name: table_name_camelcased, columns: columns)
|
143
|
+
template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/table.rb.erb", 'r').read
|
144
|
+
result = ERB.new(template).result(namespace.get_binding)
|
145
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/tables/#{ table_name }.rb", result)
|
146
|
+
end
|
147
|
+
|
148
|
+
def self.quickstart_save_file(output_path_full, contents)
|
149
|
+
*output_path, output_filename = output_path_full.split('/')
|
150
|
+
output_path = output_path.join("/")
|
151
|
+
FileUtils::mkdir_p(output_path)
|
152
|
+
|
153
|
+
output = File.open(output_path_full, "w")
|
154
|
+
output << contents
|
155
|
+
output.close
|
156
|
+
end
|
157
|
+
|
158
|
+
def self.quickstart_save_main
|
159
|
+
namespace = Namespace.new
|
160
|
+
template = File.open("#{ DataDuck.gem_root }/lib/templates/quickstart/main.rb.erb", 'r').read
|
161
|
+
result = ERB.new(template).result(namespace.get_binding)
|
162
|
+
DataDuck::Commands.quickstart_save_file("#{ DataDuck.project_root }/src/main.rb", result)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module DataDuck
|
2
|
+
class Destination
|
3
|
+
def self.destination_config(name)
|
4
|
+
if DataDuck.config['destinations'].nil? || DataDuck.config['destinations'][name.to_s].nil?
|
5
|
+
raise Exception.new("Could not find destination #{ name } in destinations configs.")
|
6
|
+
end
|
7
|
+
|
8
|
+
DataDuck.config['destinations'][name.to_s]
|
9
|
+
end
|
10
|
+
|
11
|
+
def load_tables!(tables)
|
12
|
+
raise Exception.new("Must implement load_tables! in subclass")
|
13
|
+
end
|
14
|
+
|
15
|
+
def before_all_loads!
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def after_all_loads!
|
20
|
+
# e.g. cleanup
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.destination(destination_name)
|
24
|
+
destination_name = destination_name.to_s
|
25
|
+
|
26
|
+
if DataDuck.destinations[destination_name]
|
27
|
+
return DataDuck.destinations[destination_name]
|
28
|
+
end
|
29
|
+
|
30
|
+
destination_configuration = DataDuck::Destination.destination_config(destination_name)
|
31
|
+
destination_type = destination_configuration['type']
|
32
|
+
if destination_type == "redshift"
|
33
|
+
DataDuck.destinations[destination_name] = DataDuck::RedshiftDestination.new(destination_configuration)
|
34
|
+
return DataDuck.destinations[destination_name]
|
35
|
+
else
|
36
|
+
raise ArgumentError.new("Unknown type '#{ destination_type }' for destination #{ destination_name }.")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/dataduck/etl.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require_relative 'redshift_destination.rb'
|
2
|
+
|
3
|
+
module DataDuck
|
4
|
+
class ETL
|
5
|
+
class << self
|
6
|
+
attr_accessor :destinations
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.destination(destination_name)
|
10
|
+
self.destinations ||= []
|
11
|
+
self.destinations << DataDuck::Destination.destination(destination_name)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(options = {})
|
15
|
+
@tables = options[:tables] || []
|
16
|
+
|
17
|
+
@autoload_tables = options[:autoload_tables].nil? ? true : options[:autoload_tables]
|
18
|
+
if @autoload_tables
|
19
|
+
Dir[DataDuck.project_root + "/src/tables/*.rb"].each do |file|
|
20
|
+
table_name_underscores = file.split("/").last.gsub(".rb", "")
|
21
|
+
table_name_camelized = DataDuck::Util.underscore_to_camelcase(table_name_underscores)
|
22
|
+
require file
|
23
|
+
table = Object.const_get(table_name_camelized)
|
24
|
+
if table <= DataDuck::Table
|
25
|
+
@tables << table
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def process!
|
32
|
+
puts "Processing ETL..."
|
33
|
+
|
34
|
+
table_instances = []
|
35
|
+
@tables.each do |table_class|
|
36
|
+
table_instance = table_class.new
|
37
|
+
table_instances << table_instance
|
38
|
+
table_instance.extract!
|
39
|
+
table_instance.transform!
|
40
|
+
end
|
41
|
+
|
42
|
+
self.class.destinations.each do |destination|
|
43
|
+
destination.before_all_loads!(table_instances)
|
44
|
+
destination.load_tables!(table_instances)
|
45
|
+
destination.after_all_loads!(table_instances)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
require_relative 'destination.rb'
|
2
|
+
|
3
|
+
module DataDuck
|
4
|
+
class RedshiftDestination < DataDuck::Destination
|
5
|
+
def initialize(config)
|
6
|
+
@aws_key = config['aws_key']
|
7
|
+
@aws_secret = config['aws_secret']
|
8
|
+
@s3_bucket = config['s3_bucket']
|
9
|
+
@s3_region = config['s3_region']
|
10
|
+
@host = config['host']
|
11
|
+
@port = config['port']
|
12
|
+
@database = config['database']
|
13
|
+
@schema = config['schema']
|
14
|
+
@username = config['username']
|
15
|
+
@password = config['password']
|
16
|
+
@redshift_connection = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def connection
|
20
|
+
@redshift_connection ||= Sequel.connect("redshift://#{ @username }:#{ @password }@#{ @host }:#{ @port }/#{ @database }" +
|
21
|
+
"?force_standard_strings=f",
|
22
|
+
:client_min_messages => '',
|
23
|
+
:force_standard_strings => false
|
24
|
+
)
|
25
|
+
end
|
26
|
+
|
27
|
+
def copy_query(table, s3_path)
|
28
|
+
properties_joined_string = "\"#{ table.output_column_names.join('","') }\""
|
29
|
+
query_fragments = []
|
30
|
+
query_fragments << "COPY #{ self.staging_table_name(table) } (#{ properties_joined_string })"
|
31
|
+
query_fragments << "FROM '#{ s3_path }'"
|
32
|
+
query_fragments << "CREDENTIALS 'aws_access_key_id=#{ @aws_key };aws_secret_access_key=#{ @aws_secret }'"
|
33
|
+
query_fragments << "REGION '#{ @s3_region }'"
|
34
|
+
query_fragments << "CSV TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
|
35
|
+
query_fragments << "DATEFORMAT 'auto'"
|
36
|
+
return query_fragments.join(" ")
|
37
|
+
end
|
38
|
+
|
39
|
+
def create_columns_on_data_warehouse!(table)
|
40
|
+
columns = get_columns_in_data_warehouse(table)
|
41
|
+
column_names = columns.map { |col| col[:name].to_s }
|
42
|
+
table.output_schema.map do |name, data_type|
|
43
|
+
if !column_names.include?(name.to_s)
|
44
|
+
redshift_data_type = data_type.to_s
|
45
|
+
redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
|
46
|
+
self.run_query("ALTER TABLE #{ table.name } ADD #{ name } #{ redshift_data_type }")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def create_table_query(table, table_name = nil)
|
52
|
+
table_name ||= table.name
|
53
|
+
props_array = table.output_schema.map do |name, data_type|
|
54
|
+
redshift_data_type = data_type.to_s
|
55
|
+
redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
|
56
|
+
"\"#{ name }\" #{ redshift_data_type }"
|
57
|
+
end
|
58
|
+
props_string = props_array.join(', ')
|
59
|
+
"CREATE TABLE IF NOT EXISTS #{ table_name } (#{ props_string })"
|
60
|
+
end
|
61
|
+
|
62
|
+
def create_output_table_on_data_warehouse!(table)
|
63
|
+
self.run_query(self.create_table_query(table))
|
64
|
+
self.create_columns_on_data_warehouse!(table)
|
65
|
+
end
|
66
|
+
|
67
|
+
def create_staging_table!(table)
|
68
|
+
table_name = self.staging_table_name(table)
|
69
|
+
self.drop_staging_table!(table)
|
70
|
+
self.run_query(self.create_table_query(table, table_name))
|
71
|
+
end
|
72
|
+
|
73
|
+
def data_as_csv_string(data, property_names)
|
74
|
+
data_string_components = [] # for performance reasons, join strings this way
|
75
|
+
data.each do |result|
|
76
|
+
property_names.each_with_index do |property_name, index|
|
77
|
+
value = result[property_name.to_sym]
|
78
|
+
|
79
|
+
if index == 0
|
80
|
+
data_string_components << '"'
|
81
|
+
end
|
82
|
+
|
83
|
+
data_string_components << DataDuck::RedshiftDestination.value_to_string(value)
|
84
|
+
|
85
|
+
if index == property_names.length - 1
|
86
|
+
data_string_components << '"'
|
87
|
+
else
|
88
|
+
data_string_components << '","'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
data_string_components << "\n"
|
92
|
+
end
|
93
|
+
|
94
|
+
return data_string_components.join
|
95
|
+
end
|
96
|
+
|
97
|
+
def drop_staging_table!(table)
|
98
|
+
self.run_query("DROP TABLE IF EXISTS #{ self.staging_table_name(table) }")
|
99
|
+
end
|
100
|
+
|
101
|
+
def get_columns_in_data_warehouse(table)
|
102
|
+
query = "SELECT pg_table_def.column as name, type as data_type, distkey, sortkey FROM pg_table_def WHERE tablename='#{ table.name }'"
|
103
|
+
results = self.run_query(query)
|
104
|
+
|
105
|
+
columns = []
|
106
|
+
results.each do |result|
|
107
|
+
columns << {
|
108
|
+
name: result[:name],
|
109
|
+
data_type: result[:data_type],
|
110
|
+
distkey: result[:distkey],
|
111
|
+
sortkey: result[:sortkey]
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
return columns
|
116
|
+
end
|
117
|
+
|
118
|
+
def merge_from_staging!(table)
|
119
|
+
# Following guidelines in http://docs.aws.amazon.com/redshift/latest/dg/merge-examples.html
|
120
|
+
staging_name = self.staging_table_name(table)
|
121
|
+
delete_query = "DELETE FROM #{ table.name } USING #{ staging_name } WHERE #{ table.name }.id = #{ staging_name }.id" # TODO allow custom or multiple keys
|
122
|
+
self.run_query(delete_query)
|
123
|
+
insert_query = "INSERT INTO #{ table.name } (\"#{ table.output_column_names.join('","') }\") SELECT \"#{ table.output_column_names.join('","') }\" FROM #{ staging_name }"
|
124
|
+
self.run_query(insert_query)
|
125
|
+
end
|
126
|
+
|
127
|
+
def run_query(sql)
|
128
|
+
self.connection[sql].map { |elem| elem }
|
129
|
+
end
|
130
|
+
|
131
|
+
def staging_table_name(table)
|
132
|
+
"zz_dataduck_#{ table.name }"
|
133
|
+
end
|
134
|
+
|
135
|
+
def upload_table_to_s3!(table)
|
136
|
+
now_epoch = Time.now.to_i.to_s
|
137
|
+
filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv"
|
138
|
+
|
139
|
+
table_csv = self.data_as_csv_string(table.data, table.output_column_names)
|
140
|
+
|
141
|
+
s3_obj = S3Object.new(filepath, table_csv, @aws_key, @aws_secret,
|
142
|
+
@s3_bucket, @s3_region)
|
143
|
+
s3_obj.upload!
|
144
|
+
return s3_obj
|
145
|
+
end
|
146
|
+
|
147
|
+
def before_all_loads!(tables)
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
def load_tables!(tables)
|
152
|
+
tables.each do |table|
|
153
|
+
puts "Loading table #{ table.name }..."
|
154
|
+
s3_object = self.upload_table_to_s3!(table)
|
155
|
+
self.create_staging_table!(table)
|
156
|
+
self.create_output_table_on_data_warehouse!(table)
|
157
|
+
self.run_query(self.copy_query(table, s3_object.s3_path))
|
158
|
+
self.merge_from_staging!(table)
|
159
|
+
self.drop_staging_table!(table)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def after_all_loads!(tables)
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
def self.value_to_string(value)
|
168
|
+
string_value = ''
|
169
|
+
if value.respond_to? :to_s
|
170
|
+
string_value = value.to_s
|
171
|
+
end
|
172
|
+
string_value.gsub!('"', '""')
|
173
|
+
return string_value
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'aws-sdk'
|
2
|
+
|
3
|
+
module DataDuck
|
4
|
+
class S3Object
|
5
|
+
def initialize(path, contents, aws_key, aws_secret, bucket, region, options={})
|
6
|
+
@path = path
|
7
|
+
@contents = contents
|
8
|
+
@options = options
|
9
|
+
@aws_key = aws_key
|
10
|
+
@aws_secret = aws_secret
|
11
|
+
@bucket = bucket
|
12
|
+
@region = region
|
13
|
+
end
|
14
|
+
|
15
|
+
def upload!
|
16
|
+
s3 = Aws::S3::Client.new(
|
17
|
+
region: @region,
|
18
|
+
access_key_id: @aws_key,
|
19
|
+
secret_access_key: @aws_secret,
|
20
|
+
)
|
21
|
+
|
22
|
+
attempts = 0
|
23
|
+
|
24
|
+
while attempts <= S3Object.max_retries
|
25
|
+
attempts += 1
|
26
|
+
put_hash = @options.merge({
|
27
|
+
acl: 'private',
|
28
|
+
bucket: @bucket,
|
29
|
+
body: @contents,
|
30
|
+
key: self.full_path,
|
31
|
+
server_side_encryption: 'AES256',
|
32
|
+
})
|
33
|
+
begin
|
34
|
+
response = s3.put_object(put_hash)
|
35
|
+
rescue Exception => e
|
36
|
+
if attempts == S3Object.max_retries
|
37
|
+
throw e
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
response
|
43
|
+
end
|
44
|
+
|
45
|
+
def full_path
|
46
|
+
'dataduck/' + @path
|
47
|
+
end
|
48
|
+
|
49
|
+
def s3_path
|
50
|
+
"s3://#{ @bucket }/#{ full_path }"
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.max_retries
|
54
|
+
3
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.regions
|
58
|
+
[
|
59
|
+
{ name: 'US Standard - N. Virginia', region: 'us-east-1' },
|
60
|
+
{ name: 'US West - N. California', region: 'us-west-1' },
|
61
|
+
{ name: 'US West - Oregon', region: 'us-west-2' },
|
62
|
+
{ name: 'EU - Ireland', region: 'eu-west-1' },
|
63
|
+
{ name: 'EU - Frankfurt', region: 'eu-central-1' },
|
64
|
+
{ name: 'Asia Pacific - Singapore', region: 'ap-southeast-1' },
|
65
|
+
{ name: 'Asia Pacific - Sydney', region: 'ap-southeast-2' },
|
66
|
+
{ name: 'Asia Pacific - Tokyo', region: 'ap-northeast-1' },
|
67
|
+
{ name: 'South America - Sao Paulo', region: 'sa-east-1' },
|
68
|
+
]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module DataDuck
|
2
|
+
|
3
|
+
class Source
|
4
|
+
def self.source_config(name)
|
5
|
+
if DataDuck.config['sources'].nil? || DataDuck.config['sources'][name.to_s].nil?
|
6
|
+
raise Exception.new("Could not find source #{ name } in source configs.")
|
7
|
+
end
|
8
|
+
|
9
|
+
DataDuck.config['sources'][name.to_s]
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.source(name)
|
13
|
+
name = name.to_s
|
14
|
+
|
15
|
+
if DataDuck.sources[name]
|
16
|
+
return DataDuck.sources[name]
|
17
|
+
end
|
18
|
+
|
19
|
+
configuration = DataDuck::Source.source_config(name)
|
20
|
+
source_type = configuration['type']
|
21
|
+
|
22
|
+
if source_type == "postgresql"
|
23
|
+
DataDuck.sources[name] = DataDuck::PostgresqlSource.new(configuration)
|
24
|
+
return DataDuck.sources[name]
|
25
|
+
else
|
26
|
+
raise ArgumentError.new("Unknown type '#{ source_type }' for source #{ name }.")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def connection
|
31
|
+
raise Exception.new("Must implement connection in subclass.")
|
32
|
+
end
|
33
|
+
|
34
|
+
def query
|
35
|
+
raise Exception.new("Must implement query in subclass.")
|
36
|
+
end
|
37
|
+
|
38
|
+
def schema(table_name)
|
39
|
+
self.connection.schema(table_name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.skip_these_table_names
|
43
|
+
[:delayed_jobs, :schema_migrations]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require_relative 'source.rb'
|
2
|
+
|
3
|
+
require 'sequel'
|
4
|
+
|
5
|
+
module DataDuck
|
6
|
+
class SqlDbSource < DataDuck::Source
|
7
|
+
def initialize(data)
|
8
|
+
@host = data['host']
|
9
|
+
@port = data['port']
|
10
|
+
@username = data['username']
|
11
|
+
@password = data['password']
|
12
|
+
@database = data['database']
|
13
|
+
@initialized_db_type = data['db_type']
|
14
|
+
end
|
15
|
+
|
16
|
+
def connection
|
17
|
+
@connection ||= Sequel.connect(
|
18
|
+
adapter: self.db_type,
|
19
|
+
user: @username,
|
20
|
+
host: @host,
|
21
|
+
database: @database,
|
22
|
+
password: @password,
|
23
|
+
port: @port
|
24
|
+
)
|
25
|
+
end
|
26
|
+
|
27
|
+
def db_type
|
28
|
+
return @initialized_db_type if @initialized_db_type
|
29
|
+
|
30
|
+
raise Exception.new("Abstract method db_type must be overwritten by subclass, or passed as data when initializing.")
|
31
|
+
end
|
32
|
+
|
33
|
+
def table_names
|
34
|
+
self.connection.tables.map { |table| DataDuck::Source.skip_these_table_names.include?(table) ? nil : table }.compact
|
35
|
+
end
|
36
|
+
|
37
|
+
def query(sql)
|
38
|
+
self.connection.fetch(sql).all
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module DataDuck
|
2
|
+
class Table
|
3
|
+
class << self
|
4
|
+
attr_accessor :sources
|
5
|
+
attr_accessor :output_schema
|
6
|
+
attr_accessor :actions
|
7
|
+
attr_accessor :errors
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_accessor :data
|
11
|
+
|
12
|
+
def self.transforms(transformation_name)
|
13
|
+
self.actions ||= []
|
14
|
+
self.actions << [:transform, transformation_name]
|
15
|
+
end
|
16
|
+
singleton_class.send(:alias_method, :transform, :transforms)
|
17
|
+
|
18
|
+
def self.validates(validation_name)
|
19
|
+
self.actions ||= []
|
20
|
+
self.actions << [:validate, validation_name]
|
21
|
+
end
|
22
|
+
singleton_class.send(:alias_method, :validate, :validates)
|
23
|
+
|
24
|
+
def self.source(source_name, source_data = [])
|
25
|
+
self.sources ||= {}
|
26
|
+
source = DataDuck::Source.source(source_name)
|
27
|
+
self.sources[source] = source_data
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.output(schema)
|
31
|
+
self.output_schema ||= {}
|
32
|
+
self.output_schema.merge!(schema)
|
33
|
+
end
|
34
|
+
|
35
|
+
def actions
|
36
|
+
self.class.actions
|
37
|
+
end
|
38
|
+
|
39
|
+
def output_schema
|
40
|
+
self.class.output_schema
|
41
|
+
end
|
42
|
+
|
43
|
+
def output_column_names
|
44
|
+
self.class.output_schema.keys.sort
|
45
|
+
end
|
46
|
+
|
47
|
+
def extract!
|
48
|
+
puts "Extracting table #{ self.name }..."
|
49
|
+
|
50
|
+
self.errors ||= []
|
51
|
+
self.data = []
|
52
|
+
self.class.sources.each_pair do |source, source_columns|
|
53
|
+
import_query = "SELECT \"#{ source_columns.sort.join('","') }\" FROM #{ self.name }"
|
54
|
+
results = source.query(import_query)
|
55
|
+
self.data = results
|
56
|
+
end
|
57
|
+
self.data
|
58
|
+
end
|
59
|
+
|
60
|
+
def transform!
|
61
|
+
puts "Transforming table #{ self.name }..."
|
62
|
+
|
63
|
+
self.errors ||= []
|
64
|
+
self.actions.each do |action|
|
65
|
+
action_type = action[0]
|
66
|
+
action_method_name = action[1]
|
67
|
+
if action_type == :transform
|
68
|
+
self.data.map! { |row| self.public_send(action_method_name, row) }
|
69
|
+
elsif action_type == :validate
|
70
|
+
self.data.each do |row|
|
71
|
+
error = self.public_send(action_method_name, row)
|
72
|
+
self.errors << error if !error.blank?
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def name
|
79
|
+
DataDuck::Util.camelcase_to_underscore(self.class.name)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module DataDuck
|
2
|
+
class Util
|
3
|
+
def self.underscore_to_camelcase(str)
|
4
|
+
str.split('_').map{ |chunk| chunk.capitalize }.join
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.camelcase_to_underscore(str)
|
8
|
+
str.gsub(/::/, '/')
|
9
|
+
.gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
|
10
|
+
.gsub(/([a-z\d])([A-Z])/,'\1_\2')
|
11
|
+
.tr("-", "_")
|
12
|
+
.downcase
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ModuleVars
|
2
|
+
def define_class_method(name, &block)
|
3
|
+
(class << self; self; end).instance_eval do
|
4
|
+
define_method(name, &block)
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
def create_module_var(name, val = nil)
|
9
|
+
class_variable_set("@@#{ name }", val)
|
10
|
+
|
11
|
+
define_class_method(name) do
|
12
|
+
class_variable_get("@@#{ name }")
|
13
|
+
end
|
14
|
+
|
15
|
+
define_class_method("#{name}=") do |set_to|
|
16
|
+
class_variable_set("@@#{ name }", set_to)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/static/logo.png
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dataduck
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeff Pickhardt
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sequel
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '4.19'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '4.19'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pg
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.16'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.16'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: aws-sdk
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '2.0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '2.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: sequel-redshift
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: A straightforward, effective ETL framework.
|
112
|
+
email:
|
113
|
+
- pickhardt@gmail.com
|
114
|
+
- admin@dataducketl.com
|
115
|
+
executables:
|
116
|
+
- dataduck
|
117
|
+
extensions: []
|
118
|
+
extra_rdoc_files: []
|
119
|
+
files:
|
120
|
+
- ".gitignore"
|
121
|
+
- ".rspec"
|
122
|
+
- ".ruby-version"
|
123
|
+
- Gemfile
|
124
|
+
- README.md
|
125
|
+
- Rakefile
|
126
|
+
- bin/console
|
127
|
+
- bin/dataduck
|
128
|
+
- bin/setup
|
129
|
+
- dataduck.gemspec
|
130
|
+
- examples/example/.gitignore
|
131
|
+
- examples/example/.ruby-version
|
132
|
+
- examples/example/Gemfile
|
133
|
+
- examples/example/README.md
|
134
|
+
- examples/example/config/replace_me.yml
|
135
|
+
- examples/example/src/main.rb
|
136
|
+
- examples/example/src/tables/games.rb
|
137
|
+
- examples/example/src/tables/users.rb
|
138
|
+
- lib/dataduck.rb
|
139
|
+
- lib/dataduck/commands.rb
|
140
|
+
- lib/dataduck/destination.rb
|
141
|
+
- lib/dataduck/etl.rb
|
142
|
+
- lib/dataduck/mysql_source.rb
|
143
|
+
- lib/dataduck/postgresql_source.rb
|
144
|
+
- lib/dataduck/redshift_destination.rb
|
145
|
+
- lib/dataduck/s3_object.rb
|
146
|
+
- lib/dataduck/source.rb
|
147
|
+
- lib/dataduck/sql_db_source.rb
|
148
|
+
- lib/dataduck/table.rb
|
149
|
+
- lib/dataduck/util.rb
|
150
|
+
- lib/dataduck/version.rb
|
151
|
+
- lib/helpers/module_vars.rb
|
152
|
+
- lib/templates/quickstart/main.rb.erb
|
153
|
+
- lib/templates/quickstart/table.rb.erb
|
154
|
+
- static/logo.png
|
155
|
+
homepage: http://dataducketl.com/
|
156
|
+
licenses: []
|
157
|
+
metadata: {}
|
158
|
+
post_install_message:
|
159
|
+
rdoc_options: []
|
160
|
+
require_paths:
|
161
|
+
- lib
|
162
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
|
+
requirements:
|
169
|
+
- - ">="
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: '0'
|
172
|
+
requirements: []
|
173
|
+
rubyforge_project:
|
174
|
+
rubygems_version: 2.4.8
|
175
|
+
signing_key:
|
176
|
+
specification_version: 4
|
177
|
+
summary: A straightforward, effective ETL framework.
|
178
|
+
test_files: []
|