dataduck 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/DEV_README.md +1 -1
- data/lib/dataduck/destination.rb +2 -0
- data/lib/dataduck/logs.rb +24 -0
- data/lib/dataduck/mysql_source.rb +4 -0
- data/lib/dataduck/postgresql_source.rb +4 -0
- data/lib/dataduck/redshift_destination.rb +18 -5
- data/lib/dataduck/source.rb +6 -0
- data/lib/dataduck/sql_db_source.rb +8 -2
- data/lib/dataduck/table.rb +7 -3
- data/lib/dataduck/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19296dd9ee329eee67c43509d2ab0012a9f4f47f
|
4
|
+
data.tar.gz: b4518be78d421506a7e1820056f4b5022b79eec7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c00321b4699cb60d9786c7808a0dbb10acfa4cab820df0394397289087308fb3e96c185b388334b3d6caa0b41920e146f07454fdbbf7b04b8b5f534579ef434f
|
7
|
+
data.tar.gz: db88b4fa5354387fdae08f1b49382ae1aa3cfa97eefa735fb2e619e5ea8d46c47b77d61a311dd4f9c801bdb773bc007edc1d30d19d0ecb98fef073a7674113fa
|
data/DEV_README.md
CHANGED
data/lib/dataduck/destination.rb
CHANGED
data/lib/dataduck/logs.rb
CHANGED
@@ -12,14 +12,26 @@ module DataDuck
|
|
12
12
|
@@logger ||= Logger.new(log_file_path, shift_age = 100, shift_size = 100 * @@ONE_MB_IN_BYTES)
|
13
13
|
end
|
14
14
|
|
15
|
+
def Logs.debug(message)
|
16
|
+
self.ensure_logger_exists!
|
17
|
+
message = Logs.sanitize_message(message)
|
18
|
+
|
19
|
+
puts "[DEBUG] #{ message }"
|
20
|
+
@@logger.debug(message)
|
21
|
+
end
|
22
|
+
|
15
23
|
def Logs.info(message)
|
16
24
|
self.ensure_logger_exists!
|
25
|
+
message = Logs.sanitize_message(message)
|
26
|
+
|
17
27
|
puts "[INFO] #{ message }"
|
18
28
|
@@logger.info(message)
|
19
29
|
end
|
20
30
|
|
21
31
|
def Logs.warn(message)
|
22
32
|
self.ensure_logger_exists!
|
33
|
+
message = Logs.sanitize_message(message)
|
34
|
+
|
23
35
|
puts "[WARN] #{ message }"
|
24
36
|
@@logger.warn(message)
|
25
37
|
end
|
@@ -27,8 +39,20 @@ module DataDuck
|
|
27
39
|
def Logs.error(err, message = nil)
|
28
40
|
self.ensure_logger_exists!
|
29
41
|
message = err.to_s unless message
|
42
|
+
message = Logs.sanitize_message(message)
|
43
|
+
|
30
44
|
puts "[ERROR] #{ message }"
|
31
45
|
@@logger.error(message)
|
32
46
|
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def Logs.sanitize_message(message)
|
51
|
+
message = message.gsub(/aws_access_key_id=[^';]+/, "aws_access_key_id=******")
|
52
|
+
message = message.gsub(/AWS_ACCESS_KEY_ID=[^';]+/, "AWS_ACCESS_KEY_ID=******")
|
53
|
+
message = message.gsub(/aws_secret_access_key=[^';]+/, "aws_secret_access_key=******")
|
54
|
+
message = message.gsub(/AWS_SECRET_ACCESS_KEY=[^';]+/, "AWS_SECRET_ACCESS_KEY=******")
|
55
|
+
message
|
56
|
+
end
|
33
57
|
end
|
34
58
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require_relative 'destination
|
1
|
+
require_relative 'destination'
|
2
2
|
|
3
3
|
module DataDuck
|
4
4
|
class RedshiftDestination < DataDuck::Destination
|
@@ -43,8 +43,7 @@ module DataDuck
|
|
43
43
|
column_names = columns.map { |col| col[:name].to_s }
|
44
44
|
table.output_schema.map do |name, data_type|
|
45
45
|
if !column_names.include?(name.to_s)
|
46
|
-
redshift_data_type = data_type
|
47
|
-
redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
|
46
|
+
redshift_data_type = self.type_to_redshift_type(data_type)
|
48
47
|
self.query("ALTER TABLE #{ table.building_name } ADD #{ name } #{ redshift_data_type }")
|
49
48
|
end
|
50
49
|
end
|
@@ -53,8 +52,7 @@ module DataDuck
|
|
53
52
|
def create_table_query(table, table_name = nil)
|
54
53
|
table_name ||= table.name
|
55
54
|
props_array = table.output_schema.map do |name, data_type|
|
56
|
-
redshift_data_type = data_type
|
57
|
-
redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
|
55
|
+
redshift_data_type = self.type_to_redshift_type(data_type)
|
58
56
|
"\"#{ name }\" #{ redshift_data_type }"
|
59
57
|
end
|
60
58
|
props_string = props_array.join(', ')
|
@@ -99,6 +97,20 @@ module DataDuck
|
|
99
97
|
return data_string_components.join
|
100
98
|
end
|
101
99
|
|
100
|
+
def type_to_redshift_type(which_type)
|
101
|
+
which_type = which_type.to_s
|
102
|
+
|
103
|
+
if ["string", "text", "bigtext"].include?(which_type)
|
104
|
+
{
|
105
|
+
"string" => "varchar(255)",
|
106
|
+
"text" => "varchar(8191)",
|
107
|
+
"bigtext" => "varchar(65535)", # Redshift maximum
|
108
|
+
}[which_type]
|
109
|
+
else
|
110
|
+
which_type
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
102
114
|
def dbconsole(options = {})
|
103
115
|
args = []
|
104
116
|
args << "--host=#{ @host }"
|
@@ -147,6 +159,7 @@ module DataDuck
|
|
147
159
|
end
|
148
160
|
|
149
161
|
def query(sql)
|
162
|
+
Logs.debug("SQL executing on #{ self.name }:\n " + sql)
|
150
163
|
self.connection[sql].map { |elem| elem }
|
151
164
|
end
|
152
165
|
|
data/lib/dataduck/source.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative 'database'
|
2
|
+
|
1
3
|
module DataDuck
|
2
4
|
class Source < DataDuck::Database
|
3
5
|
def self.load_config!
|
@@ -47,6 +49,10 @@ module DataDuck
|
|
47
49
|
return DataDuck::Source.source(source_name)
|
48
50
|
end
|
49
51
|
|
52
|
+
def escape_char
|
53
|
+
'' # implement in subclass, e.g. " in postgresql and ` in mysql
|
54
|
+
end
|
55
|
+
|
50
56
|
def schema(table_name)
|
51
57
|
self.connection.schema(table_name)
|
52
58
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
require_relative 'source
|
1
|
+
require_relative 'source'
|
2
|
+
require_relative 'logs'
|
2
3
|
|
3
4
|
require 'sequel'
|
4
5
|
|
@@ -29,7 +30,11 @@ module DataDuck
|
|
29
30
|
def db_type
|
30
31
|
return @initialized_db_type if @initialized_db_type
|
31
32
|
|
32
|
-
raise
|
33
|
+
raise NotImplementedError.new("Abstract method db_type must be overwritten by subclass, or passed as data when initializing.")
|
34
|
+
end
|
35
|
+
|
36
|
+
def escape_char
|
37
|
+
raise NotImplementedError.new("Abstract method escape_char must be overwritten by subclass.")
|
33
38
|
end
|
34
39
|
|
35
40
|
def table_names
|
@@ -41,6 +46,7 @@ module DataDuck
|
|
41
46
|
raise ArgumentError.new("Database #{ self.name } must not run mutating sql: #{ sql }")
|
42
47
|
end
|
43
48
|
|
49
|
+
Logs.debug("SQL executing on #{ self.name }:\n " + sql)
|
44
50
|
self.connection.fetch(sql).all
|
45
51
|
end
|
46
52
|
end
|
data/lib/dataduck/table.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative 'logs'
|
2
|
+
|
1
3
|
module DataDuck
|
2
4
|
class Table
|
3
5
|
class << self
|
@@ -113,8 +115,10 @@ module DataDuck
|
|
113
115
|
end
|
114
116
|
|
115
117
|
def extract_query(source_spec, destination = nil)
|
118
|
+
escape_char = source_spec[:source].escape_char
|
119
|
+
|
116
120
|
base_query = source_spec.has_key?(:query) ? source_spec[:query] :
|
117
|
-
"SELECT
|
121
|
+
"SELECT #{ escape_char }#{ source_spec[:columns].sort.join(escape_char + ',' + escape_char) }#{ escape_char } FROM #{ source_spec[:table_name] }"
|
118
122
|
|
119
123
|
extract_by_clause = ""
|
120
124
|
limit_clause = ""
|
@@ -165,11 +169,11 @@ module DataDuck
|
|
165
169
|
end
|
166
170
|
|
167
171
|
def output_schema
|
168
|
-
self.class.output_schema
|
172
|
+
self.class.output_schema || {}
|
169
173
|
end
|
170
174
|
|
171
175
|
def output_column_names
|
172
|
-
self.
|
176
|
+
self.output_schema.keys.sort.map(&:to_s)
|
173
177
|
end
|
174
178
|
|
175
179
|
def show
|
data/lib/dataduck/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|