dataduck 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/DEV_README.md +1 -1
- data/lib/dataduck/destination.rb +2 -0
- data/lib/dataduck/logs.rb +24 -0
- data/lib/dataduck/mysql_source.rb +4 -0
- data/lib/dataduck/postgresql_source.rb +4 -0
- data/lib/dataduck/redshift_destination.rb +18 -5
- data/lib/dataduck/source.rb +6 -0
- data/lib/dataduck/sql_db_source.rb +8 -2
- data/lib/dataduck/table.rb +7 -3
- data/lib/dataduck/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19296dd9ee329eee67c43509d2ab0012a9f4f47f
|
4
|
+
data.tar.gz: b4518be78d421506a7e1820056f4b5022b79eec7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c00321b4699cb60d9786c7808a0dbb10acfa4cab820df0394397289087308fb3e96c185b388334b3d6caa0b41920e146f07454fdbbf7b04b8b5f534579ef434f
|
7
|
+
data.tar.gz: db88b4fa5354387fdae08f1b49382ae1aa3cfa97eefa735fb2e619e5ea8d46c47b77d61a311dd4f9c801bdb773bc007edc1d30d19d0ecb98fef073a7674113fa
|
data/DEV_README.md
CHANGED
data/lib/dataduck/destination.rb
CHANGED
data/lib/dataduck/logs.rb
CHANGED
@@ -12,14 +12,26 @@ module DataDuck
|
|
12
12
|
@@logger ||= Logger.new(log_file_path, shift_age = 100, shift_size = 100 * @@ONE_MB_IN_BYTES)
|
13
13
|
end
|
14
14
|
|
15
|
+
def Logs.debug(message)
|
16
|
+
self.ensure_logger_exists!
|
17
|
+
message = Logs.sanitize_message(message)
|
18
|
+
|
19
|
+
puts "[DEBUG] #{ message }"
|
20
|
+
@@logger.debug(message)
|
21
|
+
end
|
22
|
+
|
15
23
|
def Logs.info(message)
|
16
24
|
self.ensure_logger_exists!
|
25
|
+
message = Logs.sanitize_message(message)
|
26
|
+
|
17
27
|
puts "[INFO] #{ message }"
|
18
28
|
@@logger.info(message)
|
19
29
|
end
|
20
30
|
|
21
31
|
def Logs.warn(message)
|
22
32
|
self.ensure_logger_exists!
|
33
|
+
message = Logs.sanitize_message(message)
|
34
|
+
|
23
35
|
puts "[WARN] #{ message }"
|
24
36
|
@@logger.warn(message)
|
25
37
|
end
|
@@ -27,8 +39,20 @@ module DataDuck
|
|
27
39
|
def Logs.error(err, message = nil)
|
28
40
|
self.ensure_logger_exists!
|
29
41
|
message = err.to_s unless message
|
42
|
+
message = Logs.sanitize_message(message)
|
43
|
+
|
30
44
|
puts "[ERROR] #{ message }"
|
31
45
|
@@logger.error(message)
|
32
46
|
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def Logs.sanitize_message(message)
|
51
|
+
message = message.gsub(/aws_access_key_id=[^';]+/, "aws_access_key_id=******")
|
52
|
+
message = message.gsub(/AWS_ACCESS_KEY_ID=[^';]+/, "AWS_ACCESS_KEY_ID=******")
|
53
|
+
message = message.gsub(/aws_secret_access_key=[^';]+/, "aws_secret_access_key=******")
|
54
|
+
message = message.gsub(/AWS_SECRET_ACCESS_KEY=[^';]+/, "AWS_SECRET_ACCESS_KEY=******")
|
55
|
+
message
|
56
|
+
end
|
33
57
|
end
|
34
58
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require_relative 'destination
|
1
|
+
require_relative 'destination'
|
2
2
|
|
3
3
|
module DataDuck
|
4
4
|
class RedshiftDestination < DataDuck::Destination
|
@@ -43,8 +43,7 @@ module DataDuck
|
|
43
43
|
column_names = columns.map { |col| col[:name].to_s }
|
44
44
|
table.output_schema.map do |name, data_type|
|
45
45
|
if !column_names.include?(name.to_s)
|
46
|
-
redshift_data_type = data_type
|
47
|
-
redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
|
46
|
+
redshift_data_type = self.type_to_redshift_type(data_type)
|
48
47
|
self.query("ALTER TABLE #{ table.building_name } ADD #{ name } #{ redshift_data_type }")
|
49
48
|
end
|
50
49
|
end
|
@@ -53,8 +52,7 @@ module DataDuck
|
|
53
52
|
def create_table_query(table, table_name = nil)
|
54
53
|
table_name ||= table.name
|
55
54
|
props_array = table.output_schema.map do |name, data_type|
|
56
|
-
redshift_data_type = data_type
|
57
|
-
redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
|
55
|
+
redshift_data_type = self.type_to_redshift_type(data_type)
|
58
56
|
"\"#{ name }\" #{ redshift_data_type }"
|
59
57
|
end
|
60
58
|
props_string = props_array.join(', ')
|
@@ -99,6 +97,20 @@ module DataDuck
|
|
99
97
|
return data_string_components.join
|
100
98
|
end
|
101
99
|
|
100
|
+
def type_to_redshift_type(which_type)
|
101
|
+
which_type = which_type.to_s
|
102
|
+
|
103
|
+
if ["string", "text", "bigtext"].include?(which_type)
|
104
|
+
{
|
105
|
+
"string" => "varchar(255)",
|
106
|
+
"text" => "varchar(8191)",
|
107
|
+
"bigtext" => "varchar(65535)", # Redshift maximum
|
108
|
+
}[which_type]
|
109
|
+
else
|
110
|
+
which_type
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
102
114
|
def dbconsole(options = {})
|
103
115
|
args = []
|
104
116
|
args << "--host=#{ @host }"
|
@@ -147,6 +159,7 @@ module DataDuck
|
|
147
159
|
end
|
148
160
|
|
149
161
|
def query(sql)
|
162
|
+
Logs.debug("SQL executing on #{ self.name }:\n " + sql)
|
150
163
|
self.connection[sql].map { |elem| elem }
|
151
164
|
end
|
152
165
|
|
data/lib/dataduck/source.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative 'database'
|
2
|
+
|
1
3
|
module DataDuck
|
2
4
|
class Source < DataDuck::Database
|
3
5
|
def self.load_config!
|
@@ -47,6 +49,10 @@ module DataDuck
|
|
47
49
|
return DataDuck::Source.source(source_name)
|
48
50
|
end
|
49
51
|
|
52
|
+
def escape_char
|
53
|
+
'' # implement in subclass, e.g. " in postgresql and ` in mysql
|
54
|
+
end
|
55
|
+
|
50
56
|
def schema(table_name)
|
51
57
|
self.connection.schema(table_name)
|
52
58
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
require_relative 'source
|
1
|
+
require_relative 'source'
|
2
|
+
require_relative 'logs'
|
2
3
|
|
3
4
|
require 'sequel'
|
4
5
|
|
@@ -29,7 +30,11 @@ module DataDuck
|
|
29
30
|
def db_type
|
30
31
|
return @initialized_db_type if @initialized_db_type
|
31
32
|
|
32
|
-
raise
|
33
|
+
raise NotImplementedError.new("Abstract method db_type must be overwritten by subclass, or passed as data when initializing.")
|
34
|
+
end
|
35
|
+
|
36
|
+
def escape_char
|
37
|
+
raise NotImplementedError.new("Abstract method escape_char must be overwritten by subclass.")
|
33
38
|
end
|
34
39
|
|
35
40
|
def table_names
|
@@ -41,6 +46,7 @@ module DataDuck
|
|
41
46
|
raise ArgumentError.new("Database #{ self.name } must not run mutating sql: #{ sql }")
|
42
47
|
end
|
43
48
|
|
49
|
+
Logs.debug("SQL executing on #{ self.name }:\n " + sql)
|
44
50
|
self.connection.fetch(sql).all
|
45
51
|
end
|
46
52
|
end
|
data/lib/dataduck/table.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative 'logs'
|
2
|
+
|
1
3
|
module DataDuck
|
2
4
|
class Table
|
3
5
|
class << self
|
@@ -113,8 +115,10 @@ module DataDuck
|
|
113
115
|
end
|
114
116
|
|
115
117
|
def extract_query(source_spec, destination = nil)
|
118
|
+
escape_char = source_spec[:source].escape_char
|
119
|
+
|
116
120
|
base_query = source_spec.has_key?(:query) ? source_spec[:query] :
|
117
|
-
"SELECT
|
121
|
+
"SELECT #{ escape_char }#{ source_spec[:columns].sort.join(escape_char + ',' + escape_char) }#{ escape_char } FROM #{ source_spec[:table_name] }"
|
118
122
|
|
119
123
|
extract_by_clause = ""
|
120
124
|
limit_clause = ""
|
@@ -165,11 +169,11 @@ module DataDuck
|
|
165
169
|
end
|
166
170
|
|
167
171
|
def output_schema
|
168
|
-
self.class.output_schema
|
172
|
+
self.class.output_schema || {}
|
169
173
|
end
|
170
174
|
|
171
175
|
def output_column_names
|
172
|
-
self.
|
176
|
+
self.output_schema.keys.sort.map(&:to_s)
|
173
177
|
end
|
174
178
|
|
175
179
|
def show
|
data/lib/dataduck/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataduck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Pickhardt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|