dataduck 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: de529cfe949f8c1fb4a4cb36129188636ffbcb74
4
- data.tar.gz: ebbcaa35d0babcbabdaef339f9ef72b061fc54d5
3
+ metadata.gz: 19296dd9ee329eee67c43509d2ab0012a9f4f47f
4
+ data.tar.gz: b4518be78d421506a7e1820056f4b5022b79eec7
5
5
  SHA512:
6
- metadata.gz: 2958e2909631c314c7104fa340f0a587b47ab172417aa792f2b1a31377b1c455188acb0554c84ab656d057c4c060f9d7110bb6971fe45cedc8d4d3a117339d1e
7
- data.tar.gz: 5e62d009d64ebe30b1ade7184c5e7f1041e8c58d3cbf5be39d3d1885e89a3126870197568694ead1673018985b079757ad00117d1aa5423d6413d027a292cd09
6
+ metadata.gz: c00321b4699cb60d9786c7808a0dbb10acfa4cab820df0394397289087308fb3e96c185b388334b3d6caa0b41920e146f07454fdbbf7b04b8b5f534579ef434f
7
+ data.tar.gz: db88b4fa5354387fdae08f1b49382ae1aa3cfa97eefa735fb2e619e5ea8d46c47b77d61a311dd4f9c801bdb773bc007edc1d30d19d0ecb98fef073a7674113fa
data/DEV_README.md CHANGED
@@ -14,4 +14,4 @@ gem push dataduck-VERSION.gem
14
14
 
15
15
  Use something like this:
16
16
 
17
- gem 'dataduck', '0.3.0', path: '/Users/jrp/projects/dataduck'
17
+ gem 'dataduck', '0.5.1', path: '/Users/jrp/projects/dataduck'
@@ -1,3 +1,5 @@
1
+ require_relative 'database'
2
+
1
3
  module DataDuck
2
4
  class Destination < DataDuck::Database
3
5
  def self.load_config!
data/lib/dataduck/logs.rb CHANGED
@@ -12,14 +12,26 @@ module DataDuck
12
12
  @@logger ||= Logger.new(log_file_path, shift_age = 100, shift_size = 100 * @@ONE_MB_IN_BYTES)
13
13
  end
14
14
 
15
+ def Logs.debug(message)
16
+ self.ensure_logger_exists!
17
+ message = Logs.sanitize_message(message)
18
+
19
+ puts "[DEBUG] #{ message }"
20
+ @@logger.debug(message)
21
+ end
22
+
15
23
  def Logs.info(message)
16
24
  self.ensure_logger_exists!
25
+ message = Logs.sanitize_message(message)
26
+
17
27
  puts "[INFO] #{ message }"
18
28
  @@logger.info(message)
19
29
  end
20
30
 
21
31
  def Logs.warn(message)
22
32
  self.ensure_logger_exists!
33
+ message = Logs.sanitize_message(message)
34
+
23
35
  puts "[WARN] #{ message }"
24
36
  @@logger.warn(message)
25
37
  end
@@ -27,8 +39,20 @@ module DataDuck
27
39
  def Logs.error(err, message = nil)
28
40
  self.ensure_logger_exists!
29
41
  message = err.to_s unless message
42
+ message = Logs.sanitize_message(message)
43
+
30
44
  puts "[ERROR] #{ message }"
31
45
  @@logger.error(message)
32
46
  end
47
+
48
+ private
49
+
50
+ def Logs.sanitize_message(message)
51
+ message = message.gsub(/aws_access_key_id=[^';]+/, "aws_access_key_id=******")
52
+ message = message.gsub(/AWS_ACCESS_KEY_ID=[^';]+/, "AWS_ACCESS_KEY_ID=******")
53
+ message = message.gsub(/aws_secret_access_key=[^';]+/, "aws_secret_access_key=******")
54
+ message = message.gsub(/AWS_SECRET_ACCESS_KEY=[^';]+/, "AWS_SECRET_ACCESS_KEY=******")
55
+ message
56
+ end
33
57
  end
34
58
  end
@@ -18,5 +18,9 @@ module DataDuck
18
18
 
19
19
  self.find_command_and_execute("mysql", *args)
20
20
  end
21
+
22
+ def escape_char
23
+ '`'
24
+ end
21
25
  end
22
26
  end
@@ -25,5 +25,9 @@ module DataDuck
25
25
  size_in_gb = size_in_bytes / 1_000_000_000.0
26
26
  size_in_gb
27
27
  end
28
+
29
+ def escape_char
30
+ '"'
31
+ end
28
32
  end
29
33
  end
@@ -1,4 +1,4 @@
1
- require_relative 'destination.rb'
1
+ require_relative 'destination'
2
2
 
3
3
  module DataDuck
4
4
  class RedshiftDestination < DataDuck::Destination
@@ -43,8 +43,7 @@ module DataDuck
43
43
  column_names = columns.map { |col| col[:name].to_s }
44
44
  table.output_schema.map do |name, data_type|
45
45
  if !column_names.include?(name.to_s)
46
- redshift_data_type = data_type.to_s
47
- redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
46
+ redshift_data_type = self.type_to_redshift_type(data_type)
48
47
  self.query("ALTER TABLE #{ table.building_name } ADD #{ name } #{ redshift_data_type }")
49
48
  end
50
49
  end
@@ -53,8 +52,7 @@ module DataDuck
53
52
  def create_table_query(table, table_name = nil)
54
53
  table_name ||= table.name
55
54
  props_array = table.output_schema.map do |name, data_type|
56
- redshift_data_type = data_type.to_s
57
- redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
55
+ redshift_data_type = self.type_to_redshift_type(data_type)
58
56
  "\"#{ name }\" #{ redshift_data_type }"
59
57
  end
60
58
  props_string = props_array.join(', ')
@@ -99,6 +97,20 @@ module DataDuck
99
97
  return data_string_components.join
100
98
  end
101
99
 
100
+ def type_to_redshift_type(which_type)
101
+ which_type = which_type.to_s
102
+
103
+ if ["string", "text", "bigtext"].include?(which_type)
104
+ {
105
+ "string" => "varchar(255)",
106
+ "text" => "varchar(8191)",
107
+ "bigtext" => "varchar(65535)", # Redshift maximum
108
+ }[which_type]
109
+ else
110
+ which_type
111
+ end
112
+ end
113
+
102
114
  def dbconsole(options = {})
103
115
  args = []
104
116
  args << "--host=#{ @host }"
@@ -147,6 +159,7 @@ module DataDuck
147
159
  end
148
160
 
149
161
  def query(sql)
162
+ Logs.debug("SQL executing on #{ self.name }:\n " + sql)
150
163
  self.connection[sql].map { |elem| elem }
151
164
  end
152
165
 
@@ -1,3 +1,5 @@
1
+ require_relative 'database'
2
+
1
3
  module DataDuck
2
4
  class Source < DataDuck::Database
3
5
  def self.load_config!
@@ -47,6 +49,10 @@ module DataDuck
47
49
  return DataDuck::Source.source(source_name)
48
50
  end
49
51
 
52
+ def escape_char
53
+ '' # implement in subclass, e.g. " in postgresql and ` in mysql
54
+ end
55
+
50
56
  def schema(table_name)
51
57
  self.connection.schema(table_name)
52
58
  end
@@ -1,4 +1,5 @@
1
- require_relative 'source.rb'
1
+ require_relative 'source'
2
+ require_relative 'logs'
2
3
 
3
4
  require 'sequel'
4
5
 
@@ -29,7 +30,11 @@ module DataDuck
29
30
  def db_type
30
31
  return @initialized_db_type if @initialized_db_type
31
32
 
32
- raise Exception.new("Abstract method db_type must be overwritten by subclass, or passed as data when initializing.")
33
+ raise NotImplementedError.new("Abstract method db_type must be overwritten by subclass, or passed as data when initializing.")
34
+ end
35
+
36
+ def escape_char
37
+ raise NotImplementedError.new("Abstract method escape_char must be overwritten by subclass.")
33
38
  end
34
39
 
35
40
  def table_names
@@ -41,6 +46,7 @@ module DataDuck
41
46
  raise ArgumentError.new("Database #{ self.name } must not run mutating sql: #{ sql }")
42
47
  end
43
48
 
49
+ Logs.debug("SQL executing on #{ self.name }:\n " + sql)
44
50
  self.connection.fetch(sql).all
45
51
  end
46
52
  end
@@ -1,3 +1,5 @@
1
+ require_relative 'logs'
2
+
1
3
  module DataDuck
2
4
  class Table
3
5
  class << self
@@ -113,8 +115,10 @@ module DataDuck
113
115
  end
114
116
 
115
117
  def extract_query(source_spec, destination = nil)
118
+ escape_char = source_spec[:source].escape_char
119
+
116
120
  base_query = source_spec.has_key?(:query) ? source_spec[:query] :
117
- "SELECT \"#{ source_spec[:columns].sort.join('","') }\" FROM #{ source_spec[:table_name] }"
121
+ "SELECT #{ escape_char }#{ source_spec[:columns].sort.join(escape_char + ',' + escape_char) }#{ escape_char } FROM #{ source_spec[:table_name] }"
118
122
 
119
123
  extract_by_clause = ""
120
124
  limit_clause = ""
@@ -165,11 +169,11 @@ module DataDuck
165
169
  end
166
170
 
167
171
  def output_schema
168
- self.class.output_schema
172
+ self.class.output_schema || {}
169
173
  end
170
174
 
171
175
  def output_column_names
172
- self.class.output_schema.keys.sort.map(&:to_s)
176
+ self.output_schema.keys.sort.map(&:to_s)
173
177
  end
174
178
 
175
179
  def show
@@ -1,6 +1,6 @@
1
1
  module DataDuck
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 5
4
- VERSION_PATCH = 0
4
+ VERSION_PATCH = 1
5
5
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-19 00:00:00.000000000 Z
11
+ date: 2015-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler