dataduck 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: de529cfe949f8c1fb4a4cb36129188636ffbcb74
4
- data.tar.gz: ebbcaa35d0babcbabdaef339f9ef72b061fc54d5
3
+ metadata.gz: 19296dd9ee329eee67c43509d2ab0012a9f4f47f
4
+ data.tar.gz: b4518be78d421506a7e1820056f4b5022b79eec7
5
5
  SHA512:
6
- metadata.gz: 2958e2909631c314c7104fa340f0a587b47ab172417aa792f2b1a31377b1c455188acb0554c84ab656d057c4c060f9d7110bb6971fe45cedc8d4d3a117339d1e
7
- data.tar.gz: 5e62d009d64ebe30b1ade7184c5e7f1041e8c58d3cbf5be39d3d1885e89a3126870197568694ead1673018985b079757ad00117d1aa5423d6413d027a292cd09
6
+ metadata.gz: c00321b4699cb60d9786c7808a0dbb10acfa4cab820df0394397289087308fb3e96c185b388334b3d6caa0b41920e146f07454fdbbf7b04b8b5f534579ef434f
7
+ data.tar.gz: db88b4fa5354387fdae08f1b49382ae1aa3cfa97eefa735fb2e619e5ea8d46c47b77d61a311dd4f9c801bdb773bc007edc1d30d19d0ecb98fef073a7674113fa
data/DEV_README.md CHANGED
@@ -14,4 +14,4 @@ gem push dataduck-VERSION.gem
14
14
 
15
15
  Use something like this:
16
16
 
17
- gem 'dataduck', '0.3.0', path: '/Users/jrp/projects/dataduck'
17
+ gem 'dataduck', '0.5.1', path: '/Users/jrp/projects/dataduck'
@@ -1,3 +1,5 @@
1
+ require_relative 'database'
2
+
1
3
  module DataDuck
2
4
  class Destination < DataDuck::Database
3
5
  def self.load_config!
data/lib/dataduck/logs.rb CHANGED
@@ -12,14 +12,26 @@ module DataDuck
12
12
  @@logger ||= Logger.new(log_file_path, shift_age = 100, shift_size = 100 * @@ONE_MB_IN_BYTES)
13
13
  end
14
14
 
15
+ def Logs.debug(message)
16
+ self.ensure_logger_exists!
17
+ message = Logs.sanitize_message(message)
18
+
19
+ puts "[DEBUG] #{ message }"
20
+ @@logger.debug(message)
21
+ end
22
+
15
23
  def Logs.info(message)
16
24
  self.ensure_logger_exists!
25
+ message = Logs.sanitize_message(message)
26
+
17
27
  puts "[INFO] #{ message }"
18
28
  @@logger.info(message)
19
29
  end
20
30
 
21
31
  def Logs.warn(message)
22
32
  self.ensure_logger_exists!
33
+ message = Logs.sanitize_message(message)
34
+
23
35
  puts "[WARN] #{ message }"
24
36
  @@logger.warn(message)
25
37
  end
@@ -27,8 +39,20 @@ module DataDuck
27
39
  def Logs.error(err, message = nil)
28
40
  self.ensure_logger_exists!
29
41
  message = err.to_s unless message
42
+ message = Logs.sanitize_message(message)
43
+
30
44
  puts "[ERROR] #{ message }"
31
45
  @@logger.error(message)
32
46
  end
47
+
48
+ private
49
+
50
+ def Logs.sanitize_message(message)
51
+ message = message.gsub(/aws_access_key_id=[^';]+/, "aws_access_key_id=******")
52
+ message = message.gsub(/AWS_ACCESS_KEY_ID=[^';]+/, "AWS_ACCESS_KEY_ID=******")
53
+ message = message.gsub(/aws_secret_access_key=[^';]+/, "aws_secret_access_key=******")
54
+ message = message.gsub(/AWS_SECRET_ACCESS_KEY=[^';]+/, "AWS_SECRET_ACCESS_KEY=******")
55
+ message
56
+ end
33
57
  end
34
58
  end
@@ -18,5 +18,9 @@ module DataDuck
18
18
 
19
19
  self.find_command_and_execute("mysql", *args)
20
20
  end
21
+
22
+ def escape_char
23
+ '`'
24
+ end
21
25
  end
22
26
  end
@@ -25,5 +25,9 @@ module DataDuck
25
25
  size_in_gb = size_in_bytes / 1_000_000_000.0
26
26
  size_in_gb
27
27
  end
28
+
29
+ def escape_char
30
+ '"'
31
+ end
28
32
  end
29
33
  end
@@ -1,4 +1,4 @@
1
- require_relative 'destination.rb'
1
+ require_relative 'destination'
2
2
 
3
3
  module DataDuck
4
4
  class RedshiftDestination < DataDuck::Destination
@@ -43,8 +43,7 @@ module DataDuck
43
43
  column_names = columns.map { |col| col[:name].to_s }
44
44
  table.output_schema.map do |name, data_type|
45
45
  if !column_names.include?(name.to_s)
46
- redshift_data_type = data_type.to_s
47
- redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
46
+ redshift_data_type = self.type_to_redshift_type(data_type)
48
47
  self.query("ALTER TABLE #{ table.building_name } ADD #{ name } #{ redshift_data_type }")
49
48
  end
50
49
  end
@@ -53,8 +52,7 @@ module DataDuck
53
52
  def create_table_query(table, table_name = nil)
54
53
  table_name ||= table.name
55
54
  props_array = table.output_schema.map do |name, data_type|
56
- redshift_data_type = data_type.to_s
57
- redshift_data_type = 'varchar(255)' if redshift_data_type == 'string'
55
+ redshift_data_type = self.type_to_redshift_type(data_type)
58
56
  "\"#{ name }\" #{ redshift_data_type }"
59
57
  end
60
58
  props_string = props_array.join(', ')
@@ -99,6 +97,20 @@ module DataDuck
99
97
  return data_string_components.join
100
98
  end
101
99
 
100
+ def type_to_redshift_type(which_type)
101
+ which_type = which_type.to_s
102
+
103
+ if ["string", "text", "bigtext"].include?(which_type)
104
+ {
105
+ "string" => "varchar(255)",
106
+ "text" => "varchar(8191)",
107
+ "bigtext" => "varchar(65535)", # Redshift maximum
108
+ }[which_type]
109
+ else
110
+ which_type
111
+ end
112
+ end
113
+
102
114
  def dbconsole(options = {})
103
115
  args = []
104
116
  args << "--host=#{ @host }"
@@ -147,6 +159,7 @@ module DataDuck
147
159
  end
148
160
 
149
161
  def query(sql)
162
+ Logs.debug("SQL executing on #{ self.name }:\n " + sql)
150
163
  self.connection[sql].map { |elem| elem }
151
164
  end
152
165
 
@@ -1,3 +1,5 @@
1
+ require_relative 'database'
2
+
1
3
  module DataDuck
2
4
  class Source < DataDuck::Database
3
5
  def self.load_config!
@@ -47,6 +49,10 @@ module DataDuck
47
49
  return DataDuck::Source.source(source_name)
48
50
  end
49
51
 
52
+ def escape_char
53
+ '' # implement in subclass, e.g. " in postgresql and ` in mysql
54
+ end
55
+
50
56
  def schema(table_name)
51
57
  self.connection.schema(table_name)
52
58
  end
@@ -1,4 +1,5 @@
1
- require_relative 'source.rb'
1
+ require_relative 'source'
2
+ require_relative 'logs'
2
3
 
3
4
  require 'sequel'
4
5
 
@@ -29,7 +30,11 @@ module DataDuck
29
30
  def db_type
30
31
  return @initialized_db_type if @initialized_db_type
31
32
 
32
- raise Exception.new("Abstract method db_type must be overwritten by subclass, or passed as data when initializing.")
33
+ raise NotImplementedError.new("Abstract method db_type must be overwritten by subclass, or passed as data when initializing.")
34
+ end
35
+
36
+ def escape_char
37
+ raise NotImplementedError.new("Abstract method escape_char must be overwritten by subclass.")
33
38
  end
34
39
 
35
40
  def table_names
@@ -41,6 +46,7 @@ module DataDuck
41
46
  raise ArgumentError.new("Database #{ self.name } must not run mutating sql: #{ sql }")
42
47
  end
43
48
 
49
+ Logs.debug("SQL executing on #{ self.name }:\n " + sql)
44
50
  self.connection.fetch(sql).all
45
51
  end
46
52
  end
@@ -1,3 +1,5 @@
1
+ require_relative 'logs'
2
+
1
3
  module DataDuck
2
4
  class Table
3
5
  class << self
@@ -113,8 +115,10 @@ module DataDuck
113
115
  end
114
116
 
115
117
  def extract_query(source_spec, destination = nil)
118
+ escape_char = source_spec[:source].escape_char
119
+
116
120
  base_query = source_spec.has_key?(:query) ? source_spec[:query] :
117
- "SELECT \"#{ source_spec[:columns].sort.join('","') }\" FROM #{ source_spec[:table_name] }"
121
+ "SELECT #{ escape_char }#{ source_spec[:columns].sort.join(escape_char + ',' + escape_char) }#{ escape_char } FROM #{ source_spec[:table_name] }"
118
122
 
119
123
  extract_by_clause = ""
120
124
  limit_clause = ""
@@ -165,11 +169,11 @@ module DataDuck
165
169
  end
166
170
 
167
171
  def output_schema
168
- self.class.output_schema
172
+ self.class.output_schema || {}
169
173
  end
170
174
 
171
175
  def output_column_names
172
- self.class.output_schema.keys.sort.map(&:to_s)
176
+ self.output_schema.keys.sort.map(&:to_s)
173
177
  end
174
178
 
175
179
  def show
@@ -1,6 +1,6 @@
1
1
  module DataDuck
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 5
4
- VERSION_PATCH = 0
4
+ VERSION_PATCH = 1
5
5
  VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataduck
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Pickhardt
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-19 00:00:00.000000000 Z
11
+ date: 2015-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler