RubyGems - dataduck - Versions diffs - 1.2.2 → 1.2.3 - Mend

dataduck 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/dataduck/commands.rb +6 -2
data/lib/dataduck/etl.rb +11 -5
data/lib/dataduck/redshift_destination.rb +12 -4
data/lib/dataduck/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 9d5833f516296cc44f7b74449d1c25014601cc89
-  data.tar.gz: 8ec72ab186743c80968626663eecade1e6cb1f99
+  metadata.gz: b906e1a110fc140a059051ef242a79ef00bcbcd2
+  data.tar.gz: 9648034a52cf23fb544aaa3d66db86ee1a424c23
 SHA512:
-  metadata.gz: a4e570b9756368f3a7a15821f9b8c661e477141d7ca21705434face9e4f78ba7e0e98aa7a0db9b01c363074a413d46b6380fd8d9ada84a2965e893b06ba48bda
-  data.tar.gz: 664b543ee47821cc4eae039e40a738990652bac539d7b8ee3f942b98244174b2681b2c47acb584682601c29ccf6573001557b3987c8bfd86cd96f2295bb9fe62
+  metadata.gz: c89642012f23632273c564ea4d09744304f053df3e4fe4ee0b9cb9734bd7403207246ae7f6109b6b53cd2d2b2c0361698aba92ccd6608934232b7ad4d6acc01a
+  data.tar.gz: 7f4c1682b11a97a9fe9777906f089a596e6022765ddafd4e2e38da42a534c9dadd81ee8af82d4dc7ca8d43ec2df643b791feec0eb76a0f5f5f79d877732a2a91

data/lib/dataduck/commands.rb CHANGED

@@ -102,9 +102,9 @@ module DataDuck
       only_destination = DataDuck::Destination.only_destination
+      etl = nil
       if table_names_underscore.length == 1 && table_names_underscore[0] == "all"
         etl = ETL.new(destinations: [only_destination], autoload_tables: true)
-        etl.process!
       else
         tables = []
         table_names_underscore.each do |table_name|
@@ -122,7 +122,11 @@ module DataDuck
             autoload_tables: false,
             tables: tables
         })
-        etl.process!
+      end
+      etl.process!
+      if etl.errored?
+        exit(1)
       end
     end

data/lib/dataduck/etl.rb CHANGED

@@ -13,11 +13,13 @@ module DataDuck
     attr_accessor :destinations
     attr_accessor :tables
+    attr_accessor :errored_tables
     def initialize(options = {})
       self.class.destinations ||= []
       @tables = options[:tables] || []
       @destinations = options[:destinations] || []
+      @errored_tables = []
       @autoload_tables = options[:autoload_tables].nil? ? true : options[:autoload_tables]
       if @autoload_tables
@@ -32,6 +34,10 @@ module DataDuck
         end
       end
     end
+    def errored?
+      @errored_tables.length > 0
+    end
     def process!
       Logs.info("Processing ETL on pid #{ Process.pid }...")
@@ -44,7 +50,7 @@ module DataDuck
         destinations_to_use << DataDuck::Destination.only_destination
       end
-      errored_tables = []
+      @errored_tables = []
       @tables.each do |table_or_class|
         table = table_or_class.kind_of?(DataDuck::Table) ? table_or_class : table_or_class.new
@@ -53,13 +59,13 @@ module DataDuck
           table.etl!(destinations_to_use)
         rescue => err
           Logs.error("Error while processing table '#{ table.name }': #{ err.to_s }\n#{ err.backtrace.join("\n") }")
-          errored_tables << table
+          @errored_tables << table
         end
       end
-      Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - errored_tables.length } succeeded, #{ errored_tables.length } failed")
-      if errored_tables.length > 0
-        Logs.info("The following tables encountered errors: '#{ errored_tables.map(&:name).join("', '") }'")
+      Logs.info("Finished ETL processing for pid #{ Process.pid }, #{ @tables.length - @errored_tables.length } succeeded, #{ @errored_tables.length } failed")
+      if @errored_tables.length > 0
+        Logs.info("The following tables encountered errors: '#{ @errored_tables.map(&:name).join("', '") }'")
       end
     end
   end

data/lib/dataduck/redshift_destination.rb CHANGED

@@ -48,7 +48,7 @@ module DataDuck
       query_fragments << "CREDENTIALS 'aws_access_key_id=#{ self.aws_key };aws_secret_access_key=#{ self.aws_secret }'"
       query_fragments << "REGION '#{ self.s3_region }'"
       query_fragments << "CSV IGNOREHEADER 1 TRUNCATECOLUMNS ACCEPTINVCHARS EMPTYASNULL"
-      query_fragments << "DATEFORMAT 'auto'"
+      query_fragments << "DATEFORMAT 'auto' GZIP"
       return query_fragments.join(" ")
     end
@@ -226,16 +226,24 @@ module DataDuck
       self.query("SELECT DISTINCT(tablename) AS name FROM pg_table_def WHERE schemaname='public' ORDER BY name").map { |item| item[:name] }
     end
+    def gzip(data)
+      sio = StringIO.new
+      gz = Zlib::GzipWriter.new(sio)
+      gz.write(data)
+      gz.close
+      sio.string
+    end
     def upload_table_to_s3!(table)
       now_epoch = Time.now.to_i.to_s
-      filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv"
+      filepath = "pending/#{ table.name.downcase }_#{ now_epoch }.csv.gz"
-      table_csv = self.data_as_csv_string(table.data, table.output_column_names)
+      table_csv = self.gzip(self.data_as_csv_string(table.data, table.output_column_names))
       s3_obj = S3Object.new(filepath, table_csv, self.aws_key, self.aws_secret,
           self.s3_bucket, self.s3_region)
       s3_obj.upload!
-      return s3_obj
+      s3_obj
     end
     def finish_fully_reloading_table!(table)

data/lib/dataduck/version.rb CHANGED

@@ -2,7 +2,7 @@ module DataDuck
   if !defined?(DataDuck::VERSION)
     VERSION_MAJOR = 1
     VERSION_MINOR = 2
-    VERSION_PATCH = 2
+    VERSION_PATCH = 3
     VERSION = [VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH].join('.')
   end
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: dataduck
 version: !ruby/object:Gem::Version
-  version: 1.2.2
+  version: 1.2.3
 platform: ruby
 authors:
 - Jeff Pickhardt
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-03-20 00:00:00.000000000 Z
+date: 2017-03-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler