RubyGems - redshifter - Versions diffs - 0.6.2 → 0.7.0 - Mend

redshifter 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.md +4 -0
data/lib/redshifter.rb +1 -0
data/lib/redshifter/extract_and_replace_redshift_table.rb +7 -6
data/lib/redshifter/extract_and_update_redshift_table.rb +9 -7
data/lib/redshifter/job/append_redshift_table_job.rb +15 -0
data/lib/redshifter/table.rb +2 -2
data/lib/redshifter/tasks.rb +12 -1
data/lib/redshifter/util/extract_and_transform_updates.rb +4 -3
data/lib/redshifter/util/redshift.rb +2 -0
data/lib/redshifter/version.rb +1 -1
metadata +4 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: eb2c60961160a7bb4203faa3798ca442128db1fe
-  data.tar.gz: 60e54cd36a888621165e6544e8ea37b4c4d0b17a
+  metadata.gz: 99e068ccc5128f42b216208140cea7df8703f014
+  data.tar.gz: 93eb9194c02f2cc2800e54548052c4ab745226d7
 SHA512:
-  metadata.gz: 41f10099834f83582c5f9e67af1a088902c1143b75768adecded997658b3a8843fb5aaa4a02d934d51dbdd0e4e21a4ffa9c7580282ce4af156601ec950c8e869
-  data.tar.gz: e7e56defe7b36942aaa6e3ad9e27ed2eb2e3fc64cb3d25581656492b8b68f81399b101d526ceee641894d748c53144d0f2533e9f8176afeadb1704c33525804f
+  metadata.gz: 60c06243a08e41617ec8851baed2e79eb1ba4f88127ea1403197043201054dad59c9e270dff03db1126d5414fb14417b1ac28a65b008e8fb7bd48f4a464da8ed
+  data.tar.gz: 726b925959807d2108435e583742fd01db36cb2c19998797be8e709a294f40135d8854f510a42b6a0a7a28e1f44756e307e583df5e966713d886ac6e2459f85d

data/README.md CHANGED

@@ -81,6 +81,7 @@ Redshifter.config.tables = {
     redshift_columns: {
       'id' => 'INTEGER',
       'title' => 'VARCHAR(128)',
+      'json_value' => 'BOOLEAN',
       'published_at' => 'TIMESTAMP',
       'updated_at' => 'TIMESTAMP',
       'exported_at' => 'TIMESTAMP'
@@ -89,8 +90,11 @@ Redshifter.config.tables = {
     # source DB. By default, redshift columns will be populated from source
     # column with the same name. Column key must exist in redshift_columns.
     # If a matching source column does not exist you MUST specify it here.
+    #
+    # Extract json values as text (using ->>), then cast non-strings to appropriate type
     source_column_transforms: {
       'title' => "lower(title)",
+      'json_value' => "cast(json_column->>'json_key' as BOOLEAN)",
       'published_at' => 'first_edition_published_at',
       'exported_at' => 'now()'
     },

data/lib/redshifter.rb CHANGED

@@ -12,5 +12,6 @@ require 'redshifter/util/update_table'
 require 'redshifter/extract_and_replace_redshift_table'
 require 'redshifter/extract_and_update_redshift_table'
+require 'redshifter/job/append_redshift_table_job'
 require 'redshifter/job/replace_redshift_table_job'
 require 'redshifter/job/update_redshift_table_job'

data/lib/redshifter/extract_and_replace_redshift_table.rb CHANGED

@@ -1,17 +1,18 @@
 module Redshifter
   class ExtractAndReplaceRedshiftTable
-    def initialize(table, s3_util = Util::S3.new)
+    def initialize(table, s3_util: Util::S3.new)
       @table = table
       @s3_util = s3_util
     end
     def run
       Redshifter.config.logger.info "Extracting rows in batches ..."
-      extracted_s3_urls = Util::ExtractAndTransformUpdates
-        .new(table: table,
-             since: Table::EPOCH_TIMESTAMP,
-             s3_util: s3_util
-        ).run
+      extracted_s3_urls = Util::ExtractAndTransformUpdates.new(
+        table: table,
+        since: Table::EPOCH_TIMESTAMP,
+        s3_util: s3_util,
+        timestamp_column: 'updated_at'
+      ).run
       Redshifter.config.logger.info "Extracted #{extracted_s3_urls.size} batches. "

data/lib/redshifter/extract_and_update_redshift_table.rb CHANGED

@@ -1,17 +1,19 @@
 module Redshifter
   class ExtractAndUpdateRedshiftTable
-    def initialize(table, s3_util = Util::S3.new)
+    def initialize(table, timestamp_column:, s3_util: Util::S3.new)
       @table = table
       @s3_util = s3_util
+      @timestamp_column = timestamp_column
     end
     def run
       Redshifter.config.logger.info "Extracting rows in batches ..."
-      extracted_s3_urls = Util::ExtractAndTransformUpdates
-                            .new(table: table,
-                                 since: table.redshift_last_update,
-                                 s3_util: s3_util
-                            ).run
+      extracted_s3_urls = Util::ExtractAndTransformUpdates.new(
+        table: table,
+        since: table.redshift_max_timestamp(timestamp_column),
+        s3_util: s3_util,
+        timestamp_column: timestamp_column
+      ).run
       if extracted_s3_urls.any?
         Redshifter.config.logger.info "Writing manifest file to S3 ..."
@@ -33,6 +35,6 @@ module Redshifter
     private
-    attr_reader :table, :s3_util
+    attr_reader :s3_util, :table, :timestamp_column
   end
 end

data/lib/redshifter/job/append_redshift_table_job.rb ADDED

@@ -0,0 +1,15 @@
+require 'dynosaur'
+module Redshifter
+  module Job
+    class AppendRedshiftTableJob
+      @queue = :low
+      def self.perform(table_config_key)
+        dyno = Dynosaur::Process::Heroku
+                 .new(task: 'redshifter:append', args: [table_config_key])
+        dyno.start
+      end
+    end
+  end
+end

data/lib/redshifter/table.rb CHANGED

@@ -48,7 +48,7 @@ module Redshifter
     # returns unix epoch timestamp literal if table does not exist or table
     # exist with zero rows.  Otherwise returns timestamp literal of most
     # recently updated row in the analytics table
-    def redshift_last_update
+    def redshift_max_timestamp(timestamp_column)
       conn = Util::Redshift.connect
       table_presence_query = <<-QUERY.squish
@@ -67,7 +67,7 @@ module Redshifter
       if table_present == 't'
         conn.exec(
           <<-QUERY.squish
-              SELECT COALESCE(MAX(updated_at), TIMESTAMP '#{EPOCH_TIMESTAMP}')
+              SELECT COALESCE(MAX(#{timestamp_column}), TIMESTAMP '#{EPOCH_TIMESTAMP}')
               FROM #{redshift_schema}.#{redshift_table_name}
           QUERY
         ).getvalue(0, 0)

data/lib/redshifter/tasks.rb CHANGED

@@ -4,6 +4,15 @@ module Redshifter
     def install_tasks
       namespace :redshifter do
+        desc 'Append an extracted table to Redshift'
+        task :append, [:table_config_key] => :environment do |_task, args|
+          table_config = Redshifter.config.tables[args[:table_config_key]]
+          table = Redshifter::Table.new(table_config)
+          Redshifter::ExtractAndUpdateRedshiftTable
+            .new(table, timestamp_column: 'created_at')
+            .run
+        end
         desc 'Create or replace an extracted table in Redshift'
         task :replace, [:table_config_key] => :environment do |_task, args|
           table_config = Redshifter.config.tables[args[:table_config_key]]
@@ -15,7 +24,9 @@ module Redshifter
         task :update, [:table_config_key] => :environment do |_task, args|
           table_config = Redshifter.config.tables[args[:table_config_key]]
           table = Redshifter::Table.new(table_config)
-          Redshifter::ExtractAndUpdateRedshiftTable.new(table).run
+          Redshifter::ExtractAndUpdateRedshiftTable
+            .new(table, timestamp_column: 'updated_at')
+            .run
         end
       end
     end

data/lib/redshifter/util/extract_and_transform_updates.rb CHANGED

@@ -10,10 +10,11 @@ module Redshifter
       # the tables that are in the ETL process.
       NULL_CHARACTER = '∅'
-      def initialize(table:, since:, s3_util:)
+      def initialize(table:, since:, s3_util:, timestamp_column:)
         @table = table
         @since = since
         @s3_util = s3_util
+        @timestamp_column = timestamp_column
       end
       # Writes pipe delimited 'CSV' files to S3 of updated records.
@@ -37,7 +38,7 @@ module Redshifter
       private
-      attr_reader :table, :since, :s3_util
+      attr_reader :table, :since, :s3_util, :timestamp_column
       def csv_row(row)
         row.map! { |value| value.nil? ? NULL_CHARACTER : value }
@@ -76,7 +77,7 @@ module Redshifter
       end
       def select_batch_sql(columns:, batch_size:, start_id:)
-        "SELECT #{columns.join(', ')} FROM #{table.source_table_name} WHERE (#{table.source_table_filter}) AND updated_at >= '#{since}' AND id >= #{start_id} ORDER BY id ASC limit #{batch_size}"
+        "SELECT #{columns.join(', ')} FROM #{table.source_table_name} WHERE (#{table.source_table_filter}) AND #{timestamp_column} >= '#{since}' AND id >= #{start_id} ORDER BY id ASC limit #{batch_size}"
       end
     end
   end

data/lib/redshifter/util/redshift.rb CHANGED

@@ -1,3 +1,5 @@
+require 'pg'
 module Redshifter
   module Util
     module Redshift

data/lib/redshifter/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Redshifter
-  VERSION = "0.6.2"
+  VERSION = "0.7.0"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: redshifter
 version: !ruby/object:Gem::Version
-  version: 0.6.2
+  version: 0.7.0
 platform: ruby
 authors:
 - Justin Richard
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-07-07 00:00:00.000000000 Z
+date: 2017-06-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dynosaur
@@ -142,6 +142,7 @@ files:
 - lib/redshifter/config.rb
 - lib/redshifter/extract_and_replace_redshift_table.rb
 - lib/redshifter/extract_and_update_redshift_table.rb
+- lib/redshifter/job/append_redshift_table_job.rb
 - lib/redshifter/job/replace_redshift_table_job.rb
 - lib/redshifter/job/update_redshift_table_job.rb
 - lib/redshifter/table.rb
@@ -174,9 +175,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.8
+rubygems_version: 2.6.10
 signing_key:
 specification_version: 4
 summary: ETL processing jobs to exporting Rails model tables to Redshift
 test_files: []
-has_rdoc: