RubyGems - inst_data_shipper - Versions diffs - 0.1.0.beta1 → 0.2.0 - Mend

inst_data_shipper 0.1.0.beta1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/README.md +139 -1
data/db/migrate/{20240301090836_create_canvas_sync_sync_batches.rb → 20240301090836_create_inst_data_shipper_dump_batches.rb} +6 -3
data/lib/inst_data_shipper/basic_dumper.rb +2 -2
data/lib/inst_data_shipper/concerns/hooks.rb +18 -4
data/lib/inst_data_shipper/data_sources/canvas_reports.rb +58 -21
data/lib/inst_data_shipper/data_sources/local_tables.rb +35 -7
data/lib/inst_data_shipper/destinations/base.rb +33 -6
data/lib/inst_data_shipper/destinations/hosted_data.rb +63 -19
data/lib/inst_data_shipper/destinations/s3.rb +1 -1
data/lib/inst_data_shipper/dumper.rb +158 -50
data/lib/inst_data_shipper/engine.rb +6 -0
data/lib/inst_data_shipper/jobs/async_caller.rb +10 -2
data/lib/inst_data_shipper/schema_builder.rb +99 -37
data/lib/inst_data_shipper/version.rb +1 -1
data/lib/inst_data_shipper.rb +13 -3
data/spec/spec_helper.rb +2 -2
metadata +22 -9
data/lib/inst_data_shipper/jobs/basic_dump_job.rb +0 -11
/data/app/models/{hosted_data_dumper → inst_data_shipper}/dump_batch.rb +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: bf2f1cdd4b4181e945c5f36e7680ed0a054429dc191197fafbee60de9598305b
-  data.tar.gz: 5fb781dc8aa17bf7d672fdfc8942d70365edb68fd324d8aeab70297270af0b1e
+  metadata.gz: f7909aa44e9dabd1d43d58a5a3c2c081891104d64336294dce287c06804804df
+  data.tar.gz: 5da874689ac1de3e016a7feefce5866b211e6f7595021b565564f796685ed104
 SHA512:
-  metadata.gz: 9212cd9c647193aa7256f15f6da12cd4ee3c56a12e011ac269a1d801d15e0cb7182a71c2fa8d8e2d2ea808aff73ff4f7c974c3720db54414eb43c24658ca554f
-  data.tar.gz: c4cb69ad7ea635833aa5051dec5a8c14f3aa13e2b11dd3e8fbdd4d12c2a9d63ac9dbb5b235915da0d80898d0b048d5677fb807d6947911e3e10a87956b8ee1fc
+  metadata.gz: cd81e6c26e2416ce1a32de588e04f560496cfb7cfdac3f4c837828a1c65798bec405d98197032b0d8935a1ba2b24a291aa25f1b73a469ac7a9c6ef8d2286103f
+  data.tar.gz: 66c5ccfd82128e8c5dc39c7c937ee7f4f9412743b7202e221e53c575d4b0e572f0b014b4f41ae5924b1d2d119a05cd5de2acbae4eb81022df844a1fea181faec

data/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # InstDataShipper
-This gem is intended to facilitate fast and easy syncing of Canvas data.
+This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
 ## Installation
@@ -16,6 +16,144 @@ Then run the migrations:
 bundle exec rake db:migrate
 ```
+## Usage
+### Dumper
+The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
+Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
+```ruby
+class HostedDataPushJob < ApplicationJob
+  # The schema serves two purposes: defining the schema and mapping data
+  SCHEMA = InstDataShipper::SchemaBuilder.build do
+    # You can augment the Table-builder DSL with custom methods like so:
+    extend_table_builder do
+      # It may be useful to define a custom column definition helpers:
+      def custom_column(*args, from: nil, **kwargs, &blk)
+        # In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
+        #   to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
+        from ||= args[0].to_s
+        from = ->(row) { row.data[from] } if from.is_a?(String)
+        column(*args, **kwargs, from: from, &blk)
+      end
+      # `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
+      include SomeConcern
+    end
+    table(ALocalModel, "<TABLE DESCRIPTION>") do
+      # If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
+      #  The first argument "scope" can be interpreted in different ways:
+      #    If exporting a local model it may be a: (default: `updated_at`)
+      #      Proc that will receive a Relation and return a Relation (use `incremental_since`)
+      #      String of a column to compare with `incremental_since`
+      #    If exporting a Canvas report it may be a: (default: `updated_after`)
+      #      Proc that will receive report params and return modified report params (use `incremental_since`)
+      #      String of a report param to set to `incremental_since`
+      #  `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
+      #  `if:` may be a Proc or a Symbol (of a method on the Dumper)
+      incremental "updated_at", on: [:id], if: ->() {}
+      column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
+      # The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
+      custom_column :name, :"varchar(128)"
+      # `from:` May be...
+      # A Symbol of a method to be called on the record
+      custom_column :sis_type, :"varchar(32)", from: :some_model_method
+      # A String of a column to read from the record
+      custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
+      # A Proc to be called with each record
+      custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
+      # Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
+      custom_column :sis_type, :"varchar(32)"
+    end
+    table("my_table", model: ALocalModel) do
+      # ...
+    end
+    table("proserv_student_submissions_csv") do
+      column :canvas_id, :bigint, from: "canvas user id"
+      column :sis_id, :"varchar(64)", from: "sis user id"
+      column :name, :"varchar(64)", from: "user name"
+      column :submission_id, :bigint, from: "submission id"
+    end
+  end
+  Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
+    InstDataShipper::DataSources::LocalTables,
+    InstDataShipper::DataSources::CanvasReports,
+  ]) do
+    import_local_table(ALocalModel)
+    import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
+    # If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
+    import_local_table(SomeModel, schema_name: "my_table")
+    import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
+  end
+  def perform
+    Dumper.perform_dump([
+      "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
+      "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
+    ])
+  end
+end
+```
+`Dumper`s may also be formed as a normal Ruby subclass:
+```ruby
+class HostedDataPushJob < ApplicationJob
+  SCHEMA = InstDataShipper::SchemaBuilder.build do
+    # ...
+  end
+  class Dumper < InstDataShipper::Dumper
+    include InstDataShipper::DataSources::LocalTables
+    include InstDataShipper::DataSources::CanvasReports
+    def enqueue_tasks
+      import_local_table(ALocalModel)
+      import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
+    end
+    def table_schemas
+      SCHEMA
+    end
+  end
+  def perform
+    Dumper.perform_dump([
+      "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
+      "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
+    ])
+  end
+end
+```
+### Destinations
+This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
+Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
+Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
+#### Hosted Data
+`hosted-data://<JWT>@<HOSTED DATA SERVER>`
+##### Optional Parameters:
+- `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
+#### S3
+`s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
+##### Optional Parameters:
+_None_
 ## Development
 When adding to or updating this gem, make sure you do the following:

data/db/migrate/{20240301090836_create_canvas_sync_sync_batches.rb → 20240301090836_create_inst_data_shipper_dump_batches.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
+class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
   def change
     create_table :inst_data_shipper_dump_batches do |t|
       t.datetime :started_at
@@ -6,10 +6,13 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
       t.string :status
       t.string :job_class
+      t.string :genre
+      t.string :batch_id
       t.string :exception
       t.text :backtrace
-      t.text :metadata
-      t.text :job_arguments
+      # t.text :metadata
+      # t.text :job_arguments
       t.timestamps
     end

data/lib/inst_data_shipper/basic_dumper.rb CHANGED Viewed

@@ -19,9 +19,9 @@ module InstDataShipper
       instance_exec(&@body_block)
     end
-    def table_schemas
+    def schema
       pointer = @schema_pointer || batch_context[:schema_pointer]
-      safe_constantize(pointer)
+      pointer.constantize
     end
   end
 end

data/lib/inst_data_shipper/concerns/hooks.rb CHANGED Viewed

@@ -9,21 +9,35 @@ module InstDataShipper
       end
       def hook(name, prepend: false, &block)
+        _assert_hook_defined(name)
+        @hooks ||= {}
+        @hooks[name] ||= []
         hooks = @hooks[name]
         prepend ? hooks.unshift(block) : hooks << block
       end
+      def _assert_hook_defined(name)
+        return true if @hooks&.key?(name)
+        return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
+        raise ArgumentError, "Hook #{name} is not defined"
+      end
+      def _list_hooks(name)
+        list = []
+        list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
+        list.push(*@hooks[name]) if (@hooks || {})[name]
+        list
+      end
     end
     def run_hook(name, *args, **kwargs)
-      hooks = @hooks[name]
-      hooks.each do |blk|
+      self.class._list_hooks(name).each do |blk|
         instance_exec(*args, **kwargs, &blk)
       end
     end
     def run_hook_safe(name, *args, **kwargs)
-      hooks = @hooks[name]
-      hooks.each do |blk|
+      self.class._list_hooks(name).each do |blk|
         instance_exec(*args, **kwargs, &blk)
       rescue StandardError
       end

data/lib/inst_data_shipper/data_sources/canvas_reports.rb CHANGED Viewed

@@ -27,11 +27,25 @@ module InstDataShipper
         _in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
       end
-      def import_canvas_report_by_terms(target_table, report_name, terms: [], params: {}, **kwargs)
+      def import_canvas_report_by_terms(*args, **kwargs)
+        _in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
+      end
+      def import_existing_report(report, **kwargs)
+        delayed(:_process_canvas_report, report: report, **kwargs)
+      end
+      private
+      def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
         term_ids = (terms || []).map do |term|
           term.is_a?(Term) ? term.canvas_id : term
         end
+        table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
+        _resolve_report_incremenal_parameters(table_def, params)
         Sidekiq::Batch.new.tap do |b|
           b.description = "Term Scoped #{report_name} Runners"
           b.context = {
@@ -40,19 +54,21 @@ module InstDataShipper
           b.jobs do
             terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
             terms_query.find_each do |t|
-              import_canvas_report(target_table, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
+              _in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
             end
           end
         end
       end
-      def import_existing_report(table, report)
-        delayed(:_process_canvas_report, table, report: report)
-      end
+      def _import_canvas_report(report_name, params: {}, **kwargs)
+        table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
-      private
+        _resolve_report_incremenal_parameters(table_def, params)
+        _trigger_canvas_report(report_name, params: params, **kwargs)
+      end
-      def _import_canvas_report(target_table, report_name, retry_count: 3, params: {}, **kwargs)
+      def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
         report = canvas_sync_client.start_report(
           'self', report_name,
           parameters: params,
@@ -61,15 +77,13 @@ module InstDataShipper
         CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
           "/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
           {
-            instance_of: origin_class,
-            method: :_process_canvas_report,
-            args: [target_table],
+            job: Jobs::AsyncCaller,
+            args: [origin_class, :_process_canvas_report],
             kwargs: kwargs,
           },
           on_failure: {
-            instance_of: origin_class,
-            method: :_handle_failed_canvas_report,
-            args: [target_table, report_name, kwargs],
+            job: Jobs::AsyncCaller,
+            args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
             kwargs: { retry_count: retry_count },
           },
           status_key: :status,
@@ -79,18 +93,18 @@ module InstDataShipper
       def _in_canvas_report_pool(mthd, *args, **kwargs)
         pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
-        AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
+        Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
       end
-      def _process_canvas_report(table, report:)
-        table_def = table_schemas.find { |t| t[:warehouse_name].to_s == table }
+      def _process_canvas_report(report:, schema_name: nil)
+        table_def = lookup_table_schema!(schema_name, report[:report])
-        IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/#{table}.csv")
+        IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
         inner_block = ->(file) {
-          CSV.foreach("#{working_dir}/#{table}.csv", headers: true) do |m|
+          CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
             file << table_def[:columns].map do |c|
-              c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
+              instance_exec(m, &c[:block])
             end
           end
         }
@@ -98,13 +112,36 @@ module InstDataShipper
         upload_data(table_def, extra: report['id'], &inner_block)
       end
-      def _handle_failed_canvas_report(table, report_name, kwargs, retry_count:, report:) # rubocop:disable Lint/UnusedMethodArgument
+      def _resolve_report_incremenal_parameters(table_def, params)
+        if table_is_incremental?(table_def)
+          inc = table_def[:incremental]
+          scope = inc[:scope]
+          if scope != false
+            scope ||= "updated_after"
+            if scope.is_a?(Proc)
+              scope = instance_exec(params, &scope)
+              if scope.is_a?(Hash) && scope != params
+                params.merge!(scope)
+              end
+            elsif scope.is_a?(String) || scope.is_a?(Symbol)
+              params[scope] = incremental_since
+            end
+          end
+        end
+        params
+      end
+      def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
         if retry_count.positive?
           tbid = batch_context[:report_bid] || batch_context[:root_bid]
           Sidekiq::Batch.new(tbid).jobs do
-            import_canvas_report(table, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
+            _in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
           end
         else
+          # TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
           cleanup_fatal_error!
         end
       end

data/lib/inst_data_shipper/data_sources/local_tables.rb CHANGED Viewed

@@ -12,22 +12,50 @@ module InstDataShipper
       private
-      def _import_local_table(table_name)
-        table_def = table_schemas.find { |t| t[:model].to_s == table_name }
-        model = table_def[:model]
+      def _import_local_table(model, schema_name: nil)
+        model = model.safe_constantize if model.is_a?(String)
+        table_def = lookup_table_schema!(schema_name, { model: model })
         inner_block = ->(file) {
-          query = model
-          query = query.includes(table_def[:includes]) if table_def[:includes].present?
-          model.find_each do |m|
+          query = model.all
+          query = _resolve_model_query(query, table_def[:query])
+          if table_is_incremental?(table_def)
+            query = _resolve_model_query(
+              query,
+              table_def.dig(:incremental, :scope),
+              string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
+              default: "updated_at",
+            )
+          end
+          query.find_each do |m|
             file << table_def[:columns].map do |c|
-              c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
+              instance_exec(m, &c[:block])
             end
           end
         }
         upload_data(table_def, &inner_block)
       end
+      def _resolve_model_query(relation, query, string: nil, default: nil)
+        return relation if query == false
+        query = default if query.nil?
+        return relation if query.nil?
+        if query.is_a?(Symbol)
+          relation.send(query)
+        elsif query.is_a?(Proc)
+          instance_exec(relation, &query)
+        elsif query.is_a?(String) && string.present?
+          instance_exec(relation, query, &string)
+        else
+          raise "Invalid query: #{query.inspect}"
+        end
+      end
     end
   end
 end

data/lib/inst_data_shipper/destinations/base.rb CHANGED Viewed

@@ -3,7 +3,7 @@ module InstDataShipper
     class Base
       attr_reader :dumper
-      delegate :tracker, :table_schemas, :working_dir, to: :dumper
+      delegate :tracker, :schema, :working_dir, to: :dumper
       def initialize(cache_key, config, dumper)
         @cache_key = cache_key
@@ -11,9 +11,13 @@ module InstDataShipper
         @dumper = dumper
       end
+      # This method is called before taking any actions.
+      # It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
+      def preinitialize_dump(context); end
       # This method is called before processing any data.
       # It should be used to initialize any external resources needed for the dump.
-      def initialize_dump; end
+      def initialize_dump(context); end
       # Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
       #
@@ -50,7 +54,7 @@ module InstDataShipper
       end
       def user_config
-        config[:extra]
+        config[:user_config]
       end
       def group_key
@@ -62,11 +66,11 @@ module InstDataShipper
       def parse_configuration(uri)
         if block_given?
           parsed = URI.parse(uri)
+          cparsed = ConfigURI.new(parsed)
           cfg = {
-            params: parsed.query.present? ? Rack::Utils.parse_nested_query(parsed.query) : {},
-            extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
+            user_config: cparsed.hash_params,
           }
-          yield parsed, cfg
+          yield cparsed, cfg
           cfg
         else
           raise NotImplementedError
@@ -100,5 +104,28 @@ module InstDataShipper
       end
     end
+    class ConfigURI
+      def initialize(uri)
+        @uri = uri
+      end
+      # delegate_missing_to :uri
+      delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
+      def params
+        @params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
+      end
+      def hash_params
+        @hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
+      end
+      private
+      def uri
+        @uri
+      end
+    end
   end
 end

data/lib/inst_data_shipper/destinations/hosted_data.rb CHANGED Viewed

@@ -1,13 +1,47 @@
+require "faraday_middleware"
 module InstDataShipper
   module Destinations
     class HostedData < Base
       include Concerns::Chunking
-      def initialize_dump
+      def preinitialize_dump(context)
+        if context[:incremental_since].present?
+          begin
+            last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
+              status: 'imported',
+              # schema_version: convert_schema[:version],
+              tags: [
+                "ids-schema=#{dumper.schema_digest}",
+                "ids-genre=#{dumper.export_genre}",
+              ],
+            }).body.with_indifferent_access
+            if last_dump[:created_at] < context[:incremental_since]
+              InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
+              context[:incremental_since] = last_dump[:created_at]
+            end
+          rescue Faraday::ResourceNotFound
+            # TODO It'd be nice to make this per-table
+            InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
+            context[:incremental_since] = nil
+          end
+        end
+      end
+      def initialize_dump(context)
+        tags = [
+          "ids-schema=#{dumper.schema_digest}",
+          "ids-genre=#{dumper.export_genre}",
+        ]
+        tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
+        tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
         dump = hosted_data_client.post(
           'api/v1/custom_dumps/',
           reference_id: tracker.id,
           schema: convert_schema,
+          tags: tags,
         ).body.with_indifferent_access
         redis.hset(rk(:state), :dump_id, dump[:id])
@@ -15,7 +49,7 @@ module InstDataShipper
       end
       def chunk_data(generator, table:, extra: nil)
-        warehouse_name = table_def[:warehouse_name]
+        warehouse_name = table[:warehouse_name]
         super(generator) do |batch, idx|
           bits = [warehouse_name, extra, idx].compact
@@ -36,18 +70,18 @@ module InstDataShipper
       def upload_data_chunk(table_def, chunk)
         hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
-          table_def[:warehouse_name] => [Faraday::UploadIO.new(chunk, 'application/gzip')],
+          table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
         })
       end
       def finalize_dump
         hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
-        redis.delete(rk(:state))
+        redis.del(rk(:state))
       end
       def cleanup_fatal_error
         hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
-        redis.delete(rk(:state))
+        redis.del(rk(:state))
       end
       # TODO Support/allow single-table fatal errors?
@@ -59,39 +93,45 @@ module InstDataShipper
       end
       def convert_schema
-        table_prefix = config[:table_prefix]
-        table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
         definititions = {}
+        table_schemas = schema[:tables]
         table_schemas.each do |ts|
           ts = ts.dup
+          tname = table_name(ts)
-          table_name = ts[:warehouse_name]
-          table_name = table_prefix + table_name if table_prefix.present?
-          definititions[ts[:warehouse_name]] = {
+          definititions[tname] = {
             dw_type: 'dimension',
             description: ts[:description],
-            incremental: !!ts[:incremental],
-            incremental_on: ts[:incremental] && ts[:incremental] != true ? ts[:incremental] : nil,
+            incremental: dumper.table_is_incremental?(ts),
+            incremental_on: ts.dig(:incremental, :on),
             # indexed_columns
-            tableName: table_name,
+            tableName: tname,
             columns: ts[:columns].map do |col|
+              coltype = col[:type]
+              coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
               {
                 name: col[:warehouse_name],
                 description: col[:description],
-                type: col[:type] || ts[:model].column_for_attribute(col[:local_name]).sql_type,
+                type: coltype,
               }
             end,
           }
         end
         {
-          version: "#{dumper.export_genre.downcase}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
+          version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
           definition: definititions,
         }
       end
+      def table_name(table_def)
+        table_prefix = config[:table_prefix]
+        table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
+        table_name = table_def[:warehouse_name]
+        table_name = table_prefix + table_name if table_prefix.present?
+        table_name
+      end
       def hosted_data_client
         @hosted_data_client ||= begin
           token = config[:token]
@@ -102,6 +142,8 @@ module InstDataShipper
             host = tok_content['host']
           end
+          host = "https://#{host}" unless host.include?('://')
           Faraday.new(url: host) do |faraday|
             faraday.request                     :multipart
             faraday.request                     :json
@@ -117,14 +159,16 @@ module InstDataShipper
       def parse_configuration(uri)
         super do |parsed_uri, cfg|
-          if parsed_uri.username.present?
+          if parsed_uri.user.present?
             # hosted-data://<JWT>:<hosted_data_domain>
-            cfg[:token] = parsed_uri.username
+            cfg[:token] = parsed_uri.user
             cfg[:host] = parsed_uri.host
           else
             # hosted-data://<JWT>
             cfg[:token] = parsed_uri.host
           end
+          cfg[:table_prefix] = parsed_uri.params[:table_prefix]
         end
       end