inst_data_shipper 0.1.0.beta1 → 0.1.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/db/migrate/{20240301090836_create_canvas_sync_sync_batches.rb → 20240301090836_create_inst_data_shipper_dump_batches.rb} +3 -1
- data/lib/inst_data_shipper/basic_dumper.rb +1 -1
- data/lib/inst_data_shipper/concerns/hooks.rb +18 -4
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +58 -21
- data/lib/inst_data_shipper/data_sources/local_tables.rb +28 -7
- data/lib/inst_data_shipper/destinations/base.rb +27 -4
- data/lib/inst_data_shipper/destinations/hosted_data.rb +28 -17
- data/lib/inst_data_shipper/destinations/s3.rb +1 -1
- data/lib/inst_data_shipper/dumper.rb +128 -47
- data/lib/inst_data_shipper/engine.rb +6 -0
- data/lib/inst_data_shipper/jobs/async_caller.rb +10 -2
- data/lib/inst_data_shipper/jobs/basic_dump_job.rb +6 -2
- data/lib/inst_data_shipper/schema_builder.rb +85 -33
- data/lib/inst_data_shipper/version.rb +1 -1
- data/lib/inst_data_shipper.rb +11 -2
- data/spec/spec_helper.rb +2 -2
- metadata +20 -6
- /data/app/models/{hosted_data_dumper → inst_data_shipper}/dump_batch.rb +0 -0
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
         | 
| 4 | 
            +
              data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
         | 
| 7 | 
            +
              data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
         | 
| @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            class  | 
| 1 | 
            +
            class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
         | 
| 2 2 | 
             
              def change
         | 
| 3 3 | 
             
                create_table :inst_data_shipper_dump_batches do |t|
         | 
| 4 4 | 
             
                  t.datetime :started_at
         | 
| @@ -6,6 +6,8 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass | |
| 6 6 | 
             
                  t.string :status
         | 
| 7 7 |  | 
| 8 8 | 
             
                  t.string :job_class
         | 
| 9 | 
            +
                  t.string :genre
         | 
| 10 | 
            +
             | 
| 9 11 | 
             
                  t.string :exception
         | 
| 10 12 | 
             
                  t.text :backtrace
         | 
| 11 13 | 
             
                  t.text :metadata
         | 
| @@ -9,21 +9,35 @@ module InstDataShipper | |
| 9 9 | 
             
                  end
         | 
| 10 10 |  | 
| 11 11 | 
             
                  def hook(name, prepend: false, &block)
         | 
| 12 | 
            +
                    _assert_hook_defined(name)
         | 
| 13 | 
            +
                    @hooks ||= {}
         | 
| 14 | 
            +
                    @hooks[name] ||= []
         | 
| 12 15 | 
             
                    hooks = @hooks[name]
         | 
| 13 16 | 
             
                    prepend ? hooks.unshift(block) : hooks << block
         | 
| 14 17 | 
             
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def _assert_hook_defined(name)
         | 
| 20 | 
            +
                    return true if @hooks&.key?(name)
         | 
| 21 | 
            +
                    return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
         | 
| 22 | 
            +
                    raise ArgumentError, "Hook #{name} is not defined"
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  def _list_hooks(name)
         | 
| 26 | 
            +
                    list = []
         | 
| 27 | 
            +
                    list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
         | 
| 28 | 
            +
                    list.push(*@hooks[name]) if (@hooks || {})[name]
         | 
| 29 | 
            +
                    list
         | 
| 30 | 
            +
                  end
         | 
| 15 31 | 
             
                end
         | 
| 16 32 |  | 
| 17 33 | 
             
                def run_hook(name, *args, **kwargs)
         | 
| 18 | 
            -
                   | 
| 19 | 
            -
                  hooks.each do |blk|
         | 
| 34 | 
            +
                  self.class._list_hooks(name).each do |blk|
         | 
| 20 35 | 
             
                    instance_exec(*args, **kwargs, &blk)
         | 
| 21 36 | 
             
                  end
         | 
| 22 37 | 
             
                end
         | 
| 23 38 |  | 
| 24 39 | 
             
                def run_hook_safe(name, *args, **kwargs)
         | 
| 25 | 
            -
                   | 
| 26 | 
            -
                  hooks.each do |blk|
         | 
| 40 | 
            +
                  self.class._list_hooks(name).each do |blk|
         | 
| 27 41 | 
             
                    instance_exec(*args, **kwargs, &blk)
         | 
| 28 42 | 
             
                  rescue StandardError
         | 
| 29 43 | 
             
                  end
         | 
| @@ -27,11 +27,25 @@ module InstDataShipper | |
| 27 27 | 
             
                    _in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
         | 
| 28 28 | 
             
                  end
         | 
| 29 29 |  | 
| 30 | 
            -
                  def import_canvas_report_by_terms( | 
| 30 | 
            +
                  def import_canvas_report_by_terms(*args, **kwargs)
         | 
| 31 | 
            +
                    _in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  def import_existing_report(report, **kwargs)
         | 
| 35 | 
            +
                    delayed(:_process_canvas_report, report: report, **kwargs)
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                  private
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                  def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
         | 
| 31 41 | 
             
                    term_ids = (terms || []).map do |term|
         | 
| 32 42 | 
             
                      term.is_a?(Term) ? term.canvas_id : term
         | 
| 33 43 | 
             
                    end
         | 
| 34 44 |  | 
| 45 | 
            +
                    table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    _resolve_report_incremenal_parameters(table_def, params)
         | 
| 48 | 
            +
             | 
| 35 49 | 
             
                    Sidekiq::Batch.new.tap do |b|
         | 
| 36 50 | 
             
                      b.description = "Term Scoped #{report_name} Runners"
         | 
| 37 51 | 
             
                      b.context = {
         | 
| @@ -40,19 +54,21 @@ module InstDataShipper | |
| 40 54 | 
             
                      b.jobs do
         | 
| 41 55 | 
             
                        terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
         | 
| 42 56 | 
             
                        terms_query.find_each do |t|
         | 
| 43 | 
            -
                           | 
| 57 | 
            +
                          _in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
         | 
| 44 58 | 
             
                        end
         | 
| 45 59 | 
             
                      end
         | 
| 46 60 | 
             
                    end
         | 
| 47 61 | 
             
                  end
         | 
| 48 62 |  | 
| 49 | 
            -
                  def  | 
| 50 | 
            -
                     | 
| 51 | 
            -
                  end
         | 
| 63 | 
            +
                  def _import_canvas_report(report_name, params: {}, **kwargs)
         | 
| 64 | 
            +
                    table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
         | 
| 52 65 |  | 
| 53 | 
            -
             | 
| 66 | 
            +
                    _resolve_report_incremenal_parameters(table_def, params)
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                    _trigger_canvas_report(report_name, params: params, **kwargs)
         | 
| 69 | 
            +
                  end
         | 
| 54 70 |  | 
| 55 | 
            -
                  def  | 
| 71 | 
            +
                  def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
         | 
| 56 72 | 
             
                    report = canvas_sync_client.start_report(
         | 
| 57 73 | 
             
                      'self', report_name,
         | 
| 58 74 | 
             
                      parameters: params,
         | 
| @@ -61,15 +77,13 @@ module InstDataShipper | |
| 61 77 | 
             
                    CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
         | 
| 62 78 | 
             
                      "/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
         | 
| 63 79 | 
             
                      {
         | 
| 64 | 
            -
                         | 
| 65 | 
            -
                         | 
| 66 | 
            -
                        args: [target_table],
         | 
| 80 | 
            +
                        job: Jobs::AsyncCaller,
         | 
| 81 | 
            +
                        args: [origin_class, :_process_canvas_report],
         | 
| 67 82 | 
             
                        kwargs: kwargs,
         | 
| 68 83 | 
             
                      },
         | 
| 69 84 | 
             
                      on_failure: {
         | 
| 70 | 
            -
                         | 
| 71 | 
            -
                         | 
| 72 | 
            -
                        args: [target_table, report_name, kwargs],
         | 
| 85 | 
            +
                        job: Jobs::AsyncCaller,
         | 
| 86 | 
            +
                        args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
         | 
| 73 87 | 
             
                        kwargs: { retry_count: retry_count },
         | 
| 74 88 | 
             
                      },
         | 
| 75 89 | 
             
                      status_key: :status,
         | 
| @@ -79,18 +93,18 @@ module InstDataShipper | |
| 79 93 |  | 
| 80 94 | 
             
                  def _in_canvas_report_pool(mthd, *args, **kwargs)
         | 
| 81 95 | 
             
                    pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
         | 
| 82 | 
            -
                    AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
         | 
| 96 | 
            +
                    Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
         | 
| 83 97 | 
             
                  end
         | 
| 84 98 |  | 
| 85 | 
            -
                  def _process_canvas_report( | 
| 86 | 
            -
                    table_def =  | 
| 99 | 
            +
                  def _process_canvas_report(report:, schema_name: nil)
         | 
| 100 | 
            +
                    table_def = lookup_table_schema!(schema_name, report[:report])
         | 
| 87 101 |  | 
| 88 | 
            -
                    IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir} | 
| 102 | 
            +
                    IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
         | 
| 89 103 |  | 
| 90 104 | 
             
                    inner_block = ->(file) {
         | 
| 91 | 
            -
                      CSV.foreach("#{working_dir} | 
| 105 | 
            +
                      CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
         | 
| 92 106 | 
             
                        file << table_def[:columns].map do |c|
         | 
| 93 | 
            -
                           | 
| 107 | 
            +
                          instance_exec(m, &c[:block])
         | 
| 94 108 | 
             
                        end
         | 
| 95 109 | 
             
                      end
         | 
| 96 110 | 
             
                    }
         | 
| @@ -98,13 +112,36 @@ module InstDataShipper | |
| 98 112 | 
             
                    upload_data(table_def, extra: report['id'], &inner_block)
         | 
| 99 113 | 
             
                  end
         | 
| 100 114 |  | 
| 101 | 
            -
                  def  | 
| 115 | 
            +
                  def _resolve_report_incremenal_parameters(table_def, params)
         | 
| 116 | 
            +
                    if table_is_incremental?(table_def)
         | 
| 117 | 
            +
                      inc = table_def[:incremental]
         | 
| 118 | 
            +
                      scope = inc[:scope]
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                      if scope != false
         | 
| 121 | 
            +
                        scope ||= "updated_after"
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                        if scope.is_a?(Proc)
         | 
| 124 | 
            +
                          scope = instance_exec(params, &scope)
         | 
| 125 | 
            +
                          if scope.is_a?(Hash) && scope != params
         | 
| 126 | 
            +
                            params.merge!(scope)
         | 
| 127 | 
            +
                          end
         | 
| 128 | 
            +
                        elsif scope.is_a?(String) || scope.is_a?(Symbol)
         | 
| 129 | 
            +
                          params[scope] = incremental_since
         | 
| 130 | 
            +
                        end
         | 
| 131 | 
            +
                      end
         | 
| 132 | 
            +
                    end
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                    params
         | 
| 135 | 
            +
                  end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                  def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
         | 
| 102 138 | 
             
                    if retry_count.positive?
         | 
| 103 139 | 
             
                      tbid = batch_context[:report_bid] || batch_context[:root_bid]
         | 
| 104 140 | 
             
                      Sidekiq::Batch.new(tbid).jobs do
         | 
| 105 | 
            -
                         | 
| 141 | 
            +
                        _in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
         | 
| 106 142 | 
             
                      end
         | 
| 107 143 | 
             
                    else
         | 
| 144 | 
            +
                      # TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
         | 
| 108 145 | 
             
                      cleanup_fatal_error!
         | 
| 109 146 | 
             
                    end
         | 
| 110 147 | 
             
                  end
         | 
| @@ -12,22 +12,43 @@ module InstDataShipper | |
| 12 12 |  | 
| 13 13 | 
             
                  private
         | 
| 14 14 |  | 
| 15 | 
            -
                  def _import_local_table( | 
| 16 | 
            -
                     | 
| 17 | 
            -
             | 
| 15 | 
            +
                  def _import_local_table(model, schema_name: nil)
         | 
| 16 | 
            +
                    model = model.safe_constantize if model.is_a?(String)
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                    table_def = lookup_table_schema!(schema_name, { model: model })
         | 
| 18 19 |  | 
| 19 20 | 
             
                    inner_block = ->(file) {
         | 
| 20 | 
            -
                      query = model
         | 
| 21 | 
            -
                      query = query | 
| 22 | 
            -
             | 
| 21 | 
            +
                      query = model.all
         | 
| 22 | 
            +
                      query = _resolve_model_query(query, table_def[:query])
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                      if table_is_incremental?(table_def)
         | 
| 25 | 
            +
                        query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
         | 
| 26 | 
            +
                      end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                      query.find_each do |m|
         | 
| 23 29 | 
             
                        file << table_def[:columns].map do |c|
         | 
| 24 | 
            -
                           | 
| 30 | 
            +
                          instance_exec(m, &c[:block])
         | 
| 25 31 | 
             
                        end
         | 
| 26 32 | 
             
                      end
         | 
| 27 33 | 
             
                    }
         | 
| 28 34 |  | 
| 29 35 | 
             
                    upload_data(table_def, &inner_block)
         | 
| 30 36 | 
             
                  end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                  def _resolve_model_query(relation, query, string: nil)
         | 
| 39 | 
            +
                    return relation if query.nil?
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                    if query.is_a?(Symbol)
         | 
| 42 | 
            +
                      relation.send(query)
         | 
| 43 | 
            +
                    elsif query.is_a?(Proc)
         | 
| 44 | 
            +
                      instance_exec(relation, &query)
         | 
| 45 | 
            +
                    elsif query.is_a?(String) && string.present?
         | 
| 46 | 
            +
                      instance_exec(relation, query, &string)
         | 
| 47 | 
            +
                    else
         | 
| 48 | 
            +
                      raise "Invalid query: #{query.inspect}"
         | 
| 49 | 
            +
                    end
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
             | 
| 31 52 | 
             
                end
         | 
| 32 53 | 
             
              end
         | 
| 33 54 | 
             
            end
         | 
| @@ -50,7 +50,7 @@ module InstDataShipper | |
| 50 50 | 
             
                  end
         | 
| 51 51 |  | 
| 52 52 | 
             
                  def user_config
         | 
| 53 | 
            -
                    config[: | 
| 53 | 
            +
                    config[:user_config]
         | 
| 54 54 | 
             
                  end
         | 
| 55 55 |  | 
| 56 56 | 
             
                  def group_key
         | 
| @@ -62,11 +62,11 @@ module InstDataShipper | |
| 62 62 | 
             
                  def parse_configuration(uri)
         | 
| 63 63 | 
             
                    if block_given?
         | 
| 64 64 | 
             
                      parsed = URI.parse(uri)
         | 
| 65 | 
            +
                      cparsed = ConfigURI.new(parsed)
         | 
| 65 66 | 
             
                      cfg = {
         | 
| 66 | 
            -
                         | 
| 67 | 
            -
                        extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
         | 
| 67 | 
            +
                        user_config: cparsed.hash_params,
         | 
| 68 68 | 
             
                      }
         | 
| 69 | 
            -
                      yield  | 
| 69 | 
            +
                      yield cparsed, cfg
         | 
| 70 70 | 
             
                      cfg
         | 
| 71 71 | 
             
                    else
         | 
| 72 72 | 
             
                      raise NotImplementedError
         | 
| @@ -100,5 +100,28 @@ module InstDataShipper | |
| 100 100 | 
             
                  end
         | 
| 101 101 |  | 
| 102 102 | 
             
                end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                class ConfigURI
         | 
| 105 | 
            +
                  def initialize(uri)
         | 
| 106 | 
            +
                    @uri = uri
         | 
| 107 | 
            +
                  end
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                  # delegate_missing_to :uri
         | 
| 110 | 
            +
                  delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                  def params
         | 
| 113 | 
            +
                    @params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
         | 
| 114 | 
            +
                  end
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                  def hash_params
         | 
| 117 | 
            +
                    @hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
         | 
| 118 | 
            +
                  end
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                  private
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                  def uri
         | 
| 123 | 
            +
                    @uri
         | 
| 124 | 
            +
                  end
         | 
| 125 | 
            +
                end
         | 
| 103 126 | 
             
              end
         | 
| 104 127 | 
             
            end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            require "faraday_middleware"
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module InstDataShipper
         | 
| 2 4 | 
             
              module Destinations
         | 
| 3 5 | 
             
                class HostedData < Base
         | 
| @@ -15,7 +17,7 @@ module InstDataShipper | |
| 15 17 | 
             
                  end
         | 
| 16 18 |  | 
| 17 19 | 
             
                  def chunk_data(generator, table:, extra: nil)
         | 
| 18 | 
            -
                    warehouse_name =  | 
| 20 | 
            +
                    warehouse_name = table[:warehouse_name]
         | 
| 19 21 |  | 
| 20 22 | 
             
                    super(generator) do |batch, idx|
         | 
| 21 23 | 
             
                      bits = [warehouse_name, extra, idx].compact
         | 
| @@ -36,18 +38,18 @@ module InstDataShipper | |
| 36 38 |  | 
| 37 39 | 
             
                  def upload_data_chunk(table_def, chunk)
         | 
| 38 40 | 
             
                    hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
         | 
| 39 | 
            -
                      table_def | 
| 41 | 
            +
                      table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
         | 
| 40 42 | 
             
                    })
         | 
| 41 43 | 
             
                  end
         | 
| 42 44 |  | 
| 43 45 | 
             
                  def finalize_dump
         | 
| 44 46 | 
             
                    hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
         | 
| 45 | 
            -
                    redis. | 
| 47 | 
            +
                    redis.del(rk(:state))
         | 
| 46 48 | 
             
                  end
         | 
| 47 49 |  | 
| 48 50 | 
             
                  def cleanup_fatal_error
         | 
| 49 51 | 
             
                    hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
         | 
| 50 | 
            -
                    redis. | 
| 52 | 
            +
                    redis.del(rk(:state))
         | 
| 51 53 | 
             
                  end
         | 
| 52 54 |  | 
| 53 55 | 
             
                  # TODO Support/allow single-table fatal errors?
         | 
| @@ -59,28 +61,25 @@ module InstDataShipper | |
| 59 61 | 
             
                  end
         | 
| 60 62 |  | 
| 61 63 | 
             
                  def convert_schema
         | 
| 62 | 
            -
                    table_prefix = config[:table_prefix]
         | 
| 63 | 
            -
                    table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
         | 
| 64 | 
            -
             | 
| 65 64 | 
             
                    definititions = {}
         | 
| 66 65 | 
             
                    table_schemas.each do |ts|
         | 
| 67 66 | 
             
                      ts = ts.dup
         | 
| 67 | 
            +
                      tname = table_name(ts)
         | 
| 68 68 |  | 
| 69 | 
            -
                       | 
| 70 | 
            -
                      table_name = table_prefix + table_name if table_prefix.present?
         | 
| 71 | 
            -
             | 
| 72 | 
            -
                      definititions[ts[:warehouse_name]] = {
         | 
| 69 | 
            +
                      definititions[tname] = {
         | 
| 73 70 | 
             
                        dw_type: 'dimension',
         | 
| 74 71 | 
             
                        description: ts[:description],
         | 
| 75 | 
            -
                        incremental:  | 
| 76 | 
            -
                        incremental_on: ts | 
| 72 | 
            +
                        incremental: dumper.table_is_incremental?(ts),
         | 
| 73 | 
            +
                        incremental_on: ts.dig(:incremental, :on),
         | 
| 77 74 | 
             
                        # indexed_columns
         | 
| 78 | 
            -
                        tableName:  | 
| 75 | 
            +
                        tableName: tname,
         | 
| 79 76 | 
             
                        columns: ts[:columns].map do |col|
         | 
| 77 | 
            +
                          coltype = col[:type]
         | 
| 78 | 
            +
                          coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
         | 
| 80 79 | 
             
                          {
         | 
| 81 80 | 
             
                            name: col[:warehouse_name],
         | 
| 82 81 | 
             
                            description: col[:description],
         | 
| 83 | 
            -
                            type:  | 
| 82 | 
            +
                            type: coltype,
         | 
| 84 83 | 
             
                          }
         | 
| 85 84 | 
             
                        end,
         | 
| 86 85 | 
             
                      }
         | 
| @@ -92,6 +91,14 @@ module InstDataShipper | |
| 92 91 | 
             
                    }
         | 
| 93 92 | 
             
                  end
         | 
| 94 93 |  | 
| 94 | 
            +
                  def table_name(table_def)
         | 
| 95 | 
            +
                    table_prefix = config[:table_prefix]
         | 
| 96 | 
            +
                    table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
         | 
| 97 | 
            +
                    table_name = table_def[:warehouse_name]
         | 
| 98 | 
            +
                    table_name = table_prefix + table_name if table_prefix.present?
         | 
| 99 | 
            +
                    table_name
         | 
| 100 | 
            +
                  end
         | 
| 101 | 
            +
             | 
| 95 102 | 
             
                  def hosted_data_client
         | 
| 96 103 | 
             
                    @hosted_data_client ||= begin
         | 
| 97 104 | 
             
                      token = config[:token]
         | 
| @@ -102,6 +109,8 @@ module InstDataShipper | |
| 102 109 | 
             
                        host = tok_content['host']
         | 
| 103 110 | 
             
                      end
         | 
| 104 111 |  | 
| 112 | 
            +
                      host = "https://#{host}" unless host.include?('://')
         | 
| 113 | 
            +
             | 
| 105 114 | 
             
                      Faraday.new(url: host) do |faraday|
         | 
| 106 115 | 
             
                        faraday.request                     :multipart
         | 
| 107 116 | 
             
                        faraday.request                     :json
         | 
| @@ -117,14 +126,16 @@ module InstDataShipper | |
| 117 126 |  | 
| 118 127 | 
             
                  def parse_configuration(uri)
         | 
| 119 128 | 
             
                    super do |parsed_uri, cfg|
         | 
| 120 | 
            -
                      if parsed_uri. | 
| 129 | 
            +
                      if parsed_uri.user.present?
         | 
| 121 130 | 
             
                        # hosted-data://<JWT>:<hosted_data_domain>
         | 
| 122 | 
            -
                        cfg[:token] = parsed_uri. | 
| 131 | 
            +
                        cfg[:token] = parsed_uri.user
         | 
| 123 132 | 
             
                        cfg[:host] = parsed_uri.host
         | 
| 124 133 | 
             
                      else
         | 
| 125 134 | 
             
                        # hosted-data://<JWT>
         | 
| 126 135 | 
             
                        cfg[:token] = parsed_uri.host
         | 
| 127 136 | 
             
                      end
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                      cfg[:table_prefix] = parsed_uri.params[:table_prefix]
         | 
| 128 139 | 
             
                    end
         | 
| 129 140 | 
             
                  end
         | 
| 130 141 |  | 
| @@ -4,7 +4,7 @@ module InstDataShipper | |
| 4 4 | 
             
                  include Concerns::Chunking
         | 
| 5 5 |  | 
| 6 6 | 
             
                  def chunk_data(generator, table:, extra: nil)
         | 
| 7 | 
            -
                    warehouse_name =  | 
| 7 | 
            +
                    warehouse_name = table[:warehouse_name]
         | 
| 8 8 |  | 
| 9 9 | 
             
                    super(generator) do |batch, idx|
         | 
| 10 10 | 
             
                      bits = [warehouse_name, extra, idx].compact
         | 
| @@ -5,7 +5,7 @@ module InstDataShipper | |
| 5 5 | 
             
                define_hook :initialize_dump_batch
         | 
| 6 6 | 
             
                define_hook :finalize_dump_batch
         | 
| 7 7 |  | 
| 8 | 
            -
                def self.perform_dump(destinations | 
| 8 | 
            +
                def self.perform_dump(destinations)
         | 
| 9 9 | 
             
                  raise "Must subclass Dumper to use perform_dump" if self == Dumper
         | 
| 10 10 |  | 
| 11 11 | 
             
                  dumper = new(destinations)
         | 
| @@ -14,48 +14,134 @@ module InstDataShipper | |
| 14 14 | 
             
                  dumper.tracker
         | 
| 15 15 | 
             
                end
         | 
| 16 16 |  | 
| 17 | 
            -
                 | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 17 | 
            +
                def self.define(include: [], schema: , &blk)
         | 
| 18 | 
            +
                  Class.new(self) do
         | 
| 19 | 
            +
                    include(*include)
         | 
| 20 20 |  | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
                   | 
| 21 | 
            +
                    define_method(:enqueue_tasks, &blk)
         | 
| 22 | 
            +
                    define_method(:table_schemas) { schema }
         | 
| 23 | 
            +
                  end
         | 
| 24 24 | 
             
                end
         | 
| 25 25 |  | 
| 26 | 
            -
                 | 
| 27 | 
            -
                  raise NotImplementedError
         | 
| 28 | 
            -
                end
         | 
| 26 | 
            +
                public
         | 
| 29 27 |  | 
| 30 28 | 
             
                def begin_dump
         | 
| 31 29 | 
             
                  raise "Dump already begun" unless @raw_destinations.present?
         | 
| 32 30 |  | 
| 33 | 
            -
                  @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, status: 'in_progress')
         | 
| 31 | 
            +
                  @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
         | 
| 34 32 |  | 
| 35 | 
            -
                   | 
| 36 | 
            -
                     | 
| 33 | 
            +
                  @batch_context = context = {
         | 
| 34 | 
            +
                    # TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
         | 
| 35 | 
            +
                    # TODO Consider behavior if last is still running
         | 
| 36 | 
            +
                    incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
         | 
| 37 | 
            +
                  }
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                  begin
         | 
| 40 | 
            +
                    begin
         | 
| 41 | 
            +
                      destinations.each do |dest|
         | 
| 42 | 
            +
                        dest.initialize_dump()
         | 
| 43 | 
            +
                      end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                      run_hook(:initialize_dump_batch, context)
         | 
| 46 | 
            +
                    ensure
         | 
| 47 | 
            +
                      @batch_context = nil
         | 
| 48 | 
            +
                      context[:tracker_id] = tracker.id
         | 
| 49 | 
            +
                      context[:origin_class] = batch_context[:origin_class] || self.class.to_s
         | 
| 50 | 
            +
                      context[:destinations] = @raw_destinations
         | 
| 51 | 
            +
                    end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                    Sidekiq::Batch.new.tap do |batch|
         | 
| 54 | 
            +
                      context[:root_bid] = batch.bid
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                      batch.description = "HD #{export_genre} Export #{tracker.id} Root"
         | 
| 57 | 
            +
                      batch.context = context
         | 
| 58 | 
            +
                      batch.on(:success, "#{self.class}#finalize_dump")
         | 
| 59 | 
            +
                      batch.on(:death, "#{self.class}#cleanup_fatal_error!")
         | 
| 60 | 
            +
                      batch.jobs do
         | 
| 61 | 
            +
                        enqueue_tasks
         | 
| 62 | 
            +
                      rescue => ex
         | 
| 63 | 
            +
                        delayed :cleanup_fatal_error!
         | 
| 64 | 
            +
                        InstDataShipper.handle_suppressed_error(ex)
         | 
| 65 | 
            +
                      end
         | 
| 66 | 
            +
                    end
         | 
| 67 | 
            +
                  rescue => ex
         | 
| 68 | 
            +
                    if context
         | 
| 69 | 
            +
                      batch ||= Sidekiq::Batch.new.tap do |batch|
         | 
| 70 | 
            +
                        batch.description = "HD #{export_genre} Export #{tracker.id} Early Failure Cleanup"
         | 
| 71 | 
            +
                        batch.context = context
         | 
| 72 | 
            +
                        batch.jobs do
         | 
| 73 | 
            +
                          delayed :cleanup_fatal_error!
         | 
| 74 | 
            +
                        end
         | 
| 75 | 
            +
                      end
         | 
| 76 | 
            +
                    end
         | 
| 77 | 
            +
                    raise ex
         | 
| 37 78 | 
             
                  end
         | 
| 79 | 
            +
                end
         | 
| 38 80 |  | 
| 39 | 
            -
             | 
| 40 | 
            -
                   | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
                     | 
| 54 | 
            -
             | 
| 81 | 
            +
                def tracker
         | 
| 82 | 
            +
                  @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
         | 
| 83 | 
            +
                end
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                def export_genre
         | 
| 86 | 
            +
                  self.class.to_s.gsub(/HD|ExportJob/, '')
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                def origin_class
         | 
| 90 | 
            +
                  batch_context[:origin_class]&.constantize || self.class
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                def table_is_incremental?(table_def)
         | 
| 94 | 
            +
                  if (inc = table_def[:incremental]).present?
         | 
| 95 | 
            +
                    differ = inc[:if]
         | 
| 96 | 
            +
                    return !!incremental_since if differ.nil?
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                    differ = :"#{differ}".to_proc if differ.is_a?(Symbol)
         | 
| 99 | 
            +
                    differ = instance_exec(&differ) if differ.is_a?(Proc)
         | 
| 100 | 
            +
                    return !!differ
         | 
| 101 | 
            +
                  end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                  false
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                def incremental_since
         | 
| 107 | 
            +
                  batch_context[:incremental_since]
         | 
| 108 | 
            +
                end
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                def lookup_table_schema(*identifiers)
         | 
| 111 | 
            +
                  identifiers.compact.each do |ident|
         | 
| 112 | 
            +
                    if ident.is_a?(Hash)
         | 
| 113 | 
            +
                      key = ident.keys.first
         | 
| 114 | 
            +
                      value = ident.values.first
         | 
| 115 | 
            +
                    else
         | 
| 116 | 
            +
                      key = :warehouse_name
         | 
| 117 | 
            +
                      value = ident
         | 
| 118 | 
            +
                    end
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                    value = Array(value).compact
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                    table_schemas.each do |ts|
         | 
| 123 | 
            +
                      return ts if value.include?(ts[key])
         | 
| 55 124 | 
             
                    end
         | 
| 56 125 | 
             
                  end
         | 
| 57 126 |  | 
| 58 | 
            -
                   | 
| 127 | 
            +
                  nil
         | 
| 128 | 
            +
                end
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                def lookup_table_schema!(*identifiers)
         | 
| 131 | 
            +
                  lookup_table_schema(*identifiers) || raise("No table schema found for #{identifiers.inspect}")
         | 
| 132 | 
            +
                end
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                protected
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                attr_reader :executor
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                def initialize(destinations = nil, executor: nil)
         | 
| 139 | 
            +
                  @raw_destinations = Array(destinations)
         | 
| 140 | 
            +
                  @executor = executor
         | 
| 141 | 
            +
                end
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                def enqueue_tasks
         | 
| 144 | 
            +
                  raise NotImplementedError
         | 
| 59 145 | 
             
                end
         | 
| 60 146 |  | 
| 61 147 | 
             
                def upload_data(table_def, extra: nil, &datagen)
         | 
| @@ -96,7 +182,7 @@ module InstDataShipper | |
| 96 182 | 
             
                def finalize_dump(_status, _opts)
         | 
| 97 183 | 
             
                  run_hook(:finalize_dump_batch)
         | 
| 98 184 |  | 
| 99 | 
            -
                   | 
| 185 | 
            +
                  destinations.each do |dest|
         | 
| 100 186 | 
             
                    dest.finalize_dump
         | 
| 101 187 | 
             
                  end
         | 
| 102 188 |  | 
| @@ -108,14 +194,15 @@ module InstDataShipper | |
| 108 194 |  | 
| 109 195 | 
             
                  run_hook(:finalize_dump_batch)
         | 
| 110 196 |  | 
| 111 | 
            -
                   | 
| 197 | 
            +
                  destinations.each do |dest|
         | 
| 112 198 | 
             
                    dest.cleanup_fatal_error
         | 
| 113 | 
            -
                  rescue  | 
| 199 | 
            +
                  rescue => ex
         | 
| 200 | 
            +
                    InstDataShipper.handle_suppressed_error(ex)
         | 
| 114 201 | 
             
                  end
         | 
| 115 202 |  | 
| 116 203 | 
             
                  DumpBatch.find(batch_context[:tracker_id]).update(status: 'failed')
         | 
| 117 204 |  | 
| 118 | 
            -
                  CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid])
         | 
| 205 | 
            +
                  CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid]) if batch_context[:root_bid].present?
         | 
| 119 206 | 
             
                end
         | 
| 120 207 |  | 
| 121 208 | 
             
                # Helper Methods
         | 
| @@ -126,23 +213,17 @@ module InstDataShipper | |
| 126 213 | 
             
                end
         | 
| 127 214 |  | 
| 128 215 | 
             
                def delayed(mthd, *args, **kwargs)
         | 
| 129 | 
            -
                  AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
         | 
| 130 | 
            -
                end
         | 
| 131 | 
            -
             | 
| 132 | 
            -
                def tracker
         | 
| 133 | 
            -
                  @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
         | 
| 216 | 
            +
                  Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
         | 
| 134 217 | 
             
                end
         | 
| 135 218 |  | 
| 136 | 
            -
                 | 
| 137 | 
            -
                  self.class.to_s.gsub(/HD|ExportJob/, '')
         | 
| 138 | 
            -
                end
         | 
| 219 | 
            +
                delegate :working_dir, to: :executor
         | 
| 139 220 |  | 
| 140 | 
            -
                def  | 
| 141 | 
            -
                   | 
| 221 | 
            +
                def batch
         | 
| 222 | 
            +
                  Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
         | 
| 142 223 | 
             
                end
         | 
| 143 224 |  | 
| 144 | 
            -
                def  | 
| 145 | 
            -
                   | 
| 225 | 
            +
                def batch_context
         | 
| 226 | 
            +
                  @batch_context || batch&.context || {}
         | 
| 146 227 | 
             
                end
         | 
| 147 228 |  | 
| 148 229 | 
             
                def destinations_for_table(table_def)
         | 
| @@ -150,7 +231,7 @@ module InstDataShipper | |
| 150 231 | 
             
                end
         | 
| 151 232 |  | 
| 152 233 | 
             
                def destinations
         | 
| 153 | 
            -
                  @destinations ||= (@raw_destinations || batch_context[:destinations]).map.with_index do |dest, i|
         | 
| 234 | 
            +
                  @destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
         | 
| 154 235 | 
             
                    dcls = InstDataShipper.resolve_destination(dest)
         | 
| 155 236 | 
             
                    dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
         | 
| 156 237 | 
             
                  end
         | 
| @@ -4,5 +4,11 @@ module InstDataShipper | |
| 4 4 | 
             
              class Engine < ::Rails::Engine
         | 
| 5 5 | 
             
                isolate_namespace InstDataShipper
         | 
| 6 6 |  | 
| 7 | 
            +
                initializer :append_migrations do |app|
         | 
| 8 | 
            +
                  config.paths["db/migrate"].expanded.each do |expanded_path|
         | 
| 9 | 
            +
                    app.config.paths["db/migrate"] << expanded_path
         | 
| 10 | 
            +
                  end
         | 
| 11 | 
            +
                  ActiveRecord::Migrator.migrations_paths = Rails.application.paths['db/migrate'].to_a
         | 
| 12 | 
            +
                end
         | 
| 7 13 | 
             
              end
         | 
| 8 14 | 
             
            end
         | 
| @@ -1,7 +1,14 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            require "sidekiq"
         | 
| 3 | 
            +
             | 
| 1 4 | 
             
            module InstDataShipper
         | 
| 2 5 | 
             
              module Jobs
         | 
| 3 6 | 
             
                class AsyncCaller < InstDataShipper::Jobs::Base
         | 
| 4 | 
            -
                  sidekiq_options | 
| 7 | 
            +
                  sidekiq_options(retry: 0) if defined?(sidekiq_options)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  def self.get_sidekiq_options
         | 
| 10 | 
            +
                    { retry: 0 }
         | 
| 11 | 
            +
                  end
         | 
| 5 12 |  | 
| 6 13 | 
             
                  def self.call_from_pool(pool, clazz, method, *args, **kwargs)
         | 
| 7 14 | 
             
                    pool.add_job(
         | 
| @@ -12,7 +19,8 @@ module InstDataShipper | |
| 12 19 | 
             
                  end
         | 
| 13 20 |  | 
| 14 21 | 
             
                  def perform(clazz, method, *args, **kwargs)
         | 
| 15 | 
            -
                    clazz.constantize | 
| 22 | 
            +
                    clazz = clazz.constantize if clazz.is_a?(String)
         | 
| 23 | 
            +
                    clazz.new(executor: self).send(method.to_sym, *args, **kwargs)
         | 
| 16 24 | 
             
                  end
         | 
| 17 25 | 
             
                end
         | 
| 18 26 | 
             
              end
         | 
| @@ -3,9 +3,13 @@ module InstDataShipper | |
| 3 3 | 
             
                class BasicDumpJob < InstDataShipper::Jobs::Base
         | 
| 4 4 | 
             
                  sidekiq_options retry: 3 if defined?(sidekiq_options)
         | 
| 5 5 |  | 
| 6 | 
            -
                  def perform(endpoints | 
| 7 | 
            -
             | 
| 6 | 
            +
                  def perform(endpoints)
         | 
| 7 | 
            +
             | 
| 8 8 | 
             
                  end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                  protected
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 9 13 | 
             
                end
         | 
| 10 14 | 
             
              end
         | 
| 11 15 | 
             
            end
         | 
| @@ -1,4 +1,6 @@ | |
| 1 1 | 
             
            module InstDataShipper
         | 
| 2 | 
            +
              # This class ends up fill two roles - Schema and Mapping.
         | 
| 3 | 
            +
              # It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
         | 
| 2 4 | 
             
              class SchemaBuilder
         | 
| 3 5 | 
             
                attr_reader :tables
         | 
| 4 6 |  | 
| @@ -12,24 +14,40 @@ module InstDataShipper | |
| 12 14 | 
             
                  builder.tables
         | 
| 13 15 | 
             
                end
         | 
| 14 16 |  | 
| 15 | 
            -
                def  | 
| 16 | 
            -
                   | 
| 17 | 
            -
                   | 
| 17 | 
            +
                def extend_table_builder(&block)
         | 
| 18 | 
            +
                  @table_builder_class ||= Class.new(TableSchemaBuilder)
         | 
| 19 | 
            +
                  @table_builder_class.class_eval(&block)
         | 
| 20 | 
            +
                end
         | 
| 18 21 |  | 
| 22 | 
            +
                def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
         | 
| 19 23 | 
             
                  tdef = {
         | 
| 24 | 
            +
                    warehouse_name: nil,
         | 
| 20 25 | 
             
                    description: description,
         | 
| 21 | 
            -
                    model: model_or_name.is_a?(String) ? nil : model_or_name,
         | 
| 22 | 
            -
                    warehouse_name: as.to_s,
         | 
| 23 | 
            -
                    incremental: incremental,
         | 
| 24 26 | 
             
                    columns: [],
         | 
| 25 | 
            -
             | 
| 27 | 
            +
             | 
| 28 | 
            +
                    model: model,
         | 
| 29 | 
            +
                    query: query,
         | 
| 30 | 
            +
                    **extra,
         | 
| 26 31 | 
             
                  }
         | 
| 27 32 |  | 
| 28 | 
            -
                   | 
| 33 | 
            +
                  if model_or_name.is_a?(ActiveRecord::Relation)
         | 
| 34 | 
            +
                    raise "model specified twice" if model.present?
         | 
| 35 | 
            +
                    raise "query specified twice" if query.present?
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    tdef[:query] = model_or_name
         | 
| 38 | 
            +
                    tdef[:model] = model_or_name.model
         | 
| 39 | 
            +
                  elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
         | 
| 40 | 
            +
                    tdef[:warehouse_name] = model_or_name.table_name
         | 
| 41 | 
            +
                    tdef[:model] = model_or_name
         | 
| 42 | 
            +
                  else
         | 
| 43 | 
            +
                    tdef[:warehouse_name] = model_or_name
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                  @table_builder_class.build(tdef, &block)
         | 
| 29 47 |  | 
| 30 48 | 
             
                  @tables << tdef
         | 
| 31 49 |  | 
| 32 | 
            -
                   | 
| 50 | 
            +
                  tdef
         | 
| 33 51 | 
             
                end
         | 
| 34 52 |  | 
| 35 53 | 
             
                class TableSchemaBuilder
         | 
| @@ -46,48 +64,82 @@ module InstDataShipper | |
| 46 64 | 
             
                    builder.columns
         | 
| 47 65 | 
             
                  end
         | 
| 48 66 |  | 
| 49 | 
            -
                  #  | 
| 50 | 
            -
                   | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 67 | 
            +
                  # def annotate(key, value)
         | 
| 68 | 
            +
                  #   options[key] = value
         | 
| 69 | 
            +
                  # end
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                  def incremental(scope="updated_at", **kwargs)
         | 
| 72 | 
            +
                    if (extras = kwargs.keys - %i[on if]).present?
         | 
| 73 | 
            +
                      raise ArgumentError, "Unsuppored options: #{extras.inspect}"
         | 
| 74 | 
            +
                    end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    options[:incremental] = {
         | 
| 77 | 
            +
                      on: Array(kwargs[:on]),
         | 
| 78 | 
            +
                      scope: scope,
         | 
| 79 | 
            +
                      if: kwargs[:if],
         | 
| 80 | 
            +
                    }
         | 
| 81 | 
            +
                  end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                  def column(name, *args, refs: [], from: nil, **extra, &block)
         | 
| 84 | 
            +
                    from ||= name.to_s
         | 
| 53 85 |  | 
| 54 86 | 
             
                    cdef = {
         | 
| 55 | 
            -
                       | 
| 56 | 
            -
                       | 
| 57 | 
            -
                       | 
| 87 | 
            +
                      warehouse_name: name.to_s,
         | 
| 88 | 
            +
                      from: from,
         | 
| 89 | 
            +
                      **extra,
         | 
| 58 90 | 
             
                    }
         | 
| 59 91 |  | 
| 60 | 
            -
                    [ | 
| 61 | 
            -
                       | 
| 62 | 
            -
                        k.each do |hk, hv|
         | 
| 63 | 
            -
                          cdef[hv] = kwargs.delete(hk) if kwargs.key?(hk)
         | 
| 64 | 
            -
                        end
         | 
| 65 | 
            -
                      elsif kwargs.key?(k)
         | 
| 66 | 
            -
                        cdef[k] = kwargs.delete(k)
         | 
| 67 | 
            -
                      end
         | 
| 92 | 
            +
                    if args[0].is_a?(Symbol)
         | 
| 93 | 
            +
                      cdef[:type] = args.shift()
         | 
| 68 94 | 
             
                    end
         | 
| 69 95 |  | 
| 70 | 
            -
                     | 
| 71 | 
            -
             | 
| 72 | 
            -
                     | 
| 73 | 
            -
                      k = (a.is_a?(String) && :description) || (a.is_a?(Symbol) && :type) || nil
         | 
| 74 | 
            -
                      raise ArgumentError, 'Unsupported Argument' if k.nil?
         | 
| 75 | 
            -
                      raise ArgumentError, "Duplicate Argument for #{k}" if cdef.key?(k)
         | 
| 96 | 
            +
                    if args[0].is_a?(String)
         | 
| 97 | 
            +
                      cdef[:description] = args.shift()
         | 
| 98 | 
            +
                    end
         | 
| 76 99 |  | 
| 77 | 
            -
             | 
| 100 | 
            +
                    if args.present?
         | 
| 101 | 
            +
                      raise ArgumentError, "Received unexpected arguments: #{args.inspect}"
         | 
| 78 102 | 
             
                    end
         | 
| 79 103 |  | 
| 104 | 
            +
                    cdef[:references] = Array(refs)
         | 
| 105 | 
            +
             | 
| 80 106 | 
             
                    if options[:model].is_a?(Class) && cdef[:local_name].to_s.ends_with?('_id')
         | 
| 81 107 | 
             
                      rel_name = cdef[:local_name].to_s[0...-3]
         | 
| 82 108 | 
             
                      refl = options[:model].reflections[rel_name]
         | 
| 83 109 | 
             
                      cdef[:references] << "#{refl.klass}##{refl.options[:primary_key] || 'id'}" if refl.present? && !refl.polymorphic?
         | 
| 84 110 | 
             
                    end
         | 
| 85 111 |  | 
| 112 | 
            +
                    compiled_from = compile_transformer(from)
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                    cdef[:block] = ->(row) {
         | 
| 115 | 
            +
                      value = instance_exec(row, &compiled_from)
         | 
| 116 | 
            +
                      value = instance_exec(value, row, &block) if block.present?
         | 
| 117 | 
            +
                      value
         | 
| 118 | 
            +
                    }
         | 
| 119 | 
            +
             | 
| 86 120 | 
             
                    @columns << cdef
         | 
| 87 121 |  | 
| 88 | 
            -
                     | 
| 122 | 
            +
                    cdef
         | 
| 89 123 | 
             
                  end
         | 
| 90 | 
            -
             | 
| 124 | 
            +
             | 
| 125 | 
            +
                  protected
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                  def compile_transformer(from)
         | 
| 128 | 
            +
                    if from.present?
         | 
| 129 | 
            +
                      if from.is_a?(Symbol)
         | 
| 130 | 
            +
                        ->(row) { row.send(from) }
         | 
| 131 | 
            +
                      elsif from.is_a?(Proc)
         | 
| 132 | 
            +
                        from
         | 
| 133 | 
            +
                      elsif from.is_a?(String)
         | 
| 134 | 
            +
                        ->(row) { row[from] }
         | 
| 135 | 
            +
                      else
         | 
| 136 | 
            +
                        raise ArgumentError, "Invalid transformer: #{from.inspect}"
         | 
| 137 | 
            +
                      end
         | 
| 138 | 
            +
                    else
         | 
| 139 | 
            +
                      ->(row) { row }
         | 
| 140 | 
            +
                    end
         | 
| 141 | 
            +
                  end
         | 
| 142 | 
            +
             | 
| 91 143 | 
             
                end
         | 
| 92 144 | 
             
              end
         | 
| 93 145 | 
             
            end
         | 
    
        data/lib/inst_data_shipper.rb
    CHANGED
    
    | @@ -23,13 +23,20 @@ module InstDataShipper | |
| 23 23 | 
             
                    destination = @destination_aliases[type]
         | 
| 24 24 | 
             
                  end
         | 
| 25 25 |  | 
| 26 | 
            -
                   | 
| 26 | 
            +
                  destination.constantize
         | 
| 27 27 | 
             
                end
         | 
| 28 28 |  | 
| 29 29 | 
             
                def start_basic_dump(*args, **kwargs, &block)
         | 
| 30 30 | 
             
                  BasicDumper.perform_dump(*args, **kwargs, &block)
         | 
| 31 31 | 
             
                end
         | 
| 32 32 |  | 
| 33 | 
            +
                def handle_suppressed_error(ex)
         | 
| 34 | 
            +
                  logger.error "Suppressed Error: #{ex.message}"
         | 
| 35 | 
            +
                  logger.error ex.backtrace.join("\n")
         | 
| 36 | 
            +
                  Raven.capture_exception(ex) if defined?(Raven)
         | 
| 37 | 
            +
                  Sentry.capture_exception(ex) if defined?(Sentry)
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
             | 
| 33 40 | 
             
                def logger
         | 
| 34 41 | 
             
                  return @logger if defined? @logger
         | 
| 35 42 | 
             
                  @logger = Logger.new(STDOUT)
         | 
| @@ -66,6 +73,8 @@ Dir[File.dirname(__FILE__) + "/inst_data_shipper/destinations/*.rb"].each do |fi | |
| 66 73 | 
             
              basename = File.basename(file, ".rb")
         | 
| 67 74 | 
             
              next if basename == "base"
         | 
| 68 75 |  | 
| 69 | 
            -
              InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename. | 
| 76 | 
            +
              InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.camelize}")
         | 
| 70 77 | 
             
            end
         | 
| 71 78 |  | 
| 79 | 
            +
            require "inst_data_shipper/dumper"
         | 
| 80 | 
            +
            require "inst_data_shipper/basic_dumper"
         | 
    
        data/spec/spec_helper.rb
    CHANGED
    
    | @@ -7,7 +7,7 @@ require File.expand_path("../dummy/config/environment.rb", __FILE__) | |
| 7 7 | 
             
            require "bundler/setup"
         | 
| 8 8 | 
             
            require 'rspec/rails'
         | 
| 9 9 | 
             
            require 'spec_helper'
         | 
| 10 | 
            -
            require ' | 
| 10 | 
            +
            require 'factory_bot_rails'
         | 
| 11 11 | 
             
            require 'timecop'
         | 
| 12 12 | 
             
            require 'webmock/rspec'
         | 
| 13 13 | 
             
            require 'support/fake_canvas'
         | 
| @@ -29,7 +29,7 @@ ActiveRecord::Migration.maintain_test_schema! | |
| 29 29 | 
             
            RSpec.configure do |config|
         | 
| 30 30 | 
             
              config.extend WithModel
         | 
| 31 31 |  | 
| 32 | 
            -
              config.include  | 
| 32 | 
            +
              config.include FactoryBot::Syntax::Methods
         | 
| 33 33 | 
             
              config.use_transactional_fixtures = true
         | 
| 34 34 | 
             
              config.infer_spec_type_from_file_location!
         | 
| 35 35 | 
             
              config.filter_rails_from_backtrace!
         | 
    
        metadata
    CHANGED
    
    | @@ -1,27 +1,27 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: inst_data_shipper
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1.0. | 
| 4 | 
            +
              version: 0.1.0.beta2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Instructure CustomDev
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2024-03- | 
| 11 | 
            +
            date: 2024-03-08 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rails
         | 
| 15 15 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 16 | 
             
                requirements:
         | 
| 17 | 
            -
                - - " | 
| 17 | 
            +
                - - "~>"
         | 
| 18 18 | 
             
                  - !ruby/object:Gem::Version
         | 
| 19 19 | 
             
                    version: '6.0'
         | 
| 20 20 | 
             
              type: :development
         | 
| 21 21 | 
             
              prerelease: false
         | 
| 22 22 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 23 | 
             
                requirements:
         | 
| 24 | 
            -
                - - " | 
| 24 | 
            +
                - - "~>"
         | 
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 26 | 
             
                    version: '6.0'
         | 
| 27 27 | 
             
            - !ruby/object:Gem::Dependency
         | 
| @@ -360,6 +360,20 @@ dependencies: | |
| 360 360 | 
             
                - - ">="
         | 
| 361 361 | 
             
                  - !ruby/object:Gem::Version
         | 
| 362 362 | 
             
                    version: '0'
         | 
| 363 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 364 | 
            +
              name: faraday_middleware
         | 
| 365 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 366 | 
            +
                requirements:
         | 
| 367 | 
            +
                - - ">="
         | 
| 368 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 369 | 
            +
                    version: '0'
         | 
| 370 | 
            +
              type: :runtime
         | 
| 371 | 
            +
              prerelease: false
         | 
| 372 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 373 | 
            +
                requirements:
         | 
| 374 | 
            +
                - - ">="
         | 
| 375 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 376 | 
            +
                    version: '0'
         | 
| 363 377 | 
             
            description: 
         | 
| 364 378 | 
             
            email:
         | 
| 365 379 | 
             
            - pseng@instructure.com
         | 
| @@ -369,8 +383,8 @@ extra_rdoc_files: [] | |
| 369 383 | 
             
            files:
         | 
| 370 384 | 
             
            - README.md
         | 
| 371 385 | 
             
            - Rakefile
         | 
| 372 | 
            -
            - app/models/ | 
| 373 | 
            -
            - db/migrate/ | 
| 386 | 
            +
            - app/models/inst_data_shipper/dump_batch.rb
         | 
| 387 | 
            +
            - db/migrate/20240301090836_create_inst_data_shipper_dump_batches.rb
         | 
| 374 388 | 
             
            - lib/inst_data_shipper.rb
         | 
| 375 389 | 
             
            - lib/inst_data_shipper/basic_dumper.rb
         | 
| 376 390 | 
             
            - lib/inst_data_shipper/concerns/hooks.rb
         | 
| 
            File without changes
         |