bricolage-streamingload 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/bin/bricolage-streaming-loader +2 -2
 - data/lib/bricolage/sqsmock.rb +0 -1
 - data/lib/bricolage/streamingload/dispatcher.rb +2 -1
 - data/lib/bricolage/streamingload/job.rb +387 -0
 - data/lib/bricolage/streamingload/{loaderparams.rb → jobparams.rb} +14 -39
 - data/lib/bricolage/streamingload/manifest.rb +7 -1
 - data/lib/bricolage/streamingload/objectbuffer.rb +0 -3
 - data/lib/bricolage/streamingload/task.rb +5 -68
 - data/lib/bricolage/streamingload/{loaderservice.rb → taskhandler.rb} +102 -61
 - data/lib/bricolage/streamingload/version.rb +1 -1
 - data/test/streamingload/test_dispatcher.rb +6 -6
 - data/test/streamingload/test_job.rb +438 -0
 - metadata +8 -9
 - data/lib/bricolage/nulllogger.rb +0 -20
 - data/lib/bricolage/snsdatasource.rb +0 -40
 - data/lib/bricolage/streamingload/loader.rb +0 -158
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 8a33b6a5561c4bf69b725a96bca4abf75f06fe8a
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 9c6357983ba1fea216fd3e0931d08f549f610320
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: e701e7aa67d9a9b6503b436342a5d2ef3b1edb49513fa6881bf4c9b758e5d44f50a7b444b950527518187f3c8d8da896ee7fc597d9d0878611c8c34c2dbb3a19
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 77b535d15e8e0724c054655e8f351b7620454f08e67a86a5c61f01b8bb623361b22344b93d590608538ee59e9809886e64ae48ec3b80f8cf61570897e3814e41
         
     | 
    
        data/lib/bricolage/sqsmock.rb
    CHANGED
    
    
| 
         @@ -27,7 +27,7 @@ module Bricolage 
     | 
|
| 
       27 
27 
     | 
    
         
             
                    end
         
     | 
| 
       28 
28 
     | 
    
         
             
                    config_path, * = opts.rest_arguments
         
     | 
| 
       29 
29 
     | 
    
         
             
                    config = YAML.load(File.read(config_path))
         
     | 
| 
       30 
     | 
    
         
            -
                    log = opts.log_file_path ? new_logger(opts.log_file_path, config) : nil
         
     | 
| 
      
 30 
     | 
    
         
            +
                    log = opts.log_file_path ? new_logger(File.expand_path(opts.log_file_path), config) : nil
         
     | 
| 
       31 
31 
     | 
    
         
             
                    ctx = Context.for_application('.', environment: opts.environment, logger: log)
         
     | 
| 
       32 
32 
     | 
    
         
             
                    logger = raw_logger = ctx.logger
         
     | 
| 
       33 
33 
     | 
    
         
             
                    event_queue = ctx.get_data_source('sqs', config.fetch('event-queue-ds', 'sqs_event'))
         
     | 
| 
         @@ -58,6 +58,7 @@ module Bricolage 
     | 
|
| 
       58 
58 
     | 
    
         | 
| 
       59 
59 
     | 
    
         
             
                    Process.daemon(true) if opts.daemon?
         
     | 
| 
       60 
60 
     | 
    
         
             
                    create_pid_file opts.pid_file_path if opts.pid_file_path
         
     | 
| 
      
 61 
     | 
    
         
            +
                    Dir.chdir '/'
         
     | 
| 
       61 
62 
     | 
    
         
             
                    dispatcher.event_loop
         
     | 
| 
       62 
63 
     | 
    
         
             
                  rescue Exception => e
         
     | 
| 
       63 
64 
     | 
    
         
             
                    logger.exception e
         
     | 
| 
         @@ -0,0 +1,387 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'bricolage/streamingload/jobparams'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'bricolage/streamingload/manifest'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'bricolage/sqlutils'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'socket'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            module Bricolage
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              module StreamingLoad
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                class JobCancelled < ApplicationError; end
         
     | 
| 
      
 12 
     | 
    
         
            +
                class JobDefered < ApplicationError; end
         
     | 
| 
      
 13 
     | 
    
         
            +
                class JobDuplicated < ApplicationError; end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                class ControlConnectionFailed < JobFailure; end
         
     | 
| 
      
 16 
     | 
    
         
            +
                class DataConnectionFailed < JobFailure; end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                class Job
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  def initialize(context:, ctl_ds:, task_id:, force: false, logger:)
         
     | 
| 
      
 22 
     | 
    
         
            +
                    @context = context
         
     | 
| 
      
 23 
     | 
    
         
            +
                    @ctl_ds = ctl_ds
         
     | 
| 
      
 24 
     | 
    
         
            +
                    @task_id = task_id
         
     | 
| 
      
 25 
     | 
    
         
            +
                    @force = force
         
     | 
| 
      
 26 
     | 
    
         
            +
                    @logger = logger
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                    @task = nil
         
     | 
| 
      
 29 
     | 
    
         
            +
                    @job_id = nil
         
     | 
| 
      
 30 
     | 
    
         
            +
                    @data_ds = nil
         
     | 
| 
      
 31 
     | 
    
         
            +
                    @manifest = nil
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                  # For tests
         
     | 
| 
      
 35 
     | 
    
         
            +
                  attr_reader :job_id
         
     | 
| 
      
 36 
     | 
    
         
            +
                  attr_reader :process_id
         
     | 
| 
      
 37 
     | 
    
         
            +
                  attr_reader :task
         
     | 
| 
      
 38 
     | 
    
         
            +
                  attr_reader :data_ds
         
     | 
| 
      
 39 
     | 
    
         
            +
                  attr_reader :manifest
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                  # Returns true -> Deletes a SQS message
         
     | 
| 
      
 42 
     | 
    
         
            +
                  # Returns false -> Keeps a SQS message
         
     | 
| 
      
 43 
     | 
    
         
            +
                  def execute(fail_fast: false)
         
     | 
| 
      
 44 
     | 
    
         
            +
                    execute_task
         
     | 
| 
      
 45 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 46 
     | 
    
         
            +
                  rescue JobCancelled
         
     | 
| 
      
 47 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 48 
     | 
    
         
            +
                  rescue JobDuplicated
         
     | 
| 
      
 49 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 50 
     | 
    
         
            +
                  rescue JobDefered
         
     | 
| 
      
 51 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 52 
     | 
    
         
            +
                  rescue ControlConnectionFailed => ex
         
     | 
| 
      
 53 
     | 
    
         
            +
                    @logger.error ex.message
         
     | 
| 
      
 54 
     | 
    
         
            +
                    wait_for_connection('ctl', @ctl_ds) unless fail_fast
         
     | 
| 
      
 55 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 56 
     | 
    
         
            +
                  rescue DataConnectionFailed
         
     | 
| 
      
 57 
     | 
    
         
            +
                    wait_for_connection('data', @data_ds) unless fail_fast
         
     | 
| 
      
 58 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 59 
     | 
    
         
            +
                  rescue JobFailure
         
     | 
| 
      
 60 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 61 
     | 
    
         
            +
                  rescue JobError
         
     | 
| 
      
 62 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 63 
     | 
    
         
            +
                  rescue Exception => ex
         
     | 
| 
      
 64 
     | 
    
         
            +
                    @logger.exception ex
         
     | 
| 
      
 65 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 66 
     | 
    
         
            +
                  end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                  MAX_RETRY = 5
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                  def execute_task
         
     | 
| 
      
 71 
     | 
    
         
            +
                    @process_id = "#{Socket.gethostname}-#{$$}"
         
     | 
| 
      
 72 
     | 
    
         
            +
                    @logger.info "execute task: task_id=#{@task_id} force=#{@force} process_id=#{@process_id}"
         
     | 
| 
      
 73 
     | 
    
         
            +
                    ctl = ControlConnection.new(@ctl_ds, @logger)
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
                    ctl.open {
         
     | 
| 
      
 76 
     | 
    
         
            +
                      @task = ctl.load_task(@task_id)
         
     | 
| 
      
 77 
     | 
    
         
            +
                      @logger.info "task details: task_id=#{@task_id} table=#{@task.schema_name}.#{@task.table_name}"
         
     | 
| 
      
 78 
     | 
    
         
            +
                      if @task.disabled
         
     | 
| 
      
 79 
     | 
    
         
            +
                        # We do not record disabled job in the DB.
         
     | 
| 
      
 80 
     | 
    
         
            +
                        @logger.info "task is disabled; defer task: task_id=#{@task_id}"
         
     | 
| 
      
 81 
     | 
    
         
            +
                        raise JobDefered, "defered: task_id=#{@task_id}"
         
     | 
| 
      
 82 
     | 
    
         
            +
                      end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                      @job_id = ctl.begin_job(@task_id, @process_id, @force)
         
     | 
| 
      
 85 
     | 
    
         
            +
                      unless @job_id
         
     | 
| 
      
 86 
     | 
    
         
            +
                        @logger.warn "task is already succeeded and not forced; discard task: task_id=#{@task_id}"
         
     | 
| 
      
 87 
     | 
    
         
            +
                        ctl.commit_duplicated_job @task_id, @process_id
         
     | 
| 
      
 88 
     | 
    
         
            +
                        raise JobDuplicated, "duplicated: task_id=#{@task_id}"
         
     | 
| 
      
 89 
     | 
    
         
            +
                      end
         
     | 
| 
      
 90 
     | 
    
         
            +
                    }
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 93 
     | 
    
         
            +
                      do_load @task, @job_id
         
     | 
| 
      
 94 
     | 
    
         
            +
                      ctl.open {
         
     | 
| 
      
 95 
     | 
    
         
            +
                        ctl.commit_job @job_id, (@force ? 'forced' : nil)
         
     | 
| 
      
 96 
     | 
    
         
            +
                      }
         
     | 
| 
      
 97 
     | 
    
         
            +
                    rescue ControlConnectionFailed
         
     | 
| 
      
 98 
     | 
    
         
            +
                      raise
         
     | 
| 
      
 99 
     | 
    
         
            +
                    rescue JobFailure => ex
         
     | 
| 
      
 100 
     | 
    
         
            +
                      @logger.error ex.message
         
     | 
| 
      
 101 
     | 
    
         
            +
                      ctl.open {
         
     | 
| 
      
 102 
     | 
    
         
            +
                        fail_count = ctl.fail_count(@task_id)
         
     | 
| 
      
 103 
     | 
    
         
            +
                        final_retry = (fail_count >= MAX_RETRY)
         
     | 
| 
      
 104 
     | 
    
         
            +
                        retry_msg = (fail_count > 0) ? "(retry\##{fail_count}#{final_retry ? ' FINAL' : ''}) " : ''
         
     | 
| 
      
 105 
     | 
    
         
            +
                        ctl.abort_job job_id, 'failure', retry_msg + ex.message.lines.first.strip
         
     | 
| 
      
 106 
     | 
    
         
            +
                        raise JobCancelled, "retry count exceeds limit: task_id=#{@task_id}" if final_retry
         
     | 
| 
      
 107 
     | 
    
         
            +
                      }
         
     | 
| 
      
 108 
     | 
    
         
            +
                      raise
         
     | 
| 
      
 109 
     | 
    
         
            +
                    rescue JobError => ex
         
     | 
| 
      
 110 
     | 
    
         
            +
                      @logger.error ex.message
         
     | 
| 
      
 111 
     | 
    
         
            +
                      ctl.open {
         
     | 
| 
      
 112 
     | 
    
         
            +
                        ctl.abort_job job_id, 'error', ex.message.lines.first.strip
         
     | 
| 
      
 113 
     | 
    
         
            +
                      }
         
     | 
| 
      
 114 
     | 
    
         
            +
                      raise
         
     | 
| 
      
 115 
     | 
    
         
            +
                    rescue Exception => ex
         
     | 
| 
      
 116 
     | 
    
         
            +
                      @logger.exception ex
         
     | 
| 
      
 117 
     | 
    
         
            +
                      ctl.open {
         
     | 
| 
      
 118 
     | 
    
         
            +
                        ctl.abort_job job_id, 'error', ex.message.lines.first.strip
         
     | 
| 
      
 119 
     | 
    
         
            +
                      }
         
     | 
| 
      
 120 
     | 
    
         
            +
                      raise JobError, "#{ex.class}: #{ex.message}"
         
     | 
| 
      
 121 
     | 
    
         
            +
                    end
         
     | 
| 
      
 122 
     | 
    
         
            +
                  end
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                  def do_load(task, job_id)
         
     | 
| 
      
 125 
     | 
    
         
            +
                    params = JobParams.load(@context, task.task_class, task.schema_name, task.table_name)
         
     | 
| 
      
 126 
     | 
    
         
            +
                    @data_ds = params.ds
         
     | 
| 
      
 127 
     | 
    
         
            +
                    @manifest = ManifestFile.create(ds: params.ctl_bucket, job_id: job_id, object_urls: task.object_urls, logger: @logger)
         
     | 
| 
      
 128 
     | 
    
         
            +
                    DataConnection.open(params.ds, @logger) {|data|
         
     | 
| 
      
 129 
     | 
    
         
            +
                      if params.enable_work_table?
         
     | 
| 
      
 130 
     | 
    
         
            +
                        data.load_with_work_table params.work_table, @manifest, params.load_options_string, params.sql_source
         
     | 
| 
      
 131 
     | 
    
         
            +
                      else
         
     | 
| 
      
 132 
     | 
    
         
            +
                        data.load_objects params.dest_table, @manifest, params.load_options_string
         
     | 
| 
      
 133 
     | 
    
         
            +
                      end
         
     | 
| 
      
 134 
     | 
    
         
            +
                    }
         
     | 
| 
      
 135 
     | 
    
         
            +
                  end
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                  def wait_for_connection(type, ds)
         
     | 
| 
      
 138 
     | 
    
         
            +
                    @logger.warn "loader: #{type} DB connection lost; polling..."
         
     | 
| 
      
 139 
     | 
    
         
            +
                    start_time = Time.now
         
     | 
| 
      
 140 
     | 
    
         
            +
                    n = 0
         
     | 
| 
      
 141 
     | 
    
         
            +
                    while true
         
     | 
| 
      
 142 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 143 
     | 
    
         
            +
                        ds.open {}
         
     | 
| 
      
 144 
     | 
    
         
            +
                        @logger.warn "loader: #{type} DB connection recovered; return to normal state"
         
     | 
| 
      
 145 
     | 
    
         
            +
                        return true
         
     | 
| 
      
 146 
     | 
    
         
            +
                      rescue ConnectionError
         
     | 
| 
      
 147 
     | 
    
         
            +
                      end
         
     | 
| 
      
 148 
     | 
    
         
            +
                      sleep 15
         
     | 
| 
      
 149 
     | 
    
         
            +
                      n += 1
         
     | 
| 
      
 150 
     | 
    
         
            +
                      if n == 120  # 30 min
         
     | 
| 
      
 151 
     | 
    
         
            +
                        # Could not get a connection in 30 minutes, now we return to the queue loop.
         
     | 
| 
      
 152 
     | 
    
         
            +
                        # Next job may fail too, but we should not stop to receive the task queue too long,
         
     | 
| 
      
 153 
     | 
    
         
            +
                        # because it contains control tasks.
         
     | 
| 
      
 154 
     | 
    
         
            +
                        @logger.warn "loader: #{type} DB connection still failing (since #{start_time}); give up."
         
     | 
| 
      
 155 
     | 
    
         
            +
                        return false
         
     | 
| 
      
 156 
     | 
    
         
            +
                      end
         
     | 
| 
      
 157 
     | 
    
         
            +
                    end
         
     | 
| 
      
 158 
     | 
    
         
            +
                  end
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
                  class DataConnection
         
     | 
| 
      
 162 
     | 
    
         
            +
             
     | 
| 
      
 163 
     | 
    
         
            +
                    include SQLUtils
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
                    def DataConnection.open(ds, logger = ds.logger, &block)
         
     | 
| 
      
 166 
     | 
    
         
            +
                      new(ds, logger).open(&block)
         
     | 
| 
      
 167 
     | 
    
         
            +
                    end
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
                    def initialize(ds, logger = ds.logger)
         
     | 
| 
      
 170 
     | 
    
         
            +
                      @ds = ds
         
     | 
| 
      
 171 
     | 
    
         
            +
                      @connection = nil
         
     | 
| 
      
 172 
     | 
    
         
            +
                      @logger = logger
         
     | 
| 
      
 173 
     | 
    
         
            +
                    end
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
      
 175 
     | 
    
         
            +
                    def open(&block)
         
     | 
| 
      
 176 
     | 
    
         
            +
                      @ds.open {|conn|
         
     | 
| 
      
 177 
     | 
    
         
            +
                        @connection = conn
         
     | 
| 
      
 178 
     | 
    
         
            +
                        yield self
         
     | 
| 
      
 179 
     | 
    
         
            +
                      }
         
     | 
| 
      
 180 
     | 
    
         
            +
                    rescue ConnectionError => ex
         
     | 
| 
      
 181 
     | 
    
         
            +
                      raise DataConnectionFailed, "data connection failed: #{ex.message}"
         
     | 
| 
      
 182 
     | 
    
         
            +
                    end
         
     | 
| 
      
 183 
     | 
    
         
            +
             
     | 
| 
      
 184 
     | 
    
         
            +
                    def load_with_work_table(work_table, manifest, options, sql_source)
         
     | 
| 
      
 185 
     | 
    
         
            +
                      @connection.transaction {|txn|
         
     | 
| 
      
 186 
     | 
    
         
            +
                        # NOTE: This transaction ends with truncation, this DELETE does nothing
         
     | 
| 
      
 187 
     | 
    
         
            +
                        # from the second time.  So don't worry about DELETE cost here.
         
     | 
| 
      
 188 
     | 
    
         
            +
                        @connection.execute("delete from #{work_table}")
         
     | 
| 
      
 189 
     | 
    
         
            +
                        load_objects work_table, manifest, options
         
     | 
| 
      
 190 
     | 
    
         
            +
                        @connection.execute sql_source
         
     | 
| 
      
 191 
     | 
    
         
            +
                        txn.truncate_and_commit work_table
         
     | 
| 
      
 192 
     | 
    
         
            +
                      }
         
     | 
| 
      
 193 
     | 
    
         
            +
                    end
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
                    def load_objects(dest_table, manifest, options)
         
     | 
| 
      
 196 
     | 
    
         
            +
                      @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
         
     | 
| 
      
 197 
     | 
    
         
            +
                          copy #{dest_table}
         
     | 
| 
      
 198 
     | 
    
         
            +
                          from #{s manifest.url}
         
     | 
| 
      
 199 
     | 
    
         
            +
                          credentials #{s manifest.credential_string}
         
     | 
| 
      
 200 
     | 
    
         
            +
                          manifest
         
     | 
| 
      
 201 
     | 
    
         
            +
                          statupdate false
         
     | 
| 
      
 202 
     | 
    
         
            +
                          compupdate false
         
     | 
| 
      
 203 
     | 
    
         
            +
                          #{options}
         
     | 
| 
      
 204 
     | 
    
         
            +
                          ;
         
     | 
| 
      
 205 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 206 
     | 
    
         
            +
                      @logger.info "load succeeded: #{manifest.url}"
         
     | 
| 
      
 207 
     | 
    
         
            +
                    end
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                  end   # class DataConnection
         
     | 
| 
      
 210 
     | 
    
         
            +
             
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
                  class ControlConnection
         
     | 
| 
      
 213 
     | 
    
         
            +
             
     | 
| 
      
 214 
     | 
    
         
            +
                    include SQLUtils
         
     | 
| 
      
 215 
     | 
    
         
            +
             
     | 
| 
      
 216 
     | 
    
         
            +
                    def ControlConnection.open(ds, logger = ds.logger, &block)
         
     | 
| 
      
 217 
     | 
    
         
            +
                      new(ds, logger).open(&block)
         
     | 
| 
      
 218 
     | 
    
         
            +
                    end
         
     | 
| 
      
 219 
     | 
    
         
            +
             
     | 
| 
      
 220 
     | 
    
         
            +
                    def initialize(ds, logger = ds.logger)
         
     | 
| 
      
 221 
     | 
    
         
            +
                      @ds = ds
         
     | 
| 
      
 222 
     | 
    
         
            +
                      @connection = nil
         
     | 
| 
      
 223 
     | 
    
         
            +
                    end
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
                    def open(&block)
         
     | 
| 
      
 226 
     | 
    
         
            +
                      @ds.open {|conn|
         
     | 
| 
      
 227 
     | 
    
         
            +
                        @connection = conn
         
     | 
| 
      
 228 
     | 
    
         
            +
                        yield self
         
     | 
| 
      
 229 
     | 
    
         
            +
                      }
         
     | 
| 
      
 230 
     | 
    
         
            +
                    rescue ConnectionError => ex
         
     | 
| 
      
 231 
     | 
    
         
            +
                      raise ControlConnectionFailed, "control connection failed: #{ex.message}"
         
     | 
| 
      
 232 
     | 
    
         
            +
                    end
         
     | 
| 
      
 233 
     | 
    
         
            +
             
     | 
| 
      
 234 
     | 
    
         
            +
                    TaskInfo = Struct.new(:task_id, :task_class, :schema_name, :table_name, :disabled, :object_urls)
         
     | 
| 
      
 235 
     | 
    
         
            +
             
     | 
| 
      
 236 
     | 
    
         
            +
                    def load_task(task_id)
         
     | 
| 
      
 237 
     | 
    
         
            +
                      rec = @connection.query_row(<<-EndSQL) or raise JobError, "no such task: #{task_id}"
         
     | 
| 
      
 238 
     | 
    
         
            +
                        select
         
     | 
| 
      
 239 
     | 
    
         
            +
                            tsk.task_class
         
     | 
| 
      
 240 
     | 
    
         
            +
                            , tbl.schema_name
         
     | 
| 
      
 241 
     | 
    
         
            +
                            , tbl.table_name
         
     | 
| 
      
 242 
     | 
    
         
            +
                            , tbl.disabled
         
     | 
| 
      
 243 
     | 
    
         
            +
                        from
         
     | 
| 
      
 244 
     | 
    
         
            +
                            strload_tasks tsk
         
     | 
| 
      
 245 
     | 
    
         
            +
                            inner join strload_tables tbl using (table_id)
         
     | 
| 
      
 246 
     | 
    
         
            +
                        where
         
     | 
| 
      
 247 
     | 
    
         
            +
                            tsk.task_id = #{task_id}
         
     | 
| 
      
 248 
     | 
    
         
            +
                        ;
         
     | 
| 
      
 249 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 250 
     | 
    
         
            +
                      TaskInfo.new(
         
     | 
| 
      
 251 
     | 
    
         
            +
                        task_id,
         
     | 
| 
      
 252 
     | 
    
         
            +
                        rec['task_class'],
         
     | 
| 
      
 253 
     | 
    
         
            +
                        rec['schema_name'],
         
     | 
| 
      
 254 
     | 
    
         
            +
                        rec['table_name'],
         
     | 
| 
      
 255 
     | 
    
         
            +
                        (rec['disabled'] != 'f'),
         
     | 
| 
      
 256 
     | 
    
         
            +
                        load_object_urls(task_id)
         
     | 
| 
      
 257 
     | 
    
         
            +
                      )
         
     | 
| 
      
 258 
     | 
    
         
            +
                    end
         
     | 
| 
      
 259 
     | 
    
         
            +
             
     | 
| 
      
 260 
     | 
    
         
            +
                    def load_object_urls(task_id)
         
     | 
| 
      
 261 
     | 
    
         
            +
                      urls = @connection.query_values(<<-EndSQL)
         
     | 
| 
      
 262 
     | 
    
         
            +
                        select
         
     | 
| 
      
 263 
     | 
    
         
            +
                            o.object_url
         
     | 
| 
      
 264 
     | 
    
         
            +
                        from
         
     | 
| 
      
 265 
     | 
    
         
            +
                            strload_tasks t
         
     | 
| 
      
 266 
     | 
    
         
            +
                            inner join strload_task_objects tob using (task_id)
         
     | 
| 
      
 267 
     | 
    
         
            +
                            inner join strload_objects o using (object_id)
         
     | 
| 
      
 268 
     | 
    
         
            +
                        where
         
     | 
| 
      
 269 
     | 
    
         
            +
                            t.task_id = #{task_id}
         
     | 
| 
      
 270 
     | 
    
         
            +
                        ;
         
     | 
| 
      
 271 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 272 
     | 
    
         
            +
                      urls
         
     | 
| 
      
 273 
     | 
    
         
            +
                    end
         
     | 
| 
      
 274 
     | 
    
         
            +
             
     | 
| 
      
 275 
     | 
    
         
            +
                    def begin_job(task_id, process_id, force)
         
     | 
| 
      
 276 
     | 
    
         
            +
                      job_id = @connection.query_value(<<-EndSQL)
         
     | 
| 
      
 277 
     | 
    
         
            +
                        insert into strload_jobs
         
     | 
| 
      
 278 
     | 
    
         
            +
                            ( task_id
         
     | 
| 
      
 279 
     | 
    
         
            +
                            , process_id
         
     | 
| 
      
 280 
     | 
    
         
            +
                            , status
         
     | 
| 
      
 281 
     | 
    
         
            +
                            , start_time
         
     | 
| 
      
 282 
     | 
    
         
            +
                            )
         
     | 
| 
      
 283 
     | 
    
         
            +
                        select
         
     | 
| 
      
 284 
     | 
    
         
            +
                            task_id
         
     | 
| 
      
 285 
     | 
    
         
            +
                            , #{s process_id}
         
     | 
| 
      
 286 
     | 
    
         
            +
                            , 'running'
         
     | 
| 
      
 287 
     | 
    
         
            +
                            , current_timestamp
         
     | 
| 
      
 288 
     | 
    
         
            +
                        from
         
     | 
| 
      
 289 
     | 
    
         
            +
                            strload_tasks
         
     | 
| 
      
 290 
     | 
    
         
            +
                        where
         
     | 
| 
      
 291 
     | 
    
         
            +
                            task_id = #{task_id}
         
     | 
| 
      
 292 
     | 
    
         
            +
                            and (#{force ? 'true' : 'false'} or task_id not in (select task_id from strload_jobs where status = 'success'))
         
     | 
| 
      
 293 
     | 
    
         
            +
                        returning job_id
         
     | 
| 
      
 294 
     | 
    
         
            +
                        ;
         
     | 
| 
      
 295 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 296 
     | 
    
         
            +
                      return job_id ? job_id.to_i : nil
         
     | 
| 
      
 297 
     | 
    
         
            +
                    end
         
     | 
| 
      
 298 
     | 
    
         
            +
             
     | 
| 
      
 299 
     | 
    
         
            +
                    def fail_count(task_id)
         
     | 
| 
      
 300 
     | 
    
         
            +
                      statuses = @connection.query_values(<<-EndSQL)
         
     | 
| 
      
 301 
     | 
    
         
            +
                        select
         
     | 
| 
      
 302 
     | 
    
         
            +
                            j.status
         
     | 
| 
      
 303 
     | 
    
         
            +
                        from
         
     | 
| 
      
 304 
     | 
    
         
            +
                            strload_tasks t
         
     | 
| 
      
 305 
     | 
    
         
            +
                            inner join strload_jobs j using (task_id)
         
     | 
| 
      
 306 
     | 
    
         
            +
                        where
         
     | 
| 
      
 307 
     | 
    
         
            +
                            t.task_id = #{task_id}
         
     | 
| 
      
 308 
     | 
    
         
            +
                        order by
         
     | 
| 
      
 309 
     | 
    
         
            +
                            j.job_id desc
         
     | 
| 
      
 310 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 311 
     | 
    
         
            +
                      statuses.shift if statuses.first == 'running'   # current job
         
     | 
| 
      
 312 
     | 
    
         
            +
                      statuses.take_while {|st| %w[failure error].include?(st) }.size
         
     | 
| 
      
 313 
     | 
    
         
            +
                    end
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
      
 315 
     | 
    
         
            +
                    def commit_job(job_id, message = nil)
         
     | 
| 
      
 316 
     | 
    
         
            +
                      @connection.transaction {|txn|
         
     | 
| 
      
 317 
     | 
    
         
            +
                        write_job_result job_id, 'success', (message || '')
         
     | 
| 
      
 318 
     | 
    
         
            +
                        update_loaded_flag job_id
         
     | 
| 
      
 319 
     | 
    
         
            +
                      }
         
     | 
| 
      
 320 
     | 
    
         
            +
                    end
         
     | 
| 
      
 321 
     | 
    
         
            +
             
     | 
| 
      
 322 
     | 
    
         
            +
                    def abort_job(job_id, status, message)
         
     | 
| 
      
 323 
     | 
    
         
            +
                      write_job_result(job_id, status, message)
         
     | 
| 
      
 324 
     | 
    
         
            +
                    end
         
     | 
| 
      
 325 
     | 
    
         
            +
             
     | 
| 
      
 326 
     | 
    
         
            +
                    MAX_MESSAGE_LENGTH = 1000
         
     | 
| 
      
 327 
     | 
    
         
            +
             
     | 
| 
      
 328 
     | 
    
         
            +
                    def write_job_result(job_id, status, message)
         
     | 
| 
      
 329 
     | 
    
         
            +
                      @connection.execute(<<-EndSQL)
         
     | 
| 
      
 330 
     | 
    
         
            +
                        update
         
     | 
| 
      
 331 
     | 
    
         
            +
                            strload_jobs
         
     | 
| 
      
 332 
     | 
    
         
            +
                        set
         
     | 
| 
      
 333 
     | 
    
         
            +
                            (status, finish_time, message) = (#{s status}, current_timestamp, #{s message[0, MAX_MESSAGE_LENGTH]})
         
     | 
| 
      
 334 
     | 
    
         
            +
                        where
         
     | 
| 
      
 335 
     | 
    
         
            +
                            job_id = #{job_id}
         
     | 
| 
      
 336 
     | 
    
         
            +
                        ;
         
     | 
| 
      
 337 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 338 
     | 
    
         
            +
                    end
         
     | 
| 
      
 339 
     | 
    
         
            +
             
     | 
| 
      
 340 
     | 
    
         
            +
                    def update_loaded_flag(job_id)
         
     | 
| 
      
 341 
     | 
    
         
            +
                      @connection.execute(<<-EndSQL)
         
     | 
| 
      
 342 
     | 
    
         
            +
                        update
         
     | 
| 
      
 343 
     | 
    
         
            +
                            strload_objects
         
     | 
| 
      
 344 
     | 
    
         
            +
                        set
         
     | 
| 
      
 345 
     | 
    
         
            +
                            loaded = true
         
     | 
| 
      
 346 
     | 
    
         
            +
                        where
         
     | 
| 
      
 347 
     | 
    
         
            +
                            object_id in (
         
     | 
| 
      
 348 
     | 
    
         
            +
                              select
         
     | 
| 
      
 349 
     | 
    
         
            +
                                  object_id
         
     | 
| 
      
 350 
     | 
    
         
            +
                              from
         
     | 
| 
      
 351 
     | 
    
         
            +
                                  strload_task_objects
         
     | 
| 
      
 352 
     | 
    
         
            +
                              where task_id = (select task_id from strload_jobs where job_id = #{job_id})
         
     | 
| 
      
 353 
     | 
    
         
            +
                            )
         
     | 
| 
      
 354 
     | 
    
         
            +
                        ;
         
     | 
| 
      
 355 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 356 
     | 
    
         
            +
                    end
         
     | 
| 
      
 357 
     | 
    
         
            +
             
     | 
| 
      
 358 
     | 
    
         
            +
                    def commit_duplicated_job(task_id, process_id)
         
     | 
| 
      
 359 
     | 
    
         
            +
                      job_id = @connection.query_value(<<-EndSQL)
         
     | 
| 
      
 360 
     | 
    
         
            +
                        insert into strload_jobs
         
     | 
| 
      
 361 
     | 
    
         
            +
                            ( task_id
         
     | 
| 
      
 362 
     | 
    
         
            +
                            , process_id
         
     | 
| 
      
 363 
     | 
    
         
            +
                            , status
         
     | 
| 
      
 364 
     | 
    
         
            +
                            , start_time
         
     | 
| 
      
 365 
     | 
    
         
            +
                            , finish_time
         
     | 
| 
      
 366 
     | 
    
         
            +
                            , message
         
     | 
| 
      
 367 
     | 
    
         
            +
                            )
         
     | 
| 
      
 368 
     | 
    
         
            +
                        select
         
     | 
| 
      
 369 
     | 
    
         
            +
                            #{task_id}
         
     | 
| 
      
 370 
     | 
    
         
            +
                            , #{s process_id}
         
     | 
| 
      
 371 
     | 
    
         
            +
                            , 'duplicated'
         
     | 
| 
      
 372 
     | 
    
         
            +
                            , current_timestamp
         
     | 
| 
      
 373 
     | 
    
         
            +
                            , current_timestamp
         
     | 
| 
      
 374 
     | 
    
         
            +
                            , ''
         
     | 
| 
      
 375 
     | 
    
         
            +
                        returning job_id
         
     | 
| 
      
 376 
     | 
    
         
            +
                        ;
         
     | 
| 
      
 377 
     | 
    
         
            +
                      EndSQL
         
     | 
| 
      
 378 
     | 
    
         
            +
                      return job_id
         
     | 
| 
      
 379 
     | 
    
         
            +
                    end
         
     | 
| 
      
 380 
     | 
    
         
            +
             
     | 
| 
      
 381 
     | 
    
         
            +
                  end   # class ControlConnection
         
     | 
| 
      
 382 
     | 
    
         
            +
             
     | 
| 
      
 383 
     | 
    
         
            +
                end   # class Job
         
     | 
| 
      
 384 
     | 
    
         
            +
             
     | 
| 
      
 385 
     | 
    
         
            +
              end   # module StreamingLoad
         
     | 
| 
      
 386 
     | 
    
         
            +
             
     | 
| 
      
 387 
     | 
    
         
            +
            end   # module Bricolage
         
     | 
| 
         @@ -6,69 +6,44 @@ module Bricolage 
     | 
|
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
              module StreamingLoad
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
                class  
     | 
| 
      
 9 
     | 
    
         
            +
                class JobParams
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
                  def  
     | 
| 
       12 
     | 
    
         
            -
                    job =  
     | 
| 
       13 
     | 
    
         
            -
                    schema = resolve_schema(ctx,  
     | 
| 
       14 
     | 
    
         
            -
                    job.provide_default 'dest-table', "#{schema}.#{ 
     | 
| 
      
 11 
     | 
    
         
            +
                  def JobParams.load(ctx, job_class, schema, table)
         
     | 
| 
      
 12 
     | 
    
         
            +
                    job = load_bricolage_job(ctx, job_class, schema, table)
         
     | 
| 
      
 13 
     | 
    
         
            +
                    schema = resolve_schema(ctx, schema)
         
     | 
| 
      
 14 
     | 
    
         
            +
                    job.provide_default 'dest-table', "#{schema}.#{table}"
         
     | 
| 
       15 
15 
     | 
    
         
             
                    #job.provide_sql_file_by_job_id   # FIXME: provide only when exist
         
     | 
| 
       16 
16 
     | 
    
         
             
                    job.compile
         
     | 
| 
       17 
     | 
    
         
            -
                    new( 
     | 
| 
      
 17 
     | 
    
         
            +
                    new(job)
         
     | 
| 
       18 
18 
     | 
    
         
             
                  end
         
     | 
| 
       19 
19 
     | 
    
         | 
| 
       20 
     | 
    
         
            -
                  def  
     | 
| 
       21 
     | 
    
         
            -
                    if job_file = find_job_file(ctx,  
     | 
| 
      
 20 
     | 
    
         
            +
                  def JobParams.load_bricolage_job(ctx, job_class, schema, table)
         
     | 
| 
      
 21 
     | 
    
         
            +
                    if job_file = find_job_file(ctx, schema, table)
         
     | 
| 
       22 
22 
     | 
    
         
             
                      ctx.logger.debug "using .job file: #{job_file}"
         
     | 
| 
       23 
     | 
    
         
            -
                      Job.load_file(job_file, ctx.subsystem( 
     | 
| 
      
 23 
     | 
    
         
            +
                      Bricolage::Job.load_file(job_file, ctx.subsystem(schema))
         
     | 
| 
       24 
24 
     | 
    
         
             
                    else
         
     | 
| 
       25 
25 
     | 
    
         
             
                      ctx.logger.debug "using default job parameters (no .job file)"
         
     | 
| 
       26 
     | 
    
         
            -
                      Job.instantiate( 
     | 
| 
      
 26 
     | 
    
         
            +
                      Bricolage::Job.instantiate(table, job_class, ctx).tap {|job|
         
     | 
| 
       27 
27 
     | 
    
         
             
                        job.bind_parameters({})
         
     | 
| 
       28 
28 
     | 
    
         
             
                      }
         
     | 
| 
       29 
29 
     | 
    
         
             
                    end
         
     | 
| 
       30 
30 
     | 
    
         
             
                  end
         
     | 
| 
       31 
31 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
                  def  
     | 
| 
      
 32 
     | 
    
         
            +
                  def JobParams.find_job_file(ctx, schema, table)
         
     | 
| 
       33 
33 
     | 
    
         
             
                    paths = Dir.glob("#{ctx.home_path}/#{schema}/#{table}.*")
         
     | 
| 
       34 
34 
     | 
    
         
             
                    paths.select {|path| File.extname(path) == '.job' }.sort.first
         
     | 
| 
       35 
35 
     | 
    
         
             
                  end
         
     | 
| 
       36 
36 
     | 
    
         | 
| 
       37 
     | 
    
         
            -
                  def  
     | 
| 
      
 37 
     | 
    
         
            +
                  def JobParams.resolve_schema(ctx, schema)
         
     | 
| 
       38 
38 
     | 
    
         
             
                    ctx.global_variables["#{schema}_schema"] || schema
         
     | 
| 
       39 
39 
     | 
    
         
             
                  end
         
     | 
| 
       40 
40 
     | 
    
         
             
                  private_class_method :resolve_schema
         
     | 
| 
       41 
41 
     | 
    
         | 
| 
       42 
     | 
    
         
            -
                  def initialize( 
     | 
| 
       43 
     | 
    
         
            -
                    @task = task
         
     | 
| 
      
 42 
     | 
    
         
            +
                  def initialize(job)
         
     | 
| 
       44 
43 
     | 
    
         
             
                    @job = job
         
     | 
| 
       45 
44 
     | 
    
         
             
                    @params = job.params
         
     | 
| 
       46 
45 
     | 
    
         
             
                  end
         
     | 
| 
       47 
46 
     | 
    
         | 
| 
       48 
     | 
    
         
            -
                  def task_id
         
     | 
| 
       49 
     | 
    
         
            -
                    @task.id
         
     | 
| 
       50 
     | 
    
         
            -
                  end
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                  def task_id
         
     | 
| 
       53 
     | 
    
         
            -
                    @task.id
         
     | 
| 
       54 
     | 
    
         
            -
                  end
         
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                  def schema
         
     | 
| 
       57 
     | 
    
         
            -
                    @task.schema
         
     | 
| 
       58 
     | 
    
         
            -
                  end
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
                  def table
         
     | 
| 
       61 
     | 
    
         
            -
                    @task.table
         
     | 
| 
       62 
     | 
    
         
            -
                  end
         
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
                  def force?
         
     | 
| 
       65 
     | 
    
         
            -
                    @task.force?
         
     | 
| 
       66 
     | 
    
         
            -
                  end
         
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
                  def object_urls
         
     | 
| 
       69 
     | 
    
         
            -
                    @task.object_urls
         
     | 
| 
       70 
     | 
    
         
            -
                  end
         
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
47 
     | 
    
         
             
                  def ds
         
     | 
| 
       73 
48 
     | 
    
         
             
                    @params['redshift-ds']
         
     | 
| 
       74 
49 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -101,7 +76,7 @@ module Bricolage 
     | 
|
| 
       101 
76 
     | 
    
         
             
                end
         
     | 
| 
       102 
77 
     | 
    
         | 
| 
       103 
78 
     | 
    
         | 
| 
       104 
     | 
    
         
            -
                class  
     | 
| 
      
 79 
     | 
    
         
            +
                class StreamingLoadV3Job < RubyJobClass
         
     | 
| 
       105 
80 
     | 
    
         | 
| 
       106 
81 
     | 
    
         
             
                  job_class_id 'streaming_load_v3'
         
     | 
| 
       107 
82 
     | 
    
         | 
| 
         @@ -4,7 +4,7 @@ module Bricolage 
     | 
|
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
                class ManifestFile
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
                  def ManifestFile.create(ds 
     | 
| 
      
 7 
     | 
    
         
            +
                  def ManifestFile.create(ds:, job_id:, object_urls:, logger:, noop: false, &block)
         
     | 
| 
       8 
8 
     | 
    
         
             
                    manifest = new(ds, job_id, object_urls, logger: logger, noop: noop)
         
     | 
| 
       9 
9 
     | 
    
         
             
                    if block
         
     | 
| 
       10 
10 
     | 
    
         
             
                      manifest.create_temporary(&block)
         
     | 
| 
         @@ -49,11 +49,17 @@ module Bricolage 
     | 
|
| 
       49 
49 
     | 
    
         
             
                  def put
         
     | 
| 
       50 
50 
     | 
    
         
             
                    @logger.info "s3: put: #{url}"
         
     | 
| 
       51 
51 
     | 
    
         
             
                    @ds.object(name).put(body: content) unless @noop
         
     | 
| 
      
 52 
     | 
    
         
            +
                  rescue Aws::S3::Errors::ServiceError => ex
         
     | 
| 
      
 53 
     | 
    
         
            +
                    @logger.exception ex
         
     | 
| 
      
 54 
     | 
    
         
            +
                    raise S3Exception.wrap(ex)
         
     | 
| 
       52 
55 
     | 
    
         
             
                  end
         
     | 
| 
       53 
56 
     | 
    
         | 
| 
       54 
57 
     | 
    
         
             
                  def delete
         
     | 
| 
       55 
58 
     | 
    
         
             
                    @logger.info "s3: delete: #{url}"
         
     | 
| 
       56 
59 
     | 
    
         
             
                    @ds.object(name).delete unless @noop
         
     | 
| 
      
 60 
     | 
    
         
            +
                  rescue Aws::S3::Errors::ServiceError => ex
         
     | 
| 
      
 61 
     | 
    
         
            +
                    @logger.exception ex
         
     | 
| 
      
 62 
     | 
    
         
            +
                    raise S3Exception.wrap(ex)
         
     | 
| 
       57 
63 
     | 
    
         
             
                  end
         
     | 
| 
       58 
64 
     | 
    
         | 
| 
       59 
65 
     | 
    
         
             
                  def create_temporary
         
     | 
| 
         @@ -1,5 +1,4 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'bricolage/sqsdatasource'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'json'
         
     | 
| 
       3 
2 
     | 
    
         | 
| 
       4 
3 
     | 
    
         
             
            module Bricolage
         
     | 
| 
       5 
4 
     | 
    
         | 
| 
         @@ -38,85 +37,23 @@ module Bricolage 
     | 
|
| 
       38 
37 
     | 
    
         
             
                    }
         
     | 
| 
       39 
38 
     | 
    
         
             
                  end
         
     | 
| 
       40 
39 
     | 
    
         | 
| 
       41 
     | 
    
         
            -
                  def LoadTask.load(conn, task_id, force: false)
         
     | 
| 
       42 
     | 
    
         
            -
                    rec = conn.query_row(<<-EndSQL)
         
     | 
| 
       43 
     | 
    
         
            -
                      select
         
     | 
| 
       44 
     | 
    
         
            -
                          task_class
         
     | 
| 
       45 
     | 
    
         
            -
                          , tbl.schema_name
         
     | 
| 
       46 
     | 
    
         
            -
                          , tbl.table_name
         
     | 
| 
       47 
     | 
    
         
            -
                          , disabled
         
     | 
| 
       48 
     | 
    
         
            -
                      from
         
     | 
| 
       49 
     | 
    
         
            -
                          strload_tasks tsk
         
     | 
| 
       50 
     | 
    
         
            -
                          inner join strload_tables tbl
         
     | 
| 
       51 
     | 
    
         
            -
                              using(table_id)
         
     | 
| 
       52 
     | 
    
         
            -
                      where
         
     | 
| 
       53 
     | 
    
         
            -
                          task_id = #{task_id}
         
     | 
| 
       54 
     | 
    
         
            -
                      ;
         
     | 
| 
       55 
     | 
    
         
            -
                    EndSQL
         
     | 
| 
       56 
     | 
    
         
            -
                    object_urls = conn.query_values(<<-EndSQL)
         
     | 
| 
       57 
     | 
    
         
            -
                      select
         
     | 
| 
       58 
     | 
    
         
            -
                          object_url
         
     | 
| 
       59 
     | 
    
         
            -
                      from
         
     | 
| 
       60 
     | 
    
         
            -
                          strload_task_objects
         
     | 
| 
       61 
     | 
    
         
            -
                          inner join strload_objects
         
     | 
| 
       62 
     | 
    
         
            -
                          using (object_id)
         
     | 
| 
       63 
     | 
    
         
            -
                          inner join strload_tasks
         
     | 
| 
       64 
     | 
    
         
            -
                          using (task_id)
         
     | 
| 
       65 
     | 
    
         
            -
                      where
         
     | 
| 
       66 
     | 
    
         
            -
                          task_id = #{task_id}
         
     | 
| 
       67 
     | 
    
         
            -
                      ;
         
     | 
| 
       68 
     | 
    
         
            -
                    EndSQL
         
     | 
| 
       69 
     | 
    
         
            -
                    return nil unless rec
         
     | 
| 
       70 
     | 
    
         
            -
                    new(
         
     | 
| 
       71 
     | 
    
         
            -
                      name: rec['task_class'],
         
     | 
| 
       72 
     | 
    
         
            -
                      time: nil,
         
     | 
| 
       73 
     | 
    
         
            -
                      source: nil,
         
     | 
| 
       74 
     | 
    
         
            -
                      task_id: task_id,
         
     | 
| 
       75 
     | 
    
         
            -
                      schema: rec['schema_name'],
         
     | 
| 
       76 
     | 
    
         
            -
                      table: rec['table_name'],
         
     | 
| 
       77 
     | 
    
         
            -
                      object_urls: object_urls,
         
     | 
| 
       78 
     | 
    
         
            -
                      disabled: rec['disabled'] == 'f' ? false : true,
         
     | 
| 
       79 
     | 
    
         
            -
                      force: force
         
     | 
| 
       80 
     | 
    
         
            -
                    )
         
     | 
| 
       81 
     | 
    
         
            -
                  end
         
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
40 
     | 
    
         
             
                  alias message_type name
         
     | 
| 
       84 
41 
     | 
    
         | 
| 
       85 
     | 
    
         
            -
                  def init_message(task_id:,  
     | 
| 
       86 
     | 
    
         
            -
                    @ 
     | 
| 
      
 42 
     | 
    
         
            +
                  def init_message(task_id:, force: false)
         
     | 
| 
      
 43 
     | 
    
         
            +
                    @task_id = task_id
         
     | 
| 
       87 
44 
     | 
    
         
             
                    @force = force
         
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
                    # Effective only for queue reader process
         
     | 
| 
       90 
     | 
    
         
            -
                    @schema = schema
         
     | 
| 
       91 
     | 
    
         
            -
                    @table = table
         
     | 
| 
       92 
     | 
    
         
            -
                    @object_urls = object_urls
         
     | 
| 
       93 
     | 
    
         
            -
                    @disabled = disabled
         
     | 
| 
       94 
45 
     | 
    
         
             
                  end
         
     | 
| 
       95 
46 
     | 
    
         | 
| 
       96 
     | 
    
         
            -
                  attr_reader : 
     | 
| 
      
 47 
     | 
    
         
            +
                  attr_reader :task_id
         
     | 
| 
       97 
48 
     | 
    
         | 
| 
       98 
49 
     | 
    
         
             
                  def force?
         
     | 
| 
       99 
50 
     | 
    
         
             
                    !!@force
         
     | 
| 
       100 
51 
     | 
    
         
             
                  end
         
     | 
| 
       101 
52 
     | 
    
         | 
| 
       102 
     | 
    
         
            -
                  #
         
     | 
| 
       103 
     | 
    
         
            -
                  # For writer only
         
     | 
| 
       104 
     | 
    
         
            -
                  #
         
     | 
| 
       105 
     | 
    
         
            -
             
     | 
| 
       106 
     | 
    
         
            -
                  attr_reader :schema, :table, :object_urls, :disabled
         
     | 
| 
       107 
     | 
    
         
            -
             
     | 
| 
       108 
     | 
    
         
            -
                  def qualified_name
         
     | 
| 
       109 
     | 
    
         
            -
                    "#{@schema}.#{@table}"
         
     | 
| 
       110 
     | 
    
         
            -
                  end
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
53 
     | 
    
         
             
                  def body
         
     | 
| 
       113 
54 
     | 
    
         
             
                    obj = super
         
     | 
| 
       114 
     | 
    
         
            -
                    obj['taskId'] = @ 
     | 
| 
       115 
     | 
    
         
            -
                    obj[' 
     | 
| 
       116 
     | 
    
         
            -
                    obj['tableName'] = @table
         
     | 
| 
       117 
     | 
    
         
            -
                    obj['objectUrls'] = @object_urls
         
     | 
| 
       118 
     | 
    
         
            -
                    obj['disabled'] = @disabled
         
     | 
| 
       119 
     | 
    
         
            -
                    obj['force'] = @force
         
     | 
| 
      
 55 
     | 
    
         
            +
                    obj['taskId'] = @task_id
         
     | 
| 
      
 56 
     | 
    
         
            +
                    obj['force'] = true if @force
         
     | 
| 
       120 
57 
     | 
    
         
             
                    obj
         
     | 
| 
       121 
58 
     | 
    
         
             
                  end
         
     | 
| 
       122 
59 
     | 
    
         |