bricolage-streamingload 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/bin/bricolage-streaming-loader +2 -2
 - data/lib/bricolage/sqsmock.rb +0 -1
 - data/lib/bricolage/streamingload/dispatcher.rb +2 -1
 - data/lib/bricolage/streamingload/job.rb +387 -0
 - data/lib/bricolage/streamingload/{loaderparams.rb → jobparams.rb} +14 -39
 - data/lib/bricolage/streamingload/manifest.rb +7 -1
 - data/lib/bricolage/streamingload/objectbuffer.rb +0 -3
 - data/lib/bricolage/streamingload/task.rb +5 -68
 - data/lib/bricolage/streamingload/{loaderservice.rb → taskhandler.rb} +102 -61
 - data/lib/bricolage/streamingload/version.rb +1 -1
 - data/test/streamingload/test_dispatcher.rb +6 -6
 - data/test/streamingload/test_job.rb +438 -0
 - metadata +8 -9
 - data/lib/bricolage/nulllogger.rb +0 -20
 - data/lib/bricolage/snsdatasource.rb +0 -40
 - data/lib/bricolage/streamingload/loader.rb +0 -158
 
| 
         @@ -1,158 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'bricolage/streamingload/loaderparams'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'bricolage/streamingload/manifest'
         
     | 
| 
       3 
     | 
    
         
            -
            require 'bricolage/sqlutils'
         
     | 
| 
       4 
     | 
    
         
            -
            require 'socket'
         
     | 
| 
       5 
     | 
    
         
            -
            require 'json'
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
            module Bricolage
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
              module StreamingLoad
         
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
                class Loader
         
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
                  include SQLUtils
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                  def Loader.load_from_file(ctx, ctl_ds, task, logger:)
         
     | 
| 
       16 
     | 
    
         
            -
                    params = LoaderParams.load(ctx, task)
         
     | 
| 
       17 
     | 
    
         
            -
                    new(ctl_ds, params, logger: logger)
         
     | 
| 
       18 
     | 
    
         
            -
                  end
         
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
                  def initialize(ctl_ds, params, logger:)
         
     | 
| 
       21 
     | 
    
         
            -
                    @ctl_ds = ctl_ds
         
     | 
| 
       22 
     | 
    
         
            -
                    @params = params
         
     | 
| 
       23 
     | 
    
         
            -
                    @logger = logger
         
     | 
| 
       24 
     | 
    
         
            -
                    @process_id = "#{Socket.gethostname}-#{$$}"
         
     | 
| 
       25 
     | 
    
         
            -
                  end
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                  def execute
         
     | 
| 
       28 
     | 
    
         
            -
                    @job_id = assign_task
         
     | 
| 
       29 
     | 
    
         
            -
                    return unless @job_id # task already executed by other loader
         
     | 
| 
       30 
     | 
    
         
            -
                    @params.ds.open {|conn|
         
     | 
| 
       31 
     | 
    
         
            -
                      @connection = conn
         
     | 
| 
       32 
     | 
    
         
            -
                      do_load
         
     | 
| 
       33 
     | 
    
         
            -
                    }
         
     | 
| 
       34 
     | 
    
         
            -
                  end
         
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
                  def assign_task
         
     | 
| 
       37 
     | 
    
         
            -
                    @ctl_ds.open {|conn|
         
     | 
| 
       38 
     | 
    
         
            -
                      job_id = conn.query_value(<<-EndSQL)
         
     | 
| 
       39 
     | 
    
         
            -
                        insert into strload_jobs
         
     | 
| 
       40 
     | 
    
         
            -
                            ( task_id
         
     | 
| 
       41 
     | 
    
         
            -
                            , process_id
         
     | 
| 
       42 
     | 
    
         
            -
                            , status
         
     | 
| 
       43 
     | 
    
         
            -
                            , start_time
         
     | 
| 
       44 
     | 
    
         
            -
                            )
         
     | 
| 
       45 
     | 
    
         
            -
                        select
         
     | 
| 
       46 
     | 
    
         
            -
                            task_id
         
     | 
| 
       47 
     | 
    
         
            -
                            , #{s @process_id}
         
     | 
| 
       48 
     | 
    
         
            -
                            , 'running'
         
     | 
| 
       49 
     | 
    
         
            -
                            , current_timestamp
         
     | 
| 
       50 
     | 
    
         
            -
                        from
         
     | 
| 
       51 
     | 
    
         
            -
                            strload_tasks
         
     | 
| 
       52 
     | 
    
         
            -
                        where
         
     | 
| 
       53 
     | 
    
         
            -
                            task_id = #{@params.task_id}
         
     | 
| 
       54 
     | 
    
         
            -
                            and (#{@params.force?} or task_id not in (select task_id from strload_jobs))
         
     | 
| 
       55 
     | 
    
         
            -
                        returning job_id
         
     | 
| 
       56 
     | 
    
         
            -
                        ;
         
     | 
| 
       57 
     | 
    
         
            -
                      EndSQL
         
     | 
| 
       58 
     | 
    
         
            -
                      return job_id
         
     | 
| 
       59 
     | 
    
         
            -
                    }
         
     | 
| 
       60 
     | 
    
         
            -
                  end
         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
                  def do_load
         
     | 
| 
       63 
     | 
    
         
            -
                    manifest = ManifestFile.create(@params.ctl_bucket, job_id: @job_id, object_urls: @params.object_urls, logger: @logger)
         
     | 
| 
       64 
     | 
    
         
            -
                    if @params.enable_work_table?
         
     | 
| 
       65 
     | 
    
         
            -
                      @connection.transaction {|txn|
         
     | 
| 
       66 
     | 
    
         
            -
                        # NOTE: This transaction ends with truncation, this DELETE does nothing
         
     | 
| 
       67 
     | 
    
         
            -
                        # from the second time.  So don't worry about DELETE cost here.
         
     | 
| 
       68 
     | 
    
         
            -
                        @connection.execute("delete from #{@params.work_table}")
         
     | 
| 
       69 
     | 
    
         
            -
                        load_objects @params.work_table, manifest, @params.load_options_string
         
     | 
| 
       70 
     | 
    
         
            -
                        commit_work_table txn, @params
         
     | 
| 
       71 
     | 
    
         
            -
                      }
         
     | 
| 
       72 
     | 
    
         
            -
                      commit_job_result
         
     | 
| 
       73 
     | 
    
         
            -
                    else
         
     | 
| 
       74 
     | 
    
         
            -
                      load_objects @params.dest_table, manifest, @params.load_options_string
         
     | 
| 
       75 
     | 
    
         
            -
                      commit_job_result
         
     | 
| 
       76 
     | 
    
         
            -
                    end
         
     | 
| 
       77 
     | 
    
         
            -
                  rescue JobFailure => ex
         
     | 
| 
       78 
     | 
    
         
            -
                    write_job_error 'failure', ex.message
         
     | 
| 
       79 
     | 
    
         
            -
                    raise
         
     | 
| 
       80 
     | 
    
         
            -
                  rescue Exception => ex
         
     | 
| 
       81 
     | 
    
         
            -
                    write_job_error 'error', ex.message
         
     | 
| 
       82 
     | 
    
         
            -
                    raise
         
     | 
| 
       83 
     | 
    
         
            -
                  end
         
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                  def load_objects(dest_table, manifest, options)
         
     | 
| 
       86 
     | 
    
         
            -
                    @connection.execute(<<-EndSQL.strip.gsub(/\s+/, ' '))
         
     | 
| 
       87 
     | 
    
         
            -
                        copy #{dest_table}
         
     | 
| 
       88 
     | 
    
         
            -
                        from #{s manifest.url}
         
     | 
| 
       89 
     | 
    
         
            -
                        credentials #{s manifest.credential_string}
         
     | 
| 
       90 
     | 
    
         
            -
                        manifest
         
     | 
| 
       91 
     | 
    
         
            -
                        statupdate false
         
     | 
| 
       92 
     | 
    
         
            -
                        compupdate false
         
     | 
| 
       93 
     | 
    
         
            -
                        #{options}
         
     | 
| 
       94 
     | 
    
         
            -
                        ;
         
     | 
| 
       95 
     | 
    
         
            -
                    EndSQL
         
     | 
| 
       96 
     | 
    
         
            -
                    @logger.info "load succeeded: #{manifest.url}"
         
     | 
| 
       97 
     | 
    
         
            -
                  end
         
     | 
| 
       98 
     | 
    
         
            -
             
     | 
| 
       99 
     | 
    
         
            -
                  def commit_work_table(txn, params)
         
     | 
| 
       100 
     | 
    
         
            -
                    @connection.execute(params.sql_source)
         
     | 
| 
       101 
     | 
    
         
            -
                    txn.truncate_and_commit(params.work_table)
         
     | 
| 
       102 
     | 
    
         
            -
                  end
         
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
                  def commit_job_result
         
     | 
| 
       105 
     | 
    
         
            -
                    @end_time = Time.now
         
     | 
| 
       106 
     | 
    
         
            -
                    @ctl_ds.open {|conn|
         
     | 
| 
       107 
     | 
    
         
            -
                      conn.transaction {
         
     | 
| 
       108 
     | 
    
         
            -
                        write_job_result conn, 'success', ''
         
     | 
| 
       109 
     | 
    
         
            -
                        update_loaded_flag conn
         
     | 
| 
       110 
     | 
    
         
            -
                      }
         
     | 
| 
       111 
     | 
    
         
            -
                    }
         
     | 
| 
       112 
     | 
    
         
            -
                  end
         
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
                  def update_loaded_flag(connection)
         
     | 
| 
       115 
     | 
    
         
            -
                    connection.execute(<<-EndSQL)
         
     | 
| 
       116 
     | 
    
         
            -
                      update
         
     | 
| 
       117 
     | 
    
         
            -
                          strload_objects
         
     | 
| 
       118 
     | 
    
         
            -
                      set
         
     | 
| 
       119 
     | 
    
         
            -
                          loaded = true
         
     | 
| 
       120 
     | 
    
         
            -
                      where
         
     | 
| 
       121 
     | 
    
         
            -
                          object_id in (
         
     | 
| 
       122 
     | 
    
         
            -
                            select
         
     | 
| 
       123 
     | 
    
         
            -
                                object_id
         
     | 
| 
       124 
     | 
    
         
            -
                            from
         
     | 
| 
       125 
     | 
    
         
            -
                                strload_task_objects
         
     | 
| 
       126 
     | 
    
         
            -
                            where task_id = (select task_id from strload_jobs where job_id = #{@job_id})
         
     | 
| 
       127 
     | 
    
         
            -
                          )
         
     | 
| 
       128 
     | 
    
         
            -
                      ;
         
     | 
| 
       129 
     | 
    
         
            -
                    EndSQL
         
     | 
| 
       130 
     | 
    
         
            -
                  end
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
                  MAX_MESSAGE_LENGTH = 1000
         
     | 
| 
       133 
     | 
    
         
            -
             
     | 
| 
       134 
     | 
    
         
            -
                  def write_job_error(status, message)
         
     | 
| 
       135 
     | 
    
         
            -
                    @end_time = Time.now
         
     | 
| 
       136 
     | 
    
         
            -
                    @logger.warn message.lines.first
         
     | 
| 
       137 
     | 
    
         
            -
                    @ctl_ds.open {|conn|
         
     | 
| 
       138 
     | 
    
         
            -
                      write_job_result conn, status, message.lines.first.strip[0, MAX_MESSAGE_LENGTH]
         
     | 
| 
       139 
     | 
    
         
            -
                    }
         
     | 
| 
       140 
     | 
    
         
            -
                  end
         
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
                  def write_job_result(connection, status, message)
         
     | 
| 
       143 
     | 
    
         
            -
                    connection.execute(<<-EndSQL)
         
     | 
| 
       144 
     | 
    
         
            -
                      update
         
     | 
| 
       145 
     | 
    
         
            -
                          strload_jobs
         
     | 
| 
       146 
     | 
    
         
            -
                      set
         
     | 
| 
       147 
     | 
    
         
            -
                          (status, finish_time, message) = (#{s status}, current_timestamp, #{s message})
         
     | 
| 
       148 
     | 
    
         
            -
                      where
         
     | 
| 
       149 
     | 
    
         
            -
                          job_id = #{@job_id}
         
     | 
| 
       150 
     | 
    
         
            -
                      ;
         
     | 
| 
       151 
     | 
    
         
            -
                    EndSQL
         
     | 
| 
       152 
     | 
    
         
            -
                  end
         
     | 
| 
       153 
     | 
    
         
            -
             
     | 
| 
       154 
     | 
    
         
            -
                end
         
     | 
| 
       155 
     | 
    
         
            -
             
     | 
| 
       156 
     | 
    
         
            -
              end
         
     | 
| 
       157 
     | 
    
         
            -
             
     | 
| 
       158 
     | 
    
         
            -
            end
         
     |