rocketjob 6.0.0.rc2 → 6.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +164 -8
- data/lib/rocket_job/batch/categories.rb +26 -24
- data/lib/rocket_job/batch/io.rb +128 -128
- data/lib/rocket_job/batch/worker.rb +14 -12
- data/lib/rocket_job/batch.rb +0 -1
- data/lib/rocket_job/category/base.rb +10 -7
- data/lib/rocket_job/category/input.rb +61 -1
- data/lib/rocket_job/category/output.rb +9 -0
- data/lib/rocket_job/dirmon_entry.rb +1 -1
- data/lib/rocket_job/job_exception.rb +1 -1
- data/lib/rocket_job/jobs/conversion_job.rb +43 -0
- data/lib/rocket_job/jobs/dirmon_job.rb +24 -35
- data/lib/rocket_job/jobs/housekeeping_job.rb +4 -5
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -11
- data/lib/rocket_job/jobs/on_demand_job.rb +2 -2
- data/lib/rocket_job/jobs/upload_file_job.rb +4 -0
- data/lib/rocket_job/plugins/cron.rb +60 -20
- data/lib/rocket_job/plugins/job/persistence.rb +36 -0
- data/lib/rocket_job/plugins/restart.rb +3 -110
- data/lib/rocket_job/plugins/state_machine.rb +2 -2
- data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +10 -5
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
- data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
- data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
- data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
- data/lib/rocket_job/sliced/input.rb +42 -54
- data/lib/rocket_job/sliced/slice.rb +7 -3
- data/lib/rocket_job/sliced/slices.rb +12 -9
- data/lib/rocket_job/sliced/writer/input.rb +46 -18
- data/lib/rocket_job/sliced.rb +1 -19
- data/lib/rocket_job/subscribers/secret_config.rb +17 -0
- data/lib/rocket_job/supervisor.rb +10 -8
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocketjob.rb +4 -3
- metadata +12 -12
- data/lib/rocket_job/batch/tabular/input.rb +0 -133
- data/lib/rocket_job/batch/tabular/output.rb +0 -67
- data/lib/rocket_job/batch/tabular.rb +0 -58
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 10ef66caa28110774987fdeff06af1a4906ed724bc307bf55d012e1104a9eca8
         | 
| 4 | 
            +
              data.tar.gz: b0384cccd5d694a3431c6c9cb7d004603c4b79865929b8c2bb249b4b66e937a2
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 331a1c21ad9c2b481e67d5d3857e3158dd23a7bb6524dd872773e0aad0e0c5edb4e02d4a60be6498bc212e2517c81c027ce7f22d6b378e9d5772c391669c4347
         | 
| 7 | 
            +
              data.tar.gz: e46cdffa443cc75d5dbed5f396ec84fb19fd12bad5b48bbddad7d310526a941bd837f778da8391460e40c575caed62c6e0329adc99ae8c58b07ada694cb1684c
         | 
    
        data/README.md
    CHANGED
    
    | @@ -17,21 +17,177 @@ Checkout https://rocketjob.io/ | |
| 17 17 | 
             
            * Questions? Join the chat room on Gitter for [rocketjob support](https://gitter.im/rocketjob/support)
         | 
| 18 18 | 
             
            * [Report bugs](https://github.com/rocketjob/rocketjob/issues)
         | 
| 19 19 |  | 
| 20 | 
            -
            ## Rocket Job  | 
| 20 | 
            +
            ## Rocket Job v6
         | 
| 21 21 |  | 
| 22 22 | 
             
            - Support for Ruby v3 and Rails 6.
         | 
| 23 | 
            -
            -  | 
| 24 | 
            -
                -  | 
| 25 | 
            -
            -  | 
| 23 | 
            +
            - Major enhancements in Batch job support:
         | 
| 24 | 
            +
                - Direct built-in Tabular support for all input and output categories.
         | 
| 25 | 
            +
                - Multiple output file support, each with its own settings for:
         | 
| 26 | 
            +
                    - Compression
         | 
| 27 | 
            +
                        - GZip, Zip, BZip2 (Chunked for much faster loading into Apache Spark).
         | 
| 28 | 
            +
                    - Encryption
         | 
| 29 | 
            +
                        - PGP, Symmetric Encryption.
         | 
| 30 | 
            +
                    - File format
         | 
| 31 | 
            +
                        - CSV, PSV, JSON, Fixed Format, xlsx.
         | 
| 32 | 
            +
            - Significant error handling improvements, especially around throttle failures
         | 
| 33 | 
            +
              that used to result in "hanging" jobs.
         | 
| 34 | 
            +
            - Support AWS DocumentDB in addition to MongoDB as the data store.
         | 
| 26 35 | 
             
            - Removed use of Symbols to meet Symbol deprecation in MongoDB and Mongoid.
         | 
| 27 36 |  | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 37 | 
            +
            ### Upgrading to Rocket Job v6
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            The following plugins have been deprecated and are no longer loaded by default.
         | 
| 40 | 
            +
            - `RocketJob::Batch::Tabular::Input`
         | 
| 41 | 
            +
            - `RocketJob::Batch::Tabular::Output`
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            If your code relies on these plugins and you still want to upgrade to Rocket Job v6,
         | 
| 44 | 
            +
            add the following require statement to any jobs that still use them:
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            ~~~ruby
         | 
| 47 | 
            +
            require "rocket_job/batch/tabular"
         | 
| 48 | 
            +
            ~~~
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            It is important to migrate away from these plugins, since they will be removed in a future release.
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            #### Scheduled Jobs
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            For any scheduled jobs that include the `RocketJob::Plugins::Cron` plugin, the default behavior has changed
         | 
| 55 | 
            +
            so that the scheduled job instance is created immediately after the currently scheduled instance starts.
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            To maintain the old behavior of creating the job when it fails, aborts, or completes, add the following line
         | 
| 58 | 
            +
            to each of the applicable jobs:
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            ~~~ruby
         | 
| 61 | 
            +
            self.cron_after_start = false
         | 
| 62 | 
            +
            ~~~
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            Additionally, scheduled jobs will now prevent a new one from being created when another scheduled instance 
         | 
| 65 | 
            +
            of the same job is already queued, or running with the _same_ `cron_schedule`.
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            To maintain the old behavior of allowing multiple instances with the same cron schedule, add the following
         | 
| 68 | 
            +
            line to each of the applicable jobs:
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            ~~~ruby
         | 
| 71 | 
            +
            self.cron_singleton = false
         | 
| 72 | 
            +
            ~~~
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            ##### Singleton
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            Since Scheduled jobs now implement their own singleton logic, remove the singleton plugin from any scheduled jobs.
         | 
| 77 | 
            +
             | 
| 78 | 
            +
            #### Upgrading Batch Jobs to Rocket Job v6
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            Rocket Job v6 replaces the array of symbol type for `input_categories` and `output_categories`
         | 
| 81 | 
            +
            with an array of `RocketJob::Category::Input` and `RocketJob::Category::Output`.
         | 
| 82 | 
            +
             | 
| 83 | 
            +
            Jobs that added or modified the input or output categories need to be upgraded. For example:
         | 
| 84 | 
            +
            ~~~ruby
         | 
| 85 | 
            +
            class MyJob < RocketJob::Job
         | 
| 86 | 
            +
              include RocketJob::Batch
         | 
| 87 | 
            +
              
         | 
| 88 | 
            +
              self.output_categories = [:main, :errors, :ignored]
         | 
| 89 | 
            +
            end
         | 
| 90 | 
            +
            ~~~
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            Needs to be changed to:
         | 
| 93 | 
            +
            ~~~ruby
         | 
| 94 | 
            +
            class MyJob < RocketJob::Job
         | 
| 95 | 
            +
              include RocketJob::Batch
         | 
| 96 | 
            +
              
         | 
| 97 | 
            +
              output_category name: :main
         | 
| 98 | 
            +
              output_category name: :errors
         | 
| 99 | 
            +
              output_category name: :ignored
         | 
| 100 | 
            +
            end
         | 
| 101 | 
            +
            ~~~
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            ##### slice_size, encrypt, compress
         | 
| 104 | 
            +
             | 
| 105 | 
            +
            These fields have been removed from the job itself:
         | 
| 106 | 
            +
            ~~~ruby
         | 
| 107 | 
            +
            class MyJob < RocketJob::Job
         | 
| 108 | 
            +
              include RocketJob::Batch
         | 
| 109 | 
            +
              
         | 
| 110 | 
            +
              self.slice_sice = 1_000
         | 
| 111 | 
            +
              self.encrypt    = true
         | 
| 112 | 
            +
              self.compress   = true
         | 
| 113 | 
            +
            end
         | 
| 114 | 
            +
            ~~~
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            They are now specified on the `input_category` as follows:
         | 
| 117 | 
            +
            - `slice_size` just moves under `input_category`. 
         | 
| 118 | 
            +
            - `encrypt` becomes an option to `serializer`.
         | 
| 119 | 
            +
            - `compress` is now the default for all batch jobs so is not needed.
         | 
| 120 | 
            +
             | 
| 121 | 
            +
            If the serializer is set to `encrypt` then it is automatically compressed.
         | 
| 122 | 
            +
             | 
| 123 | 
            +
            ~~~ruby
         | 
| 124 | 
            +
            class MyJob < RocketJob::Job
         | 
| 125 | 
            +
              include RocketJob::Batch
         | 
| 126 | 
            +
              
         | 
| 127 | 
            +
              input_category slice_sice: 1_000, serializer: :encrypt
         | 
| 128 | 
            +
            end
         | 
| 129 | 
            +
            ~~~
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            ##### collect_output, collect_nil_output
         | 
| 132 | 
            +
             | 
| 133 | 
            +
            The following fields have been moved from the job itself:
         | 
| 134 | 
            +
            ~~~ruby
         | 
| 135 | 
            +
            class MyJob < RocketJob::Job
         | 
| 136 | 
            +
              include RocketJob::Batch
         | 
| 137 | 
            +
              
         | 
| 138 | 
            +
              self.collect_output     = true
         | 
| 139 | 
            +
              self.collect_nil_output = true
         | 
| 140 | 
            +
            end
         | 
| 141 | 
            +
            ~~~
         | 
| 142 | 
            +
             | 
| 143 | 
            +
            Into the corresponding `output_category`:
         | 
| 144 | 
            +
            - `collect_output` no longer has any meaning. Output is collected anytime an `output_category` is defined.
         | 
| 145 | 
            +
            - `collect_nil_output` is now the option `nils` on the `output_category. 
         | 
| 146 | 
            +
              It defaults to `false` so that by default any `nil` output from the `perform` method is not collected. 
         | 
| 147 | 
            +
            ~~~ruby
         | 
| 148 | 
            +
            class MyJob < RocketJob::Job
         | 
| 149 | 
            +
              include RocketJob::Batch
         | 
| 150 | 
            +
             | 
| 151 | 
            +
              output_category nils: true
         | 
| 152 | 
            +
            end
         | 
| 153 | 
            +
            ~~~
         | 
| 154 | 
            +
             | 
| 155 | 
            +
            ##### name
         | 
| 156 | 
            +
             | 
| 157 | 
            +
            For both `input_category` and `output_category`, when the `name` argument is not supplied
         | 
| 158 | 
            +
            it defaults to `:main`.
         | 
| 159 | 
            +
             | 
| 160 | 
            +
            For Example:
         | 
| 161 | 
            +
            ~~~ruby
         | 
| 162 | 
            +
            class MyJob < RocketJob::Job
         | 
| 163 | 
            +
              include RocketJob::Batch
         | 
| 164 | 
            +
             | 
| 165 | 
            +
              input_category name: :main, serializer: :encrypt
         | 
| 166 | 
            +
              output_category name: :main
         | 
| 167 | 
            +
            end
         | 
| 168 | 
            +
            ~~~
         | 
| 169 | 
            +
             | 
| 170 | 
            +
            Is the same as:
         | 
| 171 | 
            +
            ~~~ruby
         | 
| 172 | 
            +
            class MyJob < RocketJob::Job
         | 
| 173 | 
            +
              include RocketJob::Batch
         | 
| 174 | 
            +
             | 
| 175 | 
            +
              input_category serializer: :encrypt
         | 
| 176 | 
            +
              output_category
         | 
| 177 | 
            +
            end
         | 
| 178 | 
            +
            ~~~
         | 
| 179 | 
            +
             | 
| 180 | 
            +
            ##### Existing and inflight jobs
         | 
| 181 | 
            +
             | 
| 182 | 
            +
            When migrating to Rocket Job 6, it is recommended to load every job and then save it back again as part of the 
         | 
| 183 | 
            +
            deployment. When the job loads it will automatically convert itself from the old schema to the new v6 schema.
         | 
| 184 | 
            +
             | 
| 185 | 
            +
            In flight jobs should not be affected, other than it is important to shutdown all running batch 
         | 
| 186 | 
            +
            servers _before_ running any new instances.
         | 
| 31 187 |  | 
| 32 188 | 
             
            ## Rocket Job v4
         | 
| 33 189 |  | 
| 34 | 
            -
            Rocket Job Pro is now open source and included in Rocket Job. 
         | 
| 190 | 
            +
            Rocket Job Pro is now fully open source and included in Rocket Job under the Apache License. 
         | 
| 35 191 |  | 
| 36 192 | 
             
            The `RocketJob::Batch` plugin now adds batch processing capabilities to break up a single task into many
         | 
| 37 193 | 
             
            concurrent workers processing slices of the entire job at the same time. 
         | 
| @@ -72,34 +72,37 @@ module RocketJob | |
| 72 72 | 
             
                  end
         | 
| 73 73 |  | 
| 74 74 | 
             
                  def input_category(category_name = :main)
         | 
| 75 | 
            +
                    return category_name if category_name.is_a?(Category::Input)
         | 
| 76 | 
            +
                    raise(ArgumentError, "Cannot supply Output Category to input category") if category_name.is_a?(Category::Output)
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    # Initialize categories when this method is called before initialization is complete
         | 
| 79 | 
            +
                    rocketjob_categories_assign if input_categories.empty?
         | 
| 80 | 
            +
             | 
| 75 81 | 
             
                    category_name = category_name.to_sym
         | 
| 76 | 
            -
                     | 
| 77 | 
            -
                     | 
| 78 | 
            -
             | 
| 79 | 
            -
                     | 
| 80 | 
            -
                       | 
| 81 | 
            -
                       | 
| 82 | 
            -
             | 
| 83 | 
            -
                        self.input_categories = [category]
         | 
| 84 | 
            -
                      else
         | 
| 85 | 
            -
                        raise(ArgumentError,
         | 
| 86 | 
            -
                              "Unknown Input Category: #{category_name.inspect}. Registered categories: #{input_categories.collect(&:name).join(',')}")
         | 
| 87 | 
            -
                      end
         | 
| 88 | 
            -
                    end
         | 
| 89 | 
            -
                    category
         | 
| 82 | 
            +
                    # find does not work against this association
         | 
| 83 | 
            +
                    input_categories.each { |category| return category if category.name == category_name }
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                    raise(
         | 
| 86 | 
            +
                      ArgumentError,
         | 
| 87 | 
            +
                      "Unknown Input Category: #{category_name.inspect}. Registered categories: #{input_categories.collect(&:name).join(',')}"
         | 
| 88 | 
            +
                    )
         | 
| 90 89 | 
             
                  end
         | 
| 91 90 |  | 
| 92 91 | 
             
                  def output_category(category_name = :main)
         | 
| 92 | 
            +
                    return category_name if category_name.is_a?(Category::Output)
         | 
| 93 | 
            +
                    raise(ArgumentError, "Cannot supply Input Category to output category") if category_name.is_a?(Category::Input)
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                    # Initialize categories when this method is called before initialization is complete
         | 
| 96 | 
            +
                    rocketjob_categories_assign if output_categories.empty? && self.class.defined_output_categories
         | 
| 97 | 
            +
             | 
| 93 98 | 
             
                    category_name = category_name.to_sym
         | 
| 94 | 
            -
                    category      = nil
         | 
| 95 99 | 
             
                    # .find does not work against this association
         | 
| 96 | 
            -
                    output_categories.each { | | 
| 97 | 
            -
                    unless category
         | 
| 98 | 
            -
                      raise(ArgumentError,
         | 
| 99 | 
            -
                            "Unknown Output Category: #{category_name.inspect}. Registered categories: #{output_categories.collect(&:name).join(',')}")
         | 
| 100 | 
            -
                    end
         | 
| 100 | 
            +
                    output_categories.each { |category| return category if category.name == category_name }
         | 
| 101 101 |  | 
| 102 | 
            -
                     | 
| 102 | 
            +
                    raise(
         | 
| 103 | 
            +
                      ArgumentError,
         | 
| 104 | 
            +
                      "Unknown Output Category: #{category_name.inspect}. Registered categories: #{output_categories.collect(&:name).join(',')}"
         | 
| 105 | 
            +
                    )
         | 
| 103 106 | 
             
                  end
         | 
| 104 107 |  | 
| 105 108 | 
             
                  # Returns [true|false] whether the named category has already been defined
         | 
| @@ -150,7 +153,7 @@ module RocketJob | |
| 150 153 | 
             
                        end
         | 
| 151 154 | 
             
                    end
         | 
| 152 155 |  | 
| 153 | 
            -
                    return if ! | 
| 156 | 
            +
                    return if !output_categories.empty? || !self.class.defined_output_categories
         | 
| 154 157 |  | 
| 155 158 | 
             
                    # Input categories defaults to nil if none was set in the class
         | 
| 156 159 | 
             
                    self.output_categories = self.class.defined_output_categories.deep_dup
         | 
| @@ -160,7 +163,6 @@ module RocketJob | |
| 160 163 | 
             
                  def rocketjob_categories_output_render
         | 
| 161 164 | 
             
                    return if @rocket_job_output.nil?
         | 
| 162 165 |  | 
| 163 | 
            -
                    # TODO: ..
         | 
| 164 166 | 
             
                    return unless output_categories
         | 
| 165 167 | 
             
                    return if output_categories.empty?
         | 
| 166 168 |  | 
| @@ -214,7 +216,7 @@ module RocketJob | |
| 214 216 | 
             
                    category.tabular.render(row)
         | 
| 215 217 | 
             
                  end
         | 
| 216 218 |  | 
| 217 | 
            -
                  # Migrate existing  | 
| 219 | 
            +
                  # Migrate existing v5 batch jobs to v6
         | 
| 218 220 | 
             
                  def rocketjob_categories_migrate
         | 
| 219 221 | 
             
                    return unless attribute_present?(:input_categories) && self[:input_categories]&.first.is_a?(Symbol)
         | 
| 220 222 |  | 
    
        data/lib/rocket_job/batch/io.rb
    CHANGED
    
    | @@ -14,11 +14,9 @@ module RocketJob | |
| 14 14 | 
             
                  #     Default: None ( Uses the single default input collection for this job )
         | 
| 15 15 | 
             
                  #     Validates: This value must be one of those listed in #input_categories
         | 
| 16 16 | 
             
                  def input(category = :main)
         | 
| 17 | 
            -
                     | 
| 17 | 
            +
                    category = input_category(category)
         | 
| 18 18 |  | 
| 19 | 
            -
                     | 
| 20 | 
            -
             | 
| 21 | 
            -
                    (@inputs ||= {})[category.name] ||= RocketJob::Sliced.factory(:input, category, self)
         | 
| 19 | 
            +
                    (@inputs ||= {})[category.name] ||= category.data_store(self)
         | 
| 22 20 | 
             
                  end
         | 
| 23 21 |  | 
| 24 22 | 
             
                  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
         | 
| @@ -30,11 +28,9 @@ module RocketJob | |
| 30 28 | 
             
                  #     Default: None ( Uses the single default output collection for this job )
         | 
| 31 29 | 
             
                  #     Validates: This value must be one of those listed in #output_categories
         | 
| 32 30 | 
             
                  def output(category = :main)
         | 
| 33 | 
            -
                     | 
| 34 | 
            -
             | 
| 35 | 
            -
                    category = output_category(category) unless category.is_a?(Category::Output)
         | 
| 31 | 
            +
                    category = output_category(category)
         | 
| 36 32 |  | 
| 37 | 
            -
                    (@outputs ||= {})[category.name] ||=  | 
| 33 | 
            +
                    (@outputs ||= {})[category.name] ||= category.data_store(self)
         | 
| 38 34 | 
             
                  end
         | 
| 39 35 |  | 
| 40 36 | 
             
                  # Rapidly upload individual records in batches.
         | 
| @@ -59,19 +55,19 @@ module RocketJob | |
| 59 55 | 
             
                  #     The category or the name of the category to access or download data from
         | 
| 60 56 | 
             
                  #     Default: None ( Uses the single default output collection for this job )
         | 
| 61 57 | 
             
                  #     Validates: This value must be one of those listed in #input_categories
         | 
| 62 | 
            -
                  def lookup_collection(category = :main)
         | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
                  end
         | 
| 58 | 
            +
                  # def lookup_collection(category = :main)
         | 
| 59 | 
            +
                  #   category = input_category(category) unless category.is_a?(Category::Input)
         | 
| 60 | 
            +
                  #
         | 
| 61 | 
            +
                  #   collection = (@lookup_collections ||= {})[category.name]
         | 
| 62 | 
            +
                  #
         | 
| 63 | 
            +
                  #   unless collection
         | 
| 64 | 
            +
                  #     collection_name = "rocket_job.inputs.#{id}"
         | 
| 65 | 
            +
                  #     collection_name << ".#{category.name}" unless category.name == :main
         | 
| 66 | 
            +
                  #
         | 
| 67 | 
            +
                  #     @lookup_collections[category.name] ||=
         | 
| 68 | 
            +
                  #       LookupCollection.new(Sliced::Slice.collection.database, collection_name)
         | 
| 69 | 
            +
                  #   end
         | 
| 70 | 
            +
                  # end
         | 
| 75 71 |  | 
| 76 72 | 
             
                  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
         | 
| 77 73 | 
             
                  #
         | 
| @@ -154,53 +150,7 @@ module RocketJob | |
| 154 150 | 
             
                  # * If an io stream is supplied, it is read until it returns nil.
         | 
| 155 151 | 
             
                  # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
         | 
| 156 152 | 
             
                  # * CSV parsing is slow, so it is usually left for the workers to do.
         | 
| 157 | 
            -
                   | 
| 158 | 
            -
                    raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
         | 
| 159 | 
            -
             | 
| 160 | 
            -
                    category = input_category(category) unless category.is_a?(Category::Input)
         | 
| 161 | 
            -
                    stream ||= category.file_name
         | 
| 162 | 
            -
                    path     = nil
         | 
| 163 | 
            -
             | 
| 164 | 
            -
                    if stream
         | 
| 165 | 
            -
                      path               = IOStreams.new(stream)
         | 
| 166 | 
            -
                      path.file_name     = file_name if file_name
         | 
| 167 | 
            -
                      category.file_name = path.file_name
         | 
| 168 | 
            -
             | 
| 169 | 
            -
                      # Auto detect the format based on the upload file name if present.
         | 
| 170 | 
            -
                      if category.format == :auto
         | 
| 171 | 
            -
                        format = path.format
         | 
| 172 | 
            -
                        if format
         | 
| 173 | 
            -
                          # Rebuild tabular with the above file name
         | 
| 174 | 
            -
                          category.reset_tabular
         | 
| 175 | 
            -
                          category.format = format
         | 
| 176 | 
            -
                        end
         | 
| 177 | 
            -
                      end
         | 
| 178 | 
            -
                    end
         | 
| 179 | 
            -
             | 
| 180 | 
            -
                    # Tabular transformations required for upload?
         | 
| 181 | 
            -
                    if category.tabular?
         | 
| 182 | 
            -
                      # Remove non-printable characters from tabular input formats
         | 
| 183 | 
            -
                      # Cannot change the length of fixed width lines
         | 
| 184 | 
            -
                      replace = category.format == :fixed ? " " : ""
         | 
| 185 | 
            -
                      path&.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
         | 
| 186 | 
            -
             | 
| 187 | 
            -
                      # Extract the header line during the file upload when needed.
         | 
| 188 | 
            -
                      on_first = rocket_job_upload_header_lambda(category, on_first) if category.tabular.header?
         | 
| 189 | 
            -
                    end
         | 
| 190 | 
            -
             | 
| 191 | 
            -
                    count =
         | 
| 192 | 
            -
                      if block
         | 
| 193 | 
            -
                        input(category).upload(on_first: on_first, &block)
         | 
| 194 | 
            -
                      else
         | 
| 195 | 
            -
                        input(category).upload(on_first: on_first) do |io|
         | 
| 196 | 
            -
                          path.each(stream_mode, **args) { |line| io << line }
         | 
| 197 | 
            -
                        end
         | 
| 198 | 
            -
                      end
         | 
| 199 | 
            -
             | 
| 200 | 
            -
                    self.record_count = (record_count || 0) + count
         | 
| 201 | 
            -
                    count
         | 
| 202 | 
            -
                  end
         | 
| 203 | 
            -
             | 
| 153 | 
            +
                  #
         | 
| 204 154 | 
             
                  # Upload results from an Arel into RocketJob::SlicedJob.
         | 
| 205 155 | 
             
                  #
         | 
| 206 156 | 
             
                  # Params
         | 
| @@ -227,18 +177,13 @@ module RocketJob | |
| 227 177 | 
             
                  #
         | 
| 228 178 | 
             
                  # Example: Upload user_name and zip_code
         | 
| 229 179 | 
             
                  #   arel = User.where(country_code: 'US')
         | 
| 230 | 
            -
                  #   job.upload_arel(arel, :user_name, :zip_code)
         | 
| 180 | 
            +
                  #   job.upload_arel(arel, columns: [:user_name, :zip_code])
         | 
| 231 181 | 
             
                  #
         | 
| 232 182 | 
             
                  # Notes:
         | 
| 233 183 | 
             
                  # * Only call from one thread at a time against a single instance of this job.
         | 
| 234 184 | 
             
                  # * The record_count for the job is set to the number of records returned by the arel.
         | 
| 235 185 | 
             
                  # * If an exception is raised while uploading data, the input collection is cleared out
         | 
| 236 186 | 
             
                  #   so that if a job is retried during an upload failure, data is not duplicated.
         | 
| 237 | 
            -
                  def upload_arel(arel, *column_names, category: :main, &block)
         | 
| 238 | 
            -
                    count             = input(category).upload_arel(arel, *column_names, &block)
         | 
| 239 | 
            -
                    self.record_count = (record_count || 0) + count
         | 
| 240 | 
            -
                    count
         | 
| 241 | 
            -
                  end
         | 
| 242 187 |  | 
| 243 188 | 
             
                  # Upload the result of a MongoDB query to the input collection for processing
         | 
| 244 189 | 
             
                  # Useful when an entire MongoDB collection, or part thereof needs to be
         | 
| @@ -266,24 +211,19 @@ module RocketJob | |
| 266 211 | 
             
                  #   criteria = User.where(state: 'FL')
         | 
| 267 212 | 
             
                  #   job.record_count = job.upload_mongo_query(criteria)
         | 
| 268 213 | 
             
                  #
         | 
| 269 | 
            -
                  # Example: Upload  | 
| 214 | 
            +
                  # Example: Upload only the specified column(s)
         | 
| 270 215 | 
             
                  #   criteria = User.where(state: 'FL')
         | 
| 271 | 
            -
                  #   job.record_count = job.upload_mongo_query(criteria, :zip_code)
         | 
| 216 | 
            +
                  #   job.record_count = job.upload_mongo_query(criteria, columns: [:zip_code])
         | 
| 272 217 | 
             
                  #
         | 
| 273 218 | 
             
                  # Notes:
         | 
| 274 219 | 
             
                  # * Only call from one thread at a time against a single instance of this job.
         | 
| 275 220 | 
             
                  # * The record_count for the job is set to the number of records returned by the monqo query.
         | 
| 276 221 | 
             
                  # * If an exception is raised while uploading data, the input collection is cleared out
         | 
| 277 222 | 
             
                  #   so that if a job is retried during an upload failure, data is not duplicated.
         | 
| 278 | 
            -
                  def upload_mongo_query(criteria, *column_names, category: :main, &block)
         | 
| 279 | 
            -
                    count             = input(category).upload_mongo_query(criteria, *column_names, &block)
         | 
| 280 | 
            -
                    self.record_count = (record_count || 0) + count
         | 
| 281 | 
            -
                    count
         | 
| 282 | 
            -
                  end
         | 
| 283 223 |  | 
| 284 224 | 
             
                  # Upload sliced range of integer requests as arrays of start and end ids.
         | 
| 285 225 | 
             
                  #
         | 
| 286 | 
            -
                  # Returns [Integer]  | 
| 226 | 
            +
                  # Returns [Integer] the number of slices uploaded.
         | 
| 287 227 | 
             
                  #
         | 
| 288 228 | 
             
                  # Uploads one range per slice so that the response can return multiple records
         | 
| 289 229 | 
             
                  # for each slice processed
         | 
| @@ -302,17 +242,11 @@ module RocketJob | |
| 302 242 | 
             
                  # * The record_count for the job is set to: last_id - start_id + 1.
         | 
| 303 243 | 
             
                  # * If an exception is raised while uploading data, the input collection is cleared out
         | 
| 304 244 | 
             
                  #   so that if a job is retried during an upload failure, data is not duplicated.
         | 
| 305 | 
            -
                  def upload_integer_range(start_id, last_id, category: :main)
         | 
| 306 | 
            -
                    input(category).upload_integer_range(start_id, last_id)
         | 
| 307 | 
            -
                    count             = last_id - start_id + 1
         | 
| 308 | 
            -
                    self.record_count = (record_count || 0) + count
         | 
| 309 | 
            -
                    count
         | 
| 310 | 
            -
                  end
         | 
| 311 245 |  | 
| 312 246 | 
             
                  # Upload sliced range of integer requests as an arrays of start and end ids
         | 
| 313 247 | 
             
                  # starting with the last range first
         | 
| 314 248 | 
             
                  #
         | 
| 315 | 
            -
                  # Returns [Integer]  | 
| 249 | 
            +
                  # Returns [Integer] the number of slices uploaded.
         | 
| 316 250 | 
             
                  #
         | 
| 317 251 | 
             
                  # Uploads one range per slice so that the response can return multiple records
         | 
| 318 252 | 
             
                  # for each slice processed.
         | 
| @@ -334,14 +268,102 @@ module RocketJob | |
| 334 268 | 
             
                  # * The record_count for the job is set to: last_id - start_id + 1.
         | 
| 335 269 | 
             
                  # * If an exception is raised while uploading data, the input collection is cleared out
         | 
| 336 270 | 
             
                  #   so that if a job is retried during an upload failure, data is not duplicated.
         | 
| 337 | 
            -
             | 
| 338 | 
            -
             | 
| 339 | 
            -
                     | 
| 271 | 
            +
             | 
| 272 | 
            +
                  def upload(object = nil, category: :main, file_name: nil, stream_mode: nil, on_first: nil, columns: nil, slice_batch_size: nil, **args, &block)
         | 
| 273 | 
            +
                    input_collection = input(category)
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                    if block
         | 
| 276 | 
            +
                      raise(ArgumentError, "Cannot supply both an object to upload, and a block.") if object
         | 
| 277 | 
            +
                      if stream_mode || columns || slice_batch_size || args.size > 0
         | 
| 278 | 
            +
                        raise(ArgumentError, "Unknown keyword arguments when uploading a block. Only accepts :category, :file_name, or :on_first")
         | 
| 279 | 
            +
                      end
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                      category           = input_category(category)
         | 
| 282 | 
            +
                      category.file_name = file_name if file_name
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                      # Extract the header line during the upload when applicable.
         | 
| 285 | 
            +
                      extract_header = category.extract_header_callback(on_first)
         | 
| 286 | 
            +
             | 
| 287 | 
            +
                      count             = input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size, &block)
         | 
| 288 | 
            +
                      self.record_count = (record_count || 0) + count
         | 
| 289 | 
            +
                      return count
         | 
| 290 | 
            +
                    end
         | 
| 291 | 
            +
             | 
| 292 | 
            +
                    count =
         | 
| 293 | 
            +
                      case object
         | 
| 294 | 
            +
                      when Range
         | 
| 295 | 
            +
                        if file_name || stream_mode || on_first || args.size > 0
         | 
| 296 | 
            +
                          raise(ArgumentError, "Unknown keyword arguments when uploading a Range. Only accepts :category, :columns, or :slice_batch_size")
         | 
| 297 | 
            +
                        end
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                        first = object.first
         | 
| 300 | 
            +
                        last  = object.last
         | 
| 301 | 
            +
                        if first < last
         | 
| 302 | 
            +
                          input_collection.upload_integer_range(first, last, slice_batch_size: slice_batch_size || 1_000)
         | 
| 303 | 
            +
                        else
         | 
| 304 | 
            +
                          input_collection.upload_integer_range_in_reverse_order(last, first, slice_batch_size: slice_batch_size || 1_000)
         | 
| 305 | 
            +
                        end
         | 
| 306 | 
            +
                      when Mongoid::Criteria
         | 
| 307 | 
            +
                        if file_name || stream_mode || on_first || args.size > 0
         | 
| 308 | 
            +
                          raise(ArgumentError, "Unknown keyword arguments when uploading a Mongoid::Criteria. Only accepts :category, :columns, or :slice_batch_size")
         | 
| 309 | 
            +
                        end
         | 
| 310 | 
            +
             | 
| 311 | 
            +
                        input_collection.upload_mongo_query(object, columns: columns, slice_batch_size: slice_batch_size, &block)
         | 
| 312 | 
            +
                      when defined?(ActiveRecord::Relation) ? ActiveRecord::Relation : false
         | 
| 313 | 
            +
                        if file_name || stream_mode || on_first || args.size > 0
         | 
| 314 | 
            +
                          raise(ArgumentError, "Unknown keyword arguments when uploading an ActiveRecord::Relation. Only accepts :category, :columns, or :slice_batch_size")
         | 
| 315 | 
            +
                        end
         | 
| 316 | 
            +
             | 
| 317 | 
            +
                        input_collection.upload_arel(object, columns: columns, slice_batch_size: slice_batch_size, &block)
         | 
| 318 | 
            +
             | 
| 319 | 
            +
                      else
         | 
| 320 | 
            +
                        raise(ArgumentError, "Unknown keyword argument :columns when uploading a file") if columns
         | 
| 321 | 
            +
             | 
| 322 | 
            +
                        category = input_category(category)
         | 
| 323 | 
            +
             | 
| 324 | 
            +
                        # Extract the header line during the upload when applicable.
         | 
| 325 | 
            +
                        extract_header = category.extract_header_callback(on_first)
         | 
| 326 | 
            +
                        path = category.upload_path(object, original_file_name: file_name)
         | 
| 327 | 
            +
             | 
| 328 | 
            +
                        input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size) do |io|
         | 
| 329 | 
            +
                          path.each(stream_mode || :line, **args) { |line| io << line }
         | 
| 330 | 
            +
                        end
         | 
| 331 | 
            +
             | 
| 332 | 
            +
                      end
         | 
| 333 | 
            +
             | 
| 334 | 
            +
                    self.record_count = (record_count || 0) + count
         | 
| 335 | 
            +
                    count
         | 
| 336 | 
            +
                  end
         | 
| 337 | 
            +
             | 
| 338 | 
            +
                  # @deprecated
         | 
| 339 | 
            +
                  def upload_arel(arel, *column_names, category: :main, &block)
         | 
| 340 | 
            +
                    count             = input(category).upload_arel(arel, columns: column_names, &block)
         | 
| 340 341 | 
             
                    self.record_count = (record_count || 0) + count
         | 
| 341 342 | 
             
                    count
         | 
| 342 343 | 
             
                  end
         | 
| 343 344 |  | 
| 344 | 
            -
                  #  | 
| 345 | 
            +
                  # @deprecated
         | 
| 346 | 
            +
                  def upload_mongo_query(criteria, *column_names, category: :main, &block)
         | 
| 347 | 
            +
                    count             = input(category).upload_mongo_query(criteria, columns: column_names, &block)
         | 
| 348 | 
            +
                    self.record_count = (record_count || 0) + count
         | 
| 349 | 
            +
                    count
         | 
| 350 | 
            +
                  end
         | 
| 351 | 
            +
             | 
| 352 | 
            +
                  # @deprecated
         | 
| 353 | 
            +
                  def upload_integer_range(start_id, last_id, category: :main, slice_batch_size: 1_000)
         | 
| 354 | 
            +
                    count             = input(category).upload_integer_range(start_id, last_id, slice_batch_size: slice_batch_size)
         | 
| 355 | 
            +
                    self.record_count = (record_count || 0) + count
         | 
| 356 | 
            +
                    count
         | 
| 357 | 
            +
                  end
         | 
| 358 | 
            +
             | 
| 359 | 
            +
                  # @deprecated
         | 
| 360 | 
            +
                  def upload_integer_range_in_reverse_order(start_id, last_id, category: :main, slice_batch_size: 1_000)
         | 
| 361 | 
            +
                    count             = input(category).upload_integer_range_in_reverse_order(start_id, last_id, slice_batch_size: slice_batch_size)
         | 
| 362 | 
            +
                    self.record_count = (record_count || 0) + count
         | 
| 363 | 
            +
                    count
         | 
| 364 | 
            +
                  end
         | 
| 365 | 
            +
             | 
| 366 | 
            +
                  # Upload the supplied slice for processing by workers
         | 
| 345 367 | 
             
                  #
         | 
| 346 368 | 
             
                  # Updates the record_count after adding the records
         | 
| 347 369 | 
             
                  #
         | 
| @@ -427,50 +449,28 @@ module RocketJob | |
| 427 449 | 
             
                    # Store the output file name in the category
         | 
| 428 450 | 
             
                    category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
         | 
| 429 451 |  | 
| 430 | 
            -
                     | 
| 431 | 
            -
                      raise(ArgumentError, "A `header_line` is not supported with binary output collections") if header_line
         | 
| 432 | 
            -
             | 
| 433 | 
            -
                      return output_collection.download(&block) if block
         | 
| 452 | 
            +
                    header_line ||= category.render_header
         | 
| 434 453 |  | 
| 435 | 
            -
             | 
| 436 | 
            -
                        output_collection.download { |record| io << record[:binary] }
         | 
| 437 | 
            -
                      end
         | 
| 438 | 
            -
                    else
         | 
| 439 | 
            -
                      header_line ||= category.render_header
         | 
| 454 | 
            +
                    return output_collection.download(header_line: header_line, &block) if block
         | 
| 440 455 |  | 
| 441 | 
            -
             | 
| 456 | 
            +
                    raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
         | 
| 442 457 |  | 
| 443 | 
            -
             | 
| 458 | 
            +
                    if output_collection.slice_class.binary_format
         | 
| 459 | 
            +
                      binary_header_line = output_collection.slice_class.to_binary(header_line) if header_line
         | 
| 444 460 |  | 
| 461 | 
            +
                      # Don't overwrite supplied stream options if any
         | 
| 462 | 
            +
                      stream = stream&.is_a?(IOStreams::Stream) ? stream.dup : IOStreams.new(category.file_name)
         | 
| 463 | 
            +
                      stream.remove_from_pipeline(output_collection.slice_class.binary_format)
         | 
| 464 | 
            +
                      stream.writer(**args) do |io|
         | 
| 465 | 
            +
                        # TODO: Binary formats should return the record count, instead of the slice count.
         | 
| 466 | 
            +
                        output_collection.download(header_line: binary_header_line) { |record| io.write(record) }
         | 
| 467 | 
            +
                      end
         | 
| 468 | 
            +
                    else
         | 
| 445 469 | 
             
                      IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
         | 
| 446 470 | 
             
                        output_collection.download(header_line: header_line) { |record| io << record }
         | 
| 447 471 | 
             
                      end
         | 
| 448 472 | 
             
                    end
         | 
| 449 473 | 
             
                  end
         | 
| 450 | 
            -
             | 
| 451 | 
            -
                  private
         | 
| 452 | 
            -
             | 
| 453 | 
            -
                  # Return a lambda to extract the header row from the uploaded file.
         | 
| 454 | 
            -
                  def rocket_job_upload_header_lambda(category, on_first)
         | 
| 455 | 
            -
                    case category.mode
         | 
| 456 | 
            -
                    when :line
         | 
| 457 | 
            -
                      lambda do |line|
         | 
| 458 | 
            -
                        category.tabular.parse_header(line)
         | 
| 459 | 
            -
                        category.cleanse_header!
         | 
| 460 | 
            -
                        category.columns = category.tabular.header.columns
         | 
| 461 | 
            -
                        # Call chained on_first if present
         | 
| 462 | 
            -
                        on_first&.call(line)
         | 
| 463 | 
            -
                      end
         | 
| 464 | 
            -
                    when :array
         | 
| 465 | 
            -
                      lambda do |row|
         | 
| 466 | 
            -
                        category.tabular.header.columns = row
         | 
| 467 | 
            -
                        category.cleanse_header!
         | 
| 468 | 
            -
                        category.columns = category.tabular.header.columns
         | 
| 469 | 
            -
                        # Call chained on_first if present
         | 
| 470 | 
            -
                        on_first&.call(line)
         | 
| 471 | 
            -
                      end
         | 
| 472 | 
            -
                    end
         | 
| 473 | 
            -
                  end
         | 
| 474 474 | 
             
                end
         | 
| 475 475 | 
             
              end
         | 
| 476 476 | 
             
            end
         |