RubyGems - job-iteration - Versions diffs - 1.9.0 → 1.11.0 - Mend

job-iteration 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +28 -1
data/README.md +19 -1
data/job-iteration.gemspec +4 -4
data/lib/job-iteration/csv_enumerator.rb +6 -10
data/lib/job-iteration/interruption_adapters/delayed_job_adapter.rb +54 -0
data/lib/job-iteration/interruption_adapters.rb +1 -1
data/lib/job-iteration/version.rb +1 -1
data/lib/tapioca/dsl/compilers/job_iteration.rb +15 -6
metadata +7 -26
data/.github/dependabot.yml +0 -16
data/.github/workflows/ci.yml +0 -98
data/.github/workflows/cla.yml +0 -22
data/.gitignore +0 -11
data/.rubocop.yml +0 -16
data/.ruby-version +0 -1
data/.yardopts +0 -3
data/CODE_OF_CONDUCT.md +0 -74
data/Gemfile +0 -42
data/Gemfile.lock +0 -192
data/Rakefile +0 -12
data/bin/setup +0 -23
data/bin/test +0 -32
data/dev.yml +0 -54
data/gemfiles/rails_gems.gemfile +0 -18
data/guides/argument-semantics.md +0 -128
data/guides/best-practices.md +0 -108
data/guides/custom-enumerator.md +0 -140
data/guides/iteration-how-it-works.md +0 -51
data/guides/throttling.md +0 -68

data/Gemfile.lock DELETED Viewed

@@ -1,192 +0,0 @@
-GIT
-  remote: https://github.com/brianmario/mysql2
-  revision: 57b8df188c963ae0e4d4e1123d3e9de2bbcab637
-  specs:
-    mysql2 (0.5.6)
-      bigdecimal
-PATH
-  remote: .
-  specs:
-    job-iteration (1.9.0)
-      activejob (>= 5.2)
-GEM
-  remote: https://rubygems.org/
-  specs:
-    activejob (8.0.1)
-      activesupport (= 8.0.1)
-      globalid (>= 0.3.6)
-    activemodel (8.0.1)
-      activesupport (= 8.0.1)
-    activerecord (8.0.1)
-      activemodel (= 8.0.1)
-      activesupport (= 8.0.1)
-      timeout (>= 0.4.0)
-    activesupport (8.0.1)
-      base64
-      benchmark (>= 0.3)
-      bigdecimal
-      concurrent-ruby (~> 1.0, >= 1.3.1)
-      connection_pool (>= 2.2.5)
-      drb
-      i18n (>= 1.6, < 2)
-      logger (>= 1.4.2)
-      minitest (>= 5.1)
-      securerandom (>= 0.3)
-      tzinfo (~> 2.0, >= 2.0.5)
-      uri (>= 0.13.1)
-    ast (2.4.2)
-    base64 (0.2.0)
-    benchmark (0.4.0)
-    bigdecimal (3.1.9)
-    coderay (1.1.3)
-    concurrent-ruby (1.3.5)
-    connection_pool (2.5.0)
-    csv (3.3.2)
-    drb (2.2.1)
-    erubi (1.13.1)
-    globalid (1.2.1)
-      activesupport (>= 6.1)
-    i18n (1.14.7)
-      concurrent-ruby (~> 1.0)
-    json (2.9.1)
-    language_server-protocol (3.17.0.4)
-    logger (1.6.5)
-    method_source (1.1.0)
-    minitest (5.25.4)
-    mocha (2.7.1)
-      ruby2_keywords (>= 0.0.5)
-    mono_logger (1.1.2)
-    multi_json (1.15.0)
-    mustermann (3.0.3)
-      ruby2_keywords (~> 0.0.1)
-    netrc (0.11.0)
-    parallel (1.26.3)
-    parser (3.3.7.0)
-      ast (~> 2.4.1)
-      racc
-    prism (1.3.0)
-    pry (0.15.2)
-      coderay (~> 1.1)
-      method_source (~> 1.0)
-    racc (1.8.1)
-    rack (3.1.8)
-    rack-protection (4.1.1)
-      base64 (>= 0.1.0)
-      logger (>= 1.6.0)
-      rack (>= 3.0.0, < 4)
-    rack-session (2.1.0)
-      base64 (>= 0.1.0)
-      rack (>= 3.0.0)
-    rainbow (3.1.1)
-    rake (13.2.1)
-    rbi (0.2.4)
-      prism (~> 1.0)
-      sorbet-runtime (>= 0.5.9204)
-    redis (5.3.0)
-      redis-client (>= 0.22.0)
-    redis-client (0.23.2)
-      connection_pool
-    redis-namespace (1.11.0)
-      redis (>= 4)
-    regexp_parser (2.10.0)
-    resque (2.7.0)
-      mono_logger (~> 1)
-      multi_json (~> 1.0)
-      redis-namespace (~> 1.6)
-      sinatra (>= 0.9.2)
-    rubocop (1.71.0)
-      json (~> 2.3)
-      language_server-protocol (>= 3.17.0)
-      parallel (~> 1.10)
-      parser (>= 3.3.0.2)
-      rainbow (>= 2.2.2, < 4.0)
-      regexp_parser (>= 2.9.3, < 3.0)
-      rubocop-ast (>= 1.36.2, < 2.0)
-      ruby-progressbar (~> 1.7)
-      unicode-display_width (>= 2.4.0, < 4.0)
-    rubocop-ast (1.38.0)
-      parser (>= 3.3.1.0)
-    rubocop-shopify (2.15.1)
-      rubocop (~> 1.51)
-    ruby-progressbar (1.13.0)
-    ruby2_keywords (0.0.5)
-    securerandom (0.4.1)
-    sidekiq (7.3.8)
-      base64
-      connection_pool (>= 2.3.0)
-      logger
-      rack (>= 2.2.4)
-      redis-client (>= 0.22.2)
-    sinatra (4.1.1)
-      logger (>= 1.6.0)
-      mustermann (~> 3.0)
-      rack (>= 3.0.0, < 4)
-      rack-protection (= 4.1.1)
-      rack-session (>= 2.0.0, < 3)
-      tilt (~> 2.0)
-    sorbet (0.5.11787)
-      sorbet-static (= 0.5.11787)
-    sorbet-runtime (0.5.11787)
-    sorbet-static (0.5.11787-universal-darwin)
-    sorbet-static (0.5.11787-x86_64-linux)
-    sorbet-static-and-runtime (0.5.11787)
-      sorbet (= 0.5.11787)
-      sorbet-runtime (= 0.5.11787)
-    spoom (1.5.2)
-      erubi (>= 1.10.0)
-      prism (>= 0.28.0)
-      rbi (>= 0.2.3)
-      sorbet-static-and-runtime (>= 0.5.10187)
-      thor (>= 0.19.2)
-    tapioca (0.16.8)
-      benchmark
-      bundler (>= 2.2.25)
-      netrc (>= 0.11.0)
-      parallel (>= 1.21.0)
-      rbi (~> 0.2)
-      sorbet-static-and-runtime (>= 0.5.11087)
-      spoom (>= 1.2.0)
-      thor (>= 1.2.0)
-      yard-sorbet
-    thor (1.3.2)
-    tilt (2.6.0)
-    timeout (0.4.3)
-    tzinfo (2.0.6)
-      concurrent-ruby (~> 1.0)
-    unicode-display_width (3.1.4)
-      unicode-emoji (~> 4.0, >= 4.0.4)
-    unicode-emoji (4.0.4)
-    uri (1.0.2)
-    yard (0.9.37)
-    yard-sorbet (0.9.0)
-      sorbet-runtime
-      yard
-PLATFORMS
-  arm64-darwin
-  x86_64-darwin
-  x86_64-linux
-DEPENDENCIES
-  activerecord
-  csv
-  globalid
-  i18n
-  job-iteration!
-  logger
-  mocha
-  mysql2!
-  pry
-  rake
-  redis
-  resque
-  rubocop-shopify
-  sidekiq
-  sorbet-runtime
-  tapioca
-  yard
-BUNDLED WITH
-   2.6.1

data/Rakefile DELETED Viewed

@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-require "bundler/gem_tasks"
-require "rake/testtask"
-Rake::TestTask.new(:test) do |t|
-  t.libs << "test"
-  t.libs << "lib"
-  t.test_files = FileList["test/**/*_test.rb"]
-end
-task(default: :test)

data/bin/setup DELETED Viewed

@@ -1,23 +0,0 @@
-#!/bin/bash
-if ! [ -x "$(command -v mysql)" ];
-then
-  echo "Error: mysql is not installed." >&2
-  echo "You need to install mysql"
-  exit 1
-else
-  echo "Installing dependencies"
-  bundle install --quiet
-  mysql.server start > /dev/null 2>&1
-  mysql -uroot job_iteration_test -e exit > /dev/null 2>&1
-  if [ $? -eq 0 ];
-  then
-    echo "Setup completed!"
-  else
-    echo "Creating job_iteration_test database"
-    mysql -uroot -e "CREATE DATABASE job_iteration_test" > /dev/null 2>&1
-    echo "Setup completed!"
-  fi
-fi

data/bin/test DELETED Viewed

@@ -1,32 +0,0 @@
-#!/usr/bin/env ruby
-# frozen_string_literal: true
-def main
-  begin
-    command = create_command
-  rescue ArgumentError => e
-    abort(e.message)
-  end
-  puts "Running #{command.join(" ")}"
-  system(*command)
-end
-def create_command
-  case ARGV.length
-  when 0
-    ["bundle", "exec", "rake", "test"]
-  when 1
-    filename = ARGV[0]
-    ["bundle", "exec", "rake", "test", "TEST=#{filename}"]
-  when 2
-    filename = ARGV[0]
-    test_name = ARGV[1]
-    test_name_with_underscores = test_name.tr(" ", "_")
-    test_name_pattern = "/#{Regexp.escape(test_name_with_underscores)}/"
-    ["bundle", "exec", "rake", "test", "TEST=#{filename}", "TESTOPTS=\"--name=#{test_name_pattern} -v\""]
-  else
-    raise ArgumentError, "Too many arguments. Did you forget to put the test name in quotes?"
-  end
-end
-main

data/dev.yml DELETED Viewed

@@ -1,54 +0,0 @@
-# This file is for Shopify employees development environment.
-# If you are an external contributor you don't have to bother with it.
-name: job-iteration
-up:
-  - packages:
-      - mysql_client
-  - ruby
-  - bundler
-  - mysql
-  - redis
-  - custom:
-      name: Create Job Iteration database
-      meet: mysql -uroot -h $MYSQL_HOST -P $MYSQL_PORT -e "CREATE DATABASE job_iteration_test"
-      met?: mysql -uroot -h $MYSQL_HOST -P $MYSQL_PORT job_iteration_test -e "SELECT 1" &> /dev/null
-commands:
-  test:
-    run:  bin/test "$@"
-    syntax:
-      optional: filename testnamepattern
-    aliases: [t]
-    desc: run tests
-    long_desc: |
-      {{bold:Default}}
-      =======
-      Run the entire test suite.
-        Examples:
-        {{command:dev test}}
-        {{command:dev t}}
-      {{bold:Run all tests in a file}}
-      ========================
-      Include the file path.
-        Example:
-        {{command:dev test test/unit/iteration_test.rb}}
-      {{bold:Run a single test in a given file}}
-      ========================
-      Include the file path and the name of the test you'd like to run.
-        Example:
-        {{command:dev test test/unit/iteration_test.rb test_that_it_has_a_version_number}}
-      {{bold:Run all tests in a given file whose name contains a string}}
-      ========================
-      Include the file path and the string that the test names should contain.
-        Example:
-        {{command:dev test test/unit/iteration_test.rb version_number}}
-  style:
-    run: bundle exec rubocop -a

data/gemfiles/rails_gems.gemfile DELETED Viewed

@@ -1,18 +0,0 @@
-# frozen_string_literal: true
-rails_version = ENV.fetch("RAILS_VERSION")
-@rails_gems_requirements = case rails_version
-when "edge"         then { github: "rails/rails", branch: "main" }
-when /\A\d+\.\d+\z/ then "~> #{rails_version}.0"
-else                raise "Unsupported RAILS_VERSION: #{rails_version}"
-end
-eval_gemfile "../Gemfile"
-# https://github.com/rails/rails/pull/44083
-if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.1") &&
-    rails_version != "edge" && Gem::Version.new(rails_version) < Gem::Version.new("7")
-  gem "net-imap", require: false
-  gem "net-pop", require: false
-  gem "net-smtp", require: false
-end

data/guides/argument-semantics.md DELETED Viewed

@@ -1,128 +0,0 @@
-`job-iteration` overrides the `perform` method of `ActiveJob::Base` to allow for iteration. The `perform` method preserves all the standard calling conventions of the original, but the way the subsequent methods work might differ from what one expects from an ActiveJob subclass.
-The call sequence is usually 3 methods:
-`perform -> build_enumerator -> each_iteration|each_batch`
-In that sense `job-iteration` works like a framework (it calls your code) rather than like a library (that you call). When using jobs with parameters, the following rules of thumb are good to keep in mind.
-### Jobs without arguments
-Jobs without arguments do not pass anything into either `build_enumerator` or `each_iteration` except for the `cursor` which `job-iteration` persists by itself:
-```ruby
-class ArglessJob < ActiveJob::Base
-  include JobIteration::Iteration
-  def build_enumerator(cursor:)
-    # ...
-  end
-  def each_iteration(single_object_yielded_from_enumerator)
-    # ...
-  end
-end
-```
-To enqueue the job:
-```ruby
-ArglessJob.perform_later
-```
-### Jobs with positional arguments
-Jobs with positional arguments will have those arguments available to both `build_enumerator` and `each_iteration`:
-```ruby
-class ArgumentativeJob < ActiveJob::Base
-  include JobIteration::Iteration
-  def build_enumerator(arg1, arg2, arg3, cursor:)
-    # ...
-  end
-  def each_iteration(single_object_yielded_from_enumerator, arg1, arg2, arg3)
-    # ...
-  end
-end
-```
-To enqueue the job:
-```ruby
-ArgumentativeJob.perform_later(_arg1 = "One", _arg2 = "Two", _arg3 = "Three")
-```
-### Jobs with keyword arguments
-Jobs with keyword arguments will have the keyword arguments available to both `build_enumerator` and `each_iteration`, but these arguments come packaged into a Hash in both cases. You will need to `fetch` or `[]` your parameter from the `Hash` you get passed in:
-```ruby
-class ParameterizedJob < ActiveJob::Base
-  include JobIteration::Iteration
-  def build_enumerator(kwargs, cursor:)
-    name = kwargs.fetch(:name)
-    email = kwargs.fetch(:email)
-    # ...
-  end
-  def each_iteration(object_yielded_from_enumerator, kwargs)
-    name = kwargs.fetch(:name)
-    email = kwargs.fetch(:email)
-    # ...
-  end
-end
-```
-To enqueue the job:
-```ruby
-ParameterizedJob.perform_later(name: "Jane", email: "jane@host.example")
-```
-Note that you cannot use `ruby2_keywords` at present, and the keyword arguments syntax is not supported in `each_iteration` / `build_enumerator`.
-### Jobs with both positional and keyword arguments
-Jobs with keyword arguments will have the keyword arguments available to both `build_enumerator` and `each_iteration`, but these arguments come packaged into a Hash in both cases. You will need to `fetch` or `[]` your parameter from the `Hash` you get passed in. Positional arguments get passed first and "unsplatted" (not combined into an array), the `Hash` containing keyword arguments comes after:
-```ruby
-class HighlyConfigurableGreetingJob < ActiveJob::Base
-  include JobIteration::Iteration
-  def build_enumerator(subject_line, kwargs, cursor:)
-    name = kwargs.fetch(:sender_name)
-    email = kwargs.fetch(:sender_email)
-    # ...
-  end
-  def each_iteration(object_yielded_from_enumerator, subject_line, kwargs)
-    name = kwargs.fetch(:sender_name)
-    email = kwargs.fetch(:sender_email)
-    # ...
-  end
-end
-```
-To enqueue the job:
-```ruby
-HighlyConfigurableGreetingJob.perform_later(_subject_line = "Greetings everybody!", sender_name: "Jane", sender_email: "jane@host.example")
-```
-Note that you cannot use `ruby2_keywords` at present, and the keyword arguments syntax is not supported in `each_iteration` / `build_enumerator`.
-### Returning (yielding) from enumerators
-When defining a custom enumerator (see the [custom enumerator guide](custom-enumerator.md)) you need to yield two positional arguments from it: the object that will be the value for the current iteration (like a single ActiveModel instance, a single number...) and the value you want to be persisted as the `cursor` value should `job-iteration` decide to interrupt you after this iteration. Calling the enumerator with that cursor should return the next object after the one returned in this iteration. That new `cursor` value does not get passed to `each_iteration`:
-```ruby
-Enumerator.new do |yielder|
-  # In this case `cursor` is an Integer
-  cursor.upto(99999) do |offset|
-    yielder.yield(fetch_record_at(offset), offset)
-  end
-end
-```

data/guides/best-practices.md DELETED Viewed

@@ -1,108 +0,0 @@
-# Best practices
-## Batch iteration
-Regardless of the active record enumerator used in the task, `job-iteration` gem loads records in batches of 100 (by default).
-The following two tasks produce equivalent database queries,
-however `RecordsJob` task allows for more frequent interruptions by doing just one thing in the `each_iteration` method.
-```ruby
-# bad
-class BatchesJob < ApplicationJob
-  include JobIteration::Iteration
-  def build_enumerator(product_id, cursor:)
-    enumerator_builder.active_record_on_batches(
-      Comment.where(product_id: product_id),
-      cursor: cursor,
-      batch_size: 5,
-    )
-  end
-  def each_iteration(batch_of_comments, product_id)
-    batch_of_comments.each(&:destroy)
-  end
-end
-# good
-class RecordsJob < ApplicationJob
-  include JobIteration::Iteration
-  def build_enumerator(product_id, cursor:)
-    enumerator_builder.active_record_on_records(
-      Comment.where(product_id: product_id),
-      cursor: cursor,
-      batch_size: 5,
-    )
-  end
-  def each_iteration(comment, product_id)
-    comment.destroy
-  end
-end
-```
-## Instrumentation
-Iteration leverages [`ActiveSupport::Notifications`](https://guides.rubyonrails.org/active_support_instrumentation.html)
-to notify you what it's doing. You can subscribe to the following events (listed in order of job lifecycle):
-- `build_enumerator.iteration`
-- `throttled.iteration` (when using ThrottleEnumerator)
-- `nil_enumerator.iteration`
-- `resumed.iteration`
-- `each_iteration.iteration`
-- `not_found.iteration`
-- `interrupted.iteration`
-- `completed.iteration`
-All events have tags including the job class name and cursor position, some add the amount of times interrupted and/or
-total time the job spent running across interruptions.
-```ruby
-# config/initializers/instrumentation.rb
-ActiveSupport::Notifications.monotonic_subscribe("each_iteration.iteration") do |_, started, finished, _, tags|
-  elapsed = finished - started
-  StatsD.distribution(
-    "iteration.each_iteration",
-    elapsed,
-    tags: { job_class: tags[:job_class]&.underscore }
-  )
-  if elapsed >= BackgroundQueue.max_iteration_runtime
-    Rails.logger.warn "[Iteration] job_class=#{tags[:job_class]} " \
-    "each_iteration runtime exceeded limit of #{BackgroundQueue.max_iteration_runtime}s"
-  end
-end
-```
-## Max iteration time
-As you may notice in the snippet above, at Shopify we enforce that `each_iteration` does not take longer than `BackgroundQueue.max_iteration_runtime`, which is set to `25` seconds.
-We discourage that because jobs with a long `each_iteration` make interruptibility somewhat useless, as the infrastructure will have to wait longer for the job to interrupt.
-## Max job runtime
-If a job is supposed to have millions of iterations and you expect it to run for hours and days, it's still a good idea to sometimes interrupt the job even if there are no interruption signals coming from deploys or the infrastructure. At Shopify, we interrupt at least every 5 minutes to preserve **worker capacity**.
-```ruby
-JobIteration.max_job_runtime = 5.minutes # nil by default
-```
-Use this accessor to tweak how often you'd like the job to interrupt itself.
-### Per job max job runtime
-For more granular control, `job_iteration_max_job_runtime` can be set **per-job class**. This allows both incremental adoption, as well as using a conservative global setting, and an aggressive setting on a per-job basis.
-```ruby
-class MyJob < ApplicationJob
-  include JobIteration::Iteration
-  self.job_iteration_max_job_runtime = 3.minutes
-  # ...
-```
-This setting will be inherited by any child classes, although it can be further overridden. Note that no class can **increase** the `max_job_runtime` it has inherited; it can only be **decreased**. No job can increase its `max_job_runtime` beyond the global limit.