wurk 0.0.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/app/controllers/wurk/api/serializers.rb +48 -2
  4. data/app/controllers/wurk/api_controller.rb +216 -1
  5. data/app/controllers/wurk/dashboard_controller.rb +20 -2
  6. data/app/controllers/wurk/extensions_controller.rb +56 -0
  7. data/app/controllers/wurk/profiles_controller.rb +68 -0
  8. data/config/routes.rb +54 -1
  9. data/exe/sidekiqswarm +8 -0
  10. data/exe/wurkswarm +23 -0
  11. data/lib/active_job/queue_adapters/wurk_adapter.rb +35 -0
  12. data/lib/generators/wurk/install/templates/wurk.rb +14 -3
  13. data/lib/sidekiq/api.rb +4 -0
  14. data/lib/sidekiq/cli.rb +9 -0
  15. data/lib/sidekiq/client.rb +4 -0
  16. data/lib/sidekiq/job.rb +4 -0
  17. data/lib/sidekiq/launcher.rb +4 -0
  18. data/lib/sidekiq/middleware/chain.rb +4 -0
  19. data/lib/sidekiq/middleware/server/statsd.rb +12 -0
  20. data/lib/sidekiq/rails.rb +10 -0
  21. data/lib/sidekiq/redis_connection.rb +4 -0
  22. data/lib/sidekiq/scheduled.rb +4 -0
  23. data/lib/sidekiq/testing.rb +4 -0
  24. data/lib/sidekiq/version.rb +4 -0
  25. data/lib/sidekiq/web.rb +4 -0
  26. data/lib/sidekiq/worker.rb +4 -0
  27. data/lib/sidekiq.rb +16 -0
  28. data/lib/wurk/batch/callbacks.rb +103 -13
  29. data/lib/wurk/batch/death_handler.rb +5 -2
  30. data/lib/wurk/batch/server_middleware.rb +35 -3
  31. data/lib/wurk/batch/status.rb +9 -0
  32. data/lib/wurk/batch.rb +23 -1
  33. data/lib/wurk/capsule.rb +20 -1
  34. data/lib/wurk/cli.rb +84 -1
  35. data/lib/wurk/client.rb +20 -17
  36. data/lib/wurk/compat.rb +44 -2
  37. data/lib/wurk/component.rb +5 -4
  38. data/lib/wurk/configuration.rb +120 -3
  39. data/lib/wurk/cron.rb +51 -9
  40. data/lib/wurk/dead_set.rb +8 -3
  41. data/lib/wurk/deploy.rb +8 -4
  42. data/lib/wurk/encryption.rb +6 -1
  43. data/lib/wurk/fetcher/reaper.rb +78 -11
  44. data/lib/wurk/fetcher/reliable.rb +14 -4
  45. data/lib/wurk/heartbeat.rb +45 -0
  46. data/lib/wurk/history.rb +174 -0
  47. data/lib/wurk/iterable_job/active_record_enumerator.rb +71 -0
  48. data/lib/wurk/iterable_job/csv_enumerator.rb +51 -0
  49. data/lib/wurk/iterable_job.rb +41 -0
  50. data/lib/wurk/iterable_job_query.rb +75 -0
  51. data/lib/wurk/job.rb +8 -0
  52. data/lib/wurk/job_record.rb +16 -1
  53. data/lib/wurk/job_set.rb +4 -4
  54. data/lib/wurk/job_util.rb +15 -6
  55. data/lib/wurk/keys.rb +10 -0
  56. data/lib/wurk/launcher.rb +35 -1
  57. data/lib/wurk/leader.rb +15 -6
  58. data/lib/wurk/limiter/bucket.rb +14 -3
  59. data/lib/wurk/limiter/concurrent.rb +1 -1
  60. data/lib/wurk/limiter/window.rb +2 -1
  61. data/lib/wurk/limiter.rb +12 -0
  62. data/lib/wurk/lua/loader.rb +10 -0
  63. data/lib/wurk/lua.rb +106 -14
  64. data/lib/wurk/metrics/history.rb +5 -0
  65. data/lib/wurk/metrics/query.rb +39 -0
  66. data/lib/wurk/metrics/queue_rollup.rb +151 -0
  67. data/lib/wurk/metrics/statsd.rb +11 -0
  68. data/lib/wurk/middleware/current_attributes.rb +29 -6
  69. data/lib/wurk/middleware/interrupt_handler.rb +5 -0
  70. data/lib/wurk/middleware/poison_pill.rb +35 -5
  71. data/lib/wurk/processor.rb +17 -8
  72. data/lib/wurk/profile_set.rb +65 -0
  73. data/lib/wurk/profiler.rb +127 -0
  74. data/lib/wurk/railtie.rb +19 -5
  75. data/lib/wurk/redis_client_adapter.rb +72 -0
  76. data/lib/wurk/redis_connection.rb +30 -0
  77. data/lib/wurk/redis_pool.rb +5 -1
  78. data/lib/wurk/scheduled.rb +42 -0
  79. data/lib/wurk/sorted_entry.rb +13 -11
  80. data/lib/wurk/stats.rb +11 -4
  81. data/lib/wurk/swarm/child_boot.rb +26 -4
  82. data/lib/wurk/swarm.rb +1 -1
  83. data/lib/wurk/transaction_aware_client.rb +69 -0
  84. data/lib/wurk/unique.rb +49 -7
  85. data/lib/wurk/version.rb +1 -1
  86. data/lib/wurk/web/batch_status.rb +42 -0
  87. data/lib/wurk/web/config.rb +219 -17
  88. data/lib/wurk/web/enterprise.rb +14 -0
  89. data/lib/wurk/web/extension.rb +348 -0
  90. data/lib/wurk/web/rack_app.rb +77 -0
  91. data/lib/wurk/web.rb +2 -0
  92. data/lib/wurk/worker/setter.rb +5 -1
  93. data/lib/wurk/worker.rb +17 -6
  94. data/lib/wurk.rb +44 -0
  95. data/vendor/assets/dashboard/assets/fa-brands-400-BP5tdqmh.woff2 +0 -0
  96. data/vendor/assets/dashboard/assets/fa-regular-400-nyy7hhHF.woff2 +0 -0
  97. data/vendor/assets/dashboard/assets/fa-solid-900-DRAAbZTg.woff2 +0 -0
  98. data/vendor/assets/dashboard/assets/index-9CFRWpfG.js +77 -0
  99. data/vendor/assets/dashboard/assets/index-CW8AFQIv.css +2 -0
  100. data/vendor/assets/dashboard/assets/wurk-logo-Vy3xW4K0.png +0 -0
  101. data/vendor/assets/dashboard/favicon.png +0 -0
  102. data/vendor/assets/dashboard/index.html +10 -3
  103. data/vendor/assets/dashboard/wurk-manifest.json +2 -2
  104. metadata +42 -3
  105. data/vendor/assets/dashboard/assets/index-D2XR0iGw.js +0 -60
  106. data/vendor/assets/dashboard/assets/index-DlPr4YXw.css +0 -1
data/exe/wurkswarm ADDED
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Standalone multi-process runner — forks N worker children from a preloaded
5
+ # parent (fork-based real parallelism). Does NOT load the Rails engine.
6
+ # Usage: `exe/wurkswarm -C config/wurk.yml`. Drop-in alias: `sidekiqswarm`.
7
+
8
+ $stdout.sync = true
9
+ $LOAD_PATH.unshift(File.expand_path('../lib', __dir__))
10
+
11
+ require 'wurk'
12
+
13
+ begin
14
+ cli = Wurk::CLI.instance
15
+ cli.parse
16
+ cli.run_swarm
17
+ rescue StandardError => e
18
+ raise e if $DEBUG
19
+
20
+ warn "wurkswarm: #{e.message}"
21
+ warn e.backtrace.join("\n")
22
+ exit 1
23
+ end
@@ -89,6 +89,41 @@ begin
89
89
  Sidekiq::Client.push_bulk(items).compact.size
90
90
  end
91
91
  end
92
+
93
+ # Drop-in: a migrating app's config almost always still reads
94
+ # `queue_adapter = :sidekiq`, and pillar 1 says it must keep working on a
95
+ # one-line gem swap. Rails resolves `:sidekiq` by const_get'ing
96
+ # `SidekiqAdapter`, whose autoload target runs `gem "sidekiq"; require
97
+ # "sidekiq"` (activejob .../sidekiq_adapter.rb:3-4) — that raises at boot
98
+ # in a wurk-only app where no real sidekiq gem exists.
99
+ #
100
+ # When sidekiq is genuinely absent we pre-empt the autoload with a
101
+ # Wurk-backed adapter (a bare subclass — same enqueue path, same canonical
102
+ # wrapper payload, shared `@@stopping`, inherited `JobWrapper`). When the
103
+ # real sidekiq gem IS bundled (mixed mid-migration) we leave its adapter
104
+ # untouched so a genuine install is never clobbered.
105
+ #
106
+ # `gem "sidekiq"` activates without requiring; Bundler raises Gem::LoadError
107
+ # (or Gem::MissingSpecError, a subclass) when it's not in the bundle. The
108
+ # inline rescue keeps that probe from escaping to the outer handler.
109
+ sidekiq_gem_present =
110
+ begin
111
+ gem 'sidekiq'
112
+ true
113
+ rescue Gem::LoadError
114
+ false
115
+ end
116
+
117
+ unless sidekiq_gem_present
118
+ # remove_const drops Rails' pending autoload without firing it, so the
119
+ # `class` keyword below then defines fresh and never triggers the
120
+ # `require "sidekiq"`. Same remove-then-define dance as WurkAdapter.
121
+ remove_const(:SidekiqAdapter) if const_defined?(:SidekiqAdapter, false)
122
+
123
+ # Bare subclass: inherits the whole enqueue path, the shared @@stopping
124
+ # flag, and the JobWrapper constant from WurkAdapter unchanged.
125
+ class SidekiqAdapter < WurkAdapter; end
126
+ end
92
127
  end
93
128
  end
94
129
  rescue Gem::LoadError
@@ -1,14 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # Wurk configuration. Generated by `bin/rails g wurk:install`.
4
- # See docs/target/sidekiq-free.md for the full surface area.
4
+ # Full surface area: docs/target/sidekiq-free.md. Coming from Sidekiq?
5
+ # See docs/migrate-from-sidekiq.md.
5
6
 
6
7
  Wurk.configure_server do |config|
7
8
  # config.redis = { url: ENV.fetch("REDIS_URL", "redis://localhost:6379/0") }
8
- # config.workers = 2
9
+
10
+ # Threads per worker process:
9
11
  # config.concurrency = 10
12
+
13
+ # Queues this process pulls from (first = highest priority):
10
14
  # config.queues = %w[critical default low]
11
- # config.shutdown_timeout = 25
15
+
16
+ # Seconds to let in-flight jobs finish on shutdown (Sidekiq-compatible key):
17
+ # config[:timeout] = 25
18
+
19
+ # The default is a single worker process (fork). To run several, declare a
20
+ # topology — `flat` spawns N identical forks; use `slot`s for dedicated
21
+ # queues. See docs/idea/03-process-model.md.
22
+ # config.topology = Wurk::Topology.flat(count: 2, queues: %w[critical default low], concurrency: 10)
12
23
  end
13
24
 
14
25
  Wurk.configure_client do |config|
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). In Sidekiq, `Sidekiq.server?`
4
+ # is literally "has sidekiq/cli been required" — apps and ecosystem test
5
+ # helpers require this file to make `configure_server` blocks run. Wurk
6
+ # always loads its CLI class internally, so the passthrough carries the
7
+ # *semantic*: requiring it declares this process a server.
8
+ require 'wurk'
9
+ Wurk.enter_server_mode
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require shim for Sidekiq Pro's documented statsd setup:
4
+ #
5
+ # require "sidekiq/middleware/server/statsd"
6
+ # chain.add Sidekiq::Middleware::Server::Statsd
7
+ #
8
+ # The constant itself is defined by Wurk (lib/wurk/metrics/statsd.rb, aliased as
9
+ # Wurk::Middleware::Server::Statsd → Sidekiq::Middleware::Server::Statsd via the
10
+ # compat layer). This file just ensures Wurk is loaded so the verbatim Pro
11
+ # `require` resolves instead of raising LoadError. Spec: docs/target/sidekiq-pro.md §9.1.
12
+ require 'wurk' unless defined?(Sidekiq::Middleware::Server::Statsd)
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb): `require "sidekiq/rails"` in a
4
+ # Sidekiq app loads the Rails integration. Sidekiq's is railtie-grade and
5
+ # needs only railties — ecosystem test helpers load it with `rails/railtie`
6
+ # alone, no ActionDispatch — so this maps to the railtie, NOT the dashboard
7
+ # engine (a wurk-native extra; `require "wurk/rails"` for that). Sidekiq::Web
8
+ # stays mountable as a plain Rack app either way, exactly like upstream.
9
+ require 'wurk'
10
+ require 'wurk/railtie'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require path (see lib/sidekiq.rb). Wurk loads this surface whole.
4
+ require 'wurk'
data/lib/sidekiq.rb ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop-in require entrypoint (#204): `require "sidekiq"` loads Wurk and its
4
+ # Sidekiq::* alias layer (lib/wurk/compat.rb), so code and third-party gems
5
+ # written against Sidekiq load Wurk without changing a single require. The
6
+ # sibling files under lib/sidekiq/ cover the documented `require "sidekiq/…"`
7
+ # sub-paths the ecosystem uses; all are one-line passthroughs because Wurk
8
+ # loads its full surface (web included) from the single "wurk" entrypoint.
9
+ #
10
+ # Bundler-level substitution (a gem's `add_dependency "sidekiq"`) is handled
11
+ # by the companion shim gem in ecosystem/sidekiq-shim/.
12
+ require 'wurk'
13
+
14
+ # Upstream sidekiq.rb does exactly this: a Rails host that Bundler.requires
15
+ # the gem gets the Rails integration without a separate require.
16
+ require_relative 'sidekiq/rails' if defined?(Rails::Engine)
@@ -15,25 +15,45 @@ module Wurk
15
15
  module Callbacks
16
16
  module_function
17
17
 
18
- # Called from the server middleware after BATCH_ACK_SUCCESS. Fires
19
- # `:complete` when live jids hit 0; fires `:success` when pending
20
- # also hits 0 and there have been no deaths.
18
+ # Called from the server middleware after BATCH_ACK_SUCCESS (and from
19
+ # DeathHandler when a death drains the last live jid). Fires `:complete`
20
+ # when live jids hit 0; fires `:success` when pending also hits 0 and
21
+ # there have been no deaths.
22
+ #
23
+ # Both fires are additionally gated on `b-<bid>-pkids` being empty —
24
+ # children whose own subtree hasn't finished yet (#209). Spec §2.4:
25
+ # child `:complete`/`:success` fire before the parent's, so when the
26
+ # parent's *own* last job acks while a child batch is still running,
27
+ # nothing fires here; the last child's propagate_to_parent re-invokes
28
+ # this and fires then. The SREM in pkids_drained? happens before that
29
+ # re-invocation, so exactly one of the racing paths fires (dedup_set
30
+ # absorbs the overlap).
21
31
  def maybe_fire(bid, pending:, live:)
22
32
  return unless live.zero?
33
+ return unless kids_finished?(bid)
23
34
 
24
35
  fire_complete(bid)
25
- fire_success(bid) if pending.zero? && !death_fired?(bid)
36
+ fire_success(bid) if pending.zero? && !subtree_dead?(bid)
26
37
  propagate_to_parent(bid)
27
38
  end
28
39
 
29
- # Fired from Wurk::Batch::DeathHandler on the FIRST permanent death
30
- # in the batch only. Subsequent deaths bump the counter but do not
31
- # re-enqueue the callback.
32
- def fire_death(bid)
33
- return unless dedup_set(bid, 'death')
40
+ def kids_finished?(bid)
41
+ Wurk.redis { |conn| conn.call('SCARD', "b-#{bid}-pkids") }.to_i.zero?
42
+ end
34
43
 
44
+ # Fired from Wurk::Batch::DeathHandler whenever a death makes the died
45
+ # set go non-empty: the first death, or the first re-death after every
46
+ # dead jid was manually retried back into the live set (#212 — that
47
+ # retry's BATCH_PUSH cleared the death mark). The mark — durable `death`
48
+ # flag, `death_at`, `dead-batches` membership — is (re-)applied before
49
+ # the dedup guard so it is restored on re-death; the callback enqueue
50
+ # and parent cascade stay behind the guard so `:death` is enqueued at
51
+ # most once per batch.
52
+ def fire_death(bid)
35
53
  record_event(bid, 'death_at')
36
54
  Wurk.redis { |conn| conn.call('ZADD', 'dead-batches', Time.now.to_f.to_s, bid) }
55
+ return unless dedup_set(bid, 'death')
56
+
37
57
  enqueue_callbacks(bid, 'death')
38
58
  cascade_death(bid)
39
59
  end
@@ -61,10 +81,30 @@ module Wurk
61
81
  return unless dedup_set(bid, 'success')
62
82
 
63
83
  record_event(bid, 'success_at')
84
+ emit_duration_metric(bid)
64
85
  enqueue_callbacks(bid, 'success')
65
86
  apply_linger(bid)
66
87
  end
67
88
 
89
+ # Pro statsd metric (spec §9.3): wall-clock seconds from batch creation to
90
+ # full success. `created_at` shares the CLOCK_REALTIME epoch we record it
91
+ # with. No-op without a dogstatsd client.
92
+ #
93
+ # Strictly best-effort: `fire_success` has already burned the `success`
94
+ # dedup key by the time we run, so a raise here (e.g. a Redis hiccup on the
95
+ # HGET) would permanently strand the success callbacks and linger that
96
+ # follow — a retry can't re-fire them. Swallow and log instead.
97
+ def emit_duration_metric(bid)
98
+ created = Wurk.redis { |conn| conn.call('HGET', "b-#{bid}", 'created_at') }
99
+ return if created.nil? || created.to_s.empty?
100
+
101
+ seconds = ::Process.clock_gettime(::Process::CLOCK_REALTIME) - created.to_f
102
+ Wurk::Metrics::Statsd.distribution('batch.duration_dist', seconds)
103
+ rescue StandardError => e
104
+ Wurk.logger.warn("batch #{bid}: duration metric emit failed: #{e.class}: #{e.message}")
105
+ nil
106
+ end
107
+
68
108
  # Post-success retention: a succeeded batch no longer coordinates any
69
109
  # jobs, so its keys expire after the per-batch `linger` override (else
70
110
  # 24h) instead of the 30d pending TTL. Mirrors Sidekiq Pro §2.8.
@@ -106,6 +146,50 @@ module Wurk
106
146
  Wurk.redis { |conn| conn.call('HGET', "b-#{bid}", 'death') } == '1'
107
147
  end
108
148
 
149
+ # A batch's subtree is still dead while it carries the durable death
150
+ # mark OR any direct child does — deaths cascade up the parent chain,
151
+ # so a dead descendant keeps every ancestor's child marked. This gates
152
+ # `:success`, which must never fire while a job in the subtree is
153
+ # terminally dead (spec §2.4). The child check matters for the brief
154
+ # window where a batch with both its own dead job and a dead child has
155
+ # its OWN dead job retried to success: BATCH_PUSH (#212) clears that
156
+ # batch's own mark when its died set drains, but the child subtree is
157
+ # still dead, so `death_fired?` alone would wrongly let `:success` fire.
158
+ def subtree_dead?(bid)
159
+ death_fired?(bid) || any_child_dead?(bid)
160
+ end
161
+
162
+ # Recovery counterpart to cascade_death (#226). When a descendant's
163
+ # last dead job is manually retried back to success, the descendant
164
+ # clears its OWN death mark (#212, in BATCH_PUSH) — but every ancestor
165
+ # was marked by the death *cascade*, not by a jid in its own died set,
166
+ # so nothing here ever cleared them and the ancestor's `:success`
167
+ # stayed suppressed forever. Re-evaluate this batch: drop its durable
168
+ # death mark and `dead-batches` membership once its own died set is
169
+ # empty AND no child still carries a death mark. The `b-<bid>-death`
170
+ # notify dedup key is deliberately left intact, so a later re-death
171
+ # re-marks the batch (fire_death restores the flag before its own
172
+ # dedup guard) without ever re-enqueuing `:death`.
173
+ def clear_death_on_recovery(bid)
174
+ return unless death_fired?(bid)
175
+ return if own_died_remaining?(bid)
176
+ return if any_child_dead?(bid)
177
+
178
+ Wurk.redis do |conn|
179
+ conn.call('HDEL', "b-#{bid}", 'death')
180
+ conn.call('ZREM', 'dead-batches', bid)
181
+ end
182
+ end
183
+
184
+ def own_died_remaining?(bid)
185
+ Wurk.redis { |conn| conn.call('SCARD', "b-#{bid}-died") }.to_i.positive?
186
+ end
187
+
188
+ def any_child_dead?(bid)
189
+ kids = Wurk.redis { |conn| conn.call('SMEMBERS', "b-#{bid}-kids") }
190
+ kids.any? { |kid| death_fired?(kid) }
191
+ end
192
+
109
193
  # Per-callback rescue: one bad spec or a transient enqueue failure must
110
194
  # not strand the batch with the remaining callbacks for this event
111
195
  # un-enqueued. Log and move on so every other callback still fires.
@@ -146,15 +230,21 @@ module Wurk
146
230
  )
147
231
  end
148
232
 
149
- # When a child batch's `:success` fires, decrement the parent's pkids
150
- # set so the parent's own `:success` waits on the full subtree. When
151
- # parent's pkids hits 0 *and* its own pending is 0, parent's success
152
- # fires too.
233
+ # When a child batch finishes (its live jids hit 0 — by success or
234
+ # death), remove it from the parent's pkids set so the parent's own
235
+ # callbacks wait on the full subtree. When the parent's pkids hits 0,
236
+ # re-run the parent's maybe_fire: if its own counts are already at
237
+ # zero (the parent-acks-first race), this is what finally fires it.
153
238
  def propagate_to_parent(bid)
154
239
  parent_bid = parent_bid_for(bid)
155
240
  return if parent_bid.nil? || parent_bid.empty?
156
241
  return unless pkids_drained?(parent_bid, bid)
157
242
 
243
+ # A recovered child may have lifted the last death from the parent's
244
+ # subtree — clear the parent's cascaded mark before its gate runs, so
245
+ # `:success` can fire. Harmless on the death path: the dying child
246
+ # still carries its mark, so any_child_dead? keeps the parent dead.
247
+ clear_death_on_recovery(parent_bid)
158
248
  maybe_fire(parent_bid, pending: pending_for(parent_bid), live: live_for(parent_bid))
159
249
  end
160
250
 
@@ -29,8 +29,11 @@ module Wurk
29
29
  Wurk::Batch::Callbacks.fire_death(bid) if first_death == 1
30
30
  return unless live.zero?
31
31
 
32
- Wurk::Batch::Callbacks.fire_complete(bid)
33
- Wurk::Batch::Callbacks.propagate_to_parent(bid)
32
+ # Through the gated maybe_fire, not a direct fire_complete: this batch
33
+ # may still have running child batches, and spec §2.4 ordering says
34
+ # its `:complete` must wait for theirs (#209). `:success` stays
35
+ # suppressed regardless — the death above set the durable death flag.
36
+ Wurk::Batch::Callbacks.maybe_fire(bid, pending: Wurk::Batch::Callbacks.pending_for(bid), live: 0)
34
37
  end
35
38
  end
36
39
  end
@@ -3,6 +3,8 @@
3
3
  require 'json'
4
4
  require_relative '../middleware'
5
5
  require_relative '../lua'
6
+ require_relative '../job'
7
+ require_relative '../job_retry'
6
8
 
7
9
  module Wurk
8
10
  class Batch
@@ -11,6 +13,13 @@ module Wurk
11
13
  # enqueue `:success` callback jobs; if live jids hit zero, enqueue
12
14
  # `:complete` callback jobs.
13
15
  #
16
+ # On a job raising (and thus heading to retry), records a transient
17
+ # failure → BATCH_ACK_FAILED → the jid joins `b-<bid>-failed` and
18
+ # `failures` reflects the count of currently-failing jobs. A later
19
+ # successful retry clears it; a terminal death moves it to `b-<bid>-died`.
20
+ # Clean handled exits (JobRetry::Skip from expiry/interrupt, cooperative
21
+ # IterableJob interruption) are re-raised without counting as failures.
22
+ #
14
23
  # Invalidated batches short-circuit: the job is skipped without
15
24
  # raising — counts as a "success" for batch purposes per spec §12.
16
25
  #
@@ -29,12 +38,24 @@ module Wurk
29
38
  return
30
39
  end
31
40
 
32
- yield
33
- ack_success(bid, job['jid'])
41
+ run_and_ack(bid, job['jid']) { yield }
34
42
  end
35
43
 
36
44
  private
37
45
 
46
+ # A handled/skip exit or a cooperative interruption is not a failure —
47
+ # re-raise it untouched. Any other exception means the job failed and
48
+ # will retry (or eventually die): record it before re-raising.
49
+ def run_and_ack(bid, jid)
50
+ yield
51
+ ack_success(bid, jid)
52
+ rescue Wurk::JobRetry::Handled, Wurk::Job::Interrupted
53
+ raise
54
+ rescue StandardError
55
+ ack_failed(bid, jid)
56
+ raise
57
+ end
58
+
38
59
  def invalidated?(bid)
39
60
  redis_pool.with { |conn| conn.call('HGET', "b-#{bid}", 'invalidated') } == '1'
40
61
  end
@@ -44,7 +65,7 @@ module Wurk
44
65
  Wurk::Lua::Loader.eval_cached(
45
66
  conn,
46
67
  :batch_ack_success,
47
- keys: ["b-#{bid}", "b-#{bid}-jids"],
68
+ keys: ["b-#{bid}", "b-#{bid}-jids", "b-#{bid}-failed"],
48
69
  argv: [jid]
49
70
  )
50
71
  end
@@ -53,6 +74,17 @@ module Wurk
53
74
 
54
75
  Wurk::Batch::Callbacks.maybe_fire(bid, pending: pending, live: live)
55
76
  end
77
+
78
+ def ack_failed(bid, jid)
79
+ redis_pool.with do |conn|
80
+ Wurk::Lua::Loader.eval_cached(
81
+ conn,
82
+ :batch_ack_failed,
83
+ keys: ["b-#{bid}", "b-#{bid}-failed"],
84
+ argv: [jid]
85
+ )
86
+ end
87
+ end
56
88
  end
57
89
  end
58
90
  end
@@ -54,6 +54,15 @@ module Wurk
54
54
  Wurk.redis { |conn| conn.call('SMEMBERS', "b-#{@bid}-failed") }
55
55
  end
56
56
 
57
+ # Deprecated pre-Pro8 surface (spec §2.5): an array of per-failure error
58
+ # detail. The Pro8 data model (§2.8) drops the `b-<bid>-failinfo` hash in
59
+ # favour of the `failed_jids` set, which Wurk tracks — so the per-jid
60
+ # error payload is intentionally not persisted and this returns []. Kept
61
+ # so drop-in callers referencing `#failure_info` don't NameError.
62
+ def failure_info
63
+ []
64
+ end
65
+
57
66
  def dead_jids
58
67
  Wurk.redis { |conn| conn.call('SMEMBERS', "b-#{@bid}-died") }
59
68
  end
data/lib/wurk/batch.rb CHANGED
@@ -157,7 +157,9 @@ module Wurk
157
157
  raise ArgumentError, "invalid event #{event.inspect}" unless VALID_EVENTS.include?(sym)
158
158
  raise ArgumentError, 'callback options must be a Hash' unless options.is_a?(Hash)
159
159
 
160
- @callbacks << [sym.to_s, callback_target(callback), options]
160
+ entry = [sym.to_s, callback_target(callback), options]
161
+ @callbacks << entry
162
+ persist_callback!(entry) if @flushed_once
161
163
  self
162
164
  end
163
165
 
@@ -224,6 +226,24 @@ module Wurk
224
226
  Wurk::Client.new.flush_batched(payloads) unless payloads.empty?
225
227
  end
226
228
 
229
+ # Like `linger=`, anything registered after the first flush must reach
230
+ # Redis — `Callbacks.enqueue_callbacks` reads specs from the hash, so an
231
+ # in-memory-only append would silently never fire (#213). Covers both
232
+ # `on` after `#jobs` and batches reopened by bid. The append runs
233
+ # server-side (Lua) so concurrent registrations from different processes
234
+ # can't lose each other to a read-modify-write race.
235
+ def persist_callback!(entry)
236
+ event = entry[0]
237
+ fired = Wurk.redis do |conn|
238
+ Wurk::Lua::Loader.eval_cached(conn, :batch_append_callback,
239
+ keys: ["b-#{@bid}"], argv: [entry.to_json, event])
240
+ end
241
+ raise ArgumentError, "cannot register #{event} callback: batch #{@bid} no longer exists" if fired == -1
242
+ return unless fired == '1'
243
+
244
+ Wurk.logger.warn("batch #{@bid}: #{event} callback registered after #{event} already fired — it will never run")
245
+ end
246
+
227
247
  # First flush writes the core hash, registers in the global `batches`
228
248
  # zset, and links tag indexes. Subsequent #jobs invocations skip this
229
249
  # — `total` is already there and BATCH_PUSH only increments deltas.
@@ -234,6 +254,8 @@ module Wurk
234
254
  Wurk.redis { |conn| conn.pipelined { |pipe| pipelined_first_flush(pipe, now) } }
235
255
  @parent_bid = current_parent_bid
236
256
  @flushed_once = true
257
+ # Pro statsd metric (spec §9.3); no-op without a dogstatsd client.
258
+ Wurk::Metrics::Statsd.increment('batch.created')
237
259
  end
238
260
 
239
261
  def pipelined_first_flush(pipe, now)
data/lib/wurk/capsule.rb CHANGED
@@ -63,7 +63,7 @@ module Wurk
63
63
  # by hand; centralizing it here covers the standalone CLI and embedded
64
64
  # paths too (the bug behind a nil `fetcher` in `exe/wurk`). Idempotent.
65
65
  def prepare!
66
- @fetcher ||= Wurk::Fetcher::Reliable.new(self)
66
+ @fetcher ||= build_fetcher
67
67
  redis_pool
68
68
  local_redis_pool
69
69
  client_middleware
@@ -121,6 +121,12 @@ module Wurk
121
121
  @config.lookup(name)
122
122
  end
123
123
 
124
+ # Empty-poll BLMOVE backoff for this capsule's reliable fetcher (Pro
125
+ # super_fetch §3.3). nil → the fetcher falls back to its TIMEOUT default.
126
+ def fetch_poll_interval
127
+ @config[:fetch_poll_interval]
128
+ end
129
+
124
130
  def logger
125
131
  @config.logger
126
132
  end
@@ -134,6 +140,19 @@ module Wurk
134
140
 
135
141
  private
136
142
 
143
+ # Drop-in fetch pluggability (spec §5 / §4.1): instantiate
144
+ # `config[:fetch_class]` when the host set one — a custom or Pro fetcher —
145
+ # else the default reliable BLMOVE fetcher (`Sidekiq::BasicFetch`). A
146
+ # `config[:fetch_setup]` callable, if present, is handed the freshly built
147
+ # fetcher so it can configure it before the manager starts pulling work.
148
+ def build_fetcher
149
+ klass = @config[:fetch_class] || Wurk::Fetcher::Reliable
150
+ fetcher = klass.new(self)
151
+ setup = @config[:fetch_setup]
152
+ setup.call(fetcher) if setup.respond_to?(:call)
153
+ fetcher
154
+ end
155
+
137
156
  def parse_queue_entry(entry)
138
157
  qname, weight_str = entry.to_s.split(',', 2)
139
158
  qname = qname.to_s.strip