wurk 0.0.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/app/controllers/wurk/api/serializers.rb +48 -2
  4. data/app/controllers/wurk/api_controller.rb +216 -1
  5. data/app/controllers/wurk/dashboard_controller.rb +20 -2
  6. data/app/controllers/wurk/extensions_controller.rb +56 -0
  7. data/app/controllers/wurk/profiles_controller.rb +68 -0
  8. data/config/routes.rb +54 -1
  9. data/exe/sidekiqswarm +8 -0
  10. data/exe/wurkswarm +23 -0
  11. data/lib/active_job/queue_adapters/wurk_adapter.rb +35 -0
  12. data/lib/generators/wurk/install/templates/wurk.rb +14 -3
  13. data/lib/sidekiq/api.rb +4 -0
  14. data/lib/sidekiq/cli.rb +9 -0
  15. data/lib/sidekiq/client.rb +4 -0
  16. data/lib/sidekiq/job.rb +4 -0
  17. data/lib/sidekiq/launcher.rb +4 -0
  18. data/lib/sidekiq/middleware/chain.rb +4 -0
  19. data/lib/sidekiq/middleware/server/statsd.rb +12 -0
  20. data/lib/sidekiq/rails.rb +10 -0
  21. data/lib/sidekiq/redis_connection.rb +4 -0
  22. data/lib/sidekiq/scheduled.rb +4 -0
  23. data/lib/sidekiq/testing.rb +4 -0
  24. data/lib/sidekiq/version.rb +4 -0
  25. data/lib/sidekiq/web.rb +4 -0
  26. data/lib/sidekiq/worker.rb +4 -0
  27. data/lib/sidekiq.rb +16 -0
  28. data/lib/wurk/batch/callbacks.rb +103 -13
  29. data/lib/wurk/batch/death_handler.rb +5 -2
  30. data/lib/wurk/batch/server_middleware.rb +35 -3
  31. data/lib/wurk/batch/status.rb +9 -0
  32. data/lib/wurk/batch.rb +23 -1
  33. data/lib/wurk/capsule.rb +20 -1
  34. data/lib/wurk/cli.rb +84 -1
  35. data/lib/wurk/client.rb +20 -17
  36. data/lib/wurk/compat.rb +44 -2
  37. data/lib/wurk/component.rb +5 -4
  38. data/lib/wurk/configuration.rb +120 -3
  39. data/lib/wurk/cron.rb +51 -9
  40. data/lib/wurk/dead_set.rb +8 -3
  41. data/lib/wurk/deploy.rb +8 -4
  42. data/lib/wurk/encryption.rb +6 -1
  43. data/lib/wurk/fetcher/reaper.rb +78 -11
  44. data/lib/wurk/fetcher/reliable.rb +14 -4
  45. data/lib/wurk/heartbeat.rb +45 -0
  46. data/lib/wurk/history.rb +174 -0
  47. data/lib/wurk/iterable_job/active_record_enumerator.rb +71 -0
  48. data/lib/wurk/iterable_job/csv_enumerator.rb +51 -0
  49. data/lib/wurk/iterable_job.rb +41 -0
  50. data/lib/wurk/iterable_job_query.rb +75 -0
  51. data/lib/wurk/job.rb +8 -0
  52. data/lib/wurk/job_record.rb +16 -1
  53. data/lib/wurk/job_set.rb +4 -4
  54. data/lib/wurk/job_util.rb +15 -6
  55. data/lib/wurk/keys.rb +10 -0
  56. data/lib/wurk/launcher.rb +35 -1
  57. data/lib/wurk/leader.rb +15 -6
  58. data/lib/wurk/limiter/bucket.rb +14 -3
  59. data/lib/wurk/limiter/concurrent.rb +1 -1
  60. data/lib/wurk/limiter/window.rb +2 -1
  61. data/lib/wurk/limiter.rb +12 -0
  62. data/lib/wurk/lua/loader.rb +10 -0
  63. data/lib/wurk/lua.rb +106 -14
  64. data/lib/wurk/metrics/history.rb +5 -0
  65. data/lib/wurk/metrics/query.rb +39 -0
  66. data/lib/wurk/metrics/queue_rollup.rb +151 -0
  67. data/lib/wurk/metrics/statsd.rb +11 -0
  68. data/lib/wurk/middleware/current_attributes.rb +29 -6
  69. data/lib/wurk/middleware/interrupt_handler.rb +5 -0
  70. data/lib/wurk/middleware/poison_pill.rb +35 -5
  71. data/lib/wurk/processor.rb +17 -8
  72. data/lib/wurk/profile_set.rb +65 -0
  73. data/lib/wurk/profiler.rb +127 -0
  74. data/lib/wurk/railtie.rb +19 -5
  75. data/lib/wurk/redis_client_adapter.rb +72 -0
  76. data/lib/wurk/redis_connection.rb +30 -0
  77. data/lib/wurk/redis_pool.rb +5 -1
  78. data/lib/wurk/scheduled.rb +42 -0
  79. data/lib/wurk/sorted_entry.rb +13 -11
  80. data/lib/wurk/stats.rb +11 -4
  81. data/lib/wurk/swarm/child_boot.rb +26 -4
  82. data/lib/wurk/swarm.rb +1 -1
  83. data/lib/wurk/transaction_aware_client.rb +69 -0
  84. data/lib/wurk/unique.rb +49 -7
  85. data/lib/wurk/version.rb +1 -1
  86. data/lib/wurk/web/batch_status.rb +42 -0
  87. data/lib/wurk/web/config.rb +219 -17
  88. data/lib/wurk/web/enterprise.rb +14 -0
  89. data/lib/wurk/web/extension.rb +348 -0
  90. data/lib/wurk/web/rack_app.rb +77 -0
  91. data/lib/wurk/web.rb +2 -0
  92. data/lib/wurk/worker/setter.rb +5 -1
  93. data/lib/wurk/worker.rb +17 -6
  94. data/lib/wurk.rb +44 -0
  95. data/vendor/assets/dashboard/assets/fa-brands-400-BP5tdqmh.woff2 +0 -0
  96. data/vendor/assets/dashboard/assets/fa-regular-400-nyy7hhHF.woff2 +0 -0
  97. data/vendor/assets/dashboard/assets/fa-solid-900-DRAAbZTg.woff2 +0 -0
  98. data/vendor/assets/dashboard/assets/index-9CFRWpfG.js +77 -0
  99. data/vendor/assets/dashboard/assets/index-CW8AFQIv.css +2 -0
  100. data/vendor/assets/dashboard/assets/wurk-logo-Vy3xW4K0.png +0 -0
  101. data/vendor/assets/dashboard/favicon.png +0 -0
  102. data/vendor/assets/dashboard/index.html +10 -3
  103. data/vendor/assets/dashboard/wurk-manifest.json +2 -2
  104. metadata +42 -3
  105. data/vendor/assets/dashboard/assets/index-D2XR0iGw.js +0 -60
  106. data/vendor/assets/dashboard/assets/index-DlPr4YXw.css +0 -1
@@ -33,10 +33,16 @@ module Wurk
33
33
  # is killed into the dead set instead of re-queued, so a job that crashes
34
34
  # its worker every time can't loop forever.
35
35
  #
36
- # SCANs are scoped to the public queues this process serves and gated by
37
- # a cluster-wide `SET NX EX` lock, so across a fleet only one process
38
- # sweeps per interval ("1/min within process group" in the spec) and the
39
- # keyspace touched is bounded to known queues.
36
+ # The reaper runs two passes, exactly as super_fetch's sweeper does:
37
+ #
38
+ # * a *scoped* sweep every interval ("1/min within process group"): SCANs
39
+ # only the public queues this process serves, gated by a cluster `SET NX
40
+ # EX` lock so one process sweeps per interval. The cheap common path.
41
+ # * a *full* sweep at most once an hour ("full SCAN 1/hr"): SCANs the whole
42
+ # `queue:*|*` keyspace, gated by its own hourly lock, so private lists
43
+ # whose public queue no live process serves — a renamed/decommissioned
44
+ # queue, or a dead host's queue no survivor consumes — are recovered too,
45
+ # not stranded forever.
40
46
  #
41
47
  # Spec: docs/target/sidekiq-pro.md §3.2.
42
48
  class Reaper
@@ -47,16 +53,24 @@ module Wurk
47
53
  # floor below which cross-host orphans can't be detected anyway.
48
54
  DEFAULT_INTERVAL = 60
49
55
 
56
+ # Full-keyspace sweep cadence + its lock TTL: at most once per hour across
57
+ # the fleet, since a global SCAN is far costlier than the scoped pass.
58
+ FULL_INTERVAL = 3600
59
+
50
60
  LOCK_KEY = 'super_fetch:reaper'
61
+ FULL_LOCK_KEY = 'super_fetch:reaper:full'
51
62
  SCAN_COUNT = 100
52
63
  THREAD_NAME = 'wurk-reaper'
53
64
 
54
65
  attr_reader :interval
55
66
 
56
- def initialize(config, interval: DEFAULT_INTERVAL, lock_key: LOCK_KEY)
67
+ def initialize(config, interval: DEFAULT_INTERVAL, lock_key: LOCK_KEY,
68
+ full_interval: FULL_INTERVAL, full_lock_key: FULL_LOCK_KEY)
57
69
  @config = config
58
70
  @interval = interval
59
71
  @lock_key = lock_key
72
+ @full_interval = full_interval
73
+ @full_lock_key = full_lock_key
60
74
  @thread = nil
61
75
  @done = false
62
76
  @mutex = ::Mutex.new
@@ -89,12 +103,13 @@ module Wurk
89
103
  !@thread.nil? && @thread.alive?
90
104
  end
91
105
 
92
- # One cluster-gated sweep: a no-op (returns 0) unless this process wins
93
- # the interval's lock. Used by the loop.
106
+ # One loop tick: the scoped sweep when this process wins the per-interval
107
+ # lock, plus the full-keyspace sweep when it also wins the hourly lock.
108
+ # Returns the total jobs reclaimed across both.
94
109
  def reap
95
- return 0 unless acquire_lock?
96
-
97
- reclaim!
110
+ reclaimed = acquire_lock? ? reclaim! : 0
111
+ reclaimed += reclaim_full! if acquire_full_lock?
112
+ reclaimed
98
113
  end
99
114
 
100
115
  # One unguarded sweep over every served queue. Returns the number of
@@ -105,6 +120,21 @@ module Wurk
105
120
  served_queues.sum { |public_q| reclaim_queue(public_q, prefixes) }
106
121
  end
107
122
 
123
+ # One unguarded full-keyspace sweep: every `queue:*|*` private list, even
124
+ # ones whose public queue this process doesn't serve. Returns the number
125
+ # of jobs reclaimed. Public so boot paths and tests can drive it without
126
+ # the hourly lock.
127
+ def reclaim_full!
128
+ prefixes = live_process_prefixes
129
+ reclaimed = 0
130
+ each_full_private_list do |key, public_q, host, pid|
131
+ next if owner_alive?(host, pid, prefixes)
132
+
133
+ reclaimed += drain(key, public_q)
134
+ end
135
+ reclaimed
136
+ end
137
+
108
138
  private
109
139
 
110
140
  # Union of `queue:<name>` keys across every capsule this process serves.
@@ -143,6 +173,39 @@ module Wurk
143
173
  end
144
174
  end
145
175
 
176
+ # Yields [private_list_key, public_q, host, pid] for every private list in
177
+ # the keyspace. MATCH `queue:*|*` matches only private lists (public queue
178
+ # keys carry no `|`); parse_full_key drops anything that isn't a
179
+ # well-formed `queue:<public>|<host>|<pid>|<idx>`.
180
+ def each_full_private_list
181
+ cursor = '0'
182
+ loop do
183
+ cursor, keys = redis { |c| c.call('SCAN', cursor, 'MATCH', "#{Keys::QUEUE_PREFIX}*|*", 'COUNT', SCAN_COUNT) }
184
+ keys.each do |key|
185
+ parsed = parse_full_key(key)
186
+ yield key, *parsed if parsed
187
+ end
188
+ break if cursor == '0'
189
+ end
190
+ end
191
+
192
+ # `queue:<public>|<host>|<pid>|<idx>` → [public_q, host, pid], parsed from
193
+ # the right (pid + idx are integers, host precedes them) so a `|` inside
194
+ # the queue name is tolerated. nil when the key isn't a well-formed
195
+ # private list.
196
+ def parse_full_key(key)
197
+ parts = key.split('|')
198
+ return nil if parts.size < 4
199
+
200
+ host, pid, idx = parts.last(3)
201
+ return nil unless integer?(pid) && integer?(idx)
202
+
203
+ public_q = parts[0...-3].join('|')
204
+ return nil unless public_q.start_with?(Keys::QUEUE_PREFIX) && public_q != Keys::QUEUE_PREFIX
205
+
206
+ [public_q, host, pid.to_i]
207
+ end
208
+
146
209
  # `<public_q>|<host>|<pid>|<idx>` → [host, pid] (pid as Integer), or
147
210
  # [nil, nil] when the suffix isn't a well-formed `host|pid|idx` triple.
148
211
  # Splitting the suffix off the known public-queue prefix tolerates a
@@ -219,7 +282,7 @@ module Wurk
219
282
  end
220
283
 
221
284
  def poison_off(public_q, job, queue_name)
222
- return unless Middleware::PoisonPill.track!(job, queue: queue_name) == :poison
285
+ return unless Middleware::PoisonPill.track!(job, queue: queue_name, config: @config) == :poison
223
286
 
224
287
  # track! already ZADDed the payload to the dead set; pull the copy we
225
288
  # just LMOVE'd onto the public tail so it isn't also re-run.
@@ -230,6 +293,10 @@ module Wurk
230
293
  redis { |c| c.call('SET', @lock_key, '1', 'NX', 'EX', @interval) } == 'OK'
231
294
  end
232
295
 
296
+ def acquire_full_lock?
297
+ redis { |c| c.call('SET', @full_lock_key, '1', 'NX', 'EX', @full_interval) } == 'OK'
298
+ end
299
+
233
300
  def spawn_loop_thread
234
301
  t = Thread.new { run_loop }
235
302
  t.name = THREAD_NAME
@@ -15,15 +15,18 @@ module Wurk
15
15
  # next boot of this process reclaims it via bulk_requeue.
16
16
  #
17
17
  # Priority handling: iterate queues_cmd in order with non-blocking
18
- # LMOVE, then fall back to a 2s BLMOVE on the first queue so an
18
+ # LMOVE, then fall back to a blocking BLMOVE on the first queue so an
19
19
  # empty poll doesn't spin Redis. BLMOVE has no multi-key form, so
20
- # blocking on a single queue is the best Redis gives us.
20
+ # blocking on a single queue is the best Redis gives us. The block
21
+ # timeout defaults to TIMEOUT (2s) and is overridable per the Pro
22
+ # super_fetch §3.3 `config.fetch_poll_interval` knob.
21
23
  #
22
- # Spec: docs/target/sidekiq-pro.md §3 (super_fetch),
24
+ # Spec: docs/target/sidekiq-pro.md §3 (super_fetch, §3.3 poll interval),
23
25
  # docs/target/sidekiq-free.md §15 (TIMEOUT=2).
24
26
  class Reliable < Fetcher
25
27
  include Component
26
28
 
29
+ # Default BLMOVE block timeout; overridable via config.fetch_poll_interval.
27
30
  TIMEOUT = 2
28
31
 
29
32
  # Carries the public queue key, the raw (still-JSON) job payload,
@@ -126,13 +129,20 @@ module Wurk
126
129
 
127
130
  def blmove(public_q)
128
131
  priv = self.class.private_queue_name(public_q)
132
+ timeout = poll_interval
129
133
  # Extend the socket read-timeout past BLMOVE's own timeout so the
130
134
  # default 1s pool timeout doesn't fire before BLMOVE returns.
131
135
  job = config.redis do |conn|
132
- conn.blocking_call(TIMEOUT + 1, 'BLMOVE', public_q, priv, 'RIGHT', 'LEFT', TIMEOUT)
136
+ conn.blocking_call(timeout + 1, 'BLMOVE', public_q, priv, 'RIGHT', 'LEFT', timeout)
133
137
  end
134
138
  job ? UnitOfWork.new(queue: public_q, job: job, config: config) : nil
135
139
  end
140
+
141
+ # BLMOVE block timeout for an empty poll. `config.fetch_poll_interval`
142
+ # (Pro super_fetch §3.3) overrides the TIMEOUT default; nil → TIMEOUT.
143
+ def poll_interval
144
+ config.fetch_poll_interval || TIMEOUT
145
+ end
136
146
  end
137
147
  end
138
148
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'etc'
4
+
3
5
  require_relative 'component'
4
6
  require_relative 'keys'
5
7
  require_relative 'processor'
@@ -151,9 +153,52 @@ module Wurk
151
153
  'identity' => @identity,
152
154
  'version' => Wurk::VERSION,
153
155
  'embedded' => @embedded
156
+ }.merge(host_facts)
157
+ end
158
+
159
+ # Static hardware facts for the Busy page's per-host grouping. Additive
160
+ # `info` fields only — Sidekiq Web ignores keys it doesn't know, so the
161
+ # drop-in wire contract holds. Best-effort: nil/0 when the platform has
162
+ # no readable source (the dashboard renders a dash).
163
+ def host_facts
164
+ @host_facts ||= {
165
+ 'cpu_model' => cpu_model,
166
+ 'cores' => cores,
167
+ 'memory_total_kb' => memory_total_kb
154
168
  }
155
169
  end
156
170
 
171
+ def cores(etc = Etc)
172
+ etc.nprocessors
173
+ # NotImplementedError is a ScriptError, outside StandardError — and it's
174
+ # exactly what Etc raises on platforms without sysconf.
175
+ rescue StandardError, NotImplementedError
176
+ nil
177
+ end
178
+
179
+ def cpu_model
180
+ if ::File.exist?('/proc/cpuinfo')
181
+ line = ::File.foreach('/proc/cpuinfo').find { |l| l.start_with?('model name') }
182
+ line&.split(':', 2)&.last&.strip
183
+ else
184
+ model = `sysctl -n machdep.cpu.brand_string 2>/dev/null`.strip
185
+ model.empty? ? nil : model
186
+ end
187
+ rescue StandardError
188
+ nil
189
+ end
190
+
191
+ def memory_total_kb
192
+ if ::File.exist?('/proc/meminfo')
193
+ line = ::File.foreach('/proc/meminfo').find { |l| l.start_with?('MemTotal') }
194
+ line.to_s[/\d+/].to_i
195
+ else
196
+ `sysctl -n hw.memsize 2>/dev/null`.to_i / 1024
197
+ end
198
+ rescue StandardError
199
+ 0
200
+ end
201
+
157
202
  def capsules_info
158
203
  @config.capsules.transform_values do |cap|
159
204
  { 'concurrency' => cap.concurrency, 'mode' => cap.mode.to_s, 'weights' => cap.weights }
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'component'
4
+ require_relative 'keys'
5
+ require_relative 'stats'
6
+ require_relative 'metrics/statsd'
7
+
8
+ module Wurk
9
+ # Sidekiq Enterprise §5 Historical Metrics snapshotter. A leader-gated
10
+ # background thread that, every `config.retain_history` seconds, emits a
11
+ # statsd-shaped snapshot to the configured dogstatsd client — either the
12
+ # default §5.2 gauge set or a user-supplied collector block.
13
+ #
14
+ # Configured in a server block:
15
+ #
16
+ # Sidekiq.configure_server do |config|
17
+ # config.dogstatsd = -> { Datadog::Statsd.new('localhost', 8125) }
18
+ # config.retain_history(30) # default §5.2 gauges
19
+ # # …or a custom collector:
20
+ # config.retain_history(30) do |s|
21
+ # Sidekiq::Queue.all.each do |q|
22
+ # s.gauge("sidekiq.queue.size", q.size, tags: ["queue:#{q.name}"])
23
+ # end
24
+ # end
25
+ # end
26
+ #
27
+ # The block receives the raw dogstatsd client `s` (quacks like
28
+ # `Datadog::Statsd`: gauge/count/histogram/batch) and writes fully-qualified
29
+ # `sidekiq.*` metric names itself, matching Sidekiq Ent. Leader-gated via the
30
+ # cluster `dear-leader` lock so exactly one process emits per cluster.
31
+ #
32
+ # Every snapshot is also appended to the capped Redis stream
33
+ # `history:metrics` (§5.3) — the same key a migrated Sidekiq Ent install
34
+ # uses — so the dashboard's Historical view has a data source independent of
35
+ # any external statsd, and pre-existing Ent stream data renders without
36
+ # rewrite. The stream write happens whenever the snapshotter runs; the
37
+ # dogstatsd emit is skipped only when no client is configured.
38
+ #
39
+ # Aliased as `Sidekiq::History` (drop-in contract).
40
+ # Spec: docs/target/sidekiq-ent.md §5.1–§5.3.
41
+ class History
42
+ include Component
43
+
44
+ # Stream field → Stats reader. Single source for both the `history:metrics`
45
+ # stream entry and the default §5.2 statsd gauge set (which prefixes
46
+ # `sidekiq.`). Order is the display order.
47
+ SNAPSHOT_FIELDS = {
48
+ 'processed' => :processed,
49
+ 'failures' => :failed,
50
+ 'enqueued' => :enqueued,
51
+ 'retries' => :retry_size,
52
+ 'dead' => :dead_size,
53
+ 'scheduled' => :scheduled_size,
54
+ 'busy' => :workers_size
55
+ }.freeze
56
+
57
+ # Approximate cap on retained snapshots (XADD MAXLEN ~). At the default 30s
58
+ # interval this is ~3.5 days of history; older points age out. `~` lets
59
+ # Redis trim in whole macro-nodes, so the actual length can briefly exceed
60
+ # the cap — matching Ent's best-effort retention.
61
+ STREAM_CAP = 10_000
62
+ STREAM_DEFAULT_LIMIT = 1000
63
+
64
+ def initialize(config)
65
+ @config = config
66
+ @interval = config.history_interval
67
+ @collector = config.history_collector
68
+ @stream_cap = config[:history_stream_cap] || STREAM_CAP
69
+ @done = false
70
+ @mutex = ::Mutex.new
71
+ @sleeper = ::ConditionVariable.new
72
+ @thread = nil
73
+ end
74
+
75
+ def start
76
+ @thread ||= safe_thread('history-snapshot') do # rubocop:disable Naming/MemoizedInstanceVariableName
77
+ wait
78
+ until @done
79
+ tick
80
+ wait
81
+ end
82
+ end
83
+ end
84
+
85
+ def terminate
86
+ @mutex.synchronize do
87
+ @done = true
88
+ @sleeper.signal
89
+ end
90
+ end
91
+
92
+ # Leader-gated: only the elected leader emits, so N workers don't each
93
+ # publish the same cluster-wide gauges every interval.
94
+ def tick
95
+ return unless leader?
96
+
97
+ snapshot
98
+ rescue StandardError => e
99
+ handle_exception(e, { context: 'history-snapshot' })
100
+ end
101
+
102
+ # One snapshot, bypassing the leader gate and the sleep loop. Public so
103
+ # deterministic specs and a manual "snapshot now" can drive it directly.
104
+ # Always appends to the `history:metrics` stream (the dashboard's source);
105
+ # additionally emits to dogstatsd when a client is configured.
106
+ def snapshot
107
+ values = collect_values
108
+ record_stream(values)
109
+ emit_statsd(values)
110
+ nil
111
+ end
112
+
113
+ # Most-recent snapshots from the `history:metrics` stream, oldest→newest.
114
+ # Each point is `{ at: <epoch seconds>, <field>: <numeric>, … }`. Fields are
115
+ # read generically, so a migrated Sidekiq Ent install's entries render
116
+ # without rewrite regardless of which fields they carry.
117
+ def self.recent(limit: STREAM_DEFAULT_LIMIT)
118
+ count = limit.to_i.clamp(1, STREAM_CAP)
119
+ entries = Wurk.redis { |c| c.call('XREVRANGE', Keys::HISTORY_METRICS, '+', '-', 'COUNT', count) }
120
+ entries.reverse.map { |entry_id, fields| parse_entry(entry_id, fields) }
121
+ end
122
+
123
+ def self.parse_entry(entry_id, fields)
124
+ pairs = fields.is_a?(::Array) ? fields.each_slice(2).to_h : fields
125
+ point = { at: stream_epoch(entry_id) }
126
+ pairs.each { |field, value| point[field.to_sym] = numeric(value) }
127
+ point
128
+ end
129
+
130
+ # Redis stream IDs are "<ms>-<seq>"; the ms half is the snapshot time.
131
+ def self.stream_epoch(entry_id)
132
+ entry_id.to_s.split('-', 2).first.to_i / 1000.0
133
+ end
134
+
135
+ # Coerce a stream field to Int/Float for charting; leave non-numeric Ent
136
+ # fields (e.g. a label) untouched so nothing is silently dropped.
137
+ def self.numeric(value)
138
+ float = Float(value)
139
+ (float % 1).zero? ? float.to_i : float
140
+ rescue ::ArgumentError, ::TypeError
141
+ value
142
+ end
143
+
144
+ private
145
+
146
+ def collect_values
147
+ stats = Wurk::Stats.new
148
+ SNAPSHOT_FIELDS.transform_values { |reader| stats.public_send(reader) }
149
+ end
150
+
151
+ def record_stream(values)
152
+ fields = values.flat_map { |field, value| [field, value] }
153
+ redis do |c|
154
+ c.call('XADD', Keys::HISTORY_METRICS, 'MAXLEN', '~', @stream_cap, '*', *fields)
155
+ end
156
+ end
157
+
158
+ # §5.2 default gauge set carries the `sidekiq.` prefix so a dashboard built
159
+ # for Sidekiq Ent reads it unchanged. A custom collector replaces it.
160
+ def emit_statsd(values)
161
+ client = Wurk::Metrics::Statsd.client
162
+ return if client.nil?
163
+ return @collector.call(client) if @collector
164
+
165
+ values.each { |field, value| client.gauge("sidekiq.#{field}", value) }
166
+ end
167
+
168
+ def wait
169
+ @mutex.synchronize do
170
+ @sleeper.wait(@mutex, @interval) unless @done
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wurk
4
+ module IterableJob
5
+ # Cursor-resumable ActiveRecord iteration helpers for
6
+ # IterableJob#build_enumerator. Behavior parity with Sidekiq's
7
+ # `Sidekiq::Job::Iterable::ActiveRecordEnumerator`: the cursor is the
8
+ # primary key of the last-yielded record, threaded back through AR's
9
+ # `start:` so iteration resumes after an interruption without re-scanning.
10
+ #
11
+ # ActiveRecord is NOT a wurk dependency — these methods simply call the
12
+ # relation's batching API, so they work when the host app has AR and raise
13
+ # a plain NoMethodError otherwise (you can't build a relation without AR).
14
+ #
15
+ # Spec: docs/target/sidekiq-free.md §6.4; Sidekiq wiki Iteration.
16
+ class ActiveRecordEnumerator
17
+ def initialize(relation, cursor: nil, **options)
18
+ @relation = relation
19
+ @cursor = cursor
20
+ @options = options
21
+ end
22
+
23
+ # `[record, record.id]` pairs.
24
+ def records
25
+ ::Enumerator.new(-> { @relation.count }) do |yielder|
26
+ @relation.find_each(**@options, start: @cursor) do |record|
27
+ yielder.yield(record, record.id)
28
+ end
29
+ end
30
+ end
31
+
32
+ # `[records_batch, batch.first.id]` pairs. The size lambda is the record
33
+ # count, NOT the batch count — byte-for-byte with upstream Sidekiq's
34
+ # `ActiveRecordEnumerator#batches`, so `enum.size` returns the same value
35
+ # a drop-in app gets from Sidekiq. (Only the lazy `#size` differs from
36
+ # `relations`; the run loop never calls it.)
37
+ def batches
38
+ ::Enumerator.new(-> { @relation.count }) do |yielder|
39
+ @relation.find_in_batches(**@options, start: @cursor) do |batch|
40
+ yielder.yield(batch, batch.first.id)
41
+ end
42
+ end
43
+ end
44
+
45
+ # `[relation, first_record.id]` pairs. `:batch_size` is normalized to
46
+ # `:of` so callers use one option name across all three helpers. Delete
47
+ # `:batch_size` unconditionally before the `||=` so a caller passing both
48
+ # `:of` and `:batch_size` can't leak `:batch_size` into `in_batches`
49
+ # (which has no such keyword) — upstream's `||=` short-circuits and
50
+ # raises ArgumentError there; valid single-option calls are unaffected.
51
+ def relations
52
+ ::Enumerator.new(-> { relations_size }) do |yielder|
53
+ options = @options.dup
54
+ batch_size = options.delete(:batch_size)
55
+ options[:of] ||= batch_size
56
+
57
+ @relation.in_batches(**options, start: @cursor) do |relation|
58
+ yielder.yield(relation, relation.first.id)
59
+ end
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ def relations_size
66
+ batch_size = @options[:batch_size] || 1000
67
+ (@relation.count + batch_size - 1) / batch_size # ceiling division
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wurk
4
+ module IterableJob
5
+ # Cursor-resumable CSV iteration helper for IterableJob#build_enumerator.
6
+ # Byte-for-byte behavior parity with Sidekiq's
7
+ # `Sidekiq::Job::Iterable::CsvEnumerator`: the cursor is the integer row
8
+ # (or batch) index, and resume drops that many rows. Requires the host to
9
+ # have loaded `csv` (we don't force the dependency).
10
+ #
11
+ # Spec: docs/target/sidekiq-free.md §6.4; Sidekiq wiki Iteration.
12
+ class CsvEnumerator
13
+ def initialize(csv)
14
+ raise ArgumentError, 'CsvEnumerator.new takes CSV object' unless defined?(::CSV) && csv.instance_of?(::CSV)
15
+
16
+ @csv = csv
17
+ end
18
+
19
+ # Enumerator of `[row, index]` pairs, skipping the first `cursor` rows.
20
+ def rows(cursor:)
21
+ @csv.lazy
22
+ .each_with_index
23
+ .drop(cursor || 0)
24
+ .to_enum { count_of_rows_in_file }
25
+ end
26
+
27
+ # Enumerator of `[rows_batch, batch_index]` pairs, skipping the first
28
+ # `cursor` batches.
29
+ def batches(cursor:, batch_size: 100)
30
+ @csv.lazy
31
+ .each_slice(batch_size)
32
+ .with_index
33
+ .drop(cursor || 0)
34
+ .to_enum { (count_of_rows_in_file.to_f / batch_size).ceil }
35
+ end
36
+
37
+ private
38
+
39
+ # Best-effort row count for the enumerator's `size` (progress display).
40
+ # Only invoked if a caller asks for `#size`; the run loop never does.
41
+ def count_of_rows_in_file
42
+ filepath = @csv.path
43
+ return unless filepath
44
+
45
+ count = ::IO.popen(['wc', '-l', filepath]) { |out| out.read.strip.to_i }
46
+ count -= 1 if @csv.headers
47
+ count
48
+ end
49
+ end
50
+ end
51
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'json'
4
4
  require_relative 'job'
5
+ require_relative 'iterable_job/csv_enumerator'
6
+ require_relative 'iterable_job/active_record_enumerator'
5
7
 
6
8
  module Wurk
7
9
  # Iterable jobs split long-running work into small, idempotent chunks.
@@ -70,6 +72,39 @@ module Wurk
70
72
  raise NotImplementedError, "#{self.class} must override #each_iteration"
71
73
  end
72
74
 
75
+ # --- enumerator builders (§6.4) -------------------------------------
76
+ # Helpers user code calls from `#build_enumerator` to get a resumable
77
+ # enumerator of `[item, cursor]` pairs. Cursor parity with Sidekiq:
78
+ # array/CSV use the integer index; ActiveRecord uses the record's
79
+ # primary key.
80
+
81
+ def array_enumerator(array, cursor:)
82
+ raise ArgumentError, 'array must be an Array' unless array.is_a?(::Array)
83
+
84
+ x = array.each_with_index.drop(cursor || 0)
85
+ x.to_enum { x.size }
86
+ end
87
+
88
+ def csv_enumerator(csv, cursor:)
89
+ CsvEnumerator.new(csv).rows(cursor: cursor)
90
+ end
91
+
92
+ def csv_batches_enumerator(csv, cursor:, **)
93
+ CsvEnumerator.new(csv).batches(cursor: cursor, **)
94
+ end
95
+
96
+ def active_record_records_enumerator(relation, cursor:, **)
97
+ ActiveRecordEnumerator.new(relation, cursor: cursor, **).records
98
+ end
99
+
100
+ def active_record_batches_enumerator(relation, cursor:, **)
101
+ ActiveRecordEnumerator.new(relation, cursor: cursor, **).batches
102
+ end
103
+
104
+ def active_record_relations_enumerator(relation, cursor:, **)
105
+ ActiveRecordEnumerator.new(relation, cursor: cursor, **).relations
106
+ end
107
+
73
108
  # --- lifecycle hooks (no-op defaults; users override as needed) -----
74
109
 
75
110
  def on_start; end
@@ -289,4 +324,10 @@ module Wurk
289
324
  end
290
325
  end
291
326
  end
327
+
328
+ # Sidekiq drop-in: upstream homes the iterable module (and its enumerator
329
+ # classes) under `Sidekiq::Job::Iterable`. Since `Sidekiq::Job == Wurk::Job`,
330
+ # mirror that so `Sidekiq::Job::Iterable::CsvEnumerator` /
331
+ # `…::ActiveRecordEnumerator` resolve for ported code.
332
+ Job::Iterable = IterableJob unless Job.const_defined?(:Iterable, false)
292
333
  end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Wurk
6
+ # Read-side data API for IterableJob progress. Bulk-reads the `it-<jid>`
7
+ # HASHes (sidekiq-free.md §1.5) in a single pipeline so dashboards and tools
8
+ # can introspect many iterable jobs without N round trips. The runtime that
9
+ # *writes* those HASHes lives in Wurk::IterableJob; this is the reader.
10
+ #
11
+ # Aliased Sidekiq::IterableJobQuery.
12
+ #
13
+ # Spec: docs/target/sidekiq-free.md §19.9.
14
+ class IterableJobQuery
15
+ include Enumerable
16
+
17
+ # One job's iteration state. `raw` is the `it-<jid>` HASH (String=>String);
18
+ # the accessors decode the wire fields (`ex`/`rt`/`c`/`cancelled`, §1.5).
19
+ State = Struct.new(:jid, :raw) do
20
+ def executions = raw['ex'].to_i
21
+ def runtime = raw['rt'].to_f
22
+ def cursor = raw['c'] && ::JSON.parse(raw['c'])
23
+
24
+ # Epoch-seconds timestamp the job was cancelled at, or nil if it wasn't.
25
+ def cancelled
26
+ ts = raw['cancelled']
27
+ ts && !ts.to_s.empty? ? ts.to_i : nil
28
+ end
29
+ end
30
+
31
+ # @param jids [Array<String>] job ids to query.
32
+ def initialize(jids)
33
+ @states = fetch(Array(jids))
34
+ end
35
+
36
+ # @return [State, nil] state for jid, or nil when no `it-<jid>` HASH exists
37
+ # (e.g. a non-iterable job, or one whose state has expired).
38
+ def [](jid) = @states[jid]
39
+
40
+ # Yields each present State, in the order its jid was supplied. Jids with no
41
+ # state are skipped — only iterable jobs with live state appear.
42
+ def each(&) = @states.each_value(&)
43
+
44
+ private
45
+
46
+ def fetch(jids)
47
+ return {} if jids.empty?
48
+
49
+ raws = Wurk.redis do |conn|
50
+ conn.pipelined { |pipe| jids.each { |jid| pipe.call('HGETALL', "it-#{jid}") } }
51
+ end
52
+ build_states(jids, raws)
53
+ end
54
+
55
+ def build_states(jids, raws)
56
+ states = {}
57
+ jids.each_with_index do |jid, i|
58
+ hash = normalize_hgetall(raws[i])
59
+ states[jid] = State.new(jid, hash) unless hash.empty?
60
+ end
61
+ states
62
+ end
63
+
64
+ # redis-client returns HGETALL as a flat array on some adapters and a Hash
65
+ # on others. Normalize to a String-keyed Hash either way (mirrors the
66
+ # normalize done in Wurk::IterableJob#load_state).
67
+ def normalize_hgetall(raw)
68
+ case raw
69
+ when Hash then raw
70
+ when Array then raw.each_slice(2).to_h
71
+ else {}
72
+ end
73
+ end
74
+ end
75
+ end