brainzlab 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,569 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BrainzLab
4
+ module Instrumentation
5
+ class ActiveJob
6
+ # Thresholds for slow job detection (in milliseconds)
7
+ SLOW_JOB_THRESHOLD = 5000 # 5 seconds
8
+ VERY_SLOW_JOB_THRESHOLD = 30_000 # 30 seconds
9
+
10
+ class << self
11
+ def install!
12
+ return unless defined?(::ActiveJob)
13
+ return if @installed
14
+
15
+ install_enqueue_subscriber!
16
+ install_enqueue_at_subscriber!
17
+ install_enqueue_all_subscriber!
18
+ install_enqueue_retry_subscriber!
19
+ install_perform_start_subscriber!
20
+ install_perform_subscriber!
21
+ install_retry_stopped_subscriber!
22
+ install_discard_subscriber!
23
+
24
+ @installed = true
25
+ BrainzLab.debug_log('ActiveJob instrumentation installed')
26
+ end
27
+
28
+ def installed?
29
+ @installed == true
30
+ end
31
+
32
+ private
33
+
34
+ # ============================================
35
+ # Enqueue (job added to queue)
36
+ # ============================================
37
+ def install_enqueue_subscriber!
38
+ ActiveSupport::Notifications.subscribe('enqueue.active_job') do |*args|
39
+ event = ActiveSupport::Notifications::Event.new(*args)
40
+ handle_enqueue(event)
41
+ end
42
+ end
43
+
44
+ def handle_enqueue(event)
45
+ payload = event.payload
46
+ job = payload[:job]
47
+ adapter = payload[:adapter]
48
+
49
+ job_class = job.class.name
50
+ job_id = job.job_id
51
+ queue = job.queue_name
52
+
53
+ # Record breadcrumb
54
+ if BrainzLab.configuration.reflex_effectively_enabled?
55
+ BrainzLab::Reflex.add_breadcrumb(
56
+ "Job enqueued: #{job_class}",
57
+ category: 'job.enqueue',
58
+ level: :info,
59
+ data: {
60
+ job_class: job_class,
61
+ job_id: job_id,
62
+ queue: queue,
63
+ adapter: adapter.class.name
64
+ }.compact
65
+ )
66
+ end
67
+
68
+ # Add Pulse span if trace is active
69
+ record_enqueue_span(event, job_class, job_id, queue)
70
+ rescue StandardError => e
71
+ BrainzLab.debug_log("ActiveJob enqueue instrumentation failed: #{e.message}")
72
+ end
73
+
74
+ # ============================================
75
+ # Enqueue At (scheduled job)
76
+ # ============================================
77
+ def install_enqueue_at_subscriber!
78
+ ActiveSupport::Notifications.subscribe('enqueue_at.active_job') do |*args|
79
+ event = ActiveSupport::Notifications::Event.new(*args)
80
+ handle_enqueue_at(event)
81
+ end
82
+ end
83
+
84
+ def handle_enqueue_at(event)
85
+ payload = event.payload
86
+ job = payload[:job]
87
+
88
+ job_class = job.class.name
89
+ job_id = job.job_id
90
+ queue = job.queue_name
91
+ scheduled_at = job.scheduled_at
92
+
93
+ # Record breadcrumb
94
+ if BrainzLab.configuration.reflex_effectively_enabled?
95
+ scheduled_in = scheduled_at ? ((scheduled_at - Time.now) / 60).round(1) : nil
96
+
97
+ BrainzLab::Reflex.add_breadcrumb(
98
+ "Job scheduled: #{job_class}#{scheduled_in ? " (in #{scheduled_in}min)" : ''}",
99
+ category: 'job.schedule',
100
+ level: :info,
101
+ data: {
102
+ job_class: job_class,
103
+ job_id: job_id,
104
+ queue: queue,
105
+ scheduled_at: scheduled_at&.iso8601
106
+ }.compact
107
+ )
108
+ end
109
+ rescue StandardError => e
110
+ BrainzLab.debug_log("ActiveJob enqueue_at instrumentation failed: #{e.message}")
111
+ end
112
+
113
+ # ============================================
114
+ # Enqueue All (bulk job enqueueing)
115
+ # Fired when using ActiveJob.perform_all_later
116
+ # ============================================
117
+ def install_enqueue_all_subscriber!
118
+ ActiveSupport::Notifications.subscribe('enqueue_all.active_job') do |*args|
119
+ event = ActiveSupport::Notifications::Event.new(*args)
120
+ handle_enqueue_all(event)
121
+ end
122
+ end
123
+
124
+ def handle_enqueue_all(event)
125
+ payload = event.payload
126
+ adapter = payload[:adapter]
127
+ jobs = payload[:jobs] || []
128
+
129
+ job_count = jobs.size
130
+ job_classes = jobs.map { |j| j.class.name }.tally
131
+
132
+ # Record breadcrumb
133
+ if BrainzLab.configuration.reflex_effectively_enabled?
134
+ class_summary = job_classes.map { |k, v| "#{k}(#{v})" }.join(', ')
135
+
136
+ BrainzLab::Reflex.add_breadcrumb(
137
+ "Bulk enqueue: #{job_count} jobs",
138
+ category: 'job.enqueue_all',
139
+ level: :info,
140
+ data: {
141
+ job_count: job_count,
142
+ job_classes: class_summary,
143
+ adapter: adapter.class.name
144
+ }.compact
145
+ )
146
+ end
147
+
148
+ # Add Pulse span if trace is active
149
+ record_enqueue_all_span(event, job_count, job_classes)
150
+
151
+ # Log to Recall for significant bulk operations
152
+ if job_count >= 10 && BrainzLab.configuration.recall_effectively_enabled?
153
+ BrainzLab::Recall.info(
154
+ "Bulk job enqueue: #{job_count} jobs",
155
+ job_count: job_count,
156
+ job_classes: job_classes,
157
+ adapter: adapter.class.name
158
+ )
159
+ end
160
+ rescue StandardError => e
161
+ BrainzLab.debug_log("ActiveJob enqueue_all instrumentation failed: #{e.message}")
162
+ end
163
+
164
+ # ============================================
165
+ # Enqueue Retry (job retry scheduled)
166
+ # ============================================
167
+ def install_enqueue_retry_subscriber!
168
+ ActiveSupport::Notifications.subscribe('enqueue_retry.active_job') do |*args|
169
+ event = ActiveSupport::Notifications::Event.new(*args)
170
+ handle_enqueue_retry(event)
171
+ end
172
+ end
173
+
174
+ def handle_enqueue_retry(event)
175
+ payload = event.payload
176
+ job = payload[:job]
177
+ error = payload[:error]
178
+ wait = payload[:wait]
179
+
180
+ job_class = job.class.name
181
+ job_id = job.job_id
182
+ executions = job.executions
183
+
184
+ # Record breadcrumb
185
+ if BrainzLab.configuration.reflex_effectively_enabled?
186
+ BrainzLab::Reflex.add_breadcrumb(
187
+ "Job retry scheduled: #{job_class} (attempt #{executions + 1})",
188
+ category: 'job.retry',
189
+ level: :warning,
190
+ data: {
191
+ job_class: job_class,
192
+ job_id: job_id,
193
+ executions: executions,
194
+ wait_seconds: wait,
195
+ error_class: error&.class&.name,
196
+ error_message: error&.message&.slice(0, 200)
197
+ }.compact
198
+ )
199
+ end
200
+
201
+ # Log retry to Recall
202
+ if BrainzLab.configuration.recall_effectively_enabled?
203
+ BrainzLab::Recall.warn(
204
+ "Job retry scheduled: #{job_class}",
205
+ job_class: job_class,
206
+ job_id: job_id,
207
+ executions: executions,
208
+ wait_seconds: wait,
209
+ error_class: error&.class&.name,
210
+ error_message: error&.message&.slice(0, 500)
211
+ )
212
+ end
213
+ rescue StandardError => e
214
+ BrainzLab.debug_log("ActiveJob enqueue_retry instrumentation failed: #{e.message}")
215
+ end
216
+
217
+ # ============================================
218
+ # Perform Start (job execution begins)
219
+ # ============================================
220
+ def install_perform_start_subscriber!
221
+ ActiveSupport::Notifications.subscribe('perform_start.active_job') do |*args|
222
+ event = ActiveSupport::Notifications::Event.new(*args)
223
+ handle_perform_start(event)
224
+ end
225
+ end
226
+
227
+ def handle_perform_start(event)
228
+ payload = event.payload
229
+ job = payload[:job]
230
+
231
+ job_class = job.class.name
232
+ job_id = job.job_id
233
+ queue = job.queue_name
234
+ executions = job.executions
235
+
236
+ # Store start time for queue wait calculation
237
+ Thread.current[:brainzlab_job_starts] ||= {}
238
+ Thread.current[:brainzlab_job_starts][job_id] = {
239
+ started_at: event.time,
240
+ enqueued_at: job.enqueued_at
241
+ }
242
+
243
+ # Calculate queue wait time if enqueued_at is available
244
+ queue_wait_ms = nil
245
+ if job.enqueued_at
246
+ queue_wait_ms = ((event.time - job.enqueued_at) * 1000).round(2)
247
+ end
248
+
249
+ # Record breadcrumb
250
+ if BrainzLab.configuration.reflex_effectively_enabled?
251
+ BrainzLab::Reflex.add_breadcrumb(
252
+ "Job started: #{job_class}#{executions > 1 ? " (attempt #{executions})" : ''}",
253
+ category: 'job.start',
254
+ level: :info,
255
+ data: {
256
+ job_class: job_class,
257
+ job_id: job_id,
258
+ queue: queue,
259
+ executions: executions,
260
+ queue_wait_ms: queue_wait_ms
261
+ }.compact
262
+ )
263
+ end
264
+
265
+ # Start Pulse trace for job
266
+ start_job_trace(job, queue_wait_ms)
267
+ rescue StandardError => e
268
+ BrainzLab.debug_log("ActiveJob perform_start instrumentation failed: #{e.message}")
269
+ end
270
+
271
+ # ============================================
272
+ # Perform (job execution complete)
273
+ # ============================================
274
+ def install_perform_subscriber!
275
+ ActiveSupport::Notifications.subscribe('perform.active_job') do |*args|
276
+ event = ActiveSupport::Notifications::Event.new(*args)
277
+ handle_perform(event)
278
+ end
279
+ end
280
+
281
+ def handle_perform(event)
282
+ payload = event.payload
283
+ job = payload[:job]
284
+ exception = payload[:exception_object]
285
+ duration = event.duration.round(2)
286
+
287
+ job_class = job.class.name
288
+ job_id = job.job_id
289
+ queue = job.queue_name
290
+ executions = job.executions
291
+
292
+ # Get stored start info
293
+ job_starts = Thread.current[:brainzlab_job_starts] || {}
294
+ start_info = job_starts.delete(job_id) || {}
295
+ queue_wait_ms = nil
296
+ if start_info[:enqueued_at]
297
+ queue_wait_ms = ((start_info[:started_at] - start_info[:enqueued_at]) * 1000).round(2)
298
+ end
299
+
300
+ # Determine level based on outcome and duration
301
+ level = if exception
302
+ :error
303
+ elsif duration >= SLOW_JOB_THRESHOLD
304
+ :warning
305
+ else
306
+ :info
307
+ end
308
+
309
+ # Record breadcrumb
310
+ if BrainzLab.configuration.reflex_effectively_enabled?
311
+ message = if exception
312
+ "Job failed: #{job_class} (#{duration}ms)"
313
+ else
314
+ "Job completed: #{job_class} (#{duration}ms)"
315
+ end
316
+
317
+ BrainzLab::Reflex.add_breadcrumb(
318
+ message,
319
+ category: 'job.perform',
320
+ level: level,
321
+ data: {
322
+ job_class: job_class,
323
+ job_id: job_id,
324
+ queue: queue,
325
+ executions: executions,
326
+ duration_ms: duration,
327
+ queue_wait_ms: queue_wait_ms,
328
+ error: exception ? true : false,
329
+ error_class: exception&.class&.name,
330
+ error_message: exception&.message&.slice(0, 200)
331
+ }.compact
332
+ )
333
+ end
334
+
335
+ # Finish Pulse trace
336
+ finish_job_trace(exception)
337
+
338
+ # Log to Recall
339
+ log_job_completion(job_class, job_id, queue, duration, exception, queue_wait_ms)
340
+ rescue StandardError => e
341
+ BrainzLab.debug_log("ActiveJob perform instrumentation failed: #{e.message}")
342
+ end
343
+
344
+ # ============================================
345
+ # Retry Stopped (all retries exhausted)
346
+ # ============================================
347
+ def install_retry_stopped_subscriber!
348
+ ActiveSupport::Notifications.subscribe('retry_stopped.active_job') do |*args|
349
+ event = ActiveSupport::Notifications::Event.new(*args)
350
+ handle_retry_stopped(event)
351
+ end
352
+ end
353
+
354
+ def handle_retry_stopped(event)
355
+ payload = event.payload
356
+ job = payload[:job]
357
+ error = payload[:error]
358
+
359
+ job_class = job.class.name
360
+ job_id = job.job_id
361
+ executions = job.executions
362
+
363
+ # Record breadcrumb
364
+ if BrainzLab.configuration.reflex_effectively_enabled?
365
+ BrainzLab::Reflex.add_breadcrumb(
366
+ "Job retries exhausted: #{job_class} (#{executions} attempts)",
367
+ category: 'job.retry_stopped',
368
+ level: :error,
369
+ data: {
370
+ job_class: job_class,
371
+ job_id: job_id,
372
+ executions: executions,
373
+ error_class: error&.class&.name,
374
+ error_message: error&.message&.slice(0, 200)
375
+ }.compact
376
+ )
377
+ end
378
+
379
+ # Log to Recall - this is a critical event
380
+ if BrainzLab.configuration.recall_effectively_enabled?
381
+ BrainzLab::Recall.error(
382
+ "Job retries exhausted: #{job_class}",
383
+ job_class: job_class,
384
+ job_id: job_id,
385
+ executions: executions,
386
+ error_class: error&.class&.name,
387
+ error_message: error&.message
388
+ )
389
+ end
390
+
391
+ # Capture error in Reflex
392
+ if error && BrainzLab.configuration.reflex_effectively_enabled?
393
+ BrainzLab::Reflex.capture(
394
+ error,
395
+ tags: { job_class: job_class, job_id: job_id },
396
+ extra: { executions: executions, retry_stopped: true }
397
+ )
398
+ end
399
+ rescue StandardError => e
400
+ BrainzLab.debug_log("ActiveJob retry_stopped instrumentation failed: #{e.message}")
401
+ end
402
+
403
+ # ============================================
404
+ # Discard (job discarded due to error)
405
+ # ============================================
406
+ def install_discard_subscriber!
407
+ ActiveSupport::Notifications.subscribe('discard.active_job') do |*args|
408
+ event = ActiveSupport::Notifications::Event.new(*args)
409
+ handle_discard(event)
410
+ end
411
+ end
412
+
413
+ def handle_discard(event)
414
+ payload = event.payload
415
+ job = payload[:job]
416
+ error = payload[:error]
417
+
418
+ job_class = job.class.name
419
+ job_id = job.job_id
420
+
421
+ # Record breadcrumb
422
+ if BrainzLab.configuration.reflex_effectively_enabled?
423
+ BrainzLab::Reflex.add_breadcrumb(
424
+ "Job discarded: #{job_class}",
425
+ category: 'job.discard',
426
+ level: :error,
427
+ data: {
428
+ job_class: job_class,
429
+ job_id: job_id,
430
+ error_class: error&.class&.name,
431
+ error_message: error&.message&.slice(0, 200)
432
+ }.compact
433
+ )
434
+ end
435
+
436
+ # Log to Recall
437
+ if BrainzLab.configuration.recall_effectively_enabled?
438
+ BrainzLab::Recall.error(
439
+ "Job discarded: #{job_class}",
440
+ job_class: job_class,
441
+ job_id: job_id,
442
+ error_class: error&.class&.name,
443
+ error_message: error&.message
444
+ )
445
+ end
446
+
447
+ # Capture error in Reflex
448
+ if error && BrainzLab.configuration.reflex_effectively_enabled?
449
+ BrainzLab::Reflex.capture(
450
+ error,
451
+ tags: { job_class: job_class, job_id: job_id },
452
+ extra: { discarded: true }
453
+ )
454
+ end
455
+ rescue StandardError => e
456
+ BrainzLab.debug_log("ActiveJob discard instrumentation failed: #{e.message}")
457
+ end
458
+
459
+ # ============================================
460
+ # Pulse Trace Helpers
461
+ # ============================================
462
+ def start_job_trace(job, queue_wait_ms)
463
+ return unless BrainzLab.configuration.pulse_effectively_enabled?
464
+
465
+ BrainzLab::Pulse.start_trace(
466
+ job.class.name,
467
+ kind: 'job',
468
+ job_class: job.class.name,
469
+ job_id: job.job_id,
470
+ queue: job.queue_name,
471
+ executions: job.executions,
472
+ queue_wait_ms: queue_wait_ms
473
+ )
474
+ end
475
+
476
+ def finish_job_trace(exception)
477
+ return unless BrainzLab.configuration.pulse_effectively_enabled?
478
+
479
+ BrainzLab::Pulse.finish_trace(
480
+ error: exception ? true : false,
481
+ error_class: exception&.class&.name,
482
+ error_message: exception&.message
483
+ )
484
+ end
485
+
486
+ def record_enqueue_span(event, job_class, job_id, queue)
487
+ return unless BrainzLab.configuration.pulse_effectively_enabled?
488
+
489
+ tracer = BrainzLab::Pulse.tracer
490
+ return unless tracer.current_trace
491
+
492
+ span_data = {
493
+ span_id: SecureRandom.uuid,
494
+ name: "job.enqueue.#{job_class}",
495
+ kind: 'job',
496
+ started_at: event.time,
497
+ ended_at: event.end,
498
+ duration_ms: event.duration.round(2),
499
+ error: false,
500
+ data: {
501
+ 'job.class' => job_class,
502
+ 'job.id' => job_id,
503
+ 'job.queue' => queue,
504
+ 'job.operation' => 'enqueue'
505
+ }
506
+ }
507
+
508
+ tracer.current_spans << span_data
509
+ end
510
+
511
+ def record_enqueue_all_span(event, job_count, job_classes)
512
+ return unless BrainzLab.configuration.pulse_effectively_enabled?
513
+
514
+ tracer = BrainzLab::Pulse.tracer
515
+ return unless tracer.current_trace
516
+
517
+ span_data = {
518
+ span_id: SecureRandom.uuid,
519
+ name: 'job.enqueue_all',
520
+ kind: 'job',
521
+ started_at: event.time,
522
+ ended_at: event.end,
523
+ duration_ms: event.duration.round(2),
524
+ error: false,
525
+ data: {
526
+ 'job.operation' => 'enqueue_all',
527
+ 'job.count' => job_count,
528
+ 'job.classes' => job_classes.keys.join(', ')
529
+ }
530
+ }
531
+
532
+ tracer.current_spans << span_data
533
+ end
534
+
535
+ # ============================================
536
+ # Logging Helpers
537
+ # ============================================
538
+ def log_job_completion(job_class, job_id, queue, duration, exception, queue_wait_ms)
539
+ return unless BrainzLab.configuration.recall_effectively_enabled?
540
+
541
+ if exception
542
+ BrainzLab::Recall.error(
543
+ "Job failed: #{job_class}",
544
+ job_class: job_class,
545
+ job_id: job_id,
546
+ queue: queue,
547
+ duration_ms: duration,
548
+ queue_wait_ms: queue_wait_ms,
549
+ error_class: exception.class.name,
550
+ error_message: exception.message
551
+ )
552
+ elsif duration >= SLOW_JOB_THRESHOLD
553
+ level = duration >= VERY_SLOW_JOB_THRESHOLD ? :error : :warn
554
+ BrainzLab::Recall.send(
555
+ level,
556
+ "Slow job: #{job_class} (#{duration}ms)",
557
+ job_class: job_class,
558
+ job_id: job_id,
559
+ queue: queue,
560
+ duration_ms: duration,
561
+ queue_wait_ms: queue_wait_ms,
562
+ threshold_exceeded: duration >= VERY_SLOW_JOB_THRESHOLD ? 'critical' : 'warning'
563
+ )
564
+ end
565
+ end
566
+ end
567
+ end
568
+ end
569
+ end