ruby_reactor 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +7 -0
- data/README.md +8 -2
- data/lib/ruby_reactor/configuration.rb +6 -1
- data/lib/ruby_reactor/context.rb +2 -1
- data/lib/ruby_reactor/context_serializer.rb +1 -3
- data/lib/ruby_reactor/dsl/reactor.rb +6 -2
- data/lib/ruby_reactor/executor/compensation_manager.rb +75 -47
- data/lib/ruby_reactor/executor/retry_manager.rb +15 -5
- data/lib/ruby_reactor/executor/step_executor.rb +36 -18
- data/lib/ruby_reactor/executor.rb +112 -36
- data/lib/ruby_reactor/map/collector.rb +4 -4
- data/lib/ruby_reactor/map/element_executor.rb +15 -1
- data/lib/ruby_reactor/map/helpers.rb +17 -4
- data/lib/ruby_reactor/middleware.rb +13 -0
- data/lib/ruby_reactor/middleware_runner.rb +29 -0
- data/lib/ruby_reactor/open_telemetry.rb +647 -0
- data/lib/ruby_reactor/reactor.rb +1 -0
- data/lib/ruby_reactor/rspec/test_subject.rb +0 -1
- data/lib/ruby_reactor/sidekiq_adapter.rb +7 -21
- data/lib/ruby_reactor/step/map_step.rb +25 -33
- data/lib/ruby_reactor/version.rb +1 -1
- data/lib/ruby_reactor/web/coordination_serializer.rb +12 -18
- data/teley/Dockerfile +60 -0
- metadata +5 -3
- data/lib/ruby_reactor/map/execution.rb +0 -101
- data/lib/ruby_reactor/sidekiq_workers/map_execution_worker.rb +0 -15
|
@@ -0,0 +1,647 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
begin
|
|
4
|
+
require "opentelemetry-api"
|
|
5
|
+
rescue LoadError
|
|
6
|
+
# Optional load
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
module RubyReactor
|
|
10
|
+
# Middleware implementing OpenTelemetry instrumentation for RubyReactor execution
|
|
11
|
+
# rubocop:disable Metrics/ClassLength
|
|
12
|
+
class OpenTelemetry < Middleware
|
|
13
|
+
def initialize(**options)
|
|
14
|
+
super
|
|
15
|
+
@step_spans = {}
|
|
16
|
+
@step_tokens = {}
|
|
17
|
+
@retry_errors = {}
|
|
18
|
+
@compensation_spans = {}
|
|
19
|
+
@compensation_tokens = {}
|
|
20
|
+
@undo_spans = {}
|
|
21
|
+
@undo_tokens = {}
|
|
22
|
+
@reactor_span = nil
|
|
23
|
+
@reactor_token = nil
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def on_start_reactor(reactor_name, inputs, context)
|
|
27
|
+
ensure_opentelemetry_loaded!
|
|
28
|
+
parent_ctx = extract_context(context)
|
|
29
|
+
|
|
30
|
+
tracer = ::OpenTelemetry.tracer_provider.tracer("ruby_reactor")
|
|
31
|
+
|
|
32
|
+
redact_keys = if context.reactor_class.respond_to?(:inputs)
|
|
33
|
+
context.reactor_class.inputs.select do |_, c|
|
|
34
|
+
c[:redact]
|
|
35
|
+
end.keys
|
|
36
|
+
else
|
|
37
|
+
[]
|
|
38
|
+
end
|
|
39
|
+
attributes = {
|
|
40
|
+
"reactor.name" => reactor_name,
|
|
41
|
+
"reactor.context_id" => context.context_id
|
|
42
|
+
}
|
|
43
|
+
inputs.each do |k, v|
|
|
44
|
+
val = redact_keys.include?(k.to_sym) ? "[REDACTED]" : safe_value(v)
|
|
45
|
+
attributes["reactor.inputs.#{k}"] = val
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
attributes["reactor.resumed"] = true if context.status.to_s != "pending"
|
|
49
|
+
|
|
50
|
+
@reactor_span = tracer.start_span(reactor_name, attributes: attributes, with_parent: parent_ctx || ::OpenTelemetry::Context.current)
|
|
51
|
+
@reactor_token = ::OpenTelemetry::Context.attach(::OpenTelemetry::Trace.context_with_span(@reactor_span))
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def on_complete_reactor(_reactor_name, result, context)
|
|
55
|
+
::OpenTelemetry::Context.detach(@reactor_token) if @reactor_token
|
|
56
|
+
@reactor_token = nil
|
|
57
|
+
|
|
58
|
+
span = @reactor_span
|
|
59
|
+
@reactor_span = nil
|
|
60
|
+
|
|
61
|
+
@step_spans.clear
|
|
62
|
+
@step_tokens.clear
|
|
63
|
+
@retry_errors.clear
|
|
64
|
+
@compensation_spans.clear
|
|
65
|
+
@compensation_tokens.clear
|
|
66
|
+
@undo_spans.clear
|
|
67
|
+
@undo_tokens.clear
|
|
68
|
+
|
|
69
|
+
return unless span
|
|
70
|
+
|
|
71
|
+
if result.is_a?(RubyReactor::RetryQueuedResult)
|
|
72
|
+
# This execution attempt failed and was requeued for an async retry
|
|
73
|
+
# (e.g. an async step or async map element). The reactor span therefore
|
|
74
|
+
# represents one failed attempt; mark it ERROR (status does not
|
|
75
|
+
# propagate to the parent, so a later successful attempt keeps the
|
|
76
|
+
# overall trace healthy).
|
|
77
|
+
map_reactor_retry_queued_status(span, result)
|
|
78
|
+
else
|
|
79
|
+
map_reactor_result_status(span, result, context)
|
|
80
|
+
end
|
|
81
|
+
span.finish
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def on_failed_reactor(_reactor_name, error, context)
|
|
85
|
+
::OpenTelemetry::Context.detach(@reactor_token) if @reactor_token
|
|
86
|
+
@reactor_token = nil
|
|
87
|
+
|
|
88
|
+
span = @reactor_span
|
|
89
|
+
@reactor_span = nil
|
|
90
|
+
|
|
91
|
+
@step_spans.clear
|
|
92
|
+
@step_tokens.clear
|
|
93
|
+
@retry_errors.clear
|
|
94
|
+
@compensation_spans.clear
|
|
95
|
+
@compensation_tokens.clear
|
|
96
|
+
@undo_spans.clear
|
|
97
|
+
@undo_tokens.clear
|
|
98
|
+
|
|
99
|
+
return unless span
|
|
100
|
+
|
|
101
|
+
if error.is_a?(Exception)
|
|
102
|
+
span.status = ::OpenTelemetry::Trace::Status.error(error.message)
|
|
103
|
+
span.record_exception(error)
|
|
104
|
+
elsif error.is_a?(RubyReactor::RetryQueuedResult)
|
|
105
|
+
map_reactor_retry_queued_status(span, error)
|
|
106
|
+
else
|
|
107
|
+
map_reactor_result_status(span, error, context)
|
|
108
|
+
end
|
|
109
|
+
span.finish
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def on_start_step(step_name, arguments, context)
|
|
113
|
+
ensure_opentelemetry_loaded!
|
|
114
|
+
tracer = ::OpenTelemetry.tracer_provider.tracer("ruby_reactor")
|
|
115
|
+
|
|
116
|
+
redact_keys = if context.reactor_class.respond_to?(:inputs)
|
|
117
|
+
context.reactor_class.inputs.select do |_, c|
|
|
118
|
+
c[:redact]
|
|
119
|
+
end.keys
|
|
120
|
+
else
|
|
121
|
+
[]
|
|
122
|
+
end
|
|
123
|
+
attributes = { "step.name" => step_name.to_s }
|
|
124
|
+
arguments.each do |k, v|
|
|
125
|
+
val = redact_keys.include?(k.to_sym) ? "[REDACTED]" : safe_value(v)
|
|
126
|
+
attributes["step.arguments.#{k}"] = val
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Nest under the current OpenTelemetry context rather than forcing the
|
|
130
|
+
# reactor span as the parent. Steps run sequentially, so the current span
|
|
131
|
+
# is the reactor span (or an enclosing step/element span for composed and
|
|
132
|
+
# mapped reactors); honouring it keeps the trace hierarchy intact instead
|
|
133
|
+
# of flattening every span directly under the reactor.
|
|
134
|
+
span = tracer.start_span("step.#{step_name}", attributes: attributes)
|
|
135
|
+
token = ::OpenTelemetry::Context.attach(::OpenTelemetry::Trace.context_with_span(span))
|
|
136
|
+
|
|
137
|
+
@step_spans[step_name] = span
|
|
138
|
+
@step_tokens[step_name] = token
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def on_complete_step(step_name, result, _context)
|
|
142
|
+
token = @step_tokens.delete(step_name)
|
|
143
|
+
::OpenTelemetry::Context.detach(token) if token
|
|
144
|
+
|
|
145
|
+
retry_error = @retry_errors.delete(step_name)
|
|
146
|
+
|
|
147
|
+
span = @step_spans.delete(step_name)
|
|
148
|
+
return unless span
|
|
149
|
+
|
|
150
|
+
if result.is_a?(RubyReactor::AsyncResult)
|
|
151
|
+
# The step was handed off to a background worker; the run block did not
|
|
152
|
+
# execute here. Rename the span so it is not confused with the real
|
|
153
|
+
# execution span emitted later under the resumed reactor span.
|
|
154
|
+
span.name = "step.#{step_name}.enqueue" if span.respond_to?(:name=)
|
|
155
|
+
span.set_attribute("step.async", true)
|
|
156
|
+
span.set_attribute("step.status", "handed_off")
|
|
157
|
+
span.set_attribute("step.async_job_id", result.job_id.to_s) if result.respond_to?(:job_id) && result.job_id
|
|
158
|
+
span.status = ::OpenTelemetry::Trace::Status.ok
|
|
159
|
+
elsif result.is_a?(RubyReactor::RetryQueuedResult)
|
|
160
|
+
# This attempt failed and was requeued for an async retry. The span
|
|
161
|
+
# represents a single failed attempt, so it is marked as an error.
|
|
162
|
+
# OTel span status does not propagate to the parent, so the reactor
|
|
163
|
+
# (and the overall trace) stays healthy if a later retry succeeds.
|
|
164
|
+
map_retry_queued_status(span, result, retry_error)
|
|
165
|
+
else
|
|
166
|
+
map_step_result_status(span, result)
|
|
167
|
+
end
|
|
168
|
+
span.finish
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def on_failed_step(step_name, error, _context)
|
|
172
|
+
token = @step_tokens.delete(step_name)
|
|
173
|
+
::OpenTelemetry::Context.detach(token) if token
|
|
174
|
+
@retry_errors.delete(step_name)
|
|
175
|
+
|
|
176
|
+
span = @step_spans.delete(step_name)
|
|
177
|
+
return unless span
|
|
178
|
+
|
|
179
|
+
if error.is_a?(Exception)
|
|
180
|
+
span.status = ::OpenTelemetry::Trace::Status.error(error.message)
|
|
181
|
+
span.record_exception(error)
|
|
182
|
+
else
|
|
183
|
+
map_step_result_status(span, error)
|
|
184
|
+
end
|
|
185
|
+
span.finish
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def on_retry_attempt(step_name, attempt, error, _context)
|
|
189
|
+
return unless defined?(::OpenTelemetry)
|
|
190
|
+
|
|
191
|
+
# Remember the error that triggered this attempt so we can annotate the
|
|
192
|
+
# span if the step is requeued for an async retry (RetryQueuedResult does
|
|
193
|
+
# not carry the error itself).
|
|
194
|
+
@retry_errors[step_name] = error
|
|
195
|
+
|
|
196
|
+
span = @step_spans[step_name]
|
|
197
|
+
return unless span
|
|
198
|
+
|
|
199
|
+
span.add_event("retry_attempt", attributes: {
|
|
200
|
+
"attempt" => attempt.to_i,
|
|
201
|
+
"error.message" => error.respond_to?(:message) ? error.message : error.to_s,
|
|
202
|
+
"error.class" => error.is_a?(Exception) ? error.class.name : "RubyReactor::Failure"
|
|
203
|
+
})
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def on_start_compensation(step_name, error, arguments, context) # rubocop:disable Metrics/MethodLength
|
|
207
|
+
ensure_opentelemetry_loaded!
|
|
208
|
+
|
|
209
|
+
# Finish step span if it is still open, as compensation happens after the step execution has failed
|
|
210
|
+
if (step_span = @step_spans.delete(step_name))
|
|
211
|
+
step_token = @step_tokens.delete(step_name)
|
|
212
|
+
::OpenTelemetry::Context.detach(step_token) if step_token
|
|
213
|
+
|
|
214
|
+
if error.is_a?(Exception)
|
|
215
|
+
step_span.status = ::OpenTelemetry::Trace::Status.error(error.message)
|
|
216
|
+
step_span.record_exception(error)
|
|
217
|
+
else
|
|
218
|
+
map_step_result_status(step_span, error)
|
|
219
|
+
end
|
|
220
|
+
step_span.finish
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
tracer = ::OpenTelemetry.tracer_provider.tracer("ruby_reactor")
|
|
224
|
+
|
|
225
|
+
redact_keys = if context.reactor_class.respond_to?(:inputs)
|
|
226
|
+
context.reactor_class.inputs.select do |_, c|
|
|
227
|
+
c[:redact]
|
|
228
|
+
end.keys
|
|
229
|
+
else
|
|
230
|
+
[]
|
|
231
|
+
end
|
|
232
|
+
attributes = { "step.name" => step_name.to_s }
|
|
233
|
+
arguments.each do |k, v|
|
|
234
|
+
val = redact_keys.include?(k.to_sym) ? "[REDACTED]" : safe_value(v)
|
|
235
|
+
attributes["step.arguments.#{k}"] = val
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
if error.is_a?(Exception)
|
|
239
|
+
attributes["compensation.trigger_error.class"] = error.class.name
|
|
240
|
+
attributes["compensation.trigger_error.message"] = error.message
|
|
241
|
+
else
|
|
242
|
+
attributes["compensation.trigger_error.message"] = error.to_s
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
parent_context = if @reactor_span
|
|
246
|
+
::OpenTelemetry::Trace.context_with_span(@reactor_span)
|
|
247
|
+
else
|
|
248
|
+
::OpenTelemetry::Context.current
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
span = tracer.start_span("compensate.#{step_name}", attributes: attributes, with_parent: parent_context)
|
|
252
|
+
token = ::OpenTelemetry::Context.attach(::OpenTelemetry::Trace.context_with_span(span))
|
|
253
|
+
|
|
254
|
+
@compensation_spans[step_name] = span
|
|
255
|
+
@compensation_tokens[step_name] = token
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def on_complete_compensation(step_name, result, _context)
|
|
259
|
+
token = @compensation_tokens.delete(step_name)
|
|
260
|
+
::OpenTelemetry::Context.detach(token) if token
|
|
261
|
+
|
|
262
|
+
span = @compensation_spans.delete(step_name)
|
|
263
|
+
return unless span
|
|
264
|
+
|
|
265
|
+
map_compensation_result_status(span, result)
|
|
266
|
+
span.finish
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def on_failed_compensation(step_name, error, _context)
|
|
270
|
+
token = @compensation_tokens.delete(step_name)
|
|
271
|
+
::OpenTelemetry::Context.detach(token) if token
|
|
272
|
+
|
|
273
|
+
span = @compensation_spans.delete(step_name)
|
|
274
|
+
return unless span
|
|
275
|
+
|
|
276
|
+
span.set_attribute("compensation.status", "failed")
|
|
277
|
+
if error.is_a?(Exception)
|
|
278
|
+
span.status = ::OpenTelemetry::Trace::Status.error(error.message)
|
|
279
|
+
span.record_exception(error)
|
|
280
|
+
span.set_attribute("error.message", error.message)
|
|
281
|
+
span.set_attribute("error.class", error.class.name)
|
|
282
|
+
else
|
|
283
|
+
map_compensation_result_status(span, error)
|
|
284
|
+
end
|
|
285
|
+
span.finish
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def on_start_undo(step_name, step_result, arguments, context)
|
|
289
|
+
ensure_opentelemetry_loaded!
|
|
290
|
+
tracer = ::OpenTelemetry.tracer_provider.tracer("ruby_reactor")
|
|
291
|
+
|
|
292
|
+
redact_keys = if context.reactor_class.respond_to?(:inputs)
|
|
293
|
+
context.reactor_class.inputs.select do |_, c|
|
|
294
|
+
c[:redact]
|
|
295
|
+
end.keys
|
|
296
|
+
else
|
|
297
|
+
[]
|
|
298
|
+
end
|
|
299
|
+
attributes = { "step.name" => step_name.to_s }
|
|
300
|
+
arguments.each do |k, v|
|
|
301
|
+
val = redact_keys.include?(k.to_sym) ? "[REDACTED]" : safe_value(v)
|
|
302
|
+
attributes["step.arguments.#{k}"] = val
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
attributes["undo.original_result.value"] = safe_value(step_result.value) if step_result.respond_to?(:value)
|
|
306
|
+
|
|
307
|
+
parent_context = if @reactor_span
|
|
308
|
+
::OpenTelemetry::Trace.context_with_span(@reactor_span)
|
|
309
|
+
else
|
|
310
|
+
::OpenTelemetry::Context.current
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
span = tracer.start_span("undo.#{step_name}", attributes: attributes, with_parent: parent_context)
|
|
314
|
+
token = ::OpenTelemetry::Context.attach(::OpenTelemetry::Trace.context_with_span(span))
|
|
315
|
+
|
|
316
|
+
@undo_spans[step_name] = span
|
|
317
|
+
@undo_tokens[step_name] = token
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def on_complete_undo(step_name, result, _context)
|
|
321
|
+
token = @undo_tokens.delete(step_name)
|
|
322
|
+
::OpenTelemetry::Context.detach(token) if token
|
|
323
|
+
|
|
324
|
+
span = @undo_spans.delete(step_name)
|
|
325
|
+
return unless span
|
|
326
|
+
|
|
327
|
+
map_undo_result_status(span, result)
|
|
328
|
+
span.finish
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def on_failed_undo(step_name, error, _context)
|
|
332
|
+
token = @undo_tokens.delete(step_name)
|
|
333
|
+
::OpenTelemetry::Context.detach(token) if token
|
|
334
|
+
|
|
335
|
+
span = @undo_spans.delete(step_name)
|
|
336
|
+
return unless span
|
|
337
|
+
|
|
338
|
+
span.set_attribute("undo.status", "failed")
|
|
339
|
+
if error.is_a?(Exception)
|
|
340
|
+
span.status = ::OpenTelemetry::Trace::Status.error(error.message)
|
|
341
|
+
span.record_exception(error)
|
|
342
|
+
span.set_attribute("error.message", error.message)
|
|
343
|
+
span.set_attribute("error.class", error.class.name)
|
|
344
|
+
else
|
|
345
|
+
map_undo_result_status(span, error)
|
|
346
|
+
end
|
|
347
|
+
span.finish
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
def on_before_async_enqueue(context)
|
|
351
|
+
return unless defined?(::OpenTelemetry)
|
|
352
|
+
|
|
353
|
+
# Inject the *current* context (the active step span at enqueue time) rather
|
|
354
|
+
# than forcing the reactor span. This keeps the resumed/async execution
|
|
355
|
+
# nested under the step that handed it off, so the step span (and the
|
|
356
|
+
# reactor span above it) reflect the total execution time of the async work
|
|
357
|
+
# in the trace waterfall instead of being flattened into a sibling.
|
|
358
|
+
#
|
|
359
|
+
# Re-inject on every handoff, including chained handoffs from an
|
|
360
|
+
# already-resumed context (async step -> async retry -> ...), so each
|
|
361
|
+
# subsequent job nests under the span that is active *now* rather than being
|
|
362
|
+
# pinned to the first step that ever handed off. Only keep any previously
|
|
363
|
+
# stored carrier when there is no valid span to inject, so we never overwrite
|
|
364
|
+
# a good parent with an empty/invalid one.
|
|
365
|
+
return unless ::OpenTelemetry::Trace.current_span.context.valid?
|
|
366
|
+
|
|
367
|
+
carrier = {}
|
|
368
|
+
::OpenTelemetry.propagation.inject(carrier)
|
|
369
|
+
context.private_data[:trace_context] = carrier unless carrier.empty?
|
|
370
|
+
rescue StandardError => e
|
|
371
|
+
RubyReactor.configuration.logger.warn("Telemetry context injection failed: #{e.message}")
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def on_lock_acquired(key, _context)
|
|
375
|
+
span = @reactor_span
|
|
376
|
+
return unless span
|
|
377
|
+
|
|
378
|
+
span.set_attribute("reactor.lock.key", key.to_s)
|
|
379
|
+
span.add_event("lock_acquired", attributes: { "lock.key" => key.to_s })
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def on_lock_released(key, _context)
|
|
383
|
+
span = @reactor_span
|
|
384
|
+
return unless span
|
|
385
|
+
|
|
386
|
+
span.add_event("lock_released", attributes: { "lock.key" => key.to_s })
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def on_lock_failed(key, error, _context)
|
|
390
|
+
span = @reactor_span
|
|
391
|
+
return unless span
|
|
392
|
+
|
|
393
|
+
span.add_event("lock_acquisition_failed", attributes: {
|
|
394
|
+
"lock.key" => key.to_s,
|
|
395
|
+
"error.message" => error.message,
|
|
396
|
+
"error.class" => error.class.name
|
|
397
|
+
})
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def on_semaphore_acquired(key, limit, _context)
|
|
401
|
+
span = @reactor_span
|
|
402
|
+
return unless span
|
|
403
|
+
|
|
404
|
+
span.set_attribute("reactor.semaphore.key", key.to_s)
|
|
405
|
+
span.set_attribute("reactor.semaphore.limit", limit.to_i)
|
|
406
|
+
span.add_event("semaphore_acquired",
|
|
407
|
+
attributes: { "semaphore.key" => key.to_s, "semaphore.limit" => limit.to_i })
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
def on_semaphore_released(key, _context)
|
|
411
|
+
span = @reactor_span
|
|
412
|
+
return unless span
|
|
413
|
+
|
|
414
|
+
span.add_event("semaphore_released", attributes: { "semaphore.key" => key.to_s })
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def on_semaphore_failed(key, limit, error, _context)
|
|
418
|
+
span = @reactor_span
|
|
419
|
+
return unless span
|
|
420
|
+
|
|
421
|
+
span.add_event("semaphore_acquisition_failed", attributes: {
|
|
422
|
+
"semaphore.key" => key.to_s,
|
|
423
|
+
"semaphore.limit" => limit.to_i,
|
|
424
|
+
"error.message" => error.message,
|
|
425
|
+
"error.class" => error.class.name
|
|
426
|
+
})
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
private
|
|
430
|
+
|
|
431
|
+
def extract_context(context)
|
|
432
|
+
return nil unless defined?(::OpenTelemetry)
|
|
433
|
+
|
|
434
|
+
tc = fetch_trace_context(context)
|
|
435
|
+
return nil if tc.nil?
|
|
436
|
+
|
|
437
|
+
# Stored carriers may have symbol keys (e.g. after a round-trip through a
|
|
438
|
+
# serializer); the propagator expects string keys.
|
|
439
|
+
tc = tc.transform_keys(&:to_s) if tc.respond_to?(:transform_keys)
|
|
440
|
+
::OpenTelemetry.propagation.extract(tc)
|
|
441
|
+
rescue StandardError => e
|
|
442
|
+
RubyReactor.configuration.logger.warn("Telemetry context extraction failed: #{e.message}")
|
|
443
|
+
nil
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def fetch_trace_context(context)
|
|
447
|
+
tc = context.private_data[:trace_context] || context.private_data["trace_context"]
|
|
448
|
+
tc ||= fetch_context_from_parent(context)
|
|
449
|
+
tc ||= fetch_context_from_storage(context)
|
|
450
|
+
tc
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
def fetch_context_from_parent(context)
|
|
454
|
+
return nil unless context.parent_context
|
|
455
|
+
|
|
456
|
+
pd = context.parent_context.private_data
|
|
457
|
+
pd[:trace_context] || pd["trace_context"]
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
def fetch_context_from_storage(context)
|
|
461
|
+
return nil unless context.parent_context_id
|
|
462
|
+
|
|
463
|
+
parent_class = map_metadata_parent_class(context)
|
|
464
|
+
return nil unless parent_class
|
|
465
|
+
|
|
466
|
+
storage = RubyReactor.configuration.storage_adapter
|
|
467
|
+
parent_data = storage.retrieve_context(context.parent_context_id, parent_class)
|
|
468
|
+
return nil unless parent_data && parent_data["private_data"]
|
|
469
|
+
|
|
470
|
+
private_data = ContextSerializer.deserialize_value(parent_data["private_data"])
|
|
471
|
+
private_data[:trace_context] || private_data["trace_context"]
|
|
472
|
+
rescue StandardError
|
|
473
|
+
nil
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
def map_metadata_parent_class(context)
|
|
477
|
+
meta = context.map_metadata
|
|
478
|
+
meta&.dig(:parent_reactor_class_name) || meta&.dig("parent_reactor_class_name")
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
def safe_value(value)
|
|
482
|
+
return "" if value.nil?
|
|
483
|
+
|
|
484
|
+
str = value.is_a?(String) ? value : value.inspect
|
|
485
|
+
if str.length > 256
|
|
486
|
+
"#{str[0...240]}... [truncated]"
|
|
487
|
+
else
|
|
488
|
+
str
|
|
489
|
+
end
|
|
490
|
+
rescue StandardError
|
|
491
|
+
"<unserializable>"
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def ensure_opentelemetry_loaded!
|
|
495
|
+
return if defined?(::OpenTelemetry)
|
|
496
|
+
|
|
497
|
+
raise "OpenTelemetry is not loaded. Please make sure `opentelemetry-api` is installed and loaded."
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def map_reactor_result_status(span, result, context)
|
|
501
|
+
return unless result
|
|
502
|
+
|
|
503
|
+
case result
|
|
504
|
+
when RubyReactor::Success
|
|
505
|
+
if result.skipped?
|
|
506
|
+
span.set_attribute("reactor.status", "skipped")
|
|
507
|
+
span.set_attribute("reactor.skipped_reason", result.reason.to_s)
|
|
508
|
+
span.status = ::OpenTelemetry::Trace::Status.ok
|
|
509
|
+
else
|
|
510
|
+
span.set_attribute("reactor.status", "completed")
|
|
511
|
+
span.status = ::OpenTelemetry::Trace::Status.ok
|
|
512
|
+
|
|
513
|
+
rs = context.reactor_class.respond_to?(:returns) ? context.reactor_class.returns : nil
|
|
514
|
+
if rs
|
|
515
|
+
val = context.intermediate_results[rs.to_sym] || context.intermediate_results[rs.to_s]
|
|
516
|
+
span.set_attribute("reactor.return_step", rs.to_s)
|
|
517
|
+
span.set_attribute("reactor.return_value", safe_value(val))
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
when RubyReactor::Failure
|
|
521
|
+
span.set_attribute("reactor.status", "failed")
|
|
522
|
+
msg = result.error.respond_to?(:message) ? result.error.message : result.error.to_s
|
|
523
|
+
span.status = ::OpenTelemetry::Trace::Status.error(msg)
|
|
524
|
+
span.set_attribute("error.class", result.exception_class.to_s) if result.exception_class
|
|
525
|
+
span.set_attribute("error.message", msg)
|
|
526
|
+
span.set_attribute("error.step_name", result.step_name.to_s) if result.step_name
|
|
527
|
+
span.set_attribute("error.file_path", result.file_path) if result.file_path
|
|
528
|
+
span.set_attribute("error.line_number", result.line_number) if result.line_number
|
|
529
|
+
if result.validation_errors
|
|
530
|
+
span.set_attribute("reactor.validation_errors", safe_value(result.validation_errors))
|
|
531
|
+
end
|
|
532
|
+
end
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
def map_reactor_retry_queued_status(span, result)
|
|
536
|
+
span.set_attribute("reactor.status", "failed_will_retry")
|
|
537
|
+
span.set_attribute("retry.will_retry", true)
|
|
538
|
+
span.set_attribute("retry.step_name", result.step_name.to_s) if result.respond_to?(:step_name) && result.step_name
|
|
539
|
+
span.set_attribute("retry.attempt", result.attempt_number.to_i) if result.respond_to?(:attempt_number)
|
|
540
|
+
if result.respond_to?(:next_retry_at) && result.next_retry_at
|
|
541
|
+
span.set_attribute("retry.next_retry_at", result.next_retry_at.to_s)
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
msg = if result.respond_to?(:step_name) && result.step_name
|
|
545
|
+
"Reactor execution requeued: step '#{result.step_name}' will retry"
|
|
546
|
+
else
|
|
547
|
+
"Reactor execution requeued for retry"
|
|
548
|
+
end
|
|
549
|
+
span.status = ::OpenTelemetry::Trace::Status.error(msg)
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def map_retry_queued_status(span, result, error)
|
|
553
|
+
span.set_attribute("step.status", "failed_will_retry")
|
|
554
|
+
span.set_attribute("retry.will_retry", true)
|
|
555
|
+
span.set_attribute("retry.attempt", result.attempt_number.to_i) if result.respond_to?(:attempt_number)
|
|
556
|
+
if result.respond_to?(:next_retry_at) && result.next_retry_at
|
|
557
|
+
span.set_attribute("retry.next_retry_at", result.next_retry_at.to_s)
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
msg = if error.respond_to?(:message)
|
|
561
|
+
error.message
|
|
562
|
+
elsif error
|
|
563
|
+
error.to_s
|
|
564
|
+
else
|
|
565
|
+
"Step failed; retry queued"
|
|
566
|
+
end
|
|
567
|
+
span.status = ::OpenTelemetry::Trace::Status.error(msg)
|
|
568
|
+
|
|
569
|
+
if error.is_a?(Exception)
|
|
570
|
+
span.record_exception(error)
|
|
571
|
+
span.set_attribute("error.class", error.class.name)
|
|
572
|
+
end
|
|
573
|
+
span.set_attribute("error.message", msg)
|
|
574
|
+
end
|
|
575
|
+
|
|
576
|
+
def map_step_result_status(span, result)
|
|
577
|
+
return unless result
|
|
578
|
+
|
|
579
|
+
case result
|
|
580
|
+
when RubyReactor::Success
|
|
581
|
+
if result.skipped?
|
|
582
|
+
span.set_attribute("step.status", "skipped")
|
|
583
|
+
span.set_attribute("step.skipped_reason", result.reason.to_s)
|
|
584
|
+
else
|
|
585
|
+
span.set_attribute("step.status", "completed")
|
|
586
|
+
span.status = ::OpenTelemetry::Trace::Status.ok
|
|
587
|
+
end
|
|
588
|
+
when RubyReactor::Failure
|
|
589
|
+
span.set_attribute("step.status", "failed")
|
|
590
|
+
msg = result.error.respond_to?(:message) ? result.error.message : result.error.to_s
|
|
591
|
+
span.status = ::OpenTelemetry::Trace::Status.error(msg)
|
|
592
|
+
span.set_attribute("error.class", result.exception_class.to_s) if result.exception_class
|
|
593
|
+
span.set_attribute("error.message", msg)
|
|
594
|
+
span.set_attribute("step.validation_errors", safe_value(result.validation_errors)) if result.validation_errors
|
|
595
|
+
end
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
def map_compensation_result_status(span, result)
|
|
599
|
+
return unless result
|
|
600
|
+
|
|
601
|
+
case result
|
|
602
|
+
when RubyReactor::Success
|
|
603
|
+
if result.skipped?
|
|
604
|
+
span.set_attribute("compensation.status", "skipped")
|
|
605
|
+
span.set_attribute("compensation.skipped_reason", result.reason.to_s)
|
|
606
|
+
else
|
|
607
|
+
span.set_attribute("compensation.status", "completed")
|
|
608
|
+
span.status = ::OpenTelemetry::Trace::Status.ok
|
|
609
|
+
end
|
|
610
|
+
when RubyReactor::Failure
|
|
611
|
+
span.set_attribute("compensation.status", "failed")
|
|
612
|
+
msg = result.error.respond_to?(:message) ? result.error.message : result.error.to_s
|
|
613
|
+
span.status = ::OpenTelemetry::Trace::Status.error(msg)
|
|
614
|
+
span.set_attribute("error.class", result.exception_class.to_s) if result.exception_class
|
|
615
|
+
span.set_attribute("error.message", msg)
|
|
616
|
+
end
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
def map_undo_result_status(span, result)
|
|
620
|
+
return unless result
|
|
621
|
+
|
|
622
|
+
case result
|
|
623
|
+
when RubyReactor::Success
|
|
624
|
+
if result.skipped?
|
|
625
|
+
span.set_attribute("undo.status", "skipped")
|
|
626
|
+
span.set_attribute("undo.skipped_reason", result.reason.to_s)
|
|
627
|
+
else
|
|
628
|
+
span.set_attribute("undo.status", "completed")
|
|
629
|
+
span.status = ::OpenTelemetry::Trace::Status.ok
|
|
630
|
+
end
|
|
631
|
+
when RubyReactor::Failure
|
|
632
|
+
span.set_attribute("undo.status", "failed")
|
|
633
|
+
msg = result.error.respond_to?(:message) ? result.error.message : result.error.to_s
|
|
634
|
+
span.status = ::OpenTelemetry::Trace::Status.error(msg)
|
|
635
|
+
span.set_attribute("error.class", result.exception_class.to_s) if result.exception_class
|
|
636
|
+
span.set_attribute("error.message", msg)
|
|
637
|
+
end
|
|
638
|
+
end
|
|
639
|
+
end
|
|
640
|
+
# rubocop:enable Metrics/ClassLength
|
|
641
|
+
|
|
642
|
+
# Also expose it inside Middleware::OpenTelemetry namespace for compatibility
|
|
643
|
+
class Middleware
|
|
644
|
+
class OpenTelemetry < ::RubyReactor::OpenTelemetry
|
|
645
|
+
end
|
|
646
|
+
end
|
|
647
|
+
end
|
data/lib/ruby_reactor/reactor.rb
CHANGED
|
@@ -105,6 +105,7 @@ module RubyReactor
|
|
|
105
105
|
if self.class.async? && !@context.inline_async_execution
|
|
106
106
|
# For async reactors, queue a job for the whole reactor
|
|
107
107
|
@context.status = :running
|
|
108
|
+
Executor.middlewares_for(self.class).on(:before_async_enqueue, @context)
|
|
108
109
|
save_context
|
|
109
110
|
|
|
110
111
|
serialized_context = ContextSerializer.serialize(@context)
|