hatchet-sdk 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +5 -1
  3. data/CHANGELOG.md +30 -0
  4. data/lib/hatchet/clients/grpc/admin.rb +45 -2
  5. data/lib/hatchet/clients/grpc/dispatcher.rb +33 -8
  6. data/lib/hatchet/condition_converter.rb +20 -12
  7. data/lib/hatchet/context.rb +6 -1
  8. data/lib/hatchet/contracts/dispatcher/dispatcher_pb.rb +3 -1
  9. data/lib/hatchet/contracts/dispatcher/dispatcher_services_pb.rb +1 -0
  10. data/lib/hatchet/contracts/v1/dispatcher_pb.rb +23 -1
  11. data/lib/hatchet/contracts/v1/dispatcher_services_pb.rb +2 -0
  12. data/lib/hatchet/contracts/v1/shared/condition_pb.rb +3 -1
  13. data/lib/hatchet/contracts/v1/shared/trigger_pb.rb +17 -0
  14. data/lib/hatchet/contracts/v1/workflows_pb.rb +4 -3
  15. data/lib/hatchet/contracts/v1/workflows_services_pb.rb +1 -0
  16. data/lib/hatchet/contracts/workflows/workflows_pb.rb +2 -4
  17. data/lib/hatchet/contracts/workflows/workflows_services_pb.rb +1 -1
  18. data/lib/hatchet/durable_context.rb +102 -33
  19. data/lib/hatchet/engine_version.rb +50 -0
  20. data/lib/hatchet/eviction_policy.rb +60 -0
  21. data/lib/hatchet/exceptions.rb +26 -0
  22. data/lib/hatchet/features/cron.rb +2 -1
  23. data/lib/hatchet/task.rb +7 -0
  24. data/lib/hatchet/version.rb +1 -1
  25. data/lib/hatchet/worker/durable_event_listener.rb +735 -0
  26. data/lib/hatchet/worker/durable_eviction/cache.rb +205 -0
  27. data/lib/hatchet/worker/durable_eviction/manager.rb +233 -0
  28. data/lib/hatchet/worker/runner.rb +279 -53
  29. data/lib/hatchet/worker_obj.rb +60 -4
  30. data/lib/hatchet/workflow.rb +8 -4
  31. data/lib/hatchet-sdk.rb +13 -3
  32. data/sig/hatchet/clients/grpc/dispatcher.rbs +2 -0
  33. data/sig/hatchet/durable_context.rbs +8 -2
  34. data/sig/hatchet/engine_version.rbs +12 -0
  35. data/sig/hatchet/eviction_policy.rbs +14 -0
  36. data/sig/hatchet/exceptions.rbs +12 -0
  37. data/sig/hatchet/task.rbs +2 -0
  38. data/sig/hatchet/worker/durable_event_listener.rbs +31 -0
  39. data/sig/hatchet/worker/durable_eviction/cache.rbs +41 -0
  40. data/sig/hatchet/worker/durable_eviction/manager.rbs +37 -0
  41. data/sig/hatchet/worker/runner.rbs +7 -1
  42. data/sig/hatchet/worker_obj.rbs +3 -0
  43. data/sig/hatchet/workflow.rbs +1 -1
  44. data/sig/hatchet-sdk.rbs +1 -1
  45. metadata +15 -4
@@ -0,0 +1,735 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "monitor"
5
+ require "timeout"
6
+
7
+ module Hatchet
8
+ module WorkerRuntime
9
+ # Thread-safe multiplexer over the ``V1Dispatcher.DurableTask`` bidirectional
10
+ # gRPC stream.
11
+ #
12
+ # A single stream is shared across all durable task invocations running on
13
+ # the worker; callers send ``send_event`` / ``wait_for_callback``
14
+ # / ``send_evict_invocation`` requests and block on per-call Queues until the
15
+ # response-dispatch thread routes the matching ``DurableTaskResponse`` back.
16
+ #
17
+ # @example
18
+ # listener = DurableEventListener.new(config: config, channel: channel, logger: logger)
19
+ # listener.start("worker-id-123")
20
+ # ack = listener.send_event(task_id, invocation_count, wait_for_event)
21
+ # result = listener.wait_for_callback(task_id, invocation_count, branch_id, node_id)
22
+ class DurableEventListener
23
+ DEFAULT_RECONNECT_INTERVAL = 3 # seconds
24
+ EVICTION_ACK_TIMEOUT_SECONDS = 30.0
25
+ REGISTER_WORKER_ACK_TIMEOUT_SECONDS = 10.0
26
+
27
+ # Outgoing event sent via ``send_event``.
28
+ #
29
+ # @!attribute [r] wait_for_conditions
30
+ # @return [V1::DurableEventListenerConditions]
31
+ # @!attribute [r] label
32
+ # @return [String, nil]
33
+ WaitForEvent = Struct.new(:wait_for_conditions, :label, keyword_init: true)
34
+
35
+ # Memo event with a ``bytes`` key and an optional already-computed result.
36
+ MemoEvent = Struct.new(:memo_key, :result, keyword_init: true)
37
+
38
+ # @return [String, nil]
39
+ attr_reader :worker_id
40
+
41
+ # @param config [Hatchet::Config]
42
+ # @param channel [GRPC::Core::Channel]
43
+ # @param logger [Logger]
44
+ # @param on_server_evict [Proc, nil] Called with (durable_task_external_id, invocation_count)
45
+ # when the server notifies about a stale invocation.
46
+ def initialize(config:, channel:, logger:, on_server_evict: nil)
47
+ @config = config
48
+ @channel = channel
49
+ @logger = logger
50
+ @on_server_evict = on_server_evict
51
+
52
+ @worker_id = nil
53
+ @stub = nil
54
+ @request_queue = nil
55
+
56
+ @mu = Monitor.new
57
+
58
+ # (task_external_id, invocation_count) => Queue (push [:ok, ack] or [:err, exc])
59
+ @pending_event_acks = {}
60
+ # (task_external_id, invocation_count) => Queue (push [:ok, nil] or [:err, exc])
61
+ @pending_eviction_acks = {}
62
+ # (task_external_id, invocation_count, branch_id, node_id) => Queue
63
+ @pending_callbacks = {}
64
+ # key -> [inserted_at, result] (rudimentary TTL cache)
65
+ @buffered_completions = {}
66
+
67
+ @running = false
68
+ @start_mu = Mutex.new
69
+ @registration_mu = Mutex.new
70
+ @registration_cv = ConditionVariable.new
71
+ @worker_registered = false
72
+
73
+ @receive_thread = nil
74
+ @send_thread = nil
75
+ end
76
+
77
+ # Start the listener if not already running. Idempotent.
78
+ #
79
+ # @param worker_id [String]
80
+ def start(worker_id)
81
+ @start_mu.synchronize do
82
+ return if @running
83
+
84
+ @worker_id = worker_id
85
+ @running = true
86
+ @registration_mu.synchronize { @worker_registered = false }
87
+
88
+ connect
89
+
90
+ @receive_thread = Thread.new { receive_loop }
91
+ @send_thread = Thread.new { send_loop }
92
+ wait_for_register_worker_ack
93
+ end
94
+ end
95
+
96
+ # Start the listener if not already running.
97
+ def ensure_started(worker_id)
98
+ start(worker_id) unless @running
99
+ end
100
+
101
+ # Stop the listener and release resources.
102
+ def stop
103
+ @running = false
104
+
105
+ fail_all_pending(Hatchet::Error.new("DurableListener stopped"))
106
+
107
+ @request_queue&.close
108
+ rescue_thread(@receive_thread)
109
+ rescue_thread(@send_thread)
110
+ end
111
+
112
+ # Send a ``DurableTask`` message and block for its ack.
113
+ #
114
+ # @param durable_task_external_id [String]
115
+ # @param invocation_count [Integer]
116
+ # @param event [WaitForEvent, MemoEvent] The event to send
117
+ # @return [Object] The parsed ack body (a simple Hash describing the ack)
118
+ # @raise [Hatchet::Error] on server-reported errors or listener disconnection
119
+ def send_event(durable_task_external_id, invocation_count, event)
120
+ raise Hatchet::Error, "DurableEventListener not started" unless @request_queue
121
+
122
+ key = [durable_task_external_id, invocation_count]
123
+ queue = Queue.new
124
+
125
+ @mu.synchronize { @pending_event_acks[key] = queue }
126
+
127
+ request = build_event_request(durable_task_external_id, invocation_count, event)
128
+ @logger&.debug(
129
+ "durable event listener send_event: task=#{durable_task_external_id} " \
130
+ "invocation=#{invocation_count} event=#{event.class}",
131
+ )
132
+ @request_queue << request
133
+
134
+ ack = await_queue(queue)
135
+ @logger&.debug(
136
+ "durable event listener send_event ack: task=#{durable_task_external_id} " \
137
+ "invocation=#{invocation_count} ack_type=#{ack[:ack_type]} " \
138
+ "branch_id=#{ack[:branch_id]} node_id=#{ack[:node_id]}",
139
+ )
140
+ ack
141
+ end
142
+
143
+ # Block until the server delivers an ``entry_completed`` (or error) for
144
+ # this durable task / invocation / branch / node id tuple.
145
+ #
146
+ # @return [Hash] ``{ durable_task_external_id:, node_id:, payload: }``
147
+ def wait_for_callback(durable_task_external_id, invocation_count, branch_id, node_id)
148
+ key = [durable_task_external_id, invocation_count, branch_id, node_id]
149
+
150
+ buffered = @mu.synchronize { @buffered_completions.delete(key) }
151
+ if buffered
152
+ @logger&.debug(
153
+ "durable event listener wait_for_callback: buffered completion hit " \
154
+ "task=#{durable_task_external_id} invocation=#{invocation_count} " \
155
+ "branch_id=#{branch_id} node_id=#{node_id}",
156
+ )
157
+ return buffered[1]
158
+ end
159
+
160
+ queue = @mu.synchronize do
161
+ @pending_callbacks[key] ||= Queue.new
162
+ end
163
+
164
+ @logger&.debug(
165
+ "durable event listener wait_for_callback: waiting " \
166
+ "task=#{durable_task_external_id} invocation=#{invocation_count} " \
167
+ "branch_id=#{branch_id} node_id=#{node_id}",
168
+ )
169
+ poll_worker_status
170
+
171
+ result = await_queue(queue)
172
+ @logger&.debug(
173
+ "durable event listener wait_for_callback: completed " \
174
+ "task=#{durable_task_external_id} invocation=#{invocation_count} " \
175
+ "branch_id=#{branch_id} node_id=#{node_id}",
176
+ )
177
+ result
178
+ end
179
+
180
+ # Request eviction of a stale invocation from the server and block until ack.
181
+ #
182
+ # @param durable_task_external_id [String]
183
+ # @param invocation_count [Integer]
184
+ # @param reason [String, nil] Optional human-readable reason.
185
+ # @raise [Hatchet::Error] on timeout or listener disconnection
186
+ def send_evict_invocation(durable_task_external_id, invocation_count, reason: nil)
187
+ raise Hatchet::Error, "DurableEventListener not started" unless @request_queue
188
+
189
+ key = [durable_task_external_id, invocation_count]
190
+ queue = Queue.new
191
+ @mu.synchronize { @pending_eviction_acks[key] = queue }
192
+
193
+ args = {
194
+ durable_task_external_id: durable_task_external_id,
195
+ invocation_count: invocation_count,
196
+ }
197
+ args[:reason] = reason if reason
198
+ req = ::V1::DurableTaskEvictInvocationRequest.new(**args)
199
+
200
+ @logger&.debug(
201
+ "durable event listener send_evict_invocation: task=#{durable_task_external_id} " \
202
+ "invocation=#{invocation_count} reason=#{reason}",
203
+ )
204
+ @request_queue << ::V1::DurableTaskRequest.new(evict_invocation: req)
205
+
206
+ await_queue(queue, timeout: EVICTION_ACK_TIMEOUT_SECONDS)
207
+ @logger&.debug(
208
+ "durable event listener send_evict_invocation ack: task=#{durable_task_external_id} " \
209
+ "invocation=#{invocation_count}",
210
+ )
211
+ rescue Timeout::Error
212
+ @mu.synchronize { @pending_eviction_acks.delete(key) }
213
+ raise Hatchet::Error,
214
+ "Eviction ack timed out after #{EVICTION_ACK_TIMEOUT_SECONDS.to_i}s " \
215
+ "for task #{durable_task_external_id} invocation #{invocation_count}"
216
+ end
217
+
218
+ # Fire-and-forget ``complete_memo`` notification.
219
+ def send_memo_completed_notification(durable_task_external_id:, node_id:, branch_id:, invocation_count:, memo_key:,
220
+ memo_result_payload:)
221
+ raise Hatchet::Error, "DurableEventListener not started" unless @request_queue
222
+
223
+ ref = ::V1::DurableEventLogEntryRef.new(
224
+ durable_task_external_id: durable_task_external_id,
225
+ node_id: node_id,
226
+ invocation_count: invocation_count,
227
+ branch_id: branch_id,
228
+ )
229
+ complete = ::V1::DurableTaskCompleteMemoRequest.new(
230
+ ref: ref,
231
+ memo_key: memo_key,
232
+ payload: memo_result_payload,
233
+ )
234
+ @request_queue << ::V1::DurableTaskRequest.new(complete_memo: complete)
235
+ end
236
+
237
+ # Drop pending callbacks / acks / buffered completions whose invocation
238
+ # count is ``<= invocation_count`` for the given task id.
239
+ def cleanup_task_state(durable_task_external_id, invocation_count)
240
+ @mu.synchronize do
241
+ @pending_callbacks.each_key do |k|
242
+ next unless k[0] == durable_task_external_id && k[1] <= invocation_count
243
+
244
+ @pending_callbacks.delete(k)&.close
245
+ end
246
+
247
+ @pending_event_acks.each_key do |k|
248
+ next unless k[0] == durable_task_external_id && k[1] <= invocation_count
249
+
250
+ @pending_event_acks.delete(k)&.close
251
+ end
252
+
253
+ @buffered_completions.each_key do |k|
254
+ next unless k[0] == durable_task_external_id && k[1] <= invocation_count
255
+
256
+ @buffered_completions.delete(k)
257
+ end
258
+ end
259
+ end
260
+
261
+ # Hook for tests: handle a single response message (bypassing the network).
262
+ def handle_response_for_test(response)
263
+ handle_response(response)
264
+ end
265
+
266
+ private
267
+
268
+ def build_event_request(durable_task_external_id, invocation_count, event)
269
+ case event
270
+ when WaitForEvent
271
+ if event.wait_for_conditions
272
+ sleep_conditions = event.wait_for_conditions.sleep_conditions || []
273
+ user_event_conditions = event.wait_for_conditions.user_event_conditions || []
274
+ first_sleep = sleep_conditions.first
275
+ if first_sleep&.base
276
+ @logger&.debug(
277
+ "durable event listener wait_for payload: task=#{durable_task_external_id} " \
278
+ "invocation=#{invocation_count} sleep_count=#{sleep_conditions.length} " \
279
+ "event_count=#{user_event_conditions.length} " \
280
+ "first_sleep_readable_key=#{first_sleep.base.readable_data_key} " \
281
+ "first_sleep_for=#{first_sleep.sleep_for} " \
282
+ "first_sleep_action=#{first_sleep.base.action} " \
283
+ "first_sleep_or_group_id=#{first_sleep.base.or_group_id}",
284
+ )
285
+ else
286
+ @logger&.debug(
287
+ "durable event listener wait_for payload: task=#{durable_task_external_id} " \
288
+ "invocation=#{invocation_count} sleep_count=#{sleep_conditions.length} " \
289
+ "event_count=#{user_event_conditions.length}",
290
+ )
291
+ end
292
+ else
293
+ @logger&.debug(
294
+ "durable event listener wait_for payload: task=#{durable_task_external_id} " \
295
+ "invocation=#{invocation_count} wait_for_conditions=nil",
296
+ )
297
+ end
298
+
299
+ wait_req = ::V1::DurableTaskWaitForRequest.new(
300
+ durable_task_external_id: durable_task_external_id,
301
+ invocation_count: invocation_count,
302
+ wait_for_conditions: event.wait_for_conditions,
303
+ label: event.label,
304
+ )
305
+ ::V1::DurableTaskRequest.new(wait_for: wait_req)
306
+ when MemoEvent
307
+ memo_req = ::V1::DurableTaskMemoRequest.new(
308
+ durable_task_external_id: durable_task_external_id,
309
+ invocation_count: invocation_count,
310
+ key: event.memo_key,
311
+ )
312
+ memo_req.payload = event.result.to_s if event.result
313
+ ::V1::DurableTaskRequest.new(memo: memo_req)
314
+ else
315
+ raise ArgumentError, "Unknown durable task send event: #{event.class}"
316
+ end
317
+ end
318
+
319
+ def await_queue(queue, timeout: nil)
320
+ msg = if timeout
321
+ deadline = Time.now + timeout
322
+ loop do
323
+ break queue.pop(true)
324
+ rescue ThreadError
325
+ raise Timeout::Error, "timed out waiting for queue" if Time.now >= deadline
326
+
327
+ sleep 0.05
328
+ end
329
+ else
330
+ queue.pop
331
+ end
332
+
333
+ raise Hatchet::Error, "listener closed" if msg.nil?
334
+
335
+ kind, payload = msg
336
+ raise payload if kind == :err
337
+
338
+ payload
339
+ end
340
+
341
+ def connect
342
+ @request_queue = Queue.new
343
+
344
+ stub = ::V1::V1Dispatcher::Stub.new(
345
+ @config.host_port,
346
+ nil,
347
+ channel_override: @channel,
348
+ )
349
+ @stub = stub
350
+
351
+ @request_enum = build_request_enumerator
352
+
353
+ @logger&.info("durable event listener connecting...")
354
+
355
+ @stream = stub.durable_task(@request_enum, metadata: @config.auth_metadata)
356
+
357
+ register_worker
358
+ poll_worker_status
359
+
360
+ @logger&.info("durable event listener connected")
361
+ end
362
+
363
+ def mark_stream_unavailable(error)
364
+ old_queue = @request_queue
365
+ @request_queue = nil
366
+ @stream = nil
367
+
368
+ begin
369
+ old_queue&.close
370
+ rescue StandardError
371
+ nil
372
+ end
373
+
374
+ fail_pending_acks(error)
375
+ end
376
+
377
+ def wait_for_register_worker_ack
378
+ timeout_at = Time.now + REGISTER_WORKER_ACK_TIMEOUT_SECONDS
379
+ @registration_mu.synchronize do
380
+ until @worker_registered
381
+ remaining = timeout_at - Time.now
382
+ break if remaining <= 0
383
+
384
+ @registration_cv.wait(@registration_mu, remaining)
385
+ end
386
+ end
387
+
388
+ return if @registration_mu.synchronize { @worker_registered }
389
+
390
+ raise Hatchet::Error,
391
+ "durable event listener did not receive register_worker ack " \
392
+ "within #{REGISTER_WORKER_ACK_TIMEOUT_SECONDS.to_i}s"
393
+ end
394
+
395
+ def build_request_enumerator
396
+ queue = @request_queue
397
+ Enumerator.new do |yielder|
398
+ loop do
399
+ begin
400
+ req = queue.pop
401
+ rescue ClosedQueueError
402
+ break
403
+ end
404
+
405
+ break if req.nil?
406
+
407
+ request_kind =
408
+ if req.respond_to?(:register_worker) && req.register_worker
409
+ "register_worker"
410
+ elsif req.respond_to?(:wait_for) && req.wait_for
411
+ "wait_for"
412
+ elsif req.respond_to?(:memo) && req.memo
413
+ "memo"
414
+ elsif req.respond_to?(:trigger_runs) && req.trigger_runs
415
+ "trigger_runs"
416
+ elsif req.respond_to?(:evict_invocation) && req.evict_invocation
417
+ "evict_invocation"
418
+ elsif req.respond_to?(:worker_status) && req.worker_status
419
+ "worker_status"
420
+ elsif req.respond_to?(:complete_memo) && req.complete_memo
421
+ "complete_memo"
422
+ else
423
+ "unknown"
424
+ end
425
+ @logger&.debug("durable event listener stream write: kind=#{request_kind}")
426
+ yielder << req
427
+ end
428
+ end
429
+ end
430
+
431
+ def register_worker
432
+ raise Hatchet::Error, "Client not started" if @worker_id.nil?
433
+
434
+ @request_queue << ::V1::DurableTaskRequest.new(
435
+ register_worker: ::V1::DurableTaskRequestRegisterWorker.new(worker_id: @worker_id),
436
+ )
437
+ end
438
+
439
+ def poll_worker_status
440
+ return if @request_queue.nil? || @worker_id.nil?
441
+
442
+ pending = @mu.synchronize { @pending_callbacks.keys.dup }
443
+ return if pending.empty?
444
+
445
+ waiting = pending.map do |(task_ext_id, inv_count, branch_id, node_id)|
446
+ ::V1::DurableTaskAwaitedCompletedEntry.new(
447
+ durable_task_external_id: task_ext_id,
448
+ invocation_count: inv_count,
449
+ node_id: node_id,
450
+ branch_id: branch_id,
451
+ )
452
+ end
453
+
454
+ @request_queue << ::V1::DurableTaskRequest.new(
455
+ worker_status: ::V1::DurableTaskWorkerStatusRequest.new(
456
+ worker_id: @worker_id,
457
+ waiting_entries: waiting,
458
+ ),
459
+ )
460
+ end
461
+
462
+ def send_loop
463
+ while @running
464
+ sleep 1
465
+ begin
466
+ poll_worker_status
467
+ rescue StandardError => e
468
+ @logger&.error("durable event listener send_loop error: #{e.class}: #{e.message}")
469
+ end
470
+ end
471
+ end
472
+
473
+ def receive_loop
474
+ while @running
475
+ unless @stream
476
+ sleep DEFAULT_RECONNECT_INTERVAL
477
+ next
478
+ end
479
+
480
+ begin
481
+ @stream.each { |response| handle_response(response) }
482
+
483
+ if @running
484
+ @logger&.warn(
485
+ "durable event listener disconnected (EOF), reconnecting in #{DEFAULT_RECONNECT_INTERVAL}s...",
486
+ )
487
+ mark_stream_unavailable(Hatchet::Error.new("durable stream disconnected"))
488
+ sleep DEFAULT_RECONNECT_INTERVAL
489
+ safe_reconnect
490
+ end
491
+ rescue ::GRPC::Cancelled
492
+ break
493
+ rescue ::GRPC::BadStatus => e
494
+ @logger&.warn(
495
+ "durable event listener disconnected: code=#{e.code}, " \
496
+ "details=#{e.details}, reconnecting in #{DEFAULT_RECONNECT_INTERVAL}s...",
497
+ )
498
+ if @running
499
+ mark_stream_unavailable(Hatchet::Error.new("durable stream error: #{e.code} #{e.details}"))
500
+ sleep DEFAULT_RECONNECT_INTERVAL
501
+ safe_reconnect
502
+ end
503
+ rescue StandardError => e
504
+ @logger&.error("unexpected error in durable event listener: #{e.class}: #{e.message}")
505
+ if @running
506
+ mark_stream_unavailable(e)
507
+ sleep DEFAULT_RECONNECT_INTERVAL
508
+ safe_reconnect
509
+ end
510
+ end
511
+ end
512
+ end
513
+
514
+ def safe_reconnect
515
+ connect
516
+ rescue StandardError => e
517
+ @logger&.error("failed to reconnect durable event listener: #{e.class}: #{e.message}")
518
+ end
519
+
520
+ def handle_response(response)
521
+ @logger&.debug("durable event listener stream read: kind=#{response_kind(response)}")
522
+
523
+ return handle_register_worker if response.has_register_worker?
524
+ return handle_trigger_runs_ack(response.trigger_runs_ack) if response.has_trigger_runs_ack?
525
+ return handle_memo_ack(response.memo_ack) if response.has_memo_ack?
526
+ return handle_wait_for_ack(response.wait_for_ack) if response.has_wait_for_ack?
527
+ return handle_entry_completed(response.entry_completed) if response.has_entry_completed?
528
+ return handle_eviction_ack(response.eviction_ack) if response.has_eviction_ack?
529
+ return handle_server_evict(response.server_evict) if response.has_server_evict?
530
+
531
+ handle_error_response(response.error) if response.has_error?
532
+ end
533
+
534
+ def response_kind(response)
535
+ return "register_worker" if response.has_register_worker?
536
+ return "trigger_runs_ack" if response.has_trigger_runs_ack?
537
+ return "memo_ack" if response.has_memo_ack?
538
+ return "wait_for_ack" if response.has_wait_for_ack?
539
+ return "entry_completed" if response.has_entry_completed?
540
+ return "eviction_ack" if response.has_eviction_ack?
541
+ return "server_evict" if response.has_server_evict?
542
+ return "error" if response.has_error?
543
+
544
+ "unknown"
545
+ end
546
+
547
+ def handle_register_worker
548
+ @registration_mu.synchronize do
549
+ @worker_registered = true
550
+ @registration_cv.broadcast
551
+ end
552
+ end
553
+
554
+ def handle_trigger_runs_ack(ack)
555
+ deliver_event_ack(
556
+ [ack.durable_task_external_id, ack.invocation_count],
557
+ {
558
+ ack_type: :run,
559
+ invocation_count: ack.invocation_count,
560
+ durable_task_external_id: ack.durable_task_external_id,
561
+ run_entries: ack.run_entries.map do |entry|
562
+ {
563
+ node_id: entry.node_id,
564
+ branch_id: entry.branch_id,
565
+ workflow_run_external_id: entry.workflow_run_external_id,
566
+ }
567
+ end,
568
+ },
569
+ )
570
+ end
571
+
572
+ def handle_memo_ack(ack)
573
+ deliver_event_ack(
574
+ [ack.ref.durable_task_external_id, ack.ref.invocation_count],
575
+ {
576
+ ack_type: :memo,
577
+ invocation_count: ack.ref.invocation_count,
578
+ durable_task_external_id: ack.ref.durable_task_external_id,
579
+ node_id: ack.ref.node_id,
580
+ branch_id: ack.ref.branch_id,
581
+ memo_already_existed: ack.memo_already_existed,
582
+ memo_result_payload: ack.memo_result_payload,
583
+ },
584
+ )
585
+ end
586
+
587
+ def handle_wait_for_ack(ack)
588
+ @logger&.debug(
589
+ "durable event listener recv wait_for_ack: task=#{ack.ref.durable_task_external_id} " \
590
+ "invocation=#{ack.ref.invocation_count} branch_id=#{ack.ref.branch_id} node_id=#{ack.ref.node_id}",
591
+ )
592
+ deliver_event_ack(
593
+ [ack.ref.durable_task_external_id, ack.ref.invocation_count],
594
+ {
595
+ ack_type: :wait,
596
+ invocation_count: ack.ref.invocation_count,
597
+ durable_task_external_id: ack.ref.durable_task_external_id,
598
+ node_id: ack.ref.node_id,
599
+ branch_id: ack.ref.branch_id,
600
+ },
601
+ )
602
+ end
603
+
604
+ def handle_entry_completed(completed)
605
+ @logger&.debug(
606
+ "durable event listener recv entry_completed: task=#{completed.ref.durable_task_external_id} " \
607
+ "invocation=#{completed.ref.invocation_count} branch_id=#{completed.ref.branch_id} node_id=#{completed.ref.node_id}",
608
+ )
609
+ key = callback_key_for(completed.ref)
610
+ result = parse_entry_completed(completed)
611
+
612
+ @mu.synchronize do
613
+ queue = @pending_callbacks.delete(key)
614
+ if queue
615
+ queue << [:ok, result]
616
+ else
617
+ @buffered_completions[key] = [Time.now, result]
618
+ end
619
+ end
620
+ end
621
+
622
+ def handle_eviction_ack(ack)
623
+ key = [ack.durable_task_external_id, ack.invocation_count]
624
+
625
+ @mu.synchronize do
626
+ queue = @pending_eviction_acks.delete(key)
627
+ queue&.<<([:ok, nil])
628
+ end
629
+ end
630
+
631
+ def handle_server_evict(evict)
632
+ @logger&.info(
633
+ "received server eviction notification for task #{evict.durable_task_external_id} " \
634
+ "invocation #{evict.invocation_count}: #{evict.reason}",
635
+ )
636
+ cleanup_task_state(evict.durable_task_external_id, evict.invocation_count)
637
+ @on_server_evict&.call(evict.durable_task_external_id, evict.invocation_count)
638
+ end
639
+
640
+ def callback_key_for(ref)
641
+ [
642
+ ref.durable_task_external_id,
643
+ ref.invocation_count,
644
+ ref.branch_id,
645
+ ref.node_id,
646
+ ]
647
+ end
648
+
649
+ def handle_error_response(error)
650
+ exc = if error.error_type == :DURABLE_TASK_ERROR_TYPE_NONDETERMINISM
651
+ Hatchet::NonDeterminismError.new(
652
+ error.error_message,
653
+ task_external_id: error.ref.durable_task_external_id,
654
+ invocation_count: error.ref.invocation_count,
655
+ node_id: error.ref.node_id,
656
+ )
657
+ else
658
+ Hatchet::Error.new(
659
+ "Unspecified durable task error: #{error.error_message} (type: #{error.error_type})",
660
+ )
661
+ end
662
+
663
+ event_key = [error.ref.durable_task_external_id, error.ref.invocation_count]
664
+ callback_key = [
665
+ error.ref.durable_task_external_id,
666
+ error.ref.invocation_count,
667
+ error.ref.branch_id,
668
+ error.ref.node_id,
669
+ ]
670
+
671
+ @mu.synchronize do
672
+ queue = @pending_event_acks.delete(event_key)
673
+ queue&.<<([:err, exc])
674
+
675
+ queue = @pending_callbacks.delete(callback_key)
676
+ queue&.<<([:err, exc])
677
+
678
+ queue = @pending_eviction_acks.delete(event_key)
679
+ queue&.<<([:err, exc])
680
+ end
681
+ end
682
+
683
+ def deliver_event_ack(key, payload)
684
+ @mu.synchronize do
685
+ queue = @pending_event_acks.delete(key)
686
+ queue&.<<([:ok, payload])
687
+ end
688
+ end
689
+
690
+ def parse_entry_completed(completed)
691
+ payload = nil
692
+ if completed.payload && !completed.payload.empty?
693
+ begin
694
+ payload_json = completed.payload.dup.force_encoding("UTF-8")
695
+ payload = JSON.parse(payload_json)
696
+ rescue JSON::ParserError
697
+ payload = nil
698
+ end
699
+ end
700
+
701
+ {
702
+ durable_task_external_id: completed.ref.durable_task_external_id,
703
+ node_id: completed.ref.node_id,
704
+ payload: payload,
705
+ }
706
+ end
707
+
708
+ def fail_pending_acks(exc)
709
+ @mu.synchronize do
710
+ @pending_event_acks.each_value { |q| q << [:err, exc] }
711
+ @pending_event_acks.clear
712
+ @pending_eviction_acks.each_value { |q| q << [:err, exc] }
713
+ @pending_eviction_acks.clear
714
+ end
715
+ end
716
+
717
+ def fail_all_pending(exc)
718
+ fail_pending_acks(exc)
719
+ @mu.synchronize do
720
+ @pending_callbacks.each_value { |q| q << [:err, exc] }
721
+ @pending_callbacks.clear
722
+ @buffered_completions.clear
723
+ end
724
+ end
725
+
726
+ def rescue_thread(thread)
727
+ return unless thread
728
+
729
+ thread.join(5)
730
+ rescue StandardError
731
+ nil
732
+ end
733
+ end
734
+ end
735
+ end