hatchet-sdk 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -1
- data/CHANGELOG.md +30 -0
- data/lib/hatchet/clients/grpc/admin.rb +45 -2
- data/lib/hatchet/clients/grpc/dispatcher.rb +33 -8
- data/lib/hatchet/condition_converter.rb +20 -12
- data/lib/hatchet/context.rb +6 -1
- data/lib/hatchet/contracts/dispatcher/dispatcher_pb.rb +3 -1
- data/lib/hatchet/contracts/dispatcher/dispatcher_services_pb.rb +1 -0
- data/lib/hatchet/contracts/v1/dispatcher_pb.rb +23 -1
- data/lib/hatchet/contracts/v1/dispatcher_services_pb.rb +2 -0
- data/lib/hatchet/contracts/v1/shared/condition_pb.rb +3 -1
- data/lib/hatchet/contracts/v1/shared/trigger_pb.rb +17 -0
- data/lib/hatchet/contracts/v1/workflows_pb.rb +4 -3
- data/lib/hatchet/contracts/v1/workflows_services_pb.rb +1 -0
- data/lib/hatchet/contracts/workflows/workflows_pb.rb +2 -4
- data/lib/hatchet/contracts/workflows/workflows_services_pb.rb +1 -1
- data/lib/hatchet/durable_context.rb +102 -33
- data/lib/hatchet/engine_version.rb +50 -0
- data/lib/hatchet/eviction_policy.rb +60 -0
- data/lib/hatchet/exceptions.rb +26 -0
- data/lib/hatchet/features/cron.rb +2 -1
- data/lib/hatchet/task.rb +7 -0
- data/lib/hatchet/version.rb +1 -1
- data/lib/hatchet/worker/durable_event_listener.rb +735 -0
- data/lib/hatchet/worker/durable_eviction/cache.rb +205 -0
- data/lib/hatchet/worker/durable_eviction/manager.rb +233 -0
- data/lib/hatchet/worker/runner.rb +279 -53
- data/lib/hatchet/worker_obj.rb +60 -4
- data/lib/hatchet/workflow.rb +8 -4
- data/lib/hatchet-sdk.rb +13 -3
- data/sig/hatchet/clients/grpc/dispatcher.rbs +2 -0
- data/sig/hatchet/durable_context.rbs +8 -2
- data/sig/hatchet/engine_version.rbs +12 -0
- data/sig/hatchet/eviction_policy.rbs +14 -0
- data/sig/hatchet/exceptions.rbs +12 -0
- data/sig/hatchet/task.rbs +2 -0
- data/sig/hatchet/worker/durable_event_listener.rbs +31 -0
- data/sig/hatchet/worker/durable_eviction/cache.rbs +41 -0
- data/sig/hatchet/worker/durable_eviction/manager.rbs +37 -0
- data/sig/hatchet/worker/runner.rbs +7 -1
- data/sig/hatchet/worker_obj.rbs +3 -0
- data/sig/hatchet/workflow.rbs +1 -1
- data/sig/hatchet-sdk.rbs +1 -1
- metadata +15 -4
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "monitor"
|
|
5
|
+
require "timeout"
|
|
6
|
+
|
|
7
|
+
module Hatchet
|
|
8
|
+
module WorkerRuntime
|
|
9
|
+
# Thread-safe multiplexer over the ``V1Dispatcher.DurableTask`` bidirectional
|
|
10
|
+
# gRPC stream.
|
|
11
|
+
#
|
|
12
|
+
# A single stream is shared across all durable task invocations running on
|
|
13
|
+
# the worker; callers send ``send_event`` / ``wait_for_callback``
|
|
14
|
+
# / ``send_evict_invocation`` requests and block on per-call Queues until the
|
|
15
|
+
# response-dispatch thread routes the matching ``DurableTaskResponse`` back.
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# listener = DurableEventListener.new(config: config, channel: channel, logger: logger)
|
|
19
|
+
# listener.start("worker-id-123")
|
|
20
|
+
# ack = listener.send_event(task_id, invocation_count, wait_for_event)
|
|
21
|
+
# result = listener.wait_for_callback(task_id, invocation_count, branch_id, node_id)
|
|
22
|
+
class DurableEventListener
|
|
23
|
+
DEFAULT_RECONNECT_INTERVAL = 3 # seconds
|
|
24
|
+
EVICTION_ACK_TIMEOUT_SECONDS = 30.0
|
|
25
|
+
REGISTER_WORKER_ACK_TIMEOUT_SECONDS = 10.0
|
|
26
|
+
|
|
27
|
+
# Outgoing event sent via ``send_event``.
|
|
28
|
+
#
|
|
29
|
+
# @!attribute [r] wait_for_conditions
|
|
30
|
+
# @return [V1::DurableEventListenerConditions]
|
|
31
|
+
# @!attribute [r] label
|
|
32
|
+
# @return [String, nil]
|
|
33
|
+
WaitForEvent = Struct.new(:wait_for_conditions, :label, keyword_init: true)
|
|
34
|
+
|
|
35
|
+
# Memo event with a ``bytes`` key and an optional already-computed result.
|
|
36
|
+
MemoEvent = Struct.new(:memo_key, :result, keyword_init: true)
|
|
37
|
+
|
|
38
|
+
# @return [String, nil]
|
|
39
|
+
attr_reader :worker_id
|
|
40
|
+
|
|
41
|
+
# @param config [Hatchet::Config]
|
|
42
|
+
# @param channel [GRPC::Core::Channel]
|
|
43
|
+
# @param logger [Logger]
|
|
44
|
+
# @param on_server_evict [Proc, nil] Called with (durable_task_external_id, invocation_count)
|
|
45
|
+
# when the server notifies about a stale invocation.
|
|
46
|
+
def initialize(config:, channel:, logger:, on_server_evict: nil)
|
|
47
|
+
@config = config
|
|
48
|
+
@channel = channel
|
|
49
|
+
@logger = logger
|
|
50
|
+
@on_server_evict = on_server_evict
|
|
51
|
+
|
|
52
|
+
@worker_id = nil
|
|
53
|
+
@stub = nil
|
|
54
|
+
@request_queue = nil
|
|
55
|
+
|
|
56
|
+
@mu = Monitor.new
|
|
57
|
+
|
|
58
|
+
# (task_external_id, invocation_count) => Queue (push [:ok, ack] or [:err, exc])
|
|
59
|
+
@pending_event_acks = {}
|
|
60
|
+
# (task_external_id, invocation_count) => Queue (push [:ok, nil] or [:err, exc])
|
|
61
|
+
@pending_eviction_acks = {}
|
|
62
|
+
# (task_external_id, invocation_count, branch_id, node_id) => Queue
|
|
63
|
+
@pending_callbacks = {}
|
|
64
|
+
# key -> [inserted_at, result] (rudimentary TTL cache)
|
|
65
|
+
@buffered_completions = {}
|
|
66
|
+
|
|
67
|
+
@running = false
|
|
68
|
+
@start_mu = Mutex.new
|
|
69
|
+
@registration_mu = Mutex.new
|
|
70
|
+
@registration_cv = ConditionVariable.new
|
|
71
|
+
@worker_registered = false
|
|
72
|
+
|
|
73
|
+
@receive_thread = nil
|
|
74
|
+
@send_thread = nil
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Start the listener if not already running. Idempotent.
|
|
78
|
+
#
|
|
79
|
+
# @param worker_id [String]
|
|
80
|
+
def start(worker_id)
|
|
81
|
+
@start_mu.synchronize do
|
|
82
|
+
return if @running
|
|
83
|
+
|
|
84
|
+
@worker_id = worker_id
|
|
85
|
+
@running = true
|
|
86
|
+
@registration_mu.synchronize { @worker_registered = false }
|
|
87
|
+
|
|
88
|
+
connect
|
|
89
|
+
|
|
90
|
+
@receive_thread = Thread.new { receive_loop }
|
|
91
|
+
@send_thread = Thread.new { send_loop }
|
|
92
|
+
wait_for_register_worker_ack
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Start the listener if not already running.
|
|
97
|
+
def ensure_started(worker_id)
|
|
98
|
+
start(worker_id) unless @running
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Stop the listener and release resources.
|
|
102
|
+
def stop
|
|
103
|
+
@running = false
|
|
104
|
+
|
|
105
|
+
fail_all_pending(Hatchet::Error.new("DurableListener stopped"))
|
|
106
|
+
|
|
107
|
+
@request_queue&.close
|
|
108
|
+
rescue_thread(@receive_thread)
|
|
109
|
+
rescue_thread(@send_thread)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Send a ``DurableTask`` message and block for its ack.
|
|
113
|
+
#
|
|
114
|
+
# @param durable_task_external_id [String]
|
|
115
|
+
# @param invocation_count [Integer]
|
|
116
|
+
# @param event [WaitForEvent, MemoEvent] The event to send
|
|
117
|
+
# @return [Object] The parsed ack body (a simple Hash describing the ack)
|
|
118
|
+
# @raise [Hatchet::Error] on server-reported errors or listener disconnection
|
|
119
|
+
def send_event(durable_task_external_id, invocation_count, event)
|
|
120
|
+
raise Hatchet::Error, "DurableEventListener not started" unless @request_queue
|
|
121
|
+
|
|
122
|
+
key = [durable_task_external_id, invocation_count]
|
|
123
|
+
queue = Queue.new
|
|
124
|
+
|
|
125
|
+
@mu.synchronize { @pending_event_acks[key] = queue }
|
|
126
|
+
|
|
127
|
+
request = build_event_request(durable_task_external_id, invocation_count, event)
|
|
128
|
+
@logger&.debug(
|
|
129
|
+
"durable event listener send_event: task=#{durable_task_external_id} " \
|
|
130
|
+
"invocation=#{invocation_count} event=#{event.class}",
|
|
131
|
+
)
|
|
132
|
+
@request_queue << request
|
|
133
|
+
|
|
134
|
+
ack = await_queue(queue)
|
|
135
|
+
@logger&.debug(
|
|
136
|
+
"durable event listener send_event ack: task=#{durable_task_external_id} " \
|
|
137
|
+
"invocation=#{invocation_count} ack_type=#{ack[:ack_type]} " \
|
|
138
|
+
"branch_id=#{ack[:branch_id]} node_id=#{ack[:node_id]}",
|
|
139
|
+
)
|
|
140
|
+
ack
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Block until the server delivers an ``entry_completed`` (or error) for
|
|
144
|
+
# this durable task / invocation / branch / node id tuple.
|
|
145
|
+
#
|
|
146
|
+
# @return [Hash] ``{ durable_task_external_id:, node_id:, payload: }``
|
|
147
|
+
def wait_for_callback(durable_task_external_id, invocation_count, branch_id, node_id)
|
|
148
|
+
key = [durable_task_external_id, invocation_count, branch_id, node_id]
|
|
149
|
+
|
|
150
|
+
buffered = @mu.synchronize { @buffered_completions.delete(key) }
|
|
151
|
+
if buffered
|
|
152
|
+
@logger&.debug(
|
|
153
|
+
"durable event listener wait_for_callback: buffered completion hit " \
|
|
154
|
+
"task=#{durable_task_external_id} invocation=#{invocation_count} " \
|
|
155
|
+
"branch_id=#{branch_id} node_id=#{node_id}",
|
|
156
|
+
)
|
|
157
|
+
return buffered[1]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
queue = @mu.synchronize do
|
|
161
|
+
@pending_callbacks[key] ||= Queue.new
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
@logger&.debug(
|
|
165
|
+
"durable event listener wait_for_callback: waiting " \
|
|
166
|
+
"task=#{durable_task_external_id} invocation=#{invocation_count} " \
|
|
167
|
+
"branch_id=#{branch_id} node_id=#{node_id}",
|
|
168
|
+
)
|
|
169
|
+
poll_worker_status
|
|
170
|
+
|
|
171
|
+
result = await_queue(queue)
|
|
172
|
+
@logger&.debug(
|
|
173
|
+
"durable event listener wait_for_callback: completed " \
|
|
174
|
+
"task=#{durable_task_external_id} invocation=#{invocation_count} " \
|
|
175
|
+
"branch_id=#{branch_id} node_id=#{node_id}",
|
|
176
|
+
)
|
|
177
|
+
result
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Request eviction of a stale invocation from the server and block until ack.
|
|
181
|
+
#
|
|
182
|
+
# @param durable_task_external_id [String]
|
|
183
|
+
# @param invocation_count [Integer]
|
|
184
|
+
# @param reason [String, nil] Optional human-readable reason.
|
|
185
|
+
# @raise [Hatchet::Error] on timeout or listener disconnection
|
|
186
|
+
def send_evict_invocation(durable_task_external_id, invocation_count, reason: nil)
|
|
187
|
+
raise Hatchet::Error, "DurableEventListener not started" unless @request_queue
|
|
188
|
+
|
|
189
|
+
key = [durable_task_external_id, invocation_count]
|
|
190
|
+
queue = Queue.new
|
|
191
|
+
@mu.synchronize { @pending_eviction_acks[key] = queue }
|
|
192
|
+
|
|
193
|
+
args = {
|
|
194
|
+
durable_task_external_id: durable_task_external_id,
|
|
195
|
+
invocation_count: invocation_count,
|
|
196
|
+
}
|
|
197
|
+
args[:reason] = reason if reason
|
|
198
|
+
req = ::V1::DurableTaskEvictInvocationRequest.new(**args)
|
|
199
|
+
|
|
200
|
+
@logger&.debug(
|
|
201
|
+
"durable event listener send_evict_invocation: task=#{durable_task_external_id} " \
|
|
202
|
+
"invocation=#{invocation_count} reason=#{reason}",
|
|
203
|
+
)
|
|
204
|
+
@request_queue << ::V1::DurableTaskRequest.new(evict_invocation: req)
|
|
205
|
+
|
|
206
|
+
await_queue(queue, timeout: EVICTION_ACK_TIMEOUT_SECONDS)
|
|
207
|
+
@logger&.debug(
|
|
208
|
+
"durable event listener send_evict_invocation ack: task=#{durable_task_external_id} " \
|
|
209
|
+
"invocation=#{invocation_count}",
|
|
210
|
+
)
|
|
211
|
+
rescue Timeout::Error
|
|
212
|
+
@mu.synchronize { @pending_eviction_acks.delete(key) }
|
|
213
|
+
raise Hatchet::Error,
|
|
214
|
+
"Eviction ack timed out after #{EVICTION_ACK_TIMEOUT_SECONDS.to_i}s " \
|
|
215
|
+
"for task #{durable_task_external_id} invocation #{invocation_count}"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Fire-and-forget ``complete_memo`` notification.
|
|
219
|
+
def send_memo_completed_notification(durable_task_external_id:, node_id:, branch_id:, invocation_count:, memo_key:,
|
|
220
|
+
memo_result_payload:)
|
|
221
|
+
raise Hatchet::Error, "DurableEventListener not started" unless @request_queue
|
|
222
|
+
|
|
223
|
+
ref = ::V1::DurableEventLogEntryRef.new(
|
|
224
|
+
durable_task_external_id: durable_task_external_id,
|
|
225
|
+
node_id: node_id,
|
|
226
|
+
invocation_count: invocation_count,
|
|
227
|
+
branch_id: branch_id,
|
|
228
|
+
)
|
|
229
|
+
complete = ::V1::DurableTaskCompleteMemoRequest.new(
|
|
230
|
+
ref: ref,
|
|
231
|
+
memo_key: memo_key,
|
|
232
|
+
payload: memo_result_payload,
|
|
233
|
+
)
|
|
234
|
+
@request_queue << ::V1::DurableTaskRequest.new(complete_memo: complete)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Drop pending callbacks / acks / buffered completions whose invocation
|
|
238
|
+
# count is ``<= invocation_count`` for the given task id.
|
|
239
|
+
def cleanup_task_state(durable_task_external_id, invocation_count)
|
|
240
|
+
@mu.synchronize do
|
|
241
|
+
@pending_callbacks.each_key do |k|
|
|
242
|
+
next unless k[0] == durable_task_external_id && k[1] <= invocation_count
|
|
243
|
+
|
|
244
|
+
@pending_callbacks.delete(k)&.close
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
@pending_event_acks.each_key do |k|
|
|
248
|
+
next unless k[0] == durable_task_external_id && k[1] <= invocation_count
|
|
249
|
+
|
|
250
|
+
@pending_event_acks.delete(k)&.close
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
@buffered_completions.each_key do |k|
|
|
254
|
+
next unless k[0] == durable_task_external_id && k[1] <= invocation_count
|
|
255
|
+
|
|
256
|
+
@buffered_completions.delete(k)
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Hook for tests: handle a single response message (bypassing the network).
|
|
262
|
+
def handle_response_for_test(response)
|
|
263
|
+
handle_response(response)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
private
|
|
267
|
+
|
|
268
|
+
def build_event_request(durable_task_external_id, invocation_count, event)
|
|
269
|
+
case event
|
|
270
|
+
when WaitForEvent
|
|
271
|
+
if event.wait_for_conditions
|
|
272
|
+
sleep_conditions = event.wait_for_conditions.sleep_conditions || []
|
|
273
|
+
user_event_conditions = event.wait_for_conditions.user_event_conditions || []
|
|
274
|
+
first_sleep = sleep_conditions.first
|
|
275
|
+
if first_sleep&.base
|
|
276
|
+
@logger&.debug(
|
|
277
|
+
"durable event listener wait_for payload: task=#{durable_task_external_id} " \
|
|
278
|
+
"invocation=#{invocation_count} sleep_count=#{sleep_conditions.length} " \
|
|
279
|
+
"event_count=#{user_event_conditions.length} " \
|
|
280
|
+
"first_sleep_readable_key=#{first_sleep.base.readable_data_key} " \
|
|
281
|
+
"first_sleep_for=#{first_sleep.sleep_for} " \
|
|
282
|
+
"first_sleep_action=#{first_sleep.base.action} " \
|
|
283
|
+
"first_sleep_or_group_id=#{first_sleep.base.or_group_id}",
|
|
284
|
+
)
|
|
285
|
+
else
|
|
286
|
+
@logger&.debug(
|
|
287
|
+
"durable event listener wait_for payload: task=#{durable_task_external_id} " \
|
|
288
|
+
"invocation=#{invocation_count} sleep_count=#{sleep_conditions.length} " \
|
|
289
|
+
"event_count=#{user_event_conditions.length}",
|
|
290
|
+
)
|
|
291
|
+
end
|
|
292
|
+
else
|
|
293
|
+
@logger&.debug(
|
|
294
|
+
"durable event listener wait_for payload: task=#{durable_task_external_id} " \
|
|
295
|
+
"invocation=#{invocation_count} wait_for_conditions=nil",
|
|
296
|
+
)
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
wait_req = ::V1::DurableTaskWaitForRequest.new(
|
|
300
|
+
durable_task_external_id: durable_task_external_id,
|
|
301
|
+
invocation_count: invocation_count,
|
|
302
|
+
wait_for_conditions: event.wait_for_conditions,
|
|
303
|
+
label: event.label,
|
|
304
|
+
)
|
|
305
|
+
::V1::DurableTaskRequest.new(wait_for: wait_req)
|
|
306
|
+
when MemoEvent
|
|
307
|
+
memo_req = ::V1::DurableTaskMemoRequest.new(
|
|
308
|
+
durable_task_external_id: durable_task_external_id,
|
|
309
|
+
invocation_count: invocation_count,
|
|
310
|
+
key: event.memo_key,
|
|
311
|
+
)
|
|
312
|
+
memo_req.payload = event.result.to_s if event.result
|
|
313
|
+
::V1::DurableTaskRequest.new(memo: memo_req)
|
|
314
|
+
else
|
|
315
|
+
raise ArgumentError, "Unknown durable task send event: #{event.class}"
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def await_queue(queue, timeout: nil)
|
|
320
|
+
msg = if timeout
|
|
321
|
+
deadline = Time.now + timeout
|
|
322
|
+
loop do
|
|
323
|
+
break queue.pop(true)
|
|
324
|
+
rescue ThreadError
|
|
325
|
+
raise Timeout::Error, "timed out waiting for queue" if Time.now >= deadline
|
|
326
|
+
|
|
327
|
+
sleep 0.05
|
|
328
|
+
end
|
|
329
|
+
else
|
|
330
|
+
queue.pop
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
raise Hatchet::Error, "listener closed" if msg.nil?
|
|
334
|
+
|
|
335
|
+
kind, payload = msg
|
|
336
|
+
raise payload if kind == :err
|
|
337
|
+
|
|
338
|
+
payload
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def connect
|
|
342
|
+
@request_queue = Queue.new
|
|
343
|
+
|
|
344
|
+
stub = ::V1::V1Dispatcher::Stub.new(
|
|
345
|
+
@config.host_port,
|
|
346
|
+
nil,
|
|
347
|
+
channel_override: @channel,
|
|
348
|
+
)
|
|
349
|
+
@stub = stub
|
|
350
|
+
|
|
351
|
+
@request_enum = build_request_enumerator
|
|
352
|
+
|
|
353
|
+
@logger&.info("durable event listener connecting...")
|
|
354
|
+
|
|
355
|
+
@stream = stub.durable_task(@request_enum, metadata: @config.auth_metadata)
|
|
356
|
+
|
|
357
|
+
register_worker
|
|
358
|
+
poll_worker_status
|
|
359
|
+
|
|
360
|
+
@logger&.info("durable event listener connected")
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def mark_stream_unavailable(error)
|
|
364
|
+
old_queue = @request_queue
|
|
365
|
+
@request_queue = nil
|
|
366
|
+
@stream = nil
|
|
367
|
+
|
|
368
|
+
begin
|
|
369
|
+
old_queue&.close
|
|
370
|
+
rescue StandardError
|
|
371
|
+
nil
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
fail_pending_acks(error)
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def wait_for_register_worker_ack
|
|
378
|
+
timeout_at = Time.now + REGISTER_WORKER_ACK_TIMEOUT_SECONDS
|
|
379
|
+
@registration_mu.synchronize do
|
|
380
|
+
until @worker_registered
|
|
381
|
+
remaining = timeout_at - Time.now
|
|
382
|
+
break if remaining <= 0
|
|
383
|
+
|
|
384
|
+
@registration_cv.wait(@registration_mu, remaining)
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
return if @registration_mu.synchronize { @worker_registered }
|
|
389
|
+
|
|
390
|
+
raise Hatchet::Error,
|
|
391
|
+
"durable event listener did not receive register_worker ack " \
|
|
392
|
+
"within #{REGISTER_WORKER_ACK_TIMEOUT_SECONDS.to_i}s"
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def build_request_enumerator
|
|
396
|
+
queue = @request_queue
|
|
397
|
+
Enumerator.new do |yielder|
|
|
398
|
+
loop do
|
|
399
|
+
begin
|
|
400
|
+
req = queue.pop
|
|
401
|
+
rescue ClosedQueueError
|
|
402
|
+
break
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
break if req.nil?
|
|
406
|
+
|
|
407
|
+
request_kind =
|
|
408
|
+
if req.respond_to?(:register_worker) && req.register_worker
|
|
409
|
+
"register_worker"
|
|
410
|
+
elsif req.respond_to?(:wait_for) && req.wait_for
|
|
411
|
+
"wait_for"
|
|
412
|
+
elsif req.respond_to?(:memo) && req.memo
|
|
413
|
+
"memo"
|
|
414
|
+
elsif req.respond_to?(:trigger_runs) && req.trigger_runs
|
|
415
|
+
"trigger_runs"
|
|
416
|
+
elsif req.respond_to?(:evict_invocation) && req.evict_invocation
|
|
417
|
+
"evict_invocation"
|
|
418
|
+
elsif req.respond_to?(:worker_status) && req.worker_status
|
|
419
|
+
"worker_status"
|
|
420
|
+
elsif req.respond_to?(:complete_memo) && req.complete_memo
|
|
421
|
+
"complete_memo"
|
|
422
|
+
else
|
|
423
|
+
"unknown"
|
|
424
|
+
end
|
|
425
|
+
@logger&.debug("durable event listener stream write: kind=#{request_kind}")
|
|
426
|
+
yielder << req
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
def register_worker
|
|
432
|
+
raise Hatchet::Error, "Client not started" if @worker_id.nil?
|
|
433
|
+
|
|
434
|
+
@request_queue << ::V1::DurableTaskRequest.new(
|
|
435
|
+
register_worker: ::V1::DurableTaskRequestRegisterWorker.new(worker_id: @worker_id),
|
|
436
|
+
)
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def poll_worker_status
|
|
440
|
+
return if @request_queue.nil? || @worker_id.nil?
|
|
441
|
+
|
|
442
|
+
pending = @mu.synchronize { @pending_callbacks.keys.dup }
|
|
443
|
+
return if pending.empty?
|
|
444
|
+
|
|
445
|
+
waiting = pending.map do |(task_ext_id, inv_count, branch_id, node_id)|
|
|
446
|
+
::V1::DurableTaskAwaitedCompletedEntry.new(
|
|
447
|
+
durable_task_external_id: task_ext_id,
|
|
448
|
+
invocation_count: inv_count,
|
|
449
|
+
node_id: node_id,
|
|
450
|
+
branch_id: branch_id,
|
|
451
|
+
)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
@request_queue << ::V1::DurableTaskRequest.new(
|
|
455
|
+
worker_status: ::V1::DurableTaskWorkerStatusRequest.new(
|
|
456
|
+
worker_id: @worker_id,
|
|
457
|
+
waiting_entries: waiting,
|
|
458
|
+
),
|
|
459
|
+
)
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def send_loop
|
|
463
|
+
while @running
|
|
464
|
+
sleep 1
|
|
465
|
+
begin
|
|
466
|
+
poll_worker_status
|
|
467
|
+
rescue StandardError => e
|
|
468
|
+
@logger&.error("durable event listener send_loop error: #{e.class}: #{e.message}")
|
|
469
|
+
end
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
def receive_loop
|
|
474
|
+
while @running
|
|
475
|
+
unless @stream
|
|
476
|
+
sleep DEFAULT_RECONNECT_INTERVAL
|
|
477
|
+
next
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
begin
|
|
481
|
+
@stream.each { |response| handle_response(response) }
|
|
482
|
+
|
|
483
|
+
if @running
|
|
484
|
+
@logger&.warn(
|
|
485
|
+
"durable event listener disconnected (EOF), reconnecting in #{DEFAULT_RECONNECT_INTERVAL}s...",
|
|
486
|
+
)
|
|
487
|
+
mark_stream_unavailable(Hatchet::Error.new("durable stream disconnected"))
|
|
488
|
+
sleep DEFAULT_RECONNECT_INTERVAL
|
|
489
|
+
safe_reconnect
|
|
490
|
+
end
|
|
491
|
+
rescue ::GRPC::Cancelled
|
|
492
|
+
break
|
|
493
|
+
rescue ::GRPC::BadStatus => e
|
|
494
|
+
@logger&.warn(
|
|
495
|
+
"durable event listener disconnected: code=#{e.code}, " \
|
|
496
|
+
"details=#{e.details}, reconnecting in #{DEFAULT_RECONNECT_INTERVAL}s...",
|
|
497
|
+
)
|
|
498
|
+
if @running
|
|
499
|
+
mark_stream_unavailable(Hatchet::Error.new("durable stream error: #{e.code} #{e.details}"))
|
|
500
|
+
sleep DEFAULT_RECONNECT_INTERVAL
|
|
501
|
+
safe_reconnect
|
|
502
|
+
end
|
|
503
|
+
rescue StandardError => e
|
|
504
|
+
@logger&.error("unexpected error in durable event listener: #{e.class}: #{e.message}")
|
|
505
|
+
if @running
|
|
506
|
+
mark_stream_unavailable(e)
|
|
507
|
+
sleep DEFAULT_RECONNECT_INTERVAL
|
|
508
|
+
safe_reconnect
|
|
509
|
+
end
|
|
510
|
+
end
|
|
511
|
+
end
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
def safe_reconnect
|
|
515
|
+
connect
|
|
516
|
+
rescue StandardError => e
|
|
517
|
+
@logger&.error("failed to reconnect durable event listener: #{e.class}: #{e.message}")
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
def handle_response(response)
|
|
521
|
+
@logger&.debug("durable event listener stream read: kind=#{response_kind(response)}")
|
|
522
|
+
|
|
523
|
+
return handle_register_worker if response.has_register_worker?
|
|
524
|
+
return handle_trigger_runs_ack(response.trigger_runs_ack) if response.has_trigger_runs_ack?
|
|
525
|
+
return handle_memo_ack(response.memo_ack) if response.has_memo_ack?
|
|
526
|
+
return handle_wait_for_ack(response.wait_for_ack) if response.has_wait_for_ack?
|
|
527
|
+
return handle_entry_completed(response.entry_completed) if response.has_entry_completed?
|
|
528
|
+
return handle_eviction_ack(response.eviction_ack) if response.has_eviction_ack?
|
|
529
|
+
return handle_server_evict(response.server_evict) if response.has_server_evict?
|
|
530
|
+
|
|
531
|
+
handle_error_response(response.error) if response.has_error?
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
def response_kind(response)
|
|
535
|
+
return "register_worker" if response.has_register_worker?
|
|
536
|
+
return "trigger_runs_ack" if response.has_trigger_runs_ack?
|
|
537
|
+
return "memo_ack" if response.has_memo_ack?
|
|
538
|
+
return "wait_for_ack" if response.has_wait_for_ack?
|
|
539
|
+
return "entry_completed" if response.has_entry_completed?
|
|
540
|
+
return "eviction_ack" if response.has_eviction_ack?
|
|
541
|
+
return "server_evict" if response.has_server_evict?
|
|
542
|
+
return "error" if response.has_error?
|
|
543
|
+
|
|
544
|
+
"unknown"
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def handle_register_worker
|
|
548
|
+
@registration_mu.synchronize do
|
|
549
|
+
@worker_registered = true
|
|
550
|
+
@registration_cv.broadcast
|
|
551
|
+
end
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
def handle_trigger_runs_ack(ack)
|
|
555
|
+
deliver_event_ack(
|
|
556
|
+
[ack.durable_task_external_id, ack.invocation_count],
|
|
557
|
+
{
|
|
558
|
+
ack_type: :run,
|
|
559
|
+
invocation_count: ack.invocation_count,
|
|
560
|
+
durable_task_external_id: ack.durable_task_external_id,
|
|
561
|
+
run_entries: ack.run_entries.map do |entry|
|
|
562
|
+
{
|
|
563
|
+
node_id: entry.node_id,
|
|
564
|
+
branch_id: entry.branch_id,
|
|
565
|
+
workflow_run_external_id: entry.workflow_run_external_id,
|
|
566
|
+
}
|
|
567
|
+
end,
|
|
568
|
+
},
|
|
569
|
+
)
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
def handle_memo_ack(ack)
|
|
573
|
+
deliver_event_ack(
|
|
574
|
+
[ack.ref.durable_task_external_id, ack.ref.invocation_count],
|
|
575
|
+
{
|
|
576
|
+
ack_type: :memo,
|
|
577
|
+
invocation_count: ack.ref.invocation_count,
|
|
578
|
+
durable_task_external_id: ack.ref.durable_task_external_id,
|
|
579
|
+
node_id: ack.ref.node_id,
|
|
580
|
+
branch_id: ack.ref.branch_id,
|
|
581
|
+
memo_already_existed: ack.memo_already_existed,
|
|
582
|
+
memo_result_payload: ack.memo_result_payload,
|
|
583
|
+
},
|
|
584
|
+
)
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
def handle_wait_for_ack(ack)
|
|
588
|
+
@logger&.debug(
|
|
589
|
+
"durable event listener recv wait_for_ack: task=#{ack.ref.durable_task_external_id} " \
|
|
590
|
+
"invocation=#{ack.ref.invocation_count} branch_id=#{ack.ref.branch_id} node_id=#{ack.ref.node_id}",
|
|
591
|
+
)
|
|
592
|
+
deliver_event_ack(
|
|
593
|
+
[ack.ref.durable_task_external_id, ack.ref.invocation_count],
|
|
594
|
+
{
|
|
595
|
+
ack_type: :wait,
|
|
596
|
+
invocation_count: ack.ref.invocation_count,
|
|
597
|
+
durable_task_external_id: ack.ref.durable_task_external_id,
|
|
598
|
+
node_id: ack.ref.node_id,
|
|
599
|
+
branch_id: ack.ref.branch_id,
|
|
600
|
+
},
|
|
601
|
+
)
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
def handle_entry_completed(completed)
|
|
605
|
+
@logger&.debug(
|
|
606
|
+
"durable event listener recv entry_completed: task=#{completed.ref.durable_task_external_id} " \
|
|
607
|
+
"invocation=#{completed.ref.invocation_count} branch_id=#{completed.ref.branch_id} node_id=#{completed.ref.node_id}",
|
|
608
|
+
)
|
|
609
|
+
key = callback_key_for(completed.ref)
|
|
610
|
+
result = parse_entry_completed(completed)
|
|
611
|
+
|
|
612
|
+
@mu.synchronize do
|
|
613
|
+
queue = @pending_callbacks.delete(key)
|
|
614
|
+
if queue
|
|
615
|
+
queue << [:ok, result]
|
|
616
|
+
else
|
|
617
|
+
@buffered_completions[key] = [Time.now, result]
|
|
618
|
+
end
|
|
619
|
+
end
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
def handle_eviction_ack(ack)
|
|
623
|
+
key = [ack.durable_task_external_id, ack.invocation_count]
|
|
624
|
+
|
|
625
|
+
@mu.synchronize do
|
|
626
|
+
queue = @pending_eviction_acks.delete(key)
|
|
627
|
+
queue&.<<([:ok, nil])
|
|
628
|
+
end
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
def handle_server_evict(evict)
|
|
632
|
+
@logger&.info(
|
|
633
|
+
"received server eviction notification for task #{evict.durable_task_external_id} " \
|
|
634
|
+
"invocation #{evict.invocation_count}: #{evict.reason}",
|
|
635
|
+
)
|
|
636
|
+
cleanup_task_state(evict.durable_task_external_id, evict.invocation_count)
|
|
637
|
+
@on_server_evict&.call(evict.durable_task_external_id, evict.invocation_count)
|
|
638
|
+
end
|
|
639
|
+
|
|
640
|
+
def callback_key_for(ref)
|
|
641
|
+
[
|
|
642
|
+
ref.durable_task_external_id,
|
|
643
|
+
ref.invocation_count,
|
|
644
|
+
ref.branch_id,
|
|
645
|
+
ref.node_id,
|
|
646
|
+
]
|
|
647
|
+
end
|
|
648
|
+
|
|
649
|
+
def handle_error_response(error)
|
|
650
|
+
exc = if error.error_type == :DURABLE_TASK_ERROR_TYPE_NONDETERMINISM
|
|
651
|
+
Hatchet::NonDeterminismError.new(
|
|
652
|
+
error.error_message,
|
|
653
|
+
task_external_id: error.ref.durable_task_external_id,
|
|
654
|
+
invocation_count: error.ref.invocation_count,
|
|
655
|
+
node_id: error.ref.node_id,
|
|
656
|
+
)
|
|
657
|
+
else
|
|
658
|
+
Hatchet::Error.new(
|
|
659
|
+
"Unspecified durable task error: #{error.error_message} (type: #{error.error_type})",
|
|
660
|
+
)
|
|
661
|
+
end
|
|
662
|
+
|
|
663
|
+
event_key = [error.ref.durable_task_external_id, error.ref.invocation_count]
|
|
664
|
+
callback_key = [
|
|
665
|
+
error.ref.durable_task_external_id,
|
|
666
|
+
error.ref.invocation_count,
|
|
667
|
+
error.ref.branch_id,
|
|
668
|
+
error.ref.node_id,
|
|
669
|
+
]
|
|
670
|
+
|
|
671
|
+
@mu.synchronize do
|
|
672
|
+
queue = @pending_event_acks.delete(event_key)
|
|
673
|
+
queue&.<<([:err, exc])
|
|
674
|
+
|
|
675
|
+
queue = @pending_callbacks.delete(callback_key)
|
|
676
|
+
queue&.<<([:err, exc])
|
|
677
|
+
|
|
678
|
+
queue = @pending_eviction_acks.delete(event_key)
|
|
679
|
+
queue&.<<([:err, exc])
|
|
680
|
+
end
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
def deliver_event_ack(key, payload)
|
|
684
|
+
@mu.synchronize do
|
|
685
|
+
queue = @pending_event_acks.delete(key)
|
|
686
|
+
queue&.<<([:ok, payload])
|
|
687
|
+
end
|
|
688
|
+
end
|
|
689
|
+
|
|
690
|
+
def parse_entry_completed(completed)
|
|
691
|
+
payload = nil
|
|
692
|
+
if completed.payload && !completed.payload.empty?
|
|
693
|
+
begin
|
|
694
|
+
payload_json = completed.payload.dup.force_encoding("UTF-8")
|
|
695
|
+
payload = JSON.parse(payload_json)
|
|
696
|
+
rescue JSON::ParserError
|
|
697
|
+
payload = nil
|
|
698
|
+
end
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
{
|
|
702
|
+
durable_task_external_id: completed.ref.durable_task_external_id,
|
|
703
|
+
node_id: completed.ref.node_id,
|
|
704
|
+
payload: payload,
|
|
705
|
+
}
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
def fail_pending_acks(exc)
|
|
709
|
+
@mu.synchronize do
|
|
710
|
+
@pending_event_acks.each_value { |q| q << [:err, exc] }
|
|
711
|
+
@pending_event_acks.clear
|
|
712
|
+
@pending_eviction_acks.each_value { |q| q << [:err, exc] }
|
|
713
|
+
@pending_eviction_acks.clear
|
|
714
|
+
end
|
|
715
|
+
end
|
|
716
|
+
|
|
717
|
+
def fail_all_pending(exc)
|
|
718
|
+
fail_pending_acks(exc)
|
|
719
|
+
@mu.synchronize do
|
|
720
|
+
@pending_callbacks.each_value { |q| q << [:err, exc] }
|
|
721
|
+
@pending_callbacks.clear
|
|
722
|
+
@buffered_completions.clear
|
|
723
|
+
end
|
|
724
|
+
end
|
|
725
|
+
|
|
726
|
+
def rescue_thread(thread)
|
|
727
|
+
return unless thread
|
|
728
|
+
|
|
729
|
+
thread.join(5)
|
|
730
|
+
rescue StandardError
|
|
731
|
+
nil
|
|
732
|
+
end
|
|
733
|
+
end
|
|
734
|
+
end
|
|
735
|
+
end
|