hatchet-sdk 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -1
- data/CHANGELOG.md +30 -0
- data/lib/hatchet/clients/grpc/admin.rb +45 -2
- data/lib/hatchet/clients/grpc/dispatcher.rb +33 -8
- data/lib/hatchet/condition_converter.rb +20 -12
- data/lib/hatchet/context.rb +6 -1
- data/lib/hatchet/contracts/dispatcher/dispatcher_pb.rb +3 -1
- data/lib/hatchet/contracts/dispatcher/dispatcher_services_pb.rb +1 -0
- data/lib/hatchet/contracts/v1/dispatcher_pb.rb +23 -1
- data/lib/hatchet/contracts/v1/dispatcher_services_pb.rb +2 -0
- data/lib/hatchet/contracts/v1/shared/condition_pb.rb +3 -1
- data/lib/hatchet/contracts/v1/shared/trigger_pb.rb +17 -0
- data/lib/hatchet/contracts/v1/workflows_pb.rb +4 -3
- data/lib/hatchet/contracts/v1/workflows_services_pb.rb +1 -0
- data/lib/hatchet/contracts/workflows/workflows_pb.rb +2 -4
- data/lib/hatchet/contracts/workflows/workflows_services_pb.rb +1 -1
- data/lib/hatchet/durable_context.rb +102 -33
- data/lib/hatchet/engine_version.rb +50 -0
- data/lib/hatchet/eviction_policy.rb +60 -0
- data/lib/hatchet/exceptions.rb +26 -0
- data/lib/hatchet/features/cron.rb +2 -1
- data/lib/hatchet/task.rb +7 -0
- data/lib/hatchet/version.rb +1 -1
- data/lib/hatchet/worker/durable_event_listener.rb +735 -0
- data/lib/hatchet/worker/durable_eviction/cache.rb +205 -0
- data/lib/hatchet/worker/durable_eviction/manager.rb +233 -0
- data/lib/hatchet/worker/runner.rb +279 -53
- data/lib/hatchet/worker_obj.rb +60 -4
- data/lib/hatchet/workflow.rb +8 -4
- data/lib/hatchet-sdk.rb +13 -3
- data/sig/hatchet/clients/grpc/dispatcher.rbs +2 -0
- data/sig/hatchet/durable_context.rbs +8 -2
- data/sig/hatchet/engine_version.rbs +12 -0
- data/sig/hatchet/eviction_policy.rbs +14 -0
- data/sig/hatchet/exceptions.rbs +12 -0
- data/sig/hatchet/task.rbs +2 -0
- data/sig/hatchet/worker/durable_event_listener.rbs +31 -0
- data/sig/hatchet/worker/durable_eviction/cache.rbs +41 -0
- data/sig/hatchet/worker/durable_eviction/manager.rbs +37 -0
- data/sig/hatchet/worker/runner.rbs +7 -1
- data/sig/hatchet/worker_obj.rbs +3 -0
- data/sig/hatchet/workflow.rbs +1 -1
- data/sig/hatchet-sdk.rbs +1 -1
- metadata +15 -4
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "concurrent"
|
|
4
4
|
require "json"
|
|
5
|
+
require "monitor"
|
|
5
6
|
|
|
6
7
|
module Hatchet
|
|
7
8
|
module WorkerRuntime
|
|
@@ -10,7 +11,11 @@ module Hatchet
|
|
|
10
11
|
#
|
|
11
12
|
# The runner receives actions from the action listener, looks up the
|
|
12
13
|
# corresponding task block, sets up context variables, executes the task,
|
|
13
|
-
# and sends the result back to the dispatcher.
|
|
14
|
+
# and sends the result back to the dispatcher. For durable tasks, it wires
|
|
15
|
+
# the :class:`DurableContext` up to the shared
|
|
16
|
+
# :class:`DurableEventListener` and per-run
|
|
17
|
+
# :class:`DurableEviction::DurableEvictionManager` when the engine supports
|
|
18
|
+
# eviction.
|
|
14
19
|
#
|
|
15
20
|
# @example
|
|
16
21
|
# runner = Runner.new(
|
|
@@ -19,36 +24,76 @@ module Hatchet
|
|
|
19
24
|
# dispatcher_client: dispatcher_grpc,
|
|
20
25
|
# event_client: event_grpc,
|
|
21
26
|
# logger: logger,
|
|
22
|
-
# client: hatchet_client
|
|
27
|
+
# client: hatchet_client,
|
|
28
|
+
# engine_version: "v0.80.0",
|
|
29
|
+
# durable_slots: 10,
|
|
23
30
|
# )
|
|
24
31
|
# runner.execute(action)
|
|
25
32
|
class Runner
|
|
33
|
+
STARTED_EVENT_RETRY_COUNT = 5
|
|
34
|
+
STARTED_EVENT_STOP = Object.new
|
|
35
|
+
|
|
26
36
|
# @param workflows [Array<Workflow, Task>] Registered workflows
|
|
27
37
|
# @param slots [Integer] Maximum concurrent task slots
|
|
28
38
|
# @param dispatcher_client [Hatchet::Clients::Grpc::Dispatcher] gRPC dispatcher client
|
|
29
39
|
# @param event_client [Hatchet::Clients::Grpc::EventClient] gRPC event client
|
|
30
40
|
# @param logger [Logger] Logger instance
|
|
31
41
|
# @param client [Hatchet::Client] The Hatchet client
|
|
32
|
-
|
|
42
|
+
# @param engine_version [String, nil] Engine semantic version (from GetVersion)
|
|
43
|
+
# @param durable_slots [Integer, nil] Separate slot count for durable tasks; defaults to ``slots``.
|
|
44
|
+
# @param worker_id [String, nil] Worker ID from registration; stamped onto durable calls that need it.
|
|
45
|
+
def initialize(
|
|
46
|
+
workflows:,
|
|
47
|
+
slots:,
|
|
48
|
+
dispatcher_client:,
|
|
49
|
+
event_client:,
|
|
50
|
+
logger:,
|
|
51
|
+
client:,
|
|
52
|
+
engine_version: nil,
|
|
53
|
+
durable_slots: nil,
|
|
54
|
+
worker_id: nil
|
|
55
|
+
)
|
|
33
56
|
@workflows = workflows
|
|
34
57
|
@slots = slots
|
|
58
|
+
@durable_slots = durable_slots || slots
|
|
35
59
|
@dispatcher_client = dispatcher_client
|
|
36
60
|
@event_client = event_client
|
|
37
61
|
@logger = logger
|
|
38
62
|
@client = client
|
|
63
|
+
@engine_version = engine_version
|
|
64
|
+
@worker_id = worker_id
|
|
39
65
|
|
|
40
|
-
# Thread pool with semaphore for slot management
|
|
41
66
|
@pool = Concurrent::FixedThreadPool.new(slots)
|
|
42
67
|
@semaphore = Concurrent::Semaphore.new(slots)
|
|
43
68
|
|
|
44
|
-
# Build task lookup table
|
|
45
69
|
@task_map = build_task_map
|
|
70
|
+
|
|
71
|
+
@contexts_mu = Monitor.new
|
|
72
|
+
@contexts = {}
|
|
73
|
+
@task_threads = {}
|
|
74
|
+
@step_action_event_queue = Queue.new
|
|
75
|
+
@step_action_event_thread = Thread.new { process_step_action_events }
|
|
76
|
+
|
|
77
|
+
@has_durable_tasks = @task_map.values.any?(&:durable)
|
|
78
|
+
@supports_durable_eviction = supports_durable_eviction?
|
|
79
|
+
|
|
80
|
+
@durable_event_listener = build_durable_event_listener
|
|
81
|
+
@eviction_manager = nil
|
|
82
|
+
@eviction_manager_mu = Mutex.new
|
|
46
83
|
end
|
|
47
84
|
|
|
85
|
+
# @return [WorkerRuntime::DurableEviction::DurableEvictionManager, nil]
|
|
86
|
+
attr_reader :eviction_manager
|
|
87
|
+
|
|
88
|
+
# @return [WorkerRuntime::DurableEventListener, nil]
|
|
89
|
+
attr_reader :durable_event_listener
|
|
90
|
+
|
|
48
91
|
# Execute an action (task assignment) in the thread pool.
|
|
49
92
|
#
|
|
50
93
|
# @param action [AssignedAction] The action from the dispatcher
|
|
51
94
|
def execute(action)
|
|
95
|
+
ensure_eviction_manager_started(action)
|
|
96
|
+
|
|
52
97
|
@semaphore.acquire
|
|
53
98
|
|
|
54
99
|
@pool.post do
|
|
@@ -62,12 +107,97 @@ module Hatchet
|
|
|
62
107
|
#
|
|
63
108
|
# @param timeout [Integer] Seconds to wait for in-progress tasks
|
|
64
109
|
def shutdown(timeout: 30)
|
|
110
|
+
if @eviction_manager
|
|
111
|
+
begin
|
|
112
|
+
@eviction_manager.evict_all_waiting
|
|
113
|
+
rescue StandardError => e
|
|
114
|
+
@logger.warn("Runner: failed to evict waiting durable runs during shutdown: #{e.class}: #{e.message}")
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
65
118
|
@pool.shutdown
|
|
66
119
|
@pool.wait_for_termination(timeout)
|
|
120
|
+
stop_step_action_event_thread
|
|
121
|
+
|
|
122
|
+
@durable_event_listener&.stop
|
|
67
123
|
end
|
|
68
124
|
|
|
69
125
|
private
|
|
70
126
|
|
|
127
|
+
def supports_durable_eviction?
|
|
128
|
+
return false unless @engine_version
|
|
129
|
+
|
|
130
|
+
!Hatchet::EngineVersion.semver_less_than?(
|
|
131
|
+
@engine_version,
|
|
132
|
+
Hatchet::MinEngineVersion::DURABLE_EVICTION,
|
|
133
|
+
)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def build_durable_event_listener
|
|
137
|
+
return nil unless @has_durable_tasks && @supports_durable_eviction
|
|
138
|
+
|
|
139
|
+
DurableEventListener.new(
|
|
140
|
+
config: @client.config,
|
|
141
|
+
channel: @client.channel,
|
|
142
|
+
logger: @logger,
|
|
143
|
+
on_server_evict: method(:handle_server_evict),
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def ensure_eviction_manager_started(_action)
|
|
148
|
+
return unless @has_durable_tasks
|
|
149
|
+
return unless @supports_durable_eviction
|
|
150
|
+
|
|
151
|
+
@durable_event_listener&.ensure_started(@worker_id) if @worker_id
|
|
152
|
+
return if @eviction_manager
|
|
153
|
+
|
|
154
|
+
@eviction_manager_mu.synchronize do
|
|
155
|
+
return if @eviction_manager
|
|
156
|
+
|
|
157
|
+
@eviction_manager = DurableEviction::DurableEvictionManager.new(
|
|
158
|
+
durable_slots: @durable_slots,
|
|
159
|
+
cancel_local: method(:eviction_cancel_local),
|
|
160
|
+
request_eviction_with_ack: method(:eviction_request_with_ack),
|
|
161
|
+
logger: @logger,
|
|
162
|
+
)
|
|
163
|
+
@eviction_manager.start
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def handle_server_evict(durable_task_external_id, invocation_count)
|
|
168
|
+
return unless @eviction_manager
|
|
169
|
+
|
|
170
|
+
@eviction_manager.handle_server_eviction(durable_task_external_id, invocation_count)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def eviction_cancel_local(action_key)
|
|
174
|
+
thread, ctx = @contexts_mu.synchronize do
|
|
175
|
+
[@task_threads[action_key], @contexts[action_key]]
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
if @durable_event_listener && ctx.is_a?(DurableContext)
|
|
179
|
+
@durable_event_listener.cleanup_task_state(ctx.step_run_id, ctx.invocation_count || 1)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
thread&.raise(Hatchet::DurableTaskEvictedError.new)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def eviction_request_with_ack(action_key, rec)
|
|
186
|
+
return unless @durable_event_listener
|
|
187
|
+
|
|
188
|
+
invocation_count = 1
|
|
189
|
+
@contexts_mu.synchronize do
|
|
190
|
+
ctx = @contexts[action_key]
|
|
191
|
+
invocation_count = ctx.invocation_count if ctx.is_a?(DurableContext) && ctx.invocation_count
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
@durable_event_listener.send_evict_invocation(
|
|
195
|
+
rec.step_run_id,
|
|
196
|
+
invocation_count,
|
|
197
|
+
reason: rec.eviction_reason,
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
|
|
71
201
|
def build_task_map
|
|
72
202
|
map = {}
|
|
73
203
|
|
|
@@ -76,7 +206,6 @@ module Hatchet
|
|
|
76
206
|
service_name = @client.config.apply_namespace(wf.name.downcase)
|
|
77
207
|
|
|
78
208
|
wf.tasks.each do |name, task|
|
|
79
|
-
# TODO: is this what we do across sdks...
|
|
80
209
|
key = "#{service_name}:#{name}".downcase
|
|
81
210
|
map[key] = task
|
|
82
211
|
end
|
|
@@ -85,7 +214,6 @@ module Hatchet
|
|
|
85
214
|
|
|
86
215
|
map["#{service_name}:on_success"] = wf.on_success if wf.on_success
|
|
87
216
|
elsif wf.is_a?(Task)
|
|
88
|
-
# Standalone task -- the workflow wrapper has the same name
|
|
89
217
|
workflow = wf.workflow
|
|
90
218
|
if workflow
|
|
91
219
|
service_name = @client.config.apply_namespace(workflow.name.downcase)
|
|
@@ -98,35 +226,60 @@ module Hatchet
|
|
|
98
226
|
end
|
|
99
227
|
|
|
100
228
|
def execute_task(action)
|
|
101
|
-
|
|
229
|
+
action_key = nil
|
|
230
|
+
prepare_action_execution(action)
|
|
231
|
+
|
|
232
|
+
task = find_task(action)
|
|
233
|
+
return unless task
|
|
234
|
+
|
|
235
|
+
ctx = build_context(action, task)
|
|
236
|
+
action_key = action_key_for(action)
|
|
237
|
+
configure_durable_context(task, ctx, action, action_key)
|
|
238
|
+
track_action_context(action_key, ctx)
|
|
239
|
+
run_task(action, task, ctx)
|
|
240
|
+
rescue Hatchet::DurableTaskEvictedError => e
|
|
241
|
+
@logger.info("Durable task evicted: #{action.action_id}: #{e.message}")
|
|
242
|
+
rescue NonRetryableError => e
|
|
243
|
+
@logger.error("Non-retryable error in task #{action.action_id}: #{e.message}")
|
|
244
|
+
send_failure(action, e, retryable: false)
|
|
245
|
+
rescue StandardError => e
|
|
246
|
+
@logger.error("Error in task #{action.action_id}: #{e.message}")
|
|
247
|
+
send_failure(action, e, retryable: true)
|
|
248
|
+
ensure
|
|
249
|
+
# CRITICAL: Clean up context vars to prevent leaking to next task
|
|
250
|
+
cleanup_action(action_key) if action_key
|
|
251
|
+
ContextVars.clear
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def prepare_action_execution(action)
|
|
255
|
+
@logger.debug(
|
|
256
|
+
"Runner: received action action_id=#{action.action_id} step_run_id=#{action.task_run_external_id} " \
|
|
257
|
+
"retry_count=#{action.retry_count} durable_invocation_count=#{extract_invocation_count(action)}",
|
|
258
|
+
)
|
|
102
259
|
ContextVars.set(
|
|
103
260
|
workflow_run_id: action.workflow_run_id,
|
|
104
261
|
step_run_id: action.task_run_external_id,
|
|
105
|
-
worker_id:
|
|
262
|
+
worker_id: worker_id,
|
|
106
263
|
action_key: action.action_id,
|
|
107
264
|
additional_metadata: parse_metadata(action),
|
|
108
265
|
retry_count: action.retry_count,
|
|
109
266
|
)
|
|
110
|
-
|
|
111
|
-
# Send STARTED event
|
|
112
267
|
send_started(action)
|
|
268
|
+
end
|
|
113
269
|
|
|
114
|
-
|
|
270
|
+
def find_task(action)
|
|
115
271
|
task_key = action.action_id.downcase
|
|
116
272
|
task = @task_map[task_key]
|
|
273
|
+
return task if task
|
|
117
274
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
# Parse parent outputs from the action payload
|
|
125
|
-
parent_outputs = parse_parent_outputs(action)
|
|
275
|
+
@logger.error("No task found for action: #{task_key}")
|
|
276
|
+
send_failure(action, StandardError.new("No task found for action: #{task_key}"), retryable: false)
|
|
277
|
+
nil
|
|
278
|
+
end
|
|
126
279
|
|
|
127
|
-
|
|
280
|
+
def build_context(action, task)
|
|
128
281
|
ctx_class = task.durable ? DurableContext : Context
|
|
129
|
-
|
|
282
|
+
ctx_class.new(
|
|
130
283
|
workflow_run_id: action.workflow_run_id,
|
|
131
284
|
step_run_id: action.task_run_external_id,
|
|
132
285
|
action: action,
|
|
@@ -135,43 +288,121 @@ module Hatchet
|
|
|
135
288
|
event_client: @event_client,
|
|
136
289
|
additional_metadata: ContextVars.additional_metadata,
|
|
137
290
|
retry_count: action.retry_count,
|
|
138
|
-
parent_outputs:
|
|
291
|
+
parent_outputs: parse_parent_outputs(action),
|
|
292
|
+
worker_id: ContextVars.worker_id,
|
|
139
293
|
)
|
|
294
|
+
end
|
|
140
295
|
|
|
141
|
-
|
|
142
|
-
|
|
296
|
+
def configure_durable_context(task, ctx, action, action_key)
|
|
297
|
+
return unless task.durable && ctx.is_a?(DurableContext)
|
|
143
298
|
|
|
144
|
-
|
|
145
|
-
ctx.
|
|
299
|
+
ctx.eviction_manager = @eviction_manager
|
|
300
|
+
ctx.action_key = action_key
|
|
301
|
+
ctx.durable_event_listener = @durable_event_listener
|
|
302
|
+
ctx.invocation_count = extract_invocation_count(action)
|
|
303
|
+
ctx.engine_version = @engine_version
|
|
304
|
+
register_durable_run(task, action, action_key, ctx)
|
|
305
|
+
end
|
|
146
306
|
|
|
147
|
-
|
|
148
|
-
|
|
307
|
+
def register_durable_run(task, action, action_key, ctx)
|
|
308
|
+
return unless @eviction_manager && task.eviction_policy
|
|
309
|
+
|
|
310
|
+
@eviction_manager.register_run(
|
|
311
|
+
action_key,
|
|
312
|
+
step_run_id: action.task_run_external_id,
|
|
313
|
+
invocation_count: ctx.invocation_count,
|
|
314
|
+
eviction_policy: task.eviction_policy,
|
|
315
|
+
)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def track_action_context(action_key, ctx)
|
|
319
|
+
@contexts_mu.synchronize do
|
|
320
|
+
@contexts[action_key] = ctx
|
|
321
|
+
@task_threads[action_key] = Thread.current
|
|
322
|
+
end
|
|
323
|
+
end
|
|
149
324
|
|
|
150
|
-
|
|
325
|
+
def run_task(action, task, ctx)
|
|
326
|
+
input = parse_input(action)
|
|
327
|
+
ctx.deps = resolve_dependencies(task.deps, input, ctx) if task.deps && !task.deps.empty?
|
|
328
|
+
result = task.call(input, ctx)
|
|
151
329
|
send_result(action, result)
|
|
152
|
-
rescue NonRetryableError => e
|
|
153
|
-
@logger.error("Non-retryable error in task #{action.action_id}: #{e.message}")
|
|
154
|
-
send_failure(action, e, retryable: false)
|
|
155
|
-
rescue StandardError => e
|
|
156
|
-
@logger.error("Error in task #{action.action_id}: #{e.message}")
|
|
157
|
-
send_failure(action, e, retryable: true)
|
|
158
|
-
ensure
|
|
159
|
-
# CRITICAL: Clean up context vars to prevent leaking to next task
|
|
160
|
-
ContextVars.clear
|
|
161
330
|
end
|
|
162
331
|
|
|
163
|
-
|
|
332
|
+
def action_key_for(action)
|
|
333
|
+
"#{action.task_run_external_id}/#{action.retry_count}"
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def extract_invocation_count(action)
|
|
337
|
+
value = action.respond_to?(:durable_task_invocation_count) ? action.durable_task_invocation_count : nil
|
|
338
|
+
value.nil? || value.zero? ? 1 : value
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def worker_id
|
|
342
|
+
@dispatcher_client.respond_to?(:worker_id) ? @dispatcher_client.worker_id : ""
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def cleanup_action(action_key)
|
|
346
|
+
@contexts_mu.synchronize do
|
|
347
|
+
@contexts.delete(action_key)
|
|
348
|
+
@task_threads.delete(action_key)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
@eviction_manager&.unregister_run(action_key)
|
|
352
|
+
end
|
|
353
|
+
|
|
164
354
|
def send_started(action)
|
|
165
|
-
@
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
355
|
+
@step_action_event_queue << action
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def process_step_action_events
|
|
359
|
+
loop do
|
|
360
|
+
action = @step_action_event_queue.pop
|
|
361
|
+
break if action.equal?(STARTED_EVENT_STOP)
|
|
362
|
+
|
|
363
|
+
send_started_with_retry(action)
|
|
364
|
+
end
|
|
365
|
+
rescue ClosedQueueError
|
|
366
|
+
nil
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
def send_started_with_retry(action)
|
|
370
|
+
attempt = 1
|
|
371
|
+
|
|
372
|
+
loop do
|
|
373
|
+
@dispatcher_client.send_step_action_event(
|
|
374
|
+
action: action,
|
|
375
|
+
event_type: :STEP_EVENT_TYPE_STARTED,
|
|
376
|
+
payload: "{}",
|
|
377
|
+
retry_count: action.retry_count,
|
|
378
|
+
)
|
|
379
|
+
return
|
|
380
|
+
rescue StandardError => e
|
|
381
|
+
@logger.warn(
|
|
382
|
+
"Failed to send STARTED event (#{attempt}/#{STARTED_EVENT_RETRY_COUNT}): #{e.message}",
|
|
383
|
+
)
|
|
384
|
+
raise e if attempt >= STARTED_EVENT_RETRY_COUNT
|
|
385
|
+
|
|
386
|
+
sleep started_event_backoff_seconds(attempt)
|
|
387
|
+
attempt += 1
|
|
388
|
+
end
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def started_event_backoff_seconds(attempt)
|
|
392
|
+
base = 0.1
|
|
393
|
+
jitter = rand * base
|
|
394
|
+
[((base * (2**attempt)) + jitter), 1.0].min
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def stop_step_action_event_thread
|
|
398
|
+
return unless @step_action_event_thread
|
|
399
|
+
|
|
400
|
+
@step_action_event_queue << STARTED_EVENT_STOP
|
|
401
|
+
@step_action_event_thread.join(5)
|
|
402
|
+
rescue StandardError
|
|
403
|
+
nil
|
|
172
404
|
end
|
|
173
405
|
|
|
174
|
-
# Send a COMPLETED event with the task result.
|
|
175
406
|
def send_result(action, result)
|
|
176
407
|
payload = result.nil? ? "{}" : JSON.generate(result)
|
|
177
408
|
|
|
@@ -183,7 +414,6 @@ module Hatchet
|
|
|
183
414
|
)
|
|
184
415
|
end
|
|
185
416
|
|
|
186
|
-
# Send a FAILED event with error details.
|
|
187
417
|
def send_failure(action, error, retryable:)
|
|
188
418
|
payload = JSON.generate({ "error" => error.message })
|
|
189
419
|
|
|
@@ -218,7 +448,6 @@ module Hatchet
|
|
|
218
448
|
resolved
|
|
219
449
|
end
|
|
220
450
|
|
|
221
|
-
# Parse additional metadata from the action.
|
|
222
451
|
def parse_metadata(action)
|
|
223
452
|
raw = action.respond_to?(:additional_metadata) ? action.additional_metadata : nil
|
|
224
453
|
return {} if raw.nil? || raw.to_s.empty?
|
|
@@ -228,7 +457,6 @@ module Hatchet
|
|
|
228
457
|
{}
|
|
229
458
|
end
|
|
230
459
|
|
|
231
|
-
# Parse parent task outputs from the action payload.
|
|
232
460
|
def parse_parent_outputs(action)
|
|
233
461
|
raw = action.respond_to?(:action_payload) ? action.action_payload : nil
|
|
234
462
|
return {} if raw.nil? || raw.to_s.empty?
|
|
@@ -239,13 +467,11 @@ module Hatchet
|
|
|
239
467
|
{}
|
|
240
468
|
end
|
|
241
469
|
|
|
242
|
-
# Parse task input from the action payload.
|
|
243
470
|
def parse_input(action)
|
|
244
471
|
raw = action.respond_to?(:action_payload) ? action.action_payload : nil
|
|
245
472
|
return {} if raw.nil? || raw.to_s.empty?
|
|
246
473
|
|
|
247
474
|
parsed = JSON.parse(raw)
|
|
248
|
-
# The input is typically stored under the "input" key in the payload
|
|
249
475
|
parsed.is_a?(Hash) && parsed.key?("input") ? parsed["input"] : parsed
|
|
250
476
|
rescue JSON::ParserError
|
|
251
477
|
{}
|
data/lib/hatchet/worker_obj.rb
CHANGED
|
@@ -30,19 +30,28 @@ module Hatchet
|
|
|
30
30
|
# @return [String, nil] Worker ID assigned by the server
|
|
31
31
|
attr_accessor :worker_id
|
|
32
32
|
|
|
33
|
+
# @return [Integer] Number of durable-task slots (defaults to ``slots``)
|
|
34
|
+
attr_reader :durable_slots
|
|
35
|
+
|
|
36
|
+
# @return [String, nil] Engine semantic version detected on ``start``
|
|
37
|
+
attr_reader :engine_version
|
|
38
|
+
|
|
33
39
|
# @param name [String] Worker name
|
|
34
40
|
# @param client [Hatchet::Client] The Hatchet client
|
|
35
41
|
# @param workflows [Array<Workflow, Task>] Workflows to register
|
|
36
42
|
# @param slots [Integer] Number of concurrent task slots (default: 10)
|
|
43
|
+
# @param durable_slots [Integer, nil] Number of durable-task slots; defaults to ``slots``
|
|
37
44
|
# @param labels [Hash] Worker labels (default: {})
|
|
38
|
-
def initialize(name:, client:, workflows: [], slots: 10, labels: {})
|
|
45
|
+
def initialize(name:, client:, workflows: [], slots: 10, durable_slots: nil, labels: {})
|
|
39
46
|
@name = name
|
|
40
47
|
@client = client
|
|
41
48
|
@workflows = workflows
|
|
42
49
|
@slots = slots
|
|
50
|
+
@durable_slots = durable_slots || slots
|
|
43
51
|
@labels = client.config.worker_preset_labels.merge(labels)
|
|
44
52
|
@worker_id = nil
|
|
45
53
|
@shutdown = false
|
|
54
|
+
@engine_version = nil
|
|
46
55
|
end
|
|
47
56
|
|
|
48
57
|
# Start the worker. This blocks until shutdown is requested.
|
|
@@ -58,7 +67,8 @@ module Hatchet
|
|
|
58
67
|
@client.config.logger.info("Starting worker '#{@name}' with #{@slots} slots")
|
|
59
68
|
@client.config.logger.info("Registering #{@workflows.length} workflow(s)")
|
|
60
69
|
|
|
61
|
-
|
|
70
|
+
check_engine_version
|
|
71
|
+
|
|
62
72
|
register_workflows
|
|
63
73
|
|
|
64
74
|
# Start the health check server if enabled
|
|
@@ -88,6 +98,46 @@ module Hatchet
|
|
|
88
98
|
|
|
89
99
|
private
|
|
90
100
|
|
|
101
|
+
def check_engine_version
|
|
102
|
+
@engine_version = @client.dispatcher_grpc.get_version
|
|
103
|
+
if @engine_version
|
|
104
|
+
@client.config.logger.info("Connected to Hatchet engine #{@engine_version}")
|
|
105
|
+
check_eviction_support
|
|
106
|
+
else
|
|
107
|
+
@client.config.logger.debug(
|
|
108
|
+
"Engine did not report a version (GetVersion unimplemented); assuming pre-eviction compatibility mode.",
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
rescue StandardError => e
|
|
112
|
+
@client.config.logger.debug("Failed to fetch engine version: #{e.class}: #{e.message}")
|
|
113
|
+
@engine_version = nil
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def check_eviction_support
|
|
117
|
+
return unless @workflows.any? { |wf| workflow_has_durable_task?(wf) }
|
|
118
|
+
return unless @engine_version
|
|
119
|
+
|
|
120
|
+
return unless Hatchet::EngineVersion.semver_less_than?(
|
|
121
|
+
@engine_version,
|
|
122
|
+
Hatchet::MinEngineVersion::DURABLE_EVICTION,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
@client.config.logger.warn(
|
|
126
|
+
"Durable task eviction requires engine #{Hatchet::MinEngineVersion::DURABLE_EVICTION} or newer " \
|
|
127
|
+
"(engine reports #{@engine_version}). Falling back to legacy durable-event protocol; eviction policies will be ignored.",
|
|
128
|
+
)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def workflow_has_durable_task?(workflow)
|
|
132
|
+
if workflow.is_a?(Workflow)
|
|
133
|
+
workflow.tasks.values.any?(&:durable)
|
|
134
|
+
elsif workflow.is_a?(Task)
|
|
135
|
+
workflow.durable
|
|
136
|
+
else
|
|
137
|
+
false
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
91
141
|
def setup_signal_handlers
|
|
92
142
|
@main_thread = Thread.current
|
|
93
143
|
|
|
@@ -126,9 +176,13 @@ module Hatchet
|
|
|
126
176
|
|
|
127
177
|
# Register the worker with the dispatcher
|
|
128
178
|
response = @client.dispatcher_grpc.register(
|
|
129
|
-
name: @name,
|
|
179
|
+
name: @client.config.apply_namespace(@name),
|
|
130
180
|
actions: action_ids,
|
|
131
181
|
slots: @slots,
|
|
182
|
+
slot_config: {
|
|
183
|
+
"default" => @slots,
|
|
184
|
+
"durable" => @durable_slots,
|
|
185
|
+
},
|
|
132
186
|
labels: @labels,
|
|
133
187
|
)
|
|
134
188
|
|
|
@@ -188,7 +242,6 @@ module Hatchet
|
|
|
188
242
|
def run_action_listener
|
|
189
243
|
@client.config.logger.info("Worker '#{@name}' is running. Press Ctrl+C to stop.")
|
|
190
244
|
|
|
191
|
-
# Create the runner for executing tasks
|
|
192
245
|
runner = WorkerRuntime::Runner.new(
|
|
193
246
|
workflows: @workflows,
|
|
194
247
|
slots: @slots,
|
|
@@ -196,6 +249,9 @@ module Hatchet
|
|
|
196
249
|
event_client: @client.event_grpc,
|
|
197
250
|
logger: @client.config.logger,
|
|
198
251
|
client: @client,
|
|
252
|
+
engine_version: @engine_version,
|
|
253
|
+
durable_slots: @durable_slots,
|
|
254
|
+
worker_id: @worker_id,
|
|
199
255
|
)
|
|
200
256
|
|
|
201
257
|
# Create the action listener with retry/reconnect logic
|
data/lib/hatchet/workflow.rb
CHANGED
|
@@ -110,14 +110,18 @@ module Hatchet
|
|
|
110
110
|
t
|
|
111
111
|
end
|
|
112
112
|
|
|
113
|
-
# Define a durable task within this workflow
|
|
113
|
+
# Define a durable task within this workflow.
|
|
114
114
|
#
|
|
115
115
|
# @param name [Symbol, String] Task name
|
|
116
|
-
# @param
|
|
116
|
+
# @param eviction_policy [Hatchet::EvictionPolicy, nil] Eviction policy for this
|
|
117
|
+
# durable task. Defaults to {Hatchet::DEFAULT_DURABLE_TASK_EVICTION_POLICY}
|
|
118
|
+
# (15-minute TTL, capacity-eviction enabled). Pass ``nil`` to disable
|
|
119
|
+
# eviction entirely for this task.
|
|
120
|
+
# @param opts [Hash] Other Task options forwarded to {#task}.
|
|
117
121
|
# @yield [input, ctx] The task execution block
|
|
118
122
|
# @return [Task] The created durable task
|
|
119
|
-
def durable_task(name, **opts, &)
|
|
120
|
-
task(name, durable: true, **opts, &)
|
|
123
|
+
def durable_task(name, eviction_policy: Hatchet::DEFAULT_DURABLE_TASK_EVICTION_POLICY, **opts, &)
|
|
124
|
+
task(name, durable: true, eviction_policy: eviction_policy, **opts, &)
|
|
121
125
|
end
|
|
122
126
|
|
|
123
127
|
# Define an on_failure task for this workflow
|
data/lib/hatchet-sdk.rb
CHANGED
|
@@ -25,6 +25,8 @@ require_relative "hatchet/features/scheduled"
|
|
|
25
25
|
|
|
26
26
|
# Core classes
|
|
27
27
|
require_relative "hatchet/exceptions"
|
|
28
|
+
require_relative "hatchet/engine_version"
|
|
29
|
+
require_relative "hatchet/eviction_policy"
|
|
28
30
|
require_relative "hatchet/concurrency"
|
|
29
31
|
require_relative "hatchet/conditions"
|
|
30
32
|
require_relative "hatchet/condition_converter"
|
|
@@ -52,6 +54,7 @@ require_relative "hatchet/contracts/events/events_services_pb"
|
|
|
52
54
|
require_relative "hatchet/contracts/workflows/workflows_pb"
|
|
53
55
|
require_relative "hatchet/contracts/workflows/workflows_services_pb"
|
|
54
56
|
require_relative "hatchet/contracts/v1/shared/condition_pb"
|
|
57
|
+
require_relative "hatchet/contracts/v1/shared/trigger_pb"
|
|
55
58
|
require_relative "hatchet/contracts/v1/dispatcher_pb"
|
|
56
59
|
require_relative "hatchet/contracts/v1/dispatcher_services_pb"
|
|
57
60
|
require_relative "hatchet/contracts/v1/workflows_pb"
|
|
@@ -65,6 +68,9 @@ require_relative "hatchet/clients/grpc/event_client"
|
|
|
65
68
|
# Worker runtime
|
|
66
69
|
require_relative "hatchet/worker/action_listener"
|
|
67
70
|
require_relative "hatchet/worker/workflow_run_listener"
|
|
71
|
+
require_relative "hatchet/worker/durable_eviction/cache"
|
|
72
|
+
require_relative "hatchet/worker/durable_eviction/manager"
|
|
73
|
+
require_relative "hatchet/worker/durable_event_listener"
|
|
68
74
|
require_relative "hatchet/worker/runner"
|
|
69
75
|
|
|
70
76
|
# Ruby SDK for Hatchet workflow engine
|
|
@@ -216,17 +222,21 @@ module Hatchet
|
|
|
216
222
|
wf.task(name, **opts, &block)
|
|
217
223
|
end
|
|
218
224
|
|
|
219
|
-
# Create a standalone durable task
|
|
225
|
+
# Create a standalone durable task.
|
|
220
226
|
#
|
|
221
227
|
# @param name [String] Task name
|
|
228
|
+
# @param eviction_policy [Hatchet::EvictionPolicy, nil] Eviction policy for this
|
|
229
|
+
# durable task. Defaults to {Hatchet::DEFAULT_DURABLE_TASK_EVICTION_POLICY}
|
|
230
|
+
# (15-minute TTL, capacity-eviction enabled). Pass ``nil`` to disable
|
|
231
|
+
# eviction entirely for this task.
|
|
222
232
|
# @param opts [Hash] Task options
|
|
223
233
|
# @yield [input, ctx] The task execution block
|
|
224
234
|
# @return [Hatchet::Task]
|
|
225
|
-
def durable_task(name:, **opts, &block)
|
|
235
|
+
def durable_task(name:, eviction_policy: Hatchet::DEFAULT_DURABLE_TASK_EVICTION_POLICY, **opts, &block)
|
|
226
236
|
wf = Workflow.new(name: name, client: self,
|
|
227
237
|
on_events: opts.delete(:on_events) || [],
|
|
228
238
|
default_filters: opts.delete(:default_filters) || [],)
|
|
229
|
-
wf.durable_task(name, **opts, &block)
|
|
239
|
+
wf.durable_task(name, eviction_policy: eviction_policy, **opts, &block)
|
|
230
240
|
end
|
|
231
241
|
|
|
232
242
|
# Create a new worker
|
|
@@ -10,6 +10,7 @@ module Hatchet
|
|
|
10
10
|
name: String,
|
|
11
11
|
actions: Array[String],
|
|
12
12
|
slots: Integer,
|
|
13
|
+
?slot_config: Hash[String, Integer]?,
|
|
13
14
|
?labels: Hash[String, String | Integer]
|
|
14
15
|
) -> untyped
|
|
15
16
|
|
|
@@ -28,6 +29,7 @@ module Hatchet
|
|
|
28
29
|
def release_slot: (step_run_id: String) -> untyped
|
|
29
30
|
def upsert_worker_labels: (worker_id: String, labels: Hash[String, String | Integer]) -> untyped
|
|
30
31
|
def subscribe_to_workflow_runs: (untyped request_enum) -> untyped
|
|
32
|
+
def get_version: () -> String?
|
|
31
33
|
def close: () -> void
|
|
32
34
|
end
|
|
33
35
|
end
|