grctl-sdk-python 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/PKG-INFO +1 -1
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/client/client.py +15 -1
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/__init__.py +14 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/command.py +45 -1
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/directive.py +11 -1
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/errors.py +4 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/history.py +18 -6
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/run_info.py +3 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/manifest.py +7 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/nats_manifest.yaml +8 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/subscriber.py +1 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/__init__.py +2 -1
- grctl_sdk_python-0.3.0/grctl/worker/child.py +40 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/context.py +58 -5
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/errors.py +8 -0
- grctl_sdk_python-0.3.0/grctl/worker/registration.py +117 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/run_manager.py +10 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/runner.py +54 -27
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/worker.py +23 -2
- grctl_sdk_python-0.3.0/grctl/worker/worker_cmd_subscriber.py +62 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/workflow/future.py +27 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/workflow/handle.py +20 -2
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/workflow/workflow.py +62 -16
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/pyproject.toml +1 -1
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/LICENSE +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/README.md +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/__init__.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/client/__init__.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/logging_config.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/api.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/common.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/run_info_helper.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/models/worker.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/__init__.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/connection.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/history_fetch.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/history_sub.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/kv_store.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/nats_client.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/nats/publisher.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/py.typed +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/settings.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/codec.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/logger.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/runtime.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/store.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/worker/task.py +0 -0
- {grctl_sdk_python-0.2.1 → grctl_sdk_python-0.3.0}/grctl/workflow/__init__.py +0 -0
|
@@ -4,6 +4,8 @@ Provides a simple interface for interacting with workflows.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
+
import secrets
|
|
8
|
+
import socket
|
|
7
9
|
from datetime import UTC, datetime, timedelta
|
|
8
10
|
from typing import Any, TypeVar, overload
|
|
9
11
|
|
|
@@ -12,7 +14,12 @@ from ulid import ULID
|
|
|
12
14
|
|
|
13
15
|
from grctl.models import DescribeCmd, GrctlAPIResponse, HistoryEvent, RunInfo
|
|
14
16
|
from grctl.models.command import CmdKind, Command
|
|
15
|
-
from grctl.models.errors import
|
|
17
|
+
from grctl.models.errors import (
|
|
18
|
+
WorkflowAlreadyRunningError,
|
|
19
|
+
WorkflowError,
|
|
20
|
+
WorkflowNotFoundError,
|
|
21
|
+
WorkflowTypeNotRegisteredError,
|
|
22
|
+
)
|
|
16
23
|
from grctl.nats.connection import Connection
|
|
17
24
|
from grctl.nats.history_fetch import fetch_run_history
|
|
18
25
|
from grctl.worker.codec import CodecRegistry
|
|
@@ -24,6 +31,7 @@ _T = TypeVar("_T")
|
|
|
24
31
|
|
|
25
32
|
ErrWorkflowAlreadyRunningCode = 4001
|
|
26
33
|
ErrWorkflowRunNotFoundCode = 4002
|
|
34
|
+
ErrWorkflowTypeNotRegisteredCode = 4004
|
|
27
35
|
|
|
28
36
|
|
|
29
37
|
class Client:
|
|
@@ -32,6 +40,7 @@ class Client:
|
|
|
32
40
|
def __init__(self, connection: Connection, codec: CodecRegistry | None = None) -> None:
|
|
33
41
|
self._connection = connection
|
|
34
42
|
self._codec = codec or CodecRegistry()
|
|
43
|
+
self.id = f"c_{secrets.token_hex(4)}@{socket.gethostname()}"
|
|
35
44
|
|
|
36
45
|
async def describe(self, wf_id: str) -> RunInfo:
|
|
37
46
|
"""Describe the latest run for a workflow ID."""
|
|
@@ -40,6 +49,7 @@ class Client:
|
|
|
40
49
|
kind=CmdKind.run_describe,
|
|
41
50
|
timestamp=datetime.now(UTC),
|
|
42
51
|
msg=DescribeCmd(wf_id=wf_id),
|
|
52
|
+
sender_id=self.id,
|
|
43
53
|
)
|
|
44
54
|
# Use a routing-only RunInfo — publish_cmd only needs wf_id for subject routing.
|
|
45
55
|
routing_info = RunInfo(id="", wf_type="", wf_id=wf_id)
|
|
@@ -103,6 +113,7 @@ class Client:
|
|
|
103
113
|
payload=None,
|
|
104
114
|
connection=self._connection,
|
|
105
115
|
codec=self._codec,
|
|
116
|
+
sender_id=self.id,
|
|
106
117
|
)
|
|
107
118
|
await handle.attach()
|
|
108
119
|
return handle
|
|
@@ -145,6 +156,7 @@ class Client:
|
|
|
145
156
|
connection=self._connection,
|
|
146
157
|
codec=self._codec,
|
|
147
158
|
return_type=return_type,
|
|
159
|
+
sender_id=self.id,
|
|
148
160
|
)
|
|
149
161
|
|
|
150
162
|
# Start the workflow future (subscribe to events and publish run command)
|
|
@@ -156,6 +168,8 @@ class Client:
|
|
|
156
168
|
error_code = response.error.code if response.error else 0
|
|
157
169
|
if error_code == ErrWorkflowAlreadyRunningCode:
|
|
158
170
|
raise WorkflowAlreadyRunningError(f"workflow '{id}' already has an active run: {error_msg}")
|
|
171
|
+
if error_code == ErrWorkflowTypeNotRegisteredCode:
|
|
172
|
+
raise WorkflowTypeNotRegisteredError(f"no worker registered for workflow type '{type}': {error_msg}")
|
|
159
173
|
raise WorkflowError(f"start_workflow failed (code={error_code}): {error_msg}")
|
|
160
174
|
|
|
161
175
|
return handle
|
|
@@ -11,7 +11,11 @@ from grctl.models.command import (
|
|
|
11
11
|
Command,
|
|
12
12
|
DescribeCmd,
|
|
13
13
|
EventCmd,
|
|
14
|
+
EventDef,
|
|
15
|
+
RegisterCmd,
|
|
14
16
|
StartCmd,
|
|
17
|
+
TerminateCmd,
|
|
18
|
+
WorkflowTypeDef,
|
|
15
19
|
command_decoder,
|
|
16
20
|
command_encoder,
|
|
17
21
|
)
|
|
@@ -26,6 +30,7 @@ from grctl.models.directive import (
|
|
|
26
30
|
FailStep,
|
|
27
31
|
Start,
|
|
28
32
|
Step,
|
|
33
|
+
StepPickedUp,
|
|
29
34
|
StepResult,
|
|
30
35
|
Wait,
|
|
31
36
|
directive_decoder,
|
|
@@ -40,11 +45,13 @@ from grctl.models.history import (
|
|
|
40
45
|
ParentEventSent,
|
|
41
46
|
RandomRecorded,
|
|
42
47
|
RunCancelled,
|
|
48
|
+
RunCancelReceived,
|
|
43
49
|
RunCompleted,
|
|
44
50
|
RunEvents,
|
|
45
51
|
RunFailed,
|
|
46
52
|
RunScheduled,
|
|
47
53
|
RunStarted,
|
|
54
|
+
RunTerminated,
|
|
48
55
|
RunTimeout,
|
|
49
56
|
SleepRecorded,
|
|
50
57
|
StepCancelled,
|
|
@@ -79,6 +86,10 @@ __all__ = [ # noqa: RUF022
|
|
|
79
86
|
"CancelCmd",
|
|
80
87
|
"DescribeCmd",
|
|
81
88
|
"EventCmd",
|
|
89
|
+
"RegisterCmd",
|
|
90
|
+
"TerminateCmd",
|
|
91
|
+
"WorkflowTypeDef",
|
|
92
|
+
"EventDef",
|
|
82
93
|
"CmdKind",
|
|
83
94
|
"command_decoder",
|
|
84
95
|
"command_encoder",
|
|
@@ -93,6 +104,7 @@ __all__ = [ # noqa: RUF022
|
|
|
93
104
|
"Fail",
|
|
94
105
|
"FailStep",
|
|
95
106
|
"Step",
|
|
107
|
+
"StepPickedUp",
|
|
96
108
|
"StepResult",
|
|
97
109
|
"Wait",
|
|
98
110
|
"directive_decoder",
|
|
@@ -105,7 +117,9 @@ __all__ = [ # noqa: RUF022
|
|
|
105
117
|
"RunCompleted",
|
|
106
118
|
"RunEvents",
|
|
107
119
|
"RunFailed",
|
|
120
|
+
"RunCancelReceived",
|
|
108
121
|
"RunCancelled",
|
|
122
|
+
"RunTerminated",
|
|
109
123
|
"RunTimeout",
|
|
110
124
|
"StepEvents",
|
|
111
125
|
"StepStarted",
|
|
@@ -13,6 +13,8 @@ class CmdKind(StrEnum):
|
|
|
13
13
|
run_describe = "run.describe"
|
|
14
14
|
run_terminate = "run.terminate"
|
|
15
15
|
run_event = "run.event"
|
|
16
|
+
worker_register = "worker.register"
|
|
17
|
+
worker_terminate_run = "worker.terminate_run"
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
class StartCmd(msgspec.Struct):
|
|
@@ -50,7 +52,41 @@ class TerminateCmd(msgspec.Struct):
|
|
|
50
52
|
reason: str | None
|
|
51
53
|
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
class WorkerTerminateRunCmd(msgspec.Struct):
|
|
56
|
+
"""Server→worker signal to cancel a specific in-flight run."""
|
|
57
|
+
|
|
58
|
+
run_id: str
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class EventDef(msgspec.Struct, kw_only=True):
|
|
62
|
+
"""Per-event timeout config carried through registration."""
|
|
63
|
+
|
|
64
|
+
name: str
|
|
65
|
+
timeout_ms: int = 0
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class WorkflowTypeDef(msgspec.Struct):
|
|
69
|
+
"""Structural definition of one workflow type reported at registration.
|
|
70
|
+
|
|
71
|
+
Field names and order mirror the Go WorkflowTypeDef msgpack tags.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
type: str
|
|
75
|
+
start_step: str
|
|
76
|
+
steps: list[str]
|
|
77
|
+
events: list[EventDef]
|
|
78
|
+
queries: list[str]
|
|
79
|
+
start_step_timeout_ms: int = 0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class RegisterCmd(msgspec.Struct):
|
|
83
|
+
"""Worker startup sync of its workflow type catalog to the server."""
|
|
84
|
+
|
|
85
|
+
worker_id: str
|
|
86
|
+
types: list[WorkflowTypeDef]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
type CommandMessage = StartCmd | EventCmd | CancelCmd | DescribeCmd | TerminateCmd | RegisterCmd | WorkerTerminateRunCmd
|
|
54
90
|
|
|
55
91
|
|
|
56
92
|
class Command(msgspec.Struct):
|
|
@@ -58,6 +94,7 @@ class Command(msgspec.Struct):
|
|
|
58
94
|
kind: CmdKind
|
|
59
95
|
timestamp: datetime
|
|
60
96
|
msg: CommandMessage
|
|
97
|
+
sender_id: str = ""
|
|
61
98
|
|
|
62
99
|
|
|
63
100
|
# Factory map for kind-based deserialization
|
|
@@ -67,6 +104,8 @@ command_factories: dict[str, type] = {
|
|
|
67
104
|
"run.describe": DescribeCmd,
|
|
68
105
|
"run.terminate": TerminateCmd,
|
|
69
106
|
"run.event": EventCmd,
|
|
107
|
+
"worker.register": RegisterCmd,
|
|
108
|
+
"worker.terminate_run": WorkerTerminateRunCmd,
|
|
70
109
|
}
|
|
71
110
|
|
|
72
111
|
|
|
@@ -80,12 +119,15 @@ class CommandWire(msgspec.Struct):
|
|
|
80
119
|
k: CmdKind
|
|
81
120
|
m: bytes
|
|
82
121
|
t: datetime
|
|
122
|
+
s: str = ""
|
|
83
123
|
|
|
84
124
|
|
|
85
125
|
def command_encoder(cmd: Command) -> bytes:
|
|
86
126
|
"""Encode command to msgpack with compact wire format."""
|
|
87
127
|
if cmd.msg is None:
|
|
88
128
|
raise ValueError("Command message cannot be None")
|
|
129
|
+
if cmd.sender_id == "":
|
|
130
|
+
raise ValueError("Command sender ID cannot be empty")
|
|
89
131
|
|
|
90
132
|
msg_bytes = msgspec.msgpack.encode(cmd.msg)
|
|
91
133
|
|
|
@@ -94,6 +136,7 @@ def command_encoder(cmd: Command) -> bytes:
|
|
|
94
136
|
k=cmd.kind,
|
|
95
137
|
m=msg_bytes,
|
|
96
138
|
t=cmd.timestamp,
|
|
139
|
+
s=cmd.sender_id,
|
|
97
140
|
)
|
|
98
141
|
|
|
99
142
|
return msgspec.msgpack.encode(wire)
|
|
@@ -114,4 +157,5 @@ def command_decoder(data: bytes) -> Command:
|
|
|
114
157
|
kind=wire.k,
|
|
115
158
|
msg=msg, # ty:ignore[invalid-argument-type]
|
|
116
159
|
timestamp=wire.t,
|
|
160
|
+
sender_id=wire.s,
|
|
117
161
|
)
|
|
@@ -92,6 +92,15 @@ class DirectiveKind(StrEnum):
|
|
|
92
92
|
wait_timeout = "wait_timeout"
|
|
93
93
|
step_result = "step_result"
|
|
94
94
|
fail_step = "fail_step"
|
|
95
|
+
step_picked_up = "step_picked_up"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class StepPickedUp(msgspec.Struct):
|
|
99
|
+
"""Worker directive notifying the server that a step has been picked up for execution."""
|
|
100
|
+
|
|
101
|
+
step_name: str
|
|
102
|
+
worker_id: str
|
|
103
|
+
timestamp: datetime
|
|
95
104
|
|
|
96
105
|
|
|
97
106
|
class StepResult(msgspec.Struct):
|
|
@@ -108,7 +117,7 @@ class StepResult(msgspec.Struct):
|
|
|
108
117
|
duration_ms: int = 0
|
|
109
118
|
|
|
110
119
|
|
|
111
|
-
DirectiveMessage = Start | Cancel | Event | Complete | Fail | Step | Wait | StepResult
|
|
120
|
+
DirectiveMessage = Start | Cancel | Event | Complete | Fail | Step | Wait | StepResult | StepPickedUp
|
|
112
121
|
|
|
113
122
|
|
|
114
123
|
# Factory map for kind-based deserialization
|
|
@@ -121,6 +130,7 @@ directive_factories: dict[str, type[DirectiveMessage]] = {
|
|
|
121
130
|
"step": Step,
|
|
122
131
|
"wait": Wait,
|
|
123
132
|
"step_result": StepResult,
|
|
133
|
+
"step_picked_up": StepPickedUp,
|
|
124
134
|
}
|
|
125
135
|
|
|
126
136
|
|
|
@@ -8,3 +8,7 @@ class WorkflowNotFoundError(WorkflowError):
|
|
|
8
8
|
|
|
9
9
|
class WorkflowAlreadyRunningError(WorkflowError):
|
|
10
10
|
"""Raised when a workflow ID already has an active run."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class WorkflowTypeNotRegisteredError(WorkflowError):
|
|
14
|
+
"""Raised when the server has no registered worker for the requested workflow type."""
|
|
@@ -12,8 +12,9 @@ class HistoryKind(StrEnum):
|
|
|
12
12
|
run_started = "run.started"
|
|
13
13
|
run_completed = "run.completed"
|
|
14
14
|
run_failed = "run.failed"
|
|
15
|
-
|
|
15
|
+
run_cancel_received = "run.cancel_received"
|
|
16
16
|
run_cancelled = "run.cancelled"
|
|
17
|
+
run_terminated = "run.terminated"
|
|
17
18
|
run_timeout = "run.timeout"
|
|
18
19
|
wait_started = "wait.started"
|
|
19
20
|
wait_timed_out = "wait.timed_out"
|
|
@@ -60,8 +61,8 @@ class RunFailed(msgspec.Struct):
|
|
|
60
61
|
duration_ms: int
|
|
61
62
|
|
|
62
63
|
|
|
63
|
-
class
|
|
64
|
-
"""Workflow cancellation
|
|
64
|
+
class RunCancelReceived(msgspec.Struct):
|
|
65
|
+
"""Workflow cancellation received while a step is in-flight; deferred until step completes."""
|
|
65
66
|
|
|
66
67
|
|
|
67
68
|
class RunCancelled(msgspec.Struct):
|
|
@@ -71,6 +72,13 @@ class RunCancelled(msgspec.Struct):
|
|
|
71
72
|
duration_ms: int
|
|
72
73
|
|
|
73
74
|
|
|
75
|
+
class RunTerminated(msgspec.Struct):
|
|
76
|
+
"""Workflow execution was forcefully terminated."""
|
|
77
|
+
|
|
78
|
+
reason: str
|
|
79
|
+
duration_ms: int
|
|
80
|
+
|
|
81
|
+
|
|
74
82
|
class RunTimeout(msgspec.Struct):
|
|
75
83
|
"""Workflow execution timed out."""
|
|
76
84
|
|
|
@@ -226,18 +234,21 @@ class ParentEventSent(msgspec.Struct):
|
|
|
226
234
|
parent_wf_id: str
|
|
227
235
|
|
|
228
236
|
|
|
229
|
-
RunEvents =
|
|
237
|
+
RunEvents = (
|
|
238
|
+
RunCancelReceived | RunCancelled | RunCompleted | RunFailed | RunScheduled | RunStarted | RunTerminated | RunTimeout
|
|
239
|
+
)
|
|
230
240
|
WaitEvents = WaitStarted | WaitTimedOut | EventReceived
|
|
231
241
|
StepEvents = StepStarted | StepCompleted | StepFailed | StepCancelled | StepTimeout
|
|
232
242
|
TaskEvents = TaskStarted | TaskCompleted | TaskFailed | TaskAttemptFailed | TaskCancelled
|
|
233
243
|
DeterministicEvents = TimestampRecorded | RandomRecorded | UuidRecorded | SleepRecorded
|
|
234
244
|
HistoryEvents = (
|
|
235
|
-
|
|
245
|
+
RunCancelReceived
|
|
236
246
|
| RunCancelled
|
|
237
247
|
| RunCompleted
|
|
238
248
|
| RunFailed
|
|
239
249
|
| RunScheduled
|
|
240
250
|
| RunStarted
|
|
251
|
+
| RunTerminated
|
|
241
252
|
| RunTimeout
|
|
242
253
|
| StepStarted
|
|
243
254
|
| StepCompleted
|
|
@@ -279,8 +290,9 @@ history_factories: dict[str, type] = {
|
|
|
279
290
|
"run.started": RunStarted,
|
|
280
291
|
"run.completed": RunCompleted,
|
|
281
292
|
"run.failed": RunFailed,
|
|
282
|
-
"run.
|
|
293
|
+
"run.cancel_received": RunCancelReceived,
|
|
283
294
|
"run.cancelled": RunCancelled,
|
|
295
|
+
"run.terminated": RunTerminated,
|
|
284
296
|
"run.timeout": RunTimeout,
|
|
285
297
|
"wait.started": WaitStarted,
|
|
286
298
|
"wait.timed_out": WaitTimedOut,
|
|
@@ -42,6 +42,9 @@ class RunInfo(msgspec.Struct, dict=True, omit_defaults=True):
|
|
|
42
42
|
parent_wf_id: str | None = None
|
|
43
43
|
parent_wf_type: str | None = None
|
|
44
44
|
parent_run_id: str | None = None
|
|
45
|
+
# Name of the parent step to trigger when this run reaches a terminal state.
|
|
46
|
+
# Set only for child runs started with a completion callback.
|
|
47
|
+
parent_callback_step: str | None = None
|
|
45
48
|
|
|
46
49
|
# Timing fields
|
|
47
50
|
timeout: int | None = None
|
|
@@ -142,6 +142,13 @@ class NatsManifest:
|
|
|
142
142
|
def api_listener_pattern(self) -> str:
|
|
143
143
|
return self._subject_pattern("api", "listen")
|
|
144
144
|
|
|
145
|
+
def worker_command_subject(self) -> str:
|
|
146
|
+
return self._subject_pattern("api", "worker")
|
|
147
|
+
|
|
148
|
+
def worker_cmd_subject(self, worker_id: str) -> str:
|
|
149
|
+
pattern = self._subject_pattern("worker_cmd", "publish")
|
|
150
|
+
return self._substitute_params(pattern, worker_id=worker_id)
|
|
151
|
+
|
|
145
152
|
def worker_task_filter_subject(self, wf_type: str) -> str:
|
|
146
153
|
pattern = self._subject_pattern("worker_task", "filter")
|
|
147
154
|
return self._substitute_params(pattern, wf_type=wf_type)
|
|
@@ -85,6 +85,7 @@ subjects:
|
|
|
85
85
|
subject_patterns:
|
|
86
86
|
publish: grctl_api.workflow.{wf_id} # Server
|
|
87
87
|
listen: grctl_api.> # Server
|
|
88
|
+
worker: grctl_api.worker # Worker -> Server commands
|
|
88
89
|
|
|
89
90
|
worker_task:
|
|
90
91
|
stream: grctl_state
|
|
@@ -93,3 +94,10 @@ subjects:
|
|
|
93
94
|
listen: grctl_worker_task.>
|
|
94
95
|
queue_group: grctl_worker_{wf_type}
|
|
95
96
|
filter: grctl_worker_task.{wf_type}.>
|
|
97
|
+
|
|
98
|
+
worker_cmd:
|
|
99
|
+
# Server -> Worker out-of-band signals. Core NATS request-reply, no stream.
|
|
100
|
+
stream: null
|
|
101
|
+
subject_patterns:
|
|
102
|
+
publish: grctl_worker_cmd.{worker_id}
|
|
103
|
+
listen: grctl_worker_cmd.{worker_id}
|
|
@@ -107,6 +107,7 @@ class Subscriber:
|
|
|
107
107
|
await msg.ack()
|
|
108
108
|
except asyncio.CancelledError:
|
|
109
109
|
logger.info("Task cancelled directive_id=%s", directive.id)
|
|
110
|
+
await msg.ack()
|
|
110
111
|
except Exception:
|
|
111
112
|
# Fail directive already published by workflow_error_handler inside the runner
|
|
112
113
|
logger.debug("Runner task raised exception directive_id=%s", directive.id)
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
"""Worker module."""
|
|
2
2
|
|
|
3
|
+
from grctl.worker.child import ChildOutcome
|
|
3
4
|
from grctl.worker.context import Context
|
|
4
5
|
from grctl.worker.store import StoreKeyNotFoundError
|
|
5
6
|
from grctl.worker.task import task
|
|
6
7
|
from grctl.worker.worker import Worker
|
|
7
8
|
|
|
8
|
-
__all__ = ["Context", "StoreKeyNotFoundError", "Worker", "task"]
|
|
9
|
+
__all__ = ["ChildOutcome", "Context", "StoreKeyNotFoundError", "Worker", "task"]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import msgspec
|
|
4
|
+
|
|
5
|
+
from grctl.models.common import ErrorDetails
|
|
6
|
+
from grctl.models.errors import WorkflowError
|
|
7
|
+
from grctl.models.run_info import RunStatus
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ChildOutcome[T](msgspec.Struct):
|
|
11
|
+
"""Terminal outcome of a child workflow, delivered to the parent's on_completed_step.
|
|
12
|
+
|
|
13
|
+
A single callback handles both success and failure: check `ok` (or `status`),
|
|
14
|
+
then read `result` for a completed child or `error` for a failed/cancelled one.
|
|
15
|
+
|
|
16
|
+
Parameterize the type to have the result decoded back into the child's return type,
|
|
17
|
+
e.g. `outcome: ChildOutcome[OrderResult]`. Left bare (`ChildOutcome`), the result
|
|
18
|
+
stays the raw msgpack builtins the child returned.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
status: RunStatus
|
|
22
|
+
result: T | None = None
|
|
23
|
+
error: ErrorDetails | None = None
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def ok(self) -> bool:
|
|
27
|
+
"""True when the child completed successfully."""
|
|
28
|
+
return self.status == RunStatus.completed
|
|
29
|
+
|
|
30
|
+
def unwrap(self) -> Any:
|
|
31
|
+
"""Return the result if the child completed, else raise WorkflowError.
|
|
32
|
+
|
|
33
|
+
Lets a parent that only cares about the happy path propagate child failures
|
|
34
|
+
without inspecting status explicitly.
|
|
35
|
+
"""
|
|
36
|
+
if self.ok:
|
|
37
|
+
return self.result
|
|
38
|
+
message = self.error.message if self.error else f"child workflow {self.status}"
|
|
39
|
+
error_type = self.error.type if self.error else "WorkflowError"
|
|
40
|
+
raise WorkflowError(f"{error_type}: {message}")
|
|
@@ -84,9 +84,6 @@ class NextBuilder:
|
|
|
84
84
|
)
|
|
85
85
|
|
|
86
86
|
def wait(self, timeout: timedelta | None = None, on_timeout: StepHandler | None = None) -> Directive:
|
|
87
|
-
if (timeout is None) != (on_timeout is None):
|
|
88
|
-
raise ValueError("timeout and on_timeout must both be provided or both omitted")
|
|
89
|
-
|
|
90
87
|
timeout_step_name = getattr(on_timeout, "__name__", "") if on_timeout is not None else ""
|
|
91
88
|
res = StepResult(
|
|
92
89
|
processed_msg_kind=self._current_directive.kind,
|
|
@@ -184,6 +181,10 @@ class Context:
|
|
|
184
181
|
self._next_builder = NextBuilder(run_info, worker_id, store, directive, step_configs)
|
|
185
182
|
self._parent_run = parent_run
|
|
186
183
|
self._workflow_logger = workflow_logger
|
|
184
|
+
# Child handles started during this step. They are single-step-scoped: cross-step
|
|
185
|
+
# coordination uses events/callbacks, not in-memory futures, so any handle still
|
|
186
|
+
# open when the step returns is abandoned and gets discarded.
|
|
187
|
+
self._started_handles: list[WorkflowHandle] = []
|
|
187
188
|
|
|
188
189
|
@property
|
|
189
190
|
def store(self) -> Store:
|
|
@@ -215,6 +216,7 @@ class Context:
|
|
|
215
216
|
id=str(ULID()),
|
|
216
217
|
kind=CmdKind.run_event,
|
|
217
218
|
timestamp=datetime.now(UTC),
|
|
219
|
+
sender_id=self._worker_id,
|
|
218
220
|
msg=EventCmd(
|
|
219
221
|
wf_id=self._parent_run.wf_id,
|
|
220
222
|
event_name=event_name,
|
|
@@ -233,14 +235,23 @@ class Context:
|
|
|
233
235
|
operation_id,
|
|
234
236
|
)
|
|
235
237
|
|
|
236
|
-
async def
|
|
238
|
+
async def start_child(
|
|
237
239
|
self,
|
|
238
240
|
workflow_type: str,
|
|
239
241
|
workflow_id: str,
|
|
240
242
|
workflow_input: dict[str, Any] | None = None,
|
|
241
243
|
workflow_timeout: timedelta | None = None,
|
|
244
|
+
on_completed_step: StepHandler | None = None,
|
|
242
245
|
) -> WorkflowHandle:
|
|
243
|
-
"""Start a child workflow and return its handle.
|
|
246
|
+
"""Start a child workflow and return its handle.
|
|
247
|
+
|
|
248
|
+
When on_completed_step is given, the server triggers that parent step once the
|
|
249
|
+
child reaches any terminal state, passing a ChildOutcome describing the result
|
|
250
|
+
(on success) or the error (on failure/cancellation). The parent typically parks
|
|
251
|
+
with ctx.next.wait() so the callback can wake it.
|
|
252
|
+
"""
|
|
253
|
+
callback_step_name = self._callback_step_name(on_completed_step)
|
|
254
|
+
|
|
244
255
|
runtime = get_step_runtime()
|
|
245
256
|
operation_id = runtime.generate_operation_id(
|
|
246
257
|
"start",
|
|
@@ -263,13 +274,16 @@ class Context:
|
|
|
263
274
|
parent_wf_id=self.run.wf_id,
|
|
264
275
|
parent_wf_type=self.run.wf_type,
|
|
265
276
|
parent_run_id=self.run.id,
|
|
277
|
+
parent_callback_step=callback_step_name,
|
|
266
278
|
created_at=datetime.now(UTC),
|
|
267
279
|
)
|
|
268
280
|
handle = WorkflowHandle(
|
|
269
281
|
run_info=run_info,
|
|
270
282
|
payload=workflow_input,
|
|
271
283
|
connection=runtime.connection,
|
|
284
|
+
sender_id=self._worker_id,
|
|
272
285
|
)
|
|
286
|
+
self._started_handles.append(handle)
|
|
273
287
|
|
|
274
288
|
if future is None:
|
|
275
289
|
await handle.start()
|
|
@@ -280,6 +294,45 @@ class Context:
|
|
|
280
294
|
)
|
|
281
295
|
return handle
|
|
282
296
|
|
|
297
|
+
async def run_child(
|
|
298
|
+
self,
|
|
299
|
+
workflow_type: str,
|
|
300
|
+
workflow_id: str,
|
|
301
|
+
workflow_input: dict[str, Any] | None = None,
|
|
302
|
+
workflow_timeout: timedelta | None = None,
|
|
303
|
+
timeout: float | None = None, # noqa: ASYNC109
|
|
304
|
+
) -> Any:
|
|
305
|
+
"""Start a child workflow and block until it completes, returning its result.
|
|
306
|
+
|
|
307
|
+
Raises WorkflowError on failure, asyncio.CancelledError on cancellation/termination,
|
|
308
|
+
and TimeoutError if the client-side timeout elapses.
|
|
309
|
+
timeout: client-side wait in seconds, independent of the server-side workflow_timeout.
|
|
310
|
+
"""
|
|
311
|
+
handle = await self.start_child(workflow_type, workflow_id, workflow_input, workflow_timeout)
|
|
312
|
+
if not handle.future.is_started:
|
|
313
|
+
await handle.future.start()
|
|
314
|
+
return await handle.result(timeout=timeout)
|
|
315
|
+
|
|
316
|
+
async def discard_started_handles(self) -> None:
|
|
317
|
+
"""Silently release child handles started during this step but not awaited.
|
|
318
|
+
|
|
319
|
+
Called by the runner once the step returns its directive, on both the success
|
|
320
|
+
and failure paths. Stops each handle's history subscription and swallows any
|
|
321
|
+
pending child outcome so an unawaited future does not warn or leak a subscription.
|
|
322
|
+
"""
|
|
323
|
+
for handle in self._started_handles:
|
|
324
|
+
await handle.future.discard()
|
|
325
|
+
self._started_handles.clear()
|
|
326
|
+
|
|
327
|
+
@staticmethod
|
|
328
|
+
def _callback_step_name(on_completed_step: StepHandler | None) -> str | None:
|
|
329
|
+
if on_completed_step is None:
|
|
330
|
+
return None
|
|
331
|
+
step_name = getattr(on_completed_step, "__name__", None)
|
|
332
|
+
if not step_name:
|
|
333
|
+
raise ValueError("on_completed_step must be a named handler function.")
|
|
334
|
+
return step_name
|
|
335
|
+
|
|
283
336
|
async def now(self) -> datetime:
|
|
284
337
|
runtime = get_step_runtime()
|
|
285
338
|
operation_id = runtime.generate_operation_id("now", {})
|
|
@@ -5,6 +5,14 @@ class WorkflowNotRegisteredError(Exception):
|
|
|
5
5
|
super().__init__(f"Workflow type '{workflow_type}' is not registered with the worker.")
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
class RegistrationError(Exception):
|
|
9
|
+
"""Raised when a worker cannot sync its workflow type catalog to the server.
|
|
10
|
+
|
|
11
|
+
A worker that fails to register must not begin processing work, so this
|
|
12
|
+
error propagates out of Worker.start() to fail the process fast.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
8
16
|
class WorkflowRunnerNotFoundError(Exception):
|
|
9
17
|
"""Exception raised when a WorkflowRunner is not found for a given workflow run ID."""
|
|
10
18
|
|