perago 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
perago/__init__.py ADDED
@@ -0,0 +1,144 @@
1
+ from perago.attempt import assert_current_attempt_snapshot
2
+ from perago.config import ConductorConfig, LakeFSConfig, RuntimeConfig, load_runtime_config
3
+ from perago.errors import (
4
+ GuardrailViolation,
5
+ PostGuardrailViolation,
6
+ PublishBudgetError,
7
+ PublishFenceError,
8
+ PreGuardrailViolation,
9
+ RuntimeConfigError,
10
+ StaleAttemptError,
11
+ TaskDefinitionError,
12
+ TaskInputError,
13
+ )
14
+ from perago.execution import (
15
+ StagedWorkspace,
16
+ build_workspace_free_task_output,
17
+ build_workspace_task_output,
18
+ invoke_workspace_free_task,
19
+ invoke_workspace_task_body,
20
+ run_workspace_free_task_attempt,
21
+ run_workspace_task_attempt,
22
+ )
23
+ from perago.guards import (
24
+ check_guardrails,
25
+ forbid_glob,
26
+ require_dir,
27
+ require_file,
28
+ require_glob,
29
+ )
30
+ from perago.metadata import (
31
+ WorkspacePublicationPlan,
32
+ build_workspace_publication_plan,
33
+ choose_publish_base,
34
+ confirm_metadata_extra,
35
+ find_matching_publication_commit,
36
+ logical_task_key,
37
+ metadata_value,
38
+ perago_metadata,
39
+ staging_branch_name,
40
+ )
41
+ from perago.models import (
42
+ ExecutionLimits,
43
+ PublishBudget,
44
+ RetryPolicy,
45
+ TaskControls,
46
+ TimeoutPolicy,
47
+ WorkspaceInput,
48
+ WorkspaceOutput,
49
+ WorkspaceSpec,
50
+ )
51
+ from perago.task import TaskDefinition, load_module_task, task
52
+ from perago.taskdef import build_taskdef, write_taskdef
53
+ from perago.result import (
54
+ RuntimeTaskResult,
55
+ completed_result,
56
+ failed_result,
57
+ result_for_exception,
58
+ terminal_failed_result,
59
+ )
60
+ from perago.supervisor import WorkerChildSpec, restart_backoff_seconds, worker_child_specs
61
+ from perago.worker_runtime import WorkerRuntime, prepare_worker_runtime
62
+ from perago.workspace import (
63
+ WorkspaceDownloadFile,
64
+ WorkspaceSyncPlan,
65
+ WorkspaceUploadFile,
66
+ assert_workspace_sync_plan_within_budget,
67
+ build_budgeted_workspace_sync_plan,
68
+ build_workspace_sync_plan,
69
+ workspace_delete_object_paths,
70
+ workspace_download_files,
71
+ workspace_local_path,
72
+ workspace_upload_files,
73
+ )
74
+
75
+ __all__ = [
76
+ "ExecutionLimits",
77
+ "ConductorConfig",
78
+ "GuardrailViolation",
79
+ "PostGuardrailViolation",
80
+ "PublishBudgetError",
81
+ "PublishFenceError",
82
+ "PreGuardrailViolation",
83
+ "PublishBudget",
84
+ "RetryPolicy",
85
+ "LakeFSConfig",
86
+ "RuntimeConfig",
87
+ "RuntimeConfigError",
88
+ "RuntimeTaskResult",
89
+ "StagedWorkspace",
90
+ "StaleAttemptError",
91
+ "TaskControls",
92
+ "TaskDefinition",
93
+ "TaskDefinitionError",
94
+ "TaskInputError",
95
+ "TimeoutPolicy",
96
+ "WorkerChildSpec",
97
+ "WorkerRuntime",
98
+ "WorkspaceInput",
99
+ "WorkspaceOutput",
100
+ "WorkspacePublicationPlan",
101
+ "WorkspaceSpec",
102
+ "WorkspaceDownloadFile",
103
+ "WorkspaceSyncPlan",
104
+ "WorkspaceUploadFile",
105
+ "assert_current_attempt_snapshot",
106
+ "assert_workspace_sync_plan_within_budget",
107
+ "build_budgeted_workspace_sync_plan",
108
+ "build_taskdef",
109
+ "build_workspace_free_task_output",
110
+ "build_workspace_publication_plan",
111
+ "build_workspace_sync_plan",
112
+ "build_workspace_task_output",
113
+ "check_guardrails",
114
+ "choose_publish_base",
115
+ "confirm_metadata_extra",
116
+ "completed_result",
117
+ "failed_result",
118
+ "find_matching_publication_commit",
119
+ "forbid_glob",
120
+ "invoke_workspace_free_task",
121
+ "invoke_workspace_task_body",
122
+ "load_module_task",
123
+ "load_runtime_config",
124
+ "logical_task_key",
125
+ "metadata_value",
126
+ "perago_metadata",
127
+ "prepare_worker_runtime",
128
+ "require_dir",
129
+ "require_file",
130
+ "require_glob",
131
+ "result_for_exception",
132
+ "run_workspace_free_task_attempt",
133
+ "run_workspace_task_attempt",
134
+ "restart_backoff_seconds",
135
+ "staging_branch_name",
136
+ "task",
137
+ "terminal_failed_result",
138
+ "worker_child_specs",
139
+ "workspace_delete_object_paths",
140
+ "workspace_download_files",
141
+ "workspace_local_path",
142
+ "workspace_upload_files",
143
+ "write_taskdef",
144
+ ]
perago/_segments.py ADDED
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+
6
+ def safe_segment(value: object) -> str:
7
+ text = str(value)
8
+ return re.sub(r"[^A-Za-z0-9_.=-]+", "_", text).strip("._") or "unknown"
perago/attempt.py ADDED
@@ -0,0 +1,22 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from perago.errors import StaleAttemptError
6
+
7
+
8
+ def assert_current_attempt_snapshot(task: object, fresh: object) -> None:
9
+ if (
10
+ _task_attr(fresh, "status") != "IN_PROGRESS"
11
+ or _task_attr(fresh, "workflow_instance_id") != _task_attr(task, "workflow_instance_id")
12
+ or _task_attr(fresh, "task_id") != _task_attr(task, "task_id")
13
+ or _task_attr(fresh, "retry_count") != _task_attr(task, "retry_count")
14
+ ):
15
+ raise StaleAttemptError(_task_attr(task, "task_id"))
16
+
17
+
18
+ def _task_attr(task: object, name: str) -> Any:
19
+ try:
20
+ return getattr(task, name)
21
+ except AttributeError as exc:
22
+ raise AttributeError(f"task is missing required attribute {name}") from exc
perago/cli.py ADDED
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+ from pydantic import ValidationError
7
+ from pydantic.errors import PydanticInvalidForJsonSchema
8
+
9
+ from perago.conductor_runtime import OrkesConductorRuntimeClient
10
+ from perago.config import load_runtime_config
11
+ from perago.errors import RuntimeConfigError, TaskDefinitionError
12
+ from perago.supervisor import run_worker_supervisor
13
+ from perago.task import load_module_task
14
+ from perago.taskdef import build_taskdef, write_taskdef
15
+
16
+
17
+ app = typer.Typer(no_args_is_help=True)
18
+
19
+
20
+ @app.command()
21
+ def check(module_target: str) -> None:
22
+ """Validate one Perago task module and local runtime config."""
23
+ try:
24
+ config = load_runtime_config(module_target)
25
+ task = load_module_task(module_target)
26
+ build_taskdef(task)
27
+ except (TaskDefinitionError, RuntimeConfigError, ValidationError, PydanticInvalidForJsonSchema) as exc:
28
+ _fail(str(exc))
29
+ typer.echo(f"ok: {task.name}")
30
+ typer.echo(f"workspace_root: {config.workspace_root}")
31
+ typer.echo(f"log_root: {config.log_root}")
32
+ typer.echo(f"worker_id_prefix: {config.worker_id_prefix}")
33
+ typer.echo(f"conductor: {_configured(config.conductor is not None)}")
34
+ typer.echo(f"lakefs: {_configured(config.lakefs is not None)}")
35
+
36
+
37
+ @app.command()
38
+ def extract(module_target: str, output: Path = typer.Option(..., "--output", "-o")) -> None:
39
+ """Write generated Conductor TaskDef JSON for one task module."""
40
+ try:
41
+ load_runtime_config(module_target)
42
+ task = load_module_task(module_target)
43
+ path = write_taskdef(task, output)
44
+ except (TaskDefinitionError, RuntimeConfigError, ValidationError, PydanticInvalidForJsonSchema, ValueError) as exc:
45
+ _fail(str(exc))
46
+ typer.echo(str(path))
47
+
48
+
49
+ @app.command()
50
+ def start(module_target: str, j: int = typer.Option(1, "-j", min=1)) -> None:
51
+ """Start Conductor worker processes for one Perago task module."""
52
+ try:
53
+ config = load_runtime_config(module_target)
54
+ if config.conductor is None:
55
+ raise RuntimeConfigError("CONDUCTOR_SERVER_URL is required for perago start")
56
+ if config.lakefs is None:
57
+ raise RuntimeConfigError("LakeFS config is required for perago start")
58
+ task = load_module_task(module_target)
59
+ build_taskdef(task)
60
+ conductor = OrkesConductorRuntimeClient.from_config(config.conductor)
61
+ if not conductor.taskdef_exists(task.name):
62
+ raise RuntimeConfigError(
63
+ f"Conductor TaskDef {task.name!r} is not registered; run perago extract and register it before start"
64
+ )
65
+ except (TaskDefinitionError, RuntimeConfigError, ValidationError, PydanticInvalidForJsonSchema) as exc:
66
+ _fail(str(exc))
67
+ except Exception as exc: # noqa: BLE001
68
+ _fail(f"failed to validate Conductor TaskDef: {exc}")
69
+ run_worker_supervisor(config=config, module_target=module_target, process_count=j)
70
+
71
+
72
+ def _fail(message: str) -> None:
73
+ typer.echo(f"error: {message}", err=True)
74
+ raise typer.Exit(code=1)
75
+
76
+
77
+ def _configured(value: bool) -> str:
78
+ return "configured" if value else "not configured"
79
+
80
+
81
+ if __name__ == "__main__":
82
+ app()
@@ -0,0 +1,238 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from collections.abc import Mapping
5
+ from dataclasses import dataclass
6
+ from typing import Any, Protocol
7
+
8
+ from conductor.client.configuration.configuration import Configuration
9
+ from conductor.client.http.models.task_result import TaskResult
10
+ from conductor.client.http.models.task_result_status import TaskResultStatus
11
+ from conductor.client.orkes.orkes_metadata_client import OrkesMetadataClient
12
+ from conductor.client.orkes.orkes_task_client import OrkesTaskClient
13
+ from loguru import logger
14
+
15
+ from perago.config import ConductorConfig
16
+ from perago.execution import (
17
+ CleanupStaging,
18
+ DownloadWorkspace,
19
+ LoadCurrentAttempt,
20
+ PublishWorkspace,
21
+ StageWorkspace,
22
+ run_workspace_free_task_attempt,
23
+ run_workspace_task_attempt,
24
+ )
25
+ from perago.result import RuntimeTaskResult
26
+ from perago.task import TaskDefinition
27
+
28
+
29
+ POLL_EMPTY_SLEEP_SECONDS = 1.0
30
+ POLL_ERROR_BACKOFF_SECONDS = 5.0
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ConductorTaskAttempt:
35
+ workflow_instance_id: str
36
+ task_id: str
37
+ retry_count: int
38
+ task_def_name: str
39
+ reference_task_name: str
40
+ seq: int
41
+ iteration: int
42
+ status: str
43
+ input_data: Mapping[str, Any]
44
+ retried_task_id: str | None = None
45
+
46
+
47
+ class ConductorRuntimeClient(Protocol):
48
+ def taskdef_exists(self, task_name: str) -> bool: ...
49
+
50
+ def poll_task(self, task_name: str, *, worker_id: str) -> ConductorTaskAttempt | None: ...
51
+
52
+ def get_task(self, task_id: str) -> ConductorTaskAttempt: ...
53
+
54
+ def update_task(self, attempt: ConductorTaskAttempt, result: RuntimeTaskResult, *, worker_id: str) -> None: ...
55
+
56
+
57
+ class OrkesConductorRuntimeClient:
58
+ def __init__(
59
+ self,
60
+ *,
61
+ task_client: OrkesTaskClient,
62
+ metadata_client: OrkesMetadataClient,
63
+ ) -> None:
64
+ self._task_client = task_client
65
+ self._metadata_client = metadata_client
66
+
67
+ @classmethod
68
+ def from_config(cls, config: ConductorConfig) -> OrkesConductorRuntimeClient:
69
+ sdk_config = Configuration(server_api_url=config.server_url)
70
+ return cls(
71
+ task_client=OrkesTaskClient(sdk_config),
72
+ metadata_client=OrkesMetadataClient(sdk_config),
73
+ )
74
+
75
+ def taskdef_exists(self, task_name: str) -> bool:
76
+ try:
77
+ self._metadata_client.get_task_def(task_name)
78
+ except Exception as exc: # noqa: BLE001
79
+ if _looks_like_not_found(exc):
80
+ return False
81
+ raise
82
+ return True
83
+
84
+ def poll_task(self, task_name: str, *, worker_id: str) -> ConductorTaskAttempt | None:
85
+ task = self._task_client.poll_task(task_name, worker_id=worker_id)
86
+ if task is None or getattr(task, "task_id", None) in {None, ""}:
87
+ return None
88
+ return conductor_task_to_attempt(task)
89
+
90
+ def get_task(self, task_id: str) -> ConductorTaskAttempt:
91
+ return conductor_task_to_attempt(self._task_client.get_task(task_id))
92
+
93
+ def update_task(self, attempt: ConductorTaskAttempt, result: RuntimeTaskResult, *, worker_id: str) -> None:
94
+ self._task_client.update_task(runtime_result_to_sdk_task_result(attempt, result, worker_id=worker_id))
95
+
96
+
97
+ def conductor_task_to_attempt(task: object) -> ConductorTaskAttempt:
98
+ return ConductorTaskAttempt(
99
+ workflow_instance_id=str(_required_task_attr(task, "workflow_instance_id")),
100
+ task_id=str(_required_task_attr(task, "task_id")),
101
+ retry_count=int(_required_task_attr(task, "retry_count")),
102
+ task_def_name=str(_required_task_attr(task, "task_def_name")),
103
+ reference_task_name=str(_required_task_attr(task, "reference_task_name")),
104
+ seq=int(_required_task_attr(task, "seq")),
105
+ iteration=int(_task_attr(task, "iteration", 0) or 0),
106
+ status=str(_required_task_attr(task, "status")),
107
+ input_data=_mapping_attr(task, "input_data"),
108
+ retried_task_id=_optional_str(_task_attr(task, "retried_task_id", None)),
109
+ )
110
+
111
+
112
+ def runtime_result_to_sdk_task_result(
113
+ attempt: ConductorTaskAttempt,
114
+ result: RuntimeTaskResult,
115
+ *,
116
+ worker_id: str,
117
+ ) -> TaskResult:
118
+ task_result = TaskResult(
119
+ workflow_instance_id=attempt.workflow_instance_id,
120
+ task_id=attempt.task_id,
121
+ worker_id=worker_id,
122
+ status=TaskResultStatus(result.status),
123
+ )
124
+ if result.status == "COMPLETED":
125
+ task_result.output_data = result.output
126
+ else:
127
+ task_result.reason_for_incompletion = result.reason_for_incompletion
128
+ return task_result
129
+
130
+
131
+ def run_worker_poll_loop(
132
+ *,
133
+ task: TaskDefinition,
134
+ client: ConductorRuntimeClient,
135
+ worker_id: str,
136
+ workspace_root: Any,
137
+ should_stop: Any,
138
+ download_workspace: DownloadWorkspace,
139
+ stage_workspace: StageWorkspace,
140
+ publish_workspace: PublishWorkspace,
141
+ cleanup_staging: CleanupStaging,
142
+ poll_empty_sleep_seconds: float = POLL_EMPTY_SLEEP_SECONDS,
143
+ poll_error_backoff_seconds: float = POLL_ERROR_BACKOFF_SECONDS,
144
+ ) -> None:
145
+ while not should_stop():
146
+ try:
147
+ attempt = client.poll_task(task.name, worker_id=worker_id)
148
+ except Exception as exc: # noqa: BLE001
149
+ logger.opt(exception=exc).error("failed to poll Conductor task")
150
+ _sleep_until_stop(poll_error_backoff_seconds, should_stop)
151
+ continue
152
+
153
+ if attempt is None:
154
+ _sleep_until_stop(poll_empty_sleep_seconds, should_stop)
155
+ continue
156
+
157
+ result = execute_polled_task(
158
+ task=task,
159
+ attempt=attempt,
160
+ workspace_root=workspace_root,
161
+ download_workspace=download_workspace,
162
+ load_current_attempt=lambda current_attempt: client.get_task(current_attempt.task_id),
163
+ stage_workspace=stage_workspace,
164
+ publish_workspace=publish_workspace,
165
+ cleanup_staging=cleanup_staging,
166
+ )
167
+ try:
168
+ client.update_task(attempt, result, worker_id=worker_id)
169
+ except Exception as exc: # noqa: BLE001
170
+ logger.bind(task_id=attempt.task_id, workflow_instance_id=attempt.workflow_instance_id).opt(
171
+ exception=exc
172
+ ).error("failed to update Conductor task result")
173
+ _sleep_until_stop(poll_error_backoff_seconds, should_stop)
174
+
175
+
176
+ def execute_polled_task(
177
+ *,
178
+ task: TaskDefinition,
179
+ attempt: ConductorTaskAttempt,
180
+ workspace_root: Any,
181
+ download_workspace: DownloadWorkspace,
182
+ load_current_attempt: LoadCurrentAttempt,
183
+ stage_workspace: StageWorkspace,
184
+ publish_workspace: PublishWorkspace,
185
+ cleanup_staging: CleanupStaging,
186
+ ) -> RuntimeTaskResult:
187
+ if task.has_workspace:
188
+ return run_workspace_task_attempt(
189
+ task,
190
+ attempt.input_data,
191
+ attempt,
192
+ workspace_root,
193
+ download_workspace=download_workspace,
194
+ load_current_attempt=load_current_attempt,
195
+ stage_workspace=stage_workspace,
196
+ publish_workspace=publish_workspace,
197
+ cleanup_staging=cleanup_staging,
198
+ )
199
+ return run_workspace_free_task_attempt(task, attempt.input_data)
200
+
201
+
202
+ def _sleep_until_stop(seconds: float, should_stop: Any) -> None:
203
+ deadline = time.monotonic() + seconds
204
+ while not should_stop() and time.monotonic() < deadline:
205
+ time.sleep(min(0.1, deadline - time.monotonic()))
206
+
207
+
208
+ def _required_task_attr(task: object, name: str) -> Any:
209
+ value = _task_attr(task, name, None)
210
+ if value is None:
211
+ raise AttributeError(f"Conductor task is missing required field {name}")
212
+ return value
213
+
214
+
215
+ def _task_attr(task: object, name: str, default: Any) -> Any:
216
+ if isinstance(task, Mapping):
217
+ return task.get(name, default)
218
+ return getattr(task, name, default)
219
+
220
+
221
+ def _mapping_attr(task: object, name: str) -> Mapping[str, Any]:
222
+ value = _required_task_attr(task, name)
223
+ if not isinstance(value, Mapping):
224
+ raise TypeError(f"Conductor task field {name} must be a mapping")
225
+ return value
226
+
227
+
228
+ def _optional_str(value: object) -> str | None:
229
+ if value is None:
230
+ return None
231
+ return str(value)
232
+
233
+
234
+ def _looks_like_not_found(exc: Exception) -> bool:
235
+ status = getattr(exc, "status", None) or getattr(exc, "status_code", None)
236
+ if status == 404:
237
+ return True
238
+ return "404" in str(exc) and "not" in str(exc).lower()