ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +64 -158
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +11 -84
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +102 -90
- ai_pipeline_core/llm/client.py +229 -183
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Unified pipeline run response contract.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Single source of truth for the response shape used by both
|
|
6
4
|
webhook push (ai-pipeline-core) and polling pull (unified-middleware).
|
|
7
5
|
"""
|
|
@@ -16,12 +14,10 @@ from pydantic import BaseModel, ConfigDict, Discriminator
|
|
|
16
14
|
class _RunBase(BaseModel):
|
|
17
15
|
"""Common fields on every run response variant."""
|
|
18
16
|
|
|
19
|
-
type: str
|
|
20
17
|
flow_run_id: UUID
|
|
21
18
|
project_name: str
|
|
22
19
|
state: str # PENDING, RUNNING, COMPLETED, FAILED, CRASHED, CANCELLED
|
|
23
20
|
timestamp: datetime
|
|
24
|
-
storage_uri: str = ""
|
|
25
21
|
|
|
26
22
|
model_config = ConfigDict(frozen=True)
|
|
27
23
|
|
|
@@ -29,19 +25,19 @@ class _RunBase(BaseModel):
|
|
|
29
25
|
class PendingRun(_RunBase):
|
|
30
26
|
"""Pipeline queued or running but no progress reported yet."""
|
|
31
27
|
|
|
32
|
-
type: Literal["pending"] = "pending"
|
|
28
|
+
type: Literal["pending"] = "pending"
|
|
33
29
|
|
|
34
30
|
|
|
35
31
|
class ProgressRun(_RunBase):
|
|
36
32
|
"""Pipeline running with step-level progress data."""
|
|
37
33
|
|
|
38
|
-
type: Literal["progress"] = "progress"
|
|
34
|
+
type: Literal["progress"] = "progress"
|
|
39
35
|
step: int
|
|
40
36
|
total_steps: int
|
|
41
37
|
flow_name: str
|
|
42
38
|
status: str # "started", "completed", "cached"
|
|
43
|
-
progress: float # overall 0.0
|
|
44
|
-
step_progress: float # within step 0.0
|
|
39
|
+
progress: float # overall 0.0-1.0
|
|
40
|
+
step_progress: float # within step 0.0-1.0
|
|
45
41
|
message: str
|
|
46
42
|
|
|
47
43
|
|
|
@@ -57,14 +53,14 @@ class DeploymentResultData(BaseModel):
|
|
|
57
53
|
class CompletedRun(_RunBase):
|
|
58
54
|
"""Pipeline finished (Prefect COMPLETED). Check result.success for business outcome."""
|
|
59
55
|
|
|
60
|
-
type: Literal["completed"] = "completed"
|
|
56
|
+
type: Literal["completed"] = "completed"
|
|
61
57
|
result: DeploymentResultData
|
|
62
58
|
|
|
63
59
|
|
|
64
60
|
class FailedRun(_RunBase):
|
|
65
61
|
"""Pipeline crashed — execution error, not business logic."""
|
|
66
62
|
|
|
67
|
-
type: Literal["failed"] = "failed"
|
|
63
|
+
type: Literal["failed"] = "failed"
|
|
68
64
|
error: str
|
|
69
65
|
result: DeploymentResultData | None = None
|
|
70
66
|
|
|
@@ -13,7 +13,7 @@ Requirements:
|
|
|
13
13
|
- Local package installed for flow metadata extraction
|
|
14
14
|
|
|
15
15
|
Usage:
|
|
16
|
-
python -m ai_pipeline_core.
|
|
16
|
+
python -m ai_pipeline_core.deployment.deploy
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
19
|
import argparse
|
|
@@ -24,17 +24,17 @@ import sys
|
|
|
24
24
|
import tempfile
|
|
25
25
|
import tomllib
|
|
26
26
|
import traceback
|
|
27
|
-
from datetime import
|
|
27
|
+
from datetime import UTC, datetime
|
|
28
28
|
from pathlib import Path
|
|
29
|
-
from typing import Any
|
|
29
|
+
from typing import Any
|
|
30
30
|
|
|
31
31
|
from prefect.cli.deploy._storage import _PullStepStorage # type: ignore
|
|
32
32
|
from prefect.client.orchestration import get_client
|
|
33
33
|
from prefect.deployments.runner import RunnerDeployment
|
|
34
34
|
from prefect.flows import load_flow_from_entrypoint
|
|
35
|
+
from prefect_gcp.cloud_storage import GcpCredentials, GcsBucket # pyright: ignore[reportMissingTypeStubs]
|
|
35
36
|
|
|
36
37
|
from ai_pipeline_core.settings import settings
|
|
37
|
-
from ai_pipeline_core.storage import Storage
|
|
38
38
|
|
|
39
39
|
# ============================================================================
|
|
40
40
|
# Deployer Class
|
|
@@ -60,11 +60,7 @@ class Deployer:
|
|
|
60
60
|
Configuration dictionary with project metadata and deployment settings.
|
|
61
61
|
"""
|
|
62
62
|
if not settings.prefect_gcs_bucket:
|
|
63
|
-
self._die(
|
|
64
|
-
"PREFECT_GCS_BUCKET not configured in settings.\n"
|
|
65
|
-
"Configure via environment variable or .env file:\n"
|
|
66
|
-
" PREFECT_GCS_BUCKET=your-bucket-name"
|
|
67
|
-
)
|
|
63
|
+
self._die("PREFECT_GCS_BUCKET not configured in settings.\nConfigure via environment variable or .env file:\n PREFECT_GCS_BUCKET=your-bucket-name")
|
|
68
64
|
|
|
69
65
|
pyproject_path = Path("pyproject.toml")
|
|
70
66
|
if not pyproject_path.exists():
|
|
@@ -110,7 +106,7 @@ class Deployer:
|
|
|
110
106
|
" PREFECT_API_URL=https://api.prefect.cloud/api/accounts/.../workspaces/..."
|
|
111
107
|
)
|
|
112
108
|
|
|
113
|
-
def _run(self, cmd: str, check: bool = True) ->
|
|
109
|
+
def _run(self, cmd: str, *, check: bool = True) -> str | None:
|
|
114
110
|
"""Execute shell command and return output.
|
|
115
111
|
|
|
116
112
|
Args:
|
|
@@ -120,22 +116,25 @@ class Deployer:
|
|
|
120
116
|
Returns:
|
|
121
117
|
Command stdout if successful, None if failed and check=False
|
|
122
118
|
"""
|
|
123
|
-
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
|
119
|
+
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=False)
|
|
124
120
|
|
|
125
121
|
if check and result.returncode != 0:
|
|
126
122
|
self._die(f"Command failed: {cmd}\n{result.stderr}")
|
|
127
123
|
|
|
128
124
|
return result.stdout.strip() if result.returncode == 0 else None
|
|
129
125
|
|
|
130
|
-
|
|
126
|
+
@staticmethod
|
|
127
|
+
def _info(msg: str):
|
|
131
128
|
"""Print info message."""
|
|
132
129
|
print(f"→ {msg}")
|
|
133
130
|
|
|
134
|
-
|
|
131
|
+
@staticmethod
|
|
132
|
+
def _success(msg: str):
|
|
135
133
|
"""Print success message."""
|
|
136
134
|
print(f"✓ {msg}")
|
|
137
135
|
|
|
138
|
-
|
|
136
|
+
@staticmethod
|
|
137
|
+
def _die(msg: str):
|
|
139
138
|
"""Print error and exit."""
|
|
140
139
|
print(f"✗ {msg}", file=sys.stderr)
|
|
141
140
|
sys.exit(1)
|
|
@@ -156,11 +155,7 @@ class Deployer:
|
|
|
156
155
|
# Verify tarball was created
|
|
157
156
|
tarball_path = Path("dist") / self.config["tarball"]
|
|
158
157
|
if not tarball_path.exists():
|
|
159
|
-
self._die(
|
|
160
|
-
f"Build artifact not found: {tarball_path}\n"
|
|
161
|
-
f"Expected tarball name: {self.config['tarball']}\n"
|
|
162
|
-
f"Check that pyproject.toml version matches."
|
|
163
|
-
)
|
|
158
|
+
self._die(f"Build artifact not found: {tarball_path}\nExpected tarball name: {self.config['tarball']}\nCheck that pyproject.toml version matches.")
|
|
164
159
|
|
|
165
160
|
self._success(f"Built {tarball_path.name} ({tarball_path.stat().st_size // 1024} KB)")
|
|
166
161
|
return tarball_path
|
|
@@ -199,6 +194,7 @@ class Deployer:
|
|
|
199
194
|
cwd=source_dir,
|
|
200
195
|
capture_output=True,
|
|
201
196
|
text=True,
|
|
197
|
+
check=False,
|
|
202
198
|
)
|
|
203
199
|
if result.returncode != 0:
|
|
204
200
|
self._die(f"Wheel build failed for {source_dir.name}:\n{result.stderr}")
|
|
@@ -214,7 +210,7 @@ class Deployer:
|
|
|
214
210
|
output.write_bytes(wheels[0].read_bytes())
|
|
215
211
|
return output
|
|
216
212
|
|
|
217
|
-
def _build_agents(self) -> dict[str, dict[str, Any]]:
|
|
213
|
+
def _build_agents(self) -> dict[str, dict[str, Any]]: # noqa: PLR0914
|
|
218
214
|
"""Build agent wheels and manifests for all configured agents.
|
|
219
215
|
|
|
220
216
|
Returns:
|
|
@@ -238,7 +234,7 @@ class Deployer:
|
|
|
238
234
|
self._info(f"Building {len(agent_config)} agent(s): {', '.join(agent_config)}")
|
|
239
235
|
|
|
240
236
|
# Build cli-agents wheel once (shared across all agents)
|
|
241
|
-
cli_agents_dir = Path(cli_agents_source).resolve()
|
|
237
|
+
cli_agents_dir = Path(cli_agents_source).resolve() # pyright: ignore[reportArgumentType]
|
|
242
238
|
if not (cli_agents_dir / "pyproject.toml").exists():
|
|
243
239
|
self._die(f"cli-agents source not found at {cli_agents_dir}")
|
|
244
240
|
|
|
@@ -250,10 +246,7 @@ class Deployer:
|
|
|
250
246
|
for agent_name, config in agent_config.items():
|
|
251
247
|
agent_path = Path(config["path"]).resolve()
|
|
252
248
|
if not (agent_path / "pyproject.toml").exists():
|
|
253
|
-
self._die(
|
|
254
|
-
f"Agent '{agent_name}' path not found: {agent_path}\n"
|
|
255
|
-
f"Check [tool.deploy.agents.{agent_name}].path in pyproject.toml"
|
|
256
|
-
)
|
|
249
|
+
self._die(f"Agent '{agent_name}' path not found: {agent_path}\nCheck [tool.deploy.agents.{agent_name}].path in pyproject.toml")
|
|
257
250
|
|
|
258
251
|
# Read module_name from agent's pyproject.toml
|
|
259
252
|
with open(agent_path / "pyproject.toml", "rb") as f:
|
|
@@ -298,9 +291,7 @@ class Deployer:
|
|
|
298
291
|
# skipping packages already built from extra_vendor
|
|
299
292
|
agent_vendor_dir = agent_path / "vendor"
|
|
300
293
|
if agent_vendor_dir.exists():
|
|
301
|
-
for pkg in list(agent_vendor_dir.glob("*.whl")) + list(
|
|
302
|
-
agent_vendor_dir.glob("*.tar.gz")
|
|
303
|
-
):
|
|
294
|
+
for pkg in list(agent_vendor_dir.glob("*.whl")) + list(agent_vendor_dir.glob("*.tar.gz")):
|
|
304
295
|
pkg_base = pkg.name.split("-")[0].replace("-", "_")
|
|
305
296
|
if pkg.name not in files and pkg_base not in extra_built:
|
|
306
297
|
files[pkg.name] = pkg
|
|
@@ -312,7 +303,7 @@ class Deployer:
|
|
|
312
303
|
"agent_wheel": agent_wheel.name,
|
|
313
304
|
"cli_agents_wheel": cli_agents_wheel.name,
|
|
314
305
|
"vendor_packages": vendor_packages,
|
|
315
|
-
"built_at": datetime.now(
|
|
306
|
+
"built_at": datetime.now(UTC).isoformat(),
|
|
316
307
|
}
|
|
317
308
|
manifest_json = json.dumps(manifest, indent=2)
|
|
318
309
|
|
|
@@ -321,6 +312,17 @@ class Deployer:
|
|
|
321
312
|
|
|
322
313
|
return builds
|
|
323
314
|
|
|
315
|
+
def _create_gcs_bucket(self, bucket_folder: str) -> Any:
|
|
316
|
+
"""Create a GcsBucket instance for uploading files.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
bucket_folder: Folder path within the bucket.
|
|
320
|
+
"""
|
|
321
|
+
creds = GcpCredentials()
|
|
322
|
+
if hasattr(settings, "gcs_service_account_file") and settings.gcs_service_account_file:
|
|
323
|
+
creds = GcpCredentials(service_account_file=Path(settings.gcs_service_account_file))
|
|
324
|
+
return GcsBucket(bucket=self.config["bucket"], bucket_folder=bucket_folder, gcp_credentials=creds)
|
|
325
|
+
|
|
324
326
|
async def _upload_agents(self, agent_builds: dict[str, dict[str, Any]]):
|
|
325
327
|
"""Upload agent bundles to GCS.
|
|
326
328
|
|
|
@@ -330,50 +332,38 @@ class Deployer:
|
|
|
330
332
|
if not agent_builds:
|
|
331
333
|
return
|
|
332
334
|
|
|
333
|
-
flow_folder = self.config["folder"]
|
|
334
|
-
base_uri = f"gs://{self.config['bucket']}/flows"
|
|
335
|
-
base_storage = await Storage.from_uri(base_uri)
|
|
336
|
-
base_storage = base_storage.with_base(flow_folder)
|
|
335
|
+
flow_folder = self.config["folder"]
|
|
337
336
|
|
|
338
337
|
for agent_name, build_info in agent_builds.items():
|
|
339
|
-
|
|
340
|
-
|
|
338
|
+
agent_folder = f"{flow_folder}/agents/{agent_name}"
|
|
339
|
+
bucket = self._create_gcs_bucket(agent_folder)
|
|
340
|
+
self._info(f"Uploading agent '{agent_name}' bundle to gs://{self.config['bucket']}/{agent_folder}")
|
|
341
341
|
|
|
342
342
|
# Upload manifest
|
|
343
|
-
await
|
|
344
|
-
"manifest.json",
|
|
345
|
-
build_info["manifest_json"].encode(),
|
|
346
|
-
)
|
|
343
|
+
await bucket.write_path("manifest.json", build_info["manifest_json"].encode())
|
|
347
344
|
|
|
348
345
|
# Upload wheels
|
|
349
346
|
for filename, filepath in build_info["files"].items():
|
|
350
|
-
await
|
|
347
|
+
await bucket.write_path(filename, filepath.read_bytes())
|
|
351
348
|
|
|
352
349
|
self._success(f"Agent '{agent_name}' uploaded ({len(build_info['files'])} files)")
|
|
353
350
|
|
|
354
351
|
async def _upload_package(self, tarball: Path):
|
|
355
|
-
"""Upload package tarball to Google Cloud Storage
|
|
352
|
+
"""Upload package tarball to Google Cloud Storage.
|
|
356
353
|
|
|
357
354
|
Args:
|
|
358
355
|
tarball: Path to the tarball to upload
|
|
359
356
|
"""
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
flow_folder = self.config["folder"].split("/", 1)[1] if "/" in self.config["folder"] else ""
|
|
363
|
-
|
|
364
|
-
# Initialize storage with gs://bucket-name/flows and set subfolder to flow_folder
|
|
365
|
-
base_uri = f"gs://{self.config['bucket']}/flows"
|
|
366
|
-
storage = await Storage.from_uri(base_uri)
|
|
367
|
-
storage = storage.with_base(flow_folder)
|
|
357
|
+
flow_folder = self.config["folder"]
|
|
358
|
+
bucket = self._create_gcs_bucket(flow_folder)
|
|
368
359
|
|
|
369
|
-
dest_uri =
|
|
360
|
+
dest_uri = f"gs://{self.config['bucket']}/{flow_folder}/{tarball.name}"
|
|
370
361
|
self._info(f"Uploading to {dest_uri}")
|
|
371
362
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
await storage.write_bytes(tarball.name, tarball_bytes)
|
|
363
|
+
tarball_bytes = tarball.read_bytes() # noqa: ASYNC240
|
|
364
|
+
await bucket.write_path(tarball.name, tarball_bytes)
|
|
375
365
|
|
|
376
|
-
self._success(f"Package uploaded to {
|
|
366
|
+
self._success(f"Package uploaded to {flow_folder}/{tarball.name}")
|
|
377
367
|
|
|
378
368
|
async def _deploy_via_api(self, agent_builds: dict[str, dict[str, Any]] | None = None):
|
|
379
369
|
"""Create or update Prefect deployment using RunnerDeployment pattern.
|
|
@@ -437,7 +427,7 @@ class Deployer:
|
|
|
437
427
|
|
|
438
428
|
# Create RunnerDeployment
|
|
439
429
|
# This is the official Prefect pattern that handles all the complexity
|
|
440
|
-
self._info(f"Creating deployment for flow '{flow.name}'")
|
|
430
|
+
self._info(f"Creating deployment for flow '{flow.name}'") # pyright: ignore[reportPossiblyUnboundVariable]
|
|
441
431
|
|
|
442
432
|
# Set AGENT_BUNDLES_URI env var if agents were built
|
|
443
433
|
job_variables: dict[str, Any] = {}
|
|
@@ -448,14 +438,13 @@ class Deployer:
|
|
|
448
438
|
|
|
449
439
|
deployment = RunnerDeployment(
|
|
450
440
|
name=self.config["package"],
|
|
451
|
-
flow_name=flow.name,
|
|
441
|
+
flow_name=flow.name, # pyright: ignore[reportPossiblyUnboundVariable]
|
|
452
442
|
entrypoint=entrypoint,
|
|
453
443
|
work_pool_name=self.config["work_pool"],
|
|
454
444
|
work_queue_name=self.config["work_queue"],
|
|
455
445
|
tags=[self.config["name"]],
|
|
456
446
|
version=self.config["version"],
|
|
457
|
-
description=flow.description
|
|
458
|
-
or f"Deployment for {self.config['package']} v{self.config['version']}",
|
|
447
|
+
description=flow.description or f"Deployment for {self.config['package']} v{self.config['version']}", # pyright: ignore[reportPossiblyUnboundVariable]
|
|
459
448
|
storage=_PullStepStorage(pull_steps),
|
|
460
449
|
parameters={},
|
|
461
450
|
job_variables=job_variables,
|
|
@@ -466,14 +455,9 @@ class Deployer:
|
|
|
466
455
|
async with get_client() as client:
|
|
467
456
|
try:
|
|
468
457
|
work_pool = await client.read_work_pool(self.config["work_pool"])
|
|
469
|
-
self._success(
|
|
470
|
-
f"Work pool '{self.config['work_pool']}' verified (type: {work_pool.type})"
|
|
471
|
-
)
|
|
458
|
+
self._success(f"Work pool '{self.config['work_pool']}' verified (type: {work_pool.type})")
|
|
472
459
|
except Exception as e:
|
|
473
|
-
self._die(
|
|
474
|
-
f"Work pool '{self.config['work_pool']}' not accessible: {e}\n"
|
|
475
|
-
"Create it in the Prefect UI or with: prefect work-pool create"
|
|
476
|
-
)
|
|
460
|
+
self._die(f"Work pool '{self.config['work_pool']}' not accessible: {e}\nCreate it in the Prefect UI or with: prefect work-pool create")
|
|
477
461
|
|
|
478
462
|
# Apply deployment
|
|
479
463
|
# This automatically handles create vs update based on whether deployment exists
|
|
@@ -486,7 +470,7 @@ class Deployer:
|
|
|
486
470
|
if self.api_url:
|
|
487
471
|
ui_url = self.api_url.replace("/api/", "/")
|
|
488
472
|
print(f"\n🌐 View deployment: {ui_url}/deployments/deployment/{deployment_id}")
|
|
489
|
-
print(f"🚀 Run now: prefect deployment run '{flow.name}/{self.config['package']}'")
|
|
473
|
+
print(f"🚀 Run now: prefect deployment run '{flow.name}/{self.config['package']}'") # pyright: ignore[reportPossiblyUnboundVariable]
|
|
490
474
|
except Exception as e:
|
|
491
475
|
self._die(f"Failed to apply deployment: {e}")
|
|
492
476
|
|
|
@@ -530,9 +514,6 @@ def main():
|
|
|
530
514
|
description="Deploy Prefect flows to GCP using the official RunnerDeployment pattern",
|
|
531
515
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
532
516
|
epilog="""
|
|
533
|
-
Example:
|
|
534
|
-
python -m ai_pipeline_core.utils.deploy
|
|
535
|
-
|
|
536
517
|
Prerequisites:
|
|
537
518
|
- Settings configured with PREFECT_API_URL (and optionally PREFECT_API_KEY)
|
|
538
519
|
- Settings configured with PREFECT_GCS_BUCKET
|
|
@@ -7,12 +7,16 @@ from typing import Any, Literal, TypedDict
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
9
9
|
from ai_pipeline_core.deployment.contract import CompletedRun, FailedRun, ProgressRun
|
|
10
|
-
from ai_pipeline_core.documents import Document
|
|
10
|
+
from ai_pipeline_core.documents import Document
|
|
11
11
|
from ai_pipeline_core.logging import get_pipeline_logger
|
|
12
12
|
|
|
13
13
|
logger = get_pipeline_logger(__name__)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
class DownloadedDocument(Document):
|
|
17
|
+
"""Concrete document for downloaded content."""
|
|
18
|
+
|
|
19
|
+
|
|
16
20
|
class StatusPayload(TypedDict):
|
|
17
21
|
"""Webhook payload for Prefect state transitions (sub-flow level)."""
|
|
18
22
|
|
|
@@ -22,24 +26,22 @@ class StatusPayload(TypedDict):
|
|
|
22
26
|
step: int
|
|
23
27
|
total_steps: int
|
|
24
28
|
flow_name: str
|
|
25
|
-
state: str
|
|
29
|
+
state: str
|
|
26
30
|
state_name: str
|
|
27
31
|
timestamp: str
|
|
28
32
|
|
|
29
33
|
|
|
30
34
|
def class_name_to_deployment_name(class_name: str) -> str:
|
|
31
|
-
"""Convert PascalCase to kebab-case: ResearchPipeline
|
|
35
|
+
"""Convert PascalCase to kebab-case: ResearchPipeline -> research-pipeline."""
|
|
32
36
|
name = re.sub(r"(?<!^)(?=[A-Z])", "-", class_name)
|
|
33
37
|
return name.lower()
|
|
34
38
|
|
|
35
39
|
|
|
36
|
-
def extract_generic_params(cls: type) -> tuple[type | None, type | None]:
|
|
37
|
-
"""Extract TOptions and TResult from
|
|
38
|
-
from ai_pipeline_core.deployment.base import PipelineDeployment # noqa: PLC0415
|
|
39
|
-
|
|
40
|
+
def extract_generic_params(cls: type, base_class: type) -> tuple[type | None, type | None]:
|
|
41
|
+
"""Extract TOptions and TResult from a generic base class's args."""
|
|
40
42
|
for base in getattr(cls, "__orig_bases__", []):
|
|
41
43
|
origin = getattr(base, "__origin__", None)
|
|
42
|
-
if origin is
|
|
44
|
+
if origin is base_class:
|
|
43
45
|
args = getattr(base, "__args__", ())
|
|
44
46
|
if len(args) == 2:
|
|
45
47
|
return args[0], args[1]
|
|
@@ -47,22 +49,19 @@ def extract_generic_params(cls: type) -> tuple[type | None, type | None]:
|
|
|
47
49
|
return None, None
|
|
48
50
|
|
|
49
51
|
|
|
50
|
-
async def download_documents(
|
|
51
|
-
|
|
52
|
-
document_type: type[FlowDocument],
|
|
53
|
-
) -> DocumentList:
|
|
54
|
-
"""Download documents from URLs and return as DocumentList."""
|
|
52
|
+
async def download_documents(urls: list[str]) -> list[Document]:
|
|
53
|
+
"""Download documents from URLs."""
|
|
55
54
|
documents: list[Document] = []
|
|
56
55
|
async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
|
|
57
56
|
for url in urls:
|
|
58
57
|
response = await client.get(url)
|
|
59
58
|
response.raise_for_status()
|
|
60
59
|
filename = url.split("/")[-1].split("?")[0] or "document"
|
|
61
|
-
documents.append(
|
|
62
|
-
return
|
|
60
|
+
documents.append(DownloadedDocument(name=filename, content=response.content))
|
|
61
|
+
return documents
|
|
63
62
|
|
|
64
63
|
|
|
65
|
-
async def upload_documents(documents:
|
|
64
|
+
async def upload_documents(documents: list[Document], url_mapping: dict[str, str]) -> None:
|
|
66
65
|
"""Upload documents to their mapped URLs."""
|
|
67
66
|
async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
|
|
68
67
|
for doc in documents:
|
|
@@ -94,5 +93,5 @@ async def send_webhook(
|
|
|
94
93
|
logger.warning(f"Webhook retry {attempt + 1}/{max_retries}: {e}")
|
|
95
94
|
await asyncio.sleep(retry_delay)
|
|
96
95
|
else:
|
|
97
|
-
logger.
|
|
96
|
+
logger.exception(f"Webhook failed after {max_retries} attempts")
|
|
98
97
|
raise
|
|
@@ -1,16 +1,19 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Intra-flow progress tracking with order-preserving webhook delivery."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import contextlib
|
|
4
5
|
from collections.abc import Generator
|
|
5
6
|
from contextlib import contextmanager
|
|
6
7
|
from contextvars import ContextVar
|
|
7
8
|
from dataclasses import dataclass
|
|
8
|
-
from datetime import
|
|
9
|
+
from datetime import UTC, datetime
|
|
9
10
|
from uuid import UUID
|
|
10
11
|
|
|
11
|
-
from ai_pipeline_core.deployment.contract import ProgressRun
|
|
12
12
|
from ai_pipeline_core.logging import get_pipeline_logger
|
|
13
13
|
|
|
14
|
+
from .contract import ProgressRun
|
|
15
|
+
from .helpers import send_webhook
|
|
16
|
+
|
|
14
17
|
logger = get_pipeline_logger(__name__)
|
|
15
18
|
|
|
16
19
|
|
|
@@ -25,9 +28,9 @@ class ProgressContext:
|
|
|
25
28
|
flow_name: str
|
|
26
29
|
step: int
|
|
27
30
|
total_steps: int
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
+
total_minutes: float
|
|
32
|
+
completed_minutes: float
|
|
33
|
+
current_flow_minutes: float
|
|
31
34
|
queue: asyncio.Queue[ProgressRun | None]
|
|
32
35
|
|
|
33
36
|
|
|
@@ -35,16 +38,15 @@ _context: ContextVar[ProgressContext | None] = ContextVar("progress_context", de
|
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
async def update(fraction: float, message: str = "") -> None:
|
|
38
|
-
"""
|
|
41
|
+
"""Report intra-flow progress (0.0-1.0). No-op without context."""
|
|
39
42
|
ctx = _context.get()
|
|
40
43
|
if ctx is None or not ctx.webhook_url:
|
|
41
44
|
return
|
|
42
45
|
|
|
43
46
|
fraction = max(0.0, min(1.0, fraction))
|
|
44
47
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
overall = (ctx.completed_weight + ctx.current_flow_weight * fraction) / total_weight
|
|
48
|
+
if ctx.total_minutes > 0:
|
|
49
|
+
overall = (ctx.completed_minutes + ctx.current_flow_minutes * fraction) / ctx.total_minutes
|
|
48
50
|
else:
|
|
49
51
|
overall = fraction
|
|
50
52
|
overall = round(max(0.0, min(1.0, overall)), 4)
|
|
@@ -53,7 +55,7 @@ async def update(fraction: float, message: str = "") -> None:
|
|
|
53
55
|
flow_run_id=UUID(ctx.flow_run_id) if ctx.flow_run_id else UUID(int=0),
|
|
54
56
|
project_name=ctx.project_name,
|
|
55
57
|
state="RUNNING",
|
|
56
|
-
timestamp=datetime.now(
|
|
58
|
+
timestamp=datetime.now(UTC),
|
|
57
59
|
step=ctx.step,
|
|
58
60
|
total_steps=ctx.total_steps,
|
|
59
61
|
flow_name=ctx.flow_name,
|
|
@@ -73,24 +75,20 @@ async def webhook_worker(
|
|
|
73
75
|
retry_delay: float = 10.0,
|
|
74
76
|
) -> None:
|
|
75
77
|
"""Process webhooks sequentially with retries, preserving order."""
|
|
76
|
-
from ai_pipeline_core.deployment.helpers import send_webhook # noqa: PLC0415
|
|
77
|
-
|
|
78
78
|
while True:
|
|
79
79
|
payload = await queue.get()
|
|
80
80
|
if payload is None:
|
|
81
81
|
queue.task_done()
|
|
82
82
|
break
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
with contextlib.suppress(Exception):
|
|
85
85
|
await send_webhook(webhook_url, payload, max_retries, retry_delay)
|
|
86
|
-
except Exception:
|
|
87
|
-
pass # Already logged in send_webhook
|
|
88
86
|
|
|
89
87
|
queue.task_done()
|
|
90
88
|
|
|
91
89
|
|
|
92
90
|
@contextmanager
|
|
93
|
-
def flow_context(
|
|
91
|
+
def flow_context( # noqa: PLR0917
|
|
94
92
|
webhook_url: str,
|
|
95
93
|
project_name: str,
|
|
96
94
|
run_id: str,
|
|
@@ -98,12 +96,13 @@ def flow_context(
|
|
|
98
96
|
flow_name: str,
|
|
99
97
|
step: int,
|
|
100
98
|
total_steps: int,
|
|
101
|
-
|
|
102
|
-
|
|
99
|
+
flow_minutes: tuple[float, ...],
|
|
100
|
+
completed_minutes: float,
|
|
103
101
|
queue: asyncio.Queue[ProgressRun | None],
|
|
104
102
|
) -> Generator[None, None, None]:
|
|
105
103
|
"""Set up progress context for a flow. Framework internal use."""
|
|
106
|
-
|
|
104
|
+
current_flow_minutes = flow_minutes[step - 1] if step <= len(flow_minutes) else 1.0
|
|
105
|
+
total_minutes = sum(flow_minutes) if flow_minutes else current_flow_minutes
|
|
107
106
|
ctx = ProgressContext(
|
|
108
107
|
webhook_url=webhook_url,
|
|
109
108
|
project_name=project_name,
|
|
@@ -112,9 +111,9 @@ def flow_context(
|
|
|
112
111
|
flow_name=flow_name,
|
|
113
112
|
step=step,
|
|
114
113
|
total_steps=total_steps,
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
114
|
+
total_minutes=total_minutes,
|
|
115
|
+
completed_minutes=completed_minutes,
|
|
116
|
+
current_flow_minutes=current_flow_minutes,
|
|
118
117
|
queue=queue,
|
|
119
118
|
)
|
|
120
119
|
token = _context.set(ctx)
|
|
@@ -124,4 +123,4 @@ def flow_context(
|
|
|
124
123
|
_context.reset(token)
|
|
125
124
|
|
|
126
125
|
|
|
127
|
-
__all__ = ["
|
|
126
|
+
__all__ = ["ProgressContext", "flow_context", "update", "webhook_worker"]
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
+
from collections.abc import Callable
|
|
4
5
|
from functools import wraps
|
|
5
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, ParamSpec, TypeVar, cast
|
|
6
7
|
|
|
7
8
|
from prefect import get_client
|
|
8
9
|
from prefect.client.orchestration import PrefectClient
|
|
@@ -12,9 +13,9 @@ from prefect.deployments.flow_runs import run_deployment
|
|
|
12
13
|
from prefect.exceptions import ObjectNotFound
|
|
13
14
|
|
|
14
15
|
from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
|
|
15
|
-
from ai_pipeline_core.
|
|
16
|
+
from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
|
|
17
|
+
from ai_pipeline_core.pipeline.options import FlowOptions
|
|
16
18
|
from ai_pipeline_core.settings import settings
|
|
17
|
-
from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
|
|
18
19
|
|
|
19
20
|
P = ParamSpec("P")
|
|
20
21
|
TOptions = TypeVar("TOptions", bound=FlowOptions)
|
|
@@ -33,15 +34,13 @@ async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]
|
|
|
33
34
|
"""Run a remote Prefect deployment, trying local client first then remote."""
|
|
34
35
|
|
|
35
36
|
async def _run(client: PrefectClient, as_subflow: bool) -> Any:
|
|
36
|
-
fr: FlowRun = await run_deployment(
|
|
37
|
-
client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow
|
|
38
|
-
) # type: ignore
|
|
37
|
+
fr: FlowRun = await run_deployment(client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow) # type: ignore
|
|
39
38
|
return await fr.state.result() # type: ignore
|
|
40
39
|
|
|
41
40
|
async with get_client() as client:
|
|
42
41
|
try:
|
|
43
42
|
await client.read_deployment_by_name(name=deployment_name)
|
|
44
|
-
return await _run(client, True)
|
|
43
|
+
return await _run(client, True) # noqa: FBT003
|
|
45
44
|
except ObjectNotFound:
|
|
46
45
|
pass
|
|
47
46
|
|
|
@@ -55,11 +54,9 @@ async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]
|
|
|
55
54
|
) as client:
|
|
56
55
|
try:
|
|
57
56
|
await client.read_deployment_by_name(name=deployment_name)
|
|
58
|
-
ctx = AsyncClientContext.model_construct(
|
|
59
|
-
client=client, _httpx_settings=None, _context_stack=0
|
|
60
|
-
)
|
|
57
|
+
ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
|
|
61
58
|
with ctx:
|
|
62
|
-
return await _run(client, False)
|
|
59
|
+
return await _run(client, False) # noqa: FBT003
|
|
63
60
|
except ObjectNotFound:
|
|
64
61
|
pass
|
|
65
62
|
|
|
@@ -74,7 +71,7 @@ def remote_deployment(
|
|
|
74
71
|
trace_level: TraceLevel = "always",
|
|
75
72
|
trace_cost: float | None = None,
|
|
76
73
|
) -> Callable[[Callable[P, TResult]], Callable[P, TResult]]:
|
|
77
|
-
"""
|
|
74
|
+
"""Decorator to call PipelineDeployment flows remotely with automatic serialization."""
|
|
78
75
|
|
|
79
76
|
def decorator(func: Callable[P, TResult]) -> Callable[P, TResult]:
|
|
80
77
|
fname = getattr(func, "__name__", deployment_class.name)
|
|
@@ -106,7 +103,7 @@ def remote_deployment(
|
|
|
106
103
|
if isinstance(result, DeploymentResult):
|
|
107
104
|
return cast(TResult, result)
|
|
108
105
|
if isinstance(result, dict):
|
|
109
|
-
return cast(TResult, deployment_class.result_type(**result))
|
|
106
|
+
return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
|
|
110
107
|
raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
|
|
111
108
|
|
|
112
109
|
traced_wrapper = trace(
|