ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +64 -158
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +11 -84
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +102 -90
  37. ai_pipeline_core/llm/client.py +229 -183
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,5 @@
1
1
  """Unified pipeline run response contract.
2
2
 
3
- @public
4
-
5
3
  Single source of truth for the response shape used by both
6
4
  webhook push (ai-pipeline-core) and polling pull (unified-middleware).
7
5
  """
@@ -16,12 +14,10 @@ from pydantic import BaseModel, ConfigDict, Discriminator
16
14
  class _RunBase(BaseModel):
17
15
  """Common fields on every run response variant."""
18
16
 
19
- type: str
20
17
  flow_run_id: UUID
21
18
  project_name: str
22
19
  state: str # PENDING, RUNNING, COMPLETED, FAILED, CRASHED, CANCELLED
23
20
  timestamp: datetime
24
- storage_uri: str = ""
25
21
 
26
22
  model_config = ConfigDict(frozen=True)
27
23
 
@@ -29,19 +25,19 @@ class _RunBase(BaseModel):
29
25
  class PendingRun(_RunBase):
30
26
  """Pipeline queued or running but no progress reported yet."""
31
27
 
32
- type: Literal["pending"] = "pending" # pyright: ignore[reportIncompatibleVariableOverride]
28
+ type: Literal["pending"] = "pending"
33
29
 
34
30
 
35
31
  class ProgressRun(_RunBase):
36
32
  """Pipeline running with step-level progress data."""
37
33
 
38
- type: Literal["progress"] = "progress" # pyright: ignore[reportIncompatibleVariableOverride]
34
+ type: Literal["progress"] = "progress"
39
35
  step: int
40
36
  total_steps: int
41
37
  flow_name: str
42
38
  status: str # "started", "completed", "cached"
43
- progress: float # overall 0.01.0
44
- step_progress: float # within step 0.01.0
39
+ progress: float # overall 0.0-1.0
40
+ step_progress: float # within step 0.0-1.0
45
41
  message: str
46
42
 
47
43
 
@@ -57,14 +53,14 @@ class DeploymentResultData(BaseModel):
57
53
  class CompletedRun(_RunBase):
58
54
  """Pipeline finished (Prefect COMPLETED). Check result.success for business outcome."""
59
55
 
60
- type: Literal["completed"] = "completed" # pyright: ignore[reportIncompatibleVariableOverride]
56
+ type: Literal["completed"] = "completed"
61
57
  result: DeploymentResultData
62
58
 
63
59
 
64
60
  class FailedRun(_RunBase):
65
61
  """Pipeline crashed — execution error, not business logic."""
66
62
 
67
- type: Literal["failed"] = "failed" # pyright: ignore[reportIncompatibleVariableOverride]
63
+ type: Literal["failed"] = "failed"
68
64
  error: str
69
65
  result: DeploymentResultData | None = None
70
66
 
@@ -13,7 +13,7 @@ Requirements:
13
13
  - Local package installed for flow metadata extraction
14
14
 
15
15
  Usage:
16
- python -m ai_pipeline_core.utils.deploy
16
+ python -m ai_pipeline_core.deployment.deploy
17
17
  """
18
18
 
19
19
  import argparse
@@ -24,17 +24,17 @@ import sys
24
24
  import tempfile
25
25
  import tomllib
26
26
  import traceback
27
- from datetime import datetime, timezone
27
+ from datetime import UTC, datetime
28
28
  from pathlib import Path
29
- from typing import Any, Optional
29
+ from typing import Any
30
30
 
31
31
  from prefect.cli.deploy._storage import _PullStepStorage # type: ignore
32
32
  from prefect.client.orchestration import get_client
33
33
  from prefect.deployments.runner import RunnerDeployment
34
34
  from prefect.flows import load_flow_from_entrypoint
35
+ from prefect_gcp.cloud_storage import GcpCredentials, GcsBucket # pyright: ignore[reportMissingTypeStubs]
35
36
 
36
37
  from ai_pipeline_core.settings import settings
37
- from ai_pipeline_core.storage import Storage
38
38
 
39
39
  # ============================================================================
40
40
  # Deployer Class
@@ -60,11 +60,7 @@ class Deployer:
60
60
  Configuration dictionary with project metadata and deployment settings.
61
61
  """
62
62
  if not settings.prefect_gcs_bucket:
63
- self._die(
64
- "PREFECT_GCS_BUCKET not configured in settings.\n"
65
- "Configure via environment variable or .env file:\n"
66
- " PREFECT_GCS_BUCKET=your-bucket-name"
67
- )
63
+ self._die("PREFECT_GCS_BUCKET not configured in settings.\nConfigure via environment variable or .env file:\n PREFECT_GCS_BUCKET=your-bucket-name")
68
64
 
69
65
  pyproject_path = Path("pyproject.toml")
70
66
  if not pyproject_path.exists():
@@ -110,7 +106,7 @@ class Deployer:
110
106
  " PREFECT_API_URL=https://api.prefect.cloud/api/accounts/.../workspaces/..."
111
107
  )
112
108
 
113
- def _run(self, cmd: str, check: bool = True) -> Optional[str]:
109
+ def _run(self, cmd: str, *, check: bool = True) -> str | None:
114
110
  """Execute shell command and return output.
115
111
 
116
112
  Args:
@@ -120,22 +116,25 @@ class Deployer:
120
116
  Returns:
121
117
  Command stdout if successful, None if failed and check=False
122
118
  """
123
- result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
119
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=False)
124
120
 
125
121
  if check and result.returncode != 0:
126
122
  self._die(f"Command failed: {cmd}\n{result.stderr}")
127
123
 
128
124
  return result.stdout.strip() if result.returncode == 0 else None
129
125
 
130
- def _info(self, msg: str):
126
+ @staticmethod
127
+ def _info(msg: str):
131
128
  """Print info message."""
132
129
  print(f"→ {msg}")
133
130
 
134
- def _success(self, msg: str):
131
+ @staticmethod
132
+ def _success(msg: str):
135
133
  """Print success message."""
136
134
  print(f"✓ {msg}")
137
135
 
138
- def _die(self, msg: str):
136
+ @staticmethod
137
+ def _die(msg: str):
139
138
  """Print error and exit."""
140
139
  print(f"✗ {msg}", file=sys.stderr)
141
140
  sys.exit(1)
@@ -156,11 +155,7 @@ class Deployer:
156
155
  # Verify tarball was created
157
156
  tarball_path = Path("dist") / self.config["tarball"]
158
157
  if not tarball_path.exists():
159
- self._die(
160
- f"Build artifact not found: {tarball_path}\n"
161
- f"Expected tarball name: {self.config['tarball']}\n"
162
- f"Check that pyproject.toml version matches."
163
- )
158
+ self._die(f"Build artifact not found: {tarball_path}\nExpected tarball name: {self.config['tarball']}\nCheck that pyproject.toml version matches.")
164
159
 
165
160
  self._success(f"Built {tarball_path.name} ({tarball_path.stat().st_size // 1024} KB)")
166
161
  return tarball_path
@@ -199,6 +194,7 @@ class Deployer:
199
194
  cwd=source_dir,
200
195
  capture_output=True,
201
196
  text=True,
197
+ check=False,
202
198
  )
203
199
  if result.returncode != 0:
204
200
  self._die(f"Wheel build failed for {source_dir.name}:\n{result.stderr}")
@@ -214,7 +210,7 @@ class Deployer:
214
210
  output.write_bytes(wheels[0].read_bytes())
215
211
  return output
216
212
 
217
- def _build_agents(self) -> dict[str, dict[str, Any]]:
213
+ def _build_agents(self) -> dict[str, dict[str, Any]]: # noqa: PLR0914
218
214
  """Build agent wheels and manifests for all configured agents.
219
215
 
220
216
  Returns:
@@ -238,7 +234,7 @@ class Deployer:
238
234
  self._info(f"Building {len(agent_config)} agent(s): {', '.join(agent_config)}")
239
235
 
240
236
  # Build cli-agents wheel once (shared across all agents)
241
- cli_agents_dir = Path(cli_agents_source).resolve()
237
+ cli_agents_dir = Path(cli_agents_source).resolve() # pyright: ignore[reportArgumentType]
242
238
  if not (cli_agents_dir / "pyproject.toml").exists():
243
239
  self._die(f"cli-agents source not found at {cli_agents_dir}")
244
240
 
@@ -250,10 +246,7 @@ class Deployer:
250
246
  for agent_name, config in agent_config.items():
251
247
  agent_path = Path(config["path"]).resolve()
252
248
  if not (agent_path / "pyproject.toml").exists():
253
- self._die(
254
- f"Agent '{agent_name}' path not found: {agent_path}\n"
255
- f"Check [tool.deploy.agents.{agent_name}].path in pyproject.toml"
256
- )
249
+ self._die(f"Agent '{agent_name}' path not found: {agent_path}\nCheck [tool.deploy.agents.{agent_name}].path in pyproject.toml")
257
250
 
258
251
  # Read module_name from agent's pyproject.toml
259
252
  with open(agent_path / "pyproject.toml", "rb") as f:
@@ -298,9 +291,7 @@ class Deployer:
298
291
  # skipping packages already built from extra_vendor
299
292
  agent_vendor_dir = agent_path / "vendor"
300
293
  if agent_vendor_dir.exists():
301
- for pkg in list(agent_vendor_dir.glob("*.whl")) + list(
302
- agent_vendor_dir.glob("*.tar.gz")
303
- ):
294
+ for pkg in list(agent_vendor_dir.glob("*.whl")) + list(agent_vendor_dir.glob("*.tar.gz")):
304
295
  pkg_base = pkg.name.split("-")[0].replace("-", "_")
305
296
  if pkg.name not in files and pkg_base not in extra_built:
306
297
  files[pkg.name] = pkg
@@ -312,7 +303,7 @@ class Deployer:
312
303
  "agent_wheel": agent_wheel.name,
313
304
  "cli_agents_wheel": cli_agents_wheel.name,
314
305
  "vendor_packages": vendor_packages,
315
- "built_at": datetime.now(timezone.utc).isoformat(),
306
+ "built_at": datetime.now(UTC).isoformat(),
316
307
  }
317
308
  manifest_json = json.dumps(manifest, indent=2)
318
309
 
@@ -321,6 +312,17 @@ class Deployer:
321
312
 
322
313
  return builds
323
314
 
315
+ def _create_gcs_bucket(self, bucket_folder: str) -> Any:
316
+ """Create a GcsBucket instance for uploading files.
317
+
318
+ Args:
319
+ bucket_folder: Folder path within the bucket.
320
+ """
321
+ creds = GcpCredentials()
322
+ if hasattr(settings, "gcs_service_account_file") and settings.gcs_service_account_file:
323
+ creds = GcpCredentials(service_account_file=Path(settings.gcs_service_account_file))
324
+ return GcsBucket(bucket=self.config["bucket"], bucket_folder=bucket_folder, gcp_credentials=creds)
325
+
324
326
  async def _upload_agents(self, agent_builds: dict[str, dict[str, Any]]):
325
327
  """Upload agent bundles to GCS.
326
328
 
@@ -330,50 +332,38 @@ class Deployer:
330
332
  if not agent_builds:
331
333
  return
332
334
 
333
- flow_folder = self.config["folder"].split("/", 1)[1] if "/" in self.config["folder"] else ""
334
- base_uri = f"gs://{self.config['bucket']}/flows"
335
- base_storage = await Storage.from_uri(base_uri)
336
- base_storage = base_storage.with_base(flow_folder)
335
+ flow_folder = self.config["folder"]
337
336
 
338
337
  for agent_name, build_info in agent_builds.items():
339
- agent_storage = base_storage.with_base(f"agents/{agent_name}")
340
- self._info(f"Uploading agent '{agent_name}' bundle to {agent_storage.url_for('')}")
338
+ agent_folder = f"{flow_folder}/agents/{agent_name}"
339
+ bucket = self._create_gcs_bucket(agent_folder)
340
+ self._info(f"Uploading agent '{agent_name}' bundle to gs://{self.config['bucket']}/{agent_folder}")
341
341
 
342
342
  # Upload manifest
343
- await agent_storage.write_bytes(
344
- "manifest.json",
345
- build_info["manifest_json"].encode(),
346
- )
343
+ await bucket.write_path("manifest.json", build_info["manifest_json"].encode())
347
344
 
348
345
  # Upload wheels
349
346
  for filename, filepath in build_info["files"].items():
350
- await agent_storage.write_bytes(filename, filepath.read_bytes())
347
+ await bucket.write_path(filename, filepath.read_bytes())
351
348
 
352
349
  self._success(f"Agent '{agent_name}' uploaded ({len(build_info['files'])} files)")
353
350
 
354
351
  async def _upload_package(self, tarball: Path):
355
- """Upload package tarball to Google Cloud Storage using Storage abstraction.
352
+ """Upload package tarball to Google Cloud Storage.
356
353
 
357
354
  Args:
358
355
  tarball: Path to the tarball to upload
359
356
  """
360
- # Extract flow_folder from the config folder path
361
- # e.g., "flows/ai-document-writer" -> "ai-document-writer"
362
- flow_folder = self.config["folder"].split("/", 1)[1] if "/" in self.config["folder"] else ""
363
-
364
- # Initialize storage with gs://bucket-name/flows and set subfolder to flow_folder
365
- base_uri = f"gs://{self.config['bucket']}/flows"
366
- storage = await Storage.from_uri(base_uri)
367
- storage = storage.with_base(flow_folder)
357
+ flow_folder = self.config["folder"]
358
+ bucket = self._create_gcs_bucket(flow_folder)
368
359
 
369
- dest_uri = storage.url_for(tarball.name)
360
+ dest_uri = f"gs://{self.config['bucket']}/{flow_folder}/{tarball.name}"
370
361
  self._info(f"Uploading to {dest_uri}")
371
362
 
372
- # Read and upload the tarball
373
- tarball_bytes = tarball.read_bytes()
374
- await storage.write_bytes(tarball.name, tarball_bytes)
363
+ tarball_bytes = tarball.read_bytes() # noqa: ASYNC240
364
+ await bucket.write_path(tarball.name, tarball_bytes)
375
365
 
376
- self._success(f"Package uploaded to {self.config['folder']}/{tarball.name}")
366
+ self._success(f"Package uploaded to {flow_folder}/{tarball.name}")
377
367
 
378
368
  async def _deploy_via_api(self, agent_builds: dict[str, dict[str, Any]] | None = None):
379
369
  """Create or update Prefect deployment using RunnerDeployment pattern.
@@ -437,7 +427,7 @@ class Deployer:
437
427
 
438
428
  # Create RunnerDeployment
439
429
  # This is the official Prefect pattern that handles all the complexity
440
- self._info(f"Creating deployment for flow '{flow.name}'")
430
+ self._info(f"Creating deployment for flow '{flow.name}'") # pyright: ignore[reportPossiblyUnboundVariable]
441
431
 
442
432
  # Set AGENT_BUNDLES_URI env var if agents were built
443
433
  job_variables: dict[str, Any] = {}
@@ -448,14 +438,13 @@ class Deployer:
448
438
 
449
439
  deployment = RunnerDeployment(
450
440
  name=self.config["package"],
451
- flow_name=flow.name,
441
+ flow_name=flow.name, # pyright: ignore[reportPossiblyUnboundVariable]
452
442
  entrypoint=entrypoint,
453
443
  work_pool_name=self.config["work_pool"],
454
444
  work_queue_name=self.config["work_queue"],
455
445
  tags=[self.config["name"]],
456
446
  version=self.config["version"],
457
- description=flow.description
458
- or f"Deployment for {self.config['package']} v{self.config['version']}",
447
+ description=flow.description or f"Deployment for {self.config['package']} v{self.config['version']}", # pyright: ignore[reportPossiblyUnboundVariable]
459
448
  storage=_PullStepStorage(pull_steps),
460
449
  parameters={},
461
450
  job_variables=job_variables,
@@ -466,14 +455,9 @@ class Deployer:
466
455
  async with get_client() as client:
467
456
  try:
468
457
  work_pool = await client.read_work_pool(self.config["work_pool"])
469
- self._success(
470
- f"Work pool '{self.config['work_pool']}' verified (type: {work_pool.type})"
471
- )
458
+ self._success(f"Work pool '{self.config['work_pool']}' verified (type: {work_pool.type})")
472
459
  except Exception as e:
473
- self._die(
474
- f"Work pool '{self.config['work_pool']}' not accessible: {e}\n"
475
- "Create it in the Prefect UI or with: prefect work-pool create"
476
- )
460
+ self._die(f"Work pool '{self.config['work_pool']}' not accessible: {e}\nCreate it in the Prefect UI or with: prefect work-pool create")
477
461
 
478
462
  # Apply deployment
479
463
  # This automatically handles create vs update based on whether deployment exists
@@ -486,7 +470,7 @@ class Deployer:
486
470
  if self.api_url:
487
471
  ui_url = self.api_url.replace("/api/", "/")
488
472
  print(f"\n🌐 View deployment: {ui_url}/deployments/deployment/{deployment_id}")
489
- print(f"🚀 Run now: prefect deployment run '{flow.name}/{self.config['package']}'")
473
+ print(f"🚀 Run now: prefect deployment run '{flow.name}/{self.config['package']}'") # pyright: ignore[reportPossiblyUnboundVariable]
490
474
  except Exception as e:
491
475
  self._die(f"Failed to apply deployment: {e}")
492
476
 
@@ -530,9 +514,6 @@ def main():
530
514
  description="Deploy Prefect flows to GCP using the official RunnerDeployment pattern",
531
515
  formatter_class=argparse.RawDescriptionHelpFormatter,
532
516
  epilog="""
533
- Example:
534
- python -m ai_pipeline_core.utils.deploy
535
-
536
517
  Prerequisites:
537
518
  - Settings configured with PREFECT_API_URL (and optionally PREFECT_API_KEY)
538
519
  - Settings configured with PREFECT_GCS_BUCKET
@@ -7,12 +7,16 @@ from typing import Any, Literal, TypedDict
7
7
  import httpx
8
8
 
9
9
  from ai_pipeline_core.deployment.contract import CompletedRun, FailedRun, ProgressRun
10
- from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
10
+ from ai_pipeline_core.documents import Document
11
11
  from ai_pipeline_core.logging import get_pipeline_logger
12
12
 
13
13
  logger = get_pipeline_logger(__name__)
14
14
 
15
15
 
16
+ class DownloadedDocument(Document):
17
+ """Concrete document for downloaded content."""
18
+
19
+
16
20
  class StatusPayload(TypedDict):
17
21
  """Webhook payload for Prefect state transitions (sub-flow level)."""
18
22
 
@@ -22,24 +26,22 @@ class StatusPayload(TypedDict):
22
26
  step: int
23
27
  total_steps: int
24
28
  flow_name: str
25
- state: str # RUNNING, COMPLETED, FAILED, CRASHED, CANCELLED
29
+ state: str
26
30
  state_name: str
27
31
  timestamp: str
28
32
 
29
33
 
30
34
  def class_name_to_deployment_name(class_name: str) -> str:
31
- """Convert PascalCase to kebab-case: ResearchPipeline research-pipeline."""
35
+ """Convert PascalCase to kebab-case: ResearchPipeline -> research-pipeline."""
32
36
  name = re.sub(r"(?<!^)(?=[A-Z])", "-", class_name)
33
37
  return name.lower()
34
38
 
35
39
 
36
- def extract_generic_params(cls: type) -> tuple[type | None, type | None]:
37
- """Extract TOptions and TResult from PipelineDeployment generic args."""
38
- from ai_pipeline_core.deployment.base import PipelineDeployment # noqa: PLC0415
39
-
40
+ def extract_generic_params(cls: type, base_class: type) -> tuple[type | None, type | None]:
41
+ """Extract TOptions and TResult from a generic base class's args."""
40
42
  for base in getattr(cls, "__orig_bases__", []):
41
43
  origin = getattr(base, "__origin__", None)
42
- if origin is PipelineDeployment:
44
+ if origin is base_class:
43
45
  args = getattr(base, "__args__", ())
44
46
  if len(args) == 2:
45
47
  return args[0], args[1]
@@ -47,22 +49,19 @@ def extract_generic_params(cls: type) -> tuple[type | None, type | None]:
47
49
  return None, None
48
50
 
49
51
 
50
- async def download_documents(
51
- urls: list[str],
52
- document_type: type[FlowDocument],
53
- ) -> DocumentList:
54
- """Download documents from URLs and return as DocumentList."""
52
+ async def download_documents(urls: list[str]) -> list[Document]:
53
+ """Download documents from URLs."""
55
54
  documents: list[Document] = []
56
55
  async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
57
56
  for url in urls:
58
57
  response = await client.get(url)
59
58
  response.raise_for_status()
60
59
  filename = url.split("/")[-1].split("?")[0] or "document"
61
- documents.append(document_type(name=filename, content=response.content))
62
- return DocumentList(documents)
60
+ documents.append(DownloadedDocument(name=filename, content=response.content))
61
+ return documents
63
62
 
64
63
 
65
- async def upload_documents(documents: DocumentList, url_mapping: dict[str, str]) -> None:
64
+ async def upload_documents(documents: list[Document], url_mapping: dict[str, str]) -> None:
66
65
  """Upload documents to their mapped URLs."""
67
66
  async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
68
67
  for doc in documents:
@@ -94,5 +93,5 @@ async def send_webhook(
94
93
  logger.warning(f"Webhook retry {attempt + 1}/{max_retries}: {e}")
95
94
  await asyncio.sleep(retry_delay)
96
95
  else:
97
- logger.error(f"Webhook failed after {max_retries} attempts: {e}")
96
+ logger.exception(f"Webhook failed after {max_retries} attempts")
98
97
  raise
@@ -1,16 +1,19 @@
1
- """@public Intra-flow progress tracking with order-preserving webhook delivery."""
1
+ """Intra-flow progress tracking with order-preserving webhook delivery."""
2
2
 
3
3
  import asyncio
4
+ import contextlib
4
5
  from collections.abc import Generator
5
6
  from contextlib import contextmanager
6
7
  from contextvars import ContextVar
7
8
  from dataclasses import dataclass
8
- from datetime import datetime, timezone
9
+ from datetime import UTC, datetime
9
10
  from uuid import UUID
10
11
 
11
- from ai_pipeline_core.deployment.contract import ProgressRun
12
12
  from ai_pipeline_core.logging import get_pipeline_logger
13
13
 
14
+ from .contract import ProgressRun
15
+ from .helpers import send_webhook
16
+
14
17
  logger = get_pipeline_logger(__name__)
15
18
 
16
19
 
@@ -25,9 +28,9 @@ class ProgressContext:
25
28
  flow_name: str
26
29
  step: int
27
30
  total_steps: int
28
- weights: tuple[float, ...]
29
- completed_weight: float
30
- current_flow_weight: float
31
+ total_minutes: float
32
+ completed_minutes: float
33
+ current_flow_minutes: float
31
34
  queue: asyncio.Queue[ProgressRun | None]
32
35
 
33
36
 
@@ -35,16 +38,15 @@ _context: ContextVar[ProgressContext | None] = ContextVar("progress_context", de
35
38
 
36
39
 
37
40
  async def update(fraction: float, message: str = "") -> None:
38
- """@public Report intra-flow progress (0.0-1.0). No-op without context."""
41
+ """Report intra-flow progress (0.0-1.0). No-op without context."""
39
42
  ctx = _context.get()
40
43
  if ctx is None or not ctx.webhook_url:
41
44
  return
42
45
 
43
46
  fraction = max(0.0, min(1.0, fraction))
44
47
 
45
- total_weight = sum(ctx.weights)
46
- if total_weight > 0:
47
- overall = (ctx.completed_weight + ctx.current_flow_weight * fraction) / total_weight
48
+ if ctx.total_minutes > 0:
49
+ overall = (ctx.completed_minutes + ctx.current_flow_minutes * fraction) / ctx.total_minutes
48
50
  else:
49
51
  overall = fraction
50
52
  overall = round(max(0.0, min(1.0, overall)), 4)
@@ -53,7 +55,7 @@ async def update(fraction: float, message: str = "") -> None:
53
55
  flow_run_id=UUID(ctx.flow_run_id) if ctx.flow_run_id else UUID(int=0),
54
56
  project_name=ctx.project_name,
55
57
  state="RUNNING",
56
- timestamp=datetime.now(timezone.utc),
58
+ timestamp=datetime.now(UTC),
57
59
  step=ctx.step,
58
60
  total_steps=ctx.total_steps,
59
61
  flow_name=ctx.flow_name,
@@ -73,24 +75,20 @@ async def webhook_worker(
73
75
  retry_delay: float = 10.0,
74
76
  ) -> None:
75
77
  """Process webhooks sequentially with retries, preserving order."""
76
- from ai_pipeline_core.deployment.helpers import send_webhook # noqa: PLC0415
77
-
78
78
  while True:
79
79
  payload = await queue.get()
80
80
  if payload is None:
81
81
  queue.task_done()
82
82
  break
83
83
 
84
- try:
84
+ with contextlib.suppress(Exception):
85
85
  await send_webhook(webhook_url, payload, max_retries, retry_delay)
86
- except Exception:
87
- pass # Already logged in send_webhook
88
86
 
89
87
  queue.task_done()
90
88
 
91
89
 
92
90
  @contextmanager
93
- def flow_context(
91
+ def flow_context( # noqa: PLR0917
94
92
  webhook_url: str,
95
93
  project_name: str,
96
94
  run_id: str,
@@ -98,12 +96,13 @@ def flow_context(
98
96
  flow_name: str,
99
97
  step: int,
100
98
  total_steps: int,
101
- weights: tuple[float, ...],
102
- completed_weight: float,
99
+ flow_minutes: tuple[float, ...],
100
+ completed_minutes: float,
103
101
  queue: asyncio.Queue[ProgressRun | None],
104
102
  ) -> Generator[None, None, None]:
105
103
  """Set up progress context for a flow. Framework internal use."""
106
- current_flow_weight = weights[step - 1] if step <= len(weights) else 1.0
104
+ current_flow_minutes = flow_minutes[step - 1] if step <= len(flow_minutes) else 1.0
105
+ total_minutes = sum(flow_minutes) if flow_minutes else current_flow_minutes
107
106
  ctx = ProgressContext(
108
107
  webhook_url=webhook_url,
109
108
  project_name=project_name,
@@ -112,9 +111,9 @@ def flow_context(
112
111
  flow_name=flow_name,
113
112
  step=step,
114
113
  total_steps=total_steps,
115
- weights=weights,
116
- completed_weight=completed_weight,
117
- current_flow_weight=current_flow_weight,
114
+ total_minutes=total_minutes,
115
+ completed_minutes=completed_minutes,
116
+ current_flow_minutes=current_flow_minutes,
118
117
  queue=queue,
119
118
  )
120
119
  token = _context.set(ctx)
@@ -124,4 +123,4 @@ def flow_context(
124
123
  _context.reset(token)
125
124
 
126
125
 
127
- __all__ = ["update", "webhook_worker", "flow_context", "ProgressContext"]
126
+ __all__ = ["ProgressContext", "flow_context", "update", "webhook_worker"]
@@ -1,8 +1,9 @@
1
- """@public Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
1
+ """Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
2
2
 
3
3
  import inspect
4
+ from collections.abc import Callable
4
5
  from functools import wraps
5
- from typing import Any, Callable, ParamSpec, TypeVar, cast
6
+ from typing import Any, ParamSpec, TypeVar, cast
6
7
 
7
8
  from prefect import get_client
8
9
  from prefect.client.orchestration import PrefectClient
@@ -12,9 +13,9 @@ from prefect.deployments.flow_runs import run_deployment
12
13
  from prefect.exceptions import ObjectNotFound
13
14
 
14
15
  from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
15
- from ai_pipeline_core.flow.options import FlowOptions
16
+ from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
17
+ from ai_pipeline_core.pipeline.options import FlowOptions
16
18
  from ai_pipeline_core.settings import settings
17
- from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
18
19
 
19
20
  P = ParamSpec("P")
20
21
  TOptions = TypeVar("TOptions", bound=FlowOptions)
@@ -33,15 +34,13 @@ async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]
33
34
  """Run a remote Prefect deployment, trying local client first then remote."""
34
35
 
35
36
  async def _run(client: PrefectClient, as_subflow: bool) -> Any:
36
- fr: FlowRun = await run_deployment(
37
- client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow
38
- ) # type: ignore
37
+ fr: FlowRun = await run_deployment(client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow) # type: ignore
39
38
  return await fr.state.result() # type: ignore
40
39
 
41
40
  async with get_client() as client:
42
41
  try:
43
42
  await client.read_deployment_by_name(name=deployment_name)
44
- return await _run(client, True)
43
+ return await _run(client, True) # noqa: FBT003
45
44
  except ObjectNotFound:
46
45
  pass
47
46
 
@@ -55,11 +54,9 @@ async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]
55
54
  ) as client:
56
55
  try:
57
56
  await client.read_deployment_by_name(name=deployment_name)
58
- ctx = AsyncClientContext.model_construct(
59
- client=client, _httpx_settings=None, _context_stack=0
60
- )
57
+ ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
61
58
  with ctx:
62
- return await _run(client, False)
59
+ return await _run(client, False) # noqa: FBT003
63
60
  except ObjectNotFound:
64
61
  pass
65
62
 
@@ -74,7 +71,7 @@ def remote_deployment(
74
71
  trace_level: TraceLevel = "always",
75
72
  trace_cost: float | None = None,
76
73
  ) -> Callable[[Callable[P, TResult]], Callable[P, TResult]]:
77
- """@public Decorator to call PipelineDeployment flows remotely with automatic serialization."""
74
+ """Decorator to call PipelineDeployment flows remotely with automatic serialization."""
78
75
 
79
76
  def decorator(func: Callable[P, TResult]) -> Callable[P, TResult]:
80
77
  fname = getattr(func, "__name__", deployment_class.name)
@@ -106,7 +103,7 @@ def remote_deployment(
106
103
  if isinstance(result, DeploymentResult):
107
104
  return cast(TResult, result)
108
105
  if isinstance(result, dict):
109
- return cast(TResult, deployment_class.result_type(**result))
106
+ return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
110
107
  raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
111
108
 
112
109
  traced_wrapper = trace(