ai-pipeline-core 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
118
118
  from .settings import Settings
119
119
  from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
120
120
 
121
- __version__ = "0.2.5"
121
+ __version__ = "0.2.7"
122
122
 
123
123
  __all__ = [
124
124
  # Config/Settings
@@ -260,11 +260,14 @@ class AIMessages(list[AIMessageType]):
260
260
 
261
261
  for message in self:
262
262
  if isinstance(message, str):
263
- messages.append({"role": "user", "content": message})
263
+ messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
264
264
  elif isinstance(message, Document):
265
265
  messages.append({"role": "user", "content": AIMessages.document_to_prompt(message)})
266
266
  elif isinstance(message, ModelResponse): # type: ignore
267
- messages.append({"role": "assistant", "content": message.content})
267
+ messages.append({
268
+ "role": "assistant",
269
+ "content": [{"type": "text", "text": message.content}],
270
+ })
268
271
  else:
269
272
  raise ValueError(f"Unsupported message type: {type(message)}")
270
273
 
@@ -17,7 +17,7 @@ from typing import Any, TypeVar
17
17
 
18
18
  from lmnr import Laminar
19
19
  from openai import AsyncOpenAI
20
- from openai.lib.streaming.chat import ContentDeltaEvent, ContentDoneEvent
20
+ from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDoneEvent
21
21
  from openai.types.chat import (
22
22
  ChatCompletionMessageParam,
23
23
  )
@@ -39,7 +39,7 @@ def _process_messages(
39
39
  context: AIMessages,
40
40
  messages: AIMessages,
41
41
  system_prompt: str | None = None,
42
- cache_ttl: str | None = "5m",
42
+ cache_ttl: str | None = "300s",
43
43
  ) -> list[ChatCompletionMessageParam]:
44
44
  """Process and format messages for LLM API consumption.
45
45
 
@@ -51,7 +51,7 @@ def _process_messages(
51
51
  context: Messages to be cached (typically expensive/static content).
52
52
  messages: Regular messages without caching (dynamic queries).
53
53
  system_prompt: Optional system instructions for the model.
54
- cache_ttl: Cache TTL for context messages (e.g. "120s", "5m", "1h").
54
+ cache_ttl: Cache TTL for context messages (e.g. "120s", "300s", "1h").
55
55
  Set to None or empty string to disable caching.
56
56
 
57
57
  Returns:
@@ -86,12 +86,17 @@ def _process_messages(
86
86
  # Use AIMessages.to_prompt() for context
87
87
  context_messages = context.to_prompt()
88
88
 
89
- # Apply caching to last context message if cache_ttl is set
89
+ # Apply caching to last context message and last content part if cache_ttl is set
90
90
  if cache_ttl:
91
91
  context_messages[-1]["cache_control"] = { # type: ignore
92
92
  "type": "ephemeral",
93
93
  "ttl": cache_ttl,
94
94
  }
95
+ assert isinstance(context_messages[-1]["content"], list) # type: ignore
96
+ context_messages[-1]["content"][-1]["cache_control"] = { # type: ignore
97
+ "type": "ephemeral",
98
+ "ttl": cache_ttl,
99
+ }
95
100
 
96
101
  processed_messages.extend(context_messages)
97
102
 
@@ -103,6 +108,42 @@ def _process_messages(
103
108
  return processed_messages
104
109
 
105
110
 
111
+ def _model_name_to_openrouter_model(model: ModelName) -> str:
112
+ """Convert a model name to an OpenRouter model name.
113
+
114
+ Args:
115
+ model: Model name to convert.
116
+
117
+ Returns:
118
+ OpenRouter model name.
119
+ """
120
+ if model == "gpt-4o-search":
121
+ return "openai/gpt-4o-search-preview"
122
+ if model == "gemini-2.5-flash-search":
123
+ return "google/gemini-2.5-flash:online"
124
+ if model == "grok-4-fast-search":
125
+ return "x-ai/grok-4-fast:online"
126
+ if model == "sonar-pro-search":
127
+ return "perplexity/sonar-reasoning-pro"
128
+ if model.startswith("gemini"):
129
+ return f"google/{model}"
130
+ elif model.startswith("gpt"):
131
+ return f"openai/{model}"
132
+ elif model.startswith("grok"):
133
+ return f"x-ai/{model}"
134
+ elif model.startswith("claude"):
135
+ return f"anthropic/{model}"
136
+ elif model.startswith("qwen3"):
137
+ return f"qwen/{model}"
138
+ elif model.startswith("deepseek-"):
139
+ return f"deepseek/{model}"
140
+ elif model.startswith("glm-"):
141
+ return f"z-ai/{model}"
142
+ elif model.startswith("kimi-"):
143
+ return f"moonshotai/{model}"
144
+ return model
145
+
146
+
106
147
  async def _generate(
107
148
  model: str, messages: list[ChatCompletionMessageParam], completion_kwargs: dict[str, Any]
108
149
  ) -> ModelResponse:
@@ -128,11 +169,16 @@ async def _generate(
128
169
  - Captures response headers for cost tracking
129
170
  - Response includes model options for debugging
130
171
  """
172
+ if "openrouter" in settings.openai_base_url.lower():
173
+ model = _model_name_to_openrouter_model(model)
174
+
131
175
  async with AsyncOpenAI(
132
176
  api_key=settings.openai_api_key,
133
177
  base_url=settings.openai_base_url,
134
178
  ) as client:
135
- start_time, first_token_time = time.time(), None
179
+ start_time = time.time()
180
+ first_token_time = None
181
+ usage = None
136
182
  async with client.chat.completions.stream(
137
183
  model=model,
138
184
  messages=messages,
@@ -144,6 +190,9 @@ async def _generate(
144
190
  first_token_time = time.time()
145
191
  elif isinstance(event, ContentDoneEvent):
146
192
  pass
193
+ elif isinstance(event, ChunkEvent):
194
+ if event.chunk.usage: # used to fix a bug with missing usage data
195
+ usage = event.chunk.usage
147
196
  if not first_token_time:
148
197
  first_token_time = time.time()
149
198
  raw_response = await stream.get_final_completion()
@@ -156,6 +205,7 @@ async def _generate(
156
205
  raw_response,
157
206
  model_options=completion_kwargs,
158
207
  metadata=metadata,
208
+ usage=usage,
159
209
  )
160
210
  return response
161
211
 
@@ -192,6 +242,10 @@ async def _generate_with_retry(
192
242
  if not context and not messages:
193
243
  raise ValueError("Either context or messages must be provided")
194
244
 
245
+ if "gemini" in model.lower() and context.approximate_tokens_count < 5000:
246
+ # Bug fix for minimum explicit context size for Gemini models
247
+ options.cache_ttl = None
248
+
195
249
  processed_messages = _process_messages(
196
250
  context, messages, options.system_prompt, options.cache_ttl
197
251
  )
@@ -329,26 +383,11 @@ async def generate(
329
383
  ... ])
330
384
  >>> response = await llm.generate("gpt-5", messages=messages)
331
385
 
332
- Configuration via LiteLLM Proxy:
333
- >>> # Configure temperature in litellm_config.yaml:
334
- >>> # model_list:
335
- >>> # - model_name: gpt-5
336
- >>> # litellm_params:
337
- >>> # model: openai/gpt-4o
338
- >>> # temperature: 0.3
339
- >>> # max_tokens: 1000
340
- >>>
341
- >>> # Configure retry logic in proxy:
342
- >>> # general_settings:
343
- >>> # master_key: sk-1234
344
- >>> # max_retries: 5
345
- >>> # retry_delay: 15
346
-
347
386
  Performance:
348
387
  - Context caching saves ~50-90% tokens on repeated calls
349
388
  - First call: full token cost
350
389
  - Subsequent calls (within cache TTL): only messages tokens
351
- - Default cache TTL is 5m (production-optimized)
390
+ - Default cache TTL is 300s/5 minutes (production-optimized)
352
391
  - Default retry logic: 3 attempts with 10s delay (production-optimized)
353
392
 
354
393
  Caching:
@@ -45,7 +45,7 @@ class ModelOptions(BaseModel):
45
45
 
46
46
  timeout: Maximum seconds to wait for response (default: 300).
47
47
 
48
- cache_ttl: Cache TTL for context messages (default: "5m").
48
+ cache_ttl: Cache TTL for context messages (default: "300s").
49
49
  String format like "60s", "5m", or None to disable caching.
50
50
  Applied to the last context message for efficient token reuse.
51
51
 
@@ -165,7 +165,7 @@ class ModelOptions(BaseModel):
165
165
  - search_context_size only works with search models
166
166
  - reasoning_effort only works with models that support explicit reasoning
167
167
  - response_format is set internally by generate_structured()
168
- - cache_ttl accepts formats like "120s", "5m" (default), "1h" or None to disable caching
168
+ - cache_ttl accepts formats like "120s", "5m", "1h" or None (default: "300s")
169
169
  - stop sequences are limited to 4 by most providers
170
170
  - user identifier helps track costs per end-user (max 256 chars)
171
171
  - extra_body allows passing provider-specific parameters
@@ -179,7 +179,7 @@ class ModelOptions(BaseModel):
179
179
  retries: int = 3
180
180
  retry_delay_seconds: int = 20
181
181
  timeout: int = 600
182
- cache_ttl: str | None = "5m"
182
+ cache_ttl: str | None = "300s"
183
183
  service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
184
184
  max_completion_tokens: int | None = None
185
185
  stop: str | list[str] | None = None
@@ -11,6 +11,7 @@ from copy import deepcopy
11
11
  from typing import Any, Generic, TypeVar
12
12
 
13
13
  from openai.types.chat import ChatCompletion
14
+ from openai.types.completion_usage import CompletionUsage
14
15
  from pydantic import BaseModel
15
16
 
16
17
  T = TypeVar(
@@ -61,6 +62,7 @@ class ModelResponse(ChatCompletion):
61
62
  chat_completion: ChatCompletion,
62
63
  model_options: dict[str, Any],
63
64
  metadata: dict[str, Any],
65
+ usage: CompletionUsage | None = None,
64
66
  ) -> None:
65
67
  """Initialize ModelResponse from ChatCompletion.
66
68
 
@@ -73,6 +75,7 @@ class ModelResponse(ChatCompletion):
73
75
  Stored for metadata extraction and tracing.
74
76
  metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
75
77
  Includes timing information and custom tags.
78
+ usage: Optional usage information from streaming response.
76
79
 
77
80
  Example:
78
81
  >>> # Usually created internally by generate()
@@ -83,10 +86,19 @@ class ModelResponse(ChatCompletion):
83
86
  ... )
84
87
  """
85
88
  data = chat_completion.model_dump()
89
+
90
+ # fixes issue where the role is "assistantassistant" instead of "assistant"
91
+ for i in range(len(data["choices"])):
92
+ if role := data["choices"][i]["message"].get("role"):
93
+ if role.startswith("assistant") and role != "assistant":
94
+ data["choices"][i]["message"]["role"] = "assistant"
95
+
86
96
  super().__init__(**data)
87
97
 
88
98
  self._model_options = model_options
89
99
  self._metadata = metadata
100
+ if usage:
101
+ self.usage = usage
90
102
 
91
103
  @property
92
104
  def content(self) -> str:
@@ -254,7 +266,7 @@ class ModelResponse(ChatCompletion):
254
266
 
255
267
  other_fields = self.__dict__
256
268
  for key, value in other_fields.items():
257
- if key in ["_model_options", "_metadata", "choices", "usage"]:
269
+ if key in ["_model_options", "_metadata", "choices"]:
258
270
  continue
259
271
  try:
260
272
  metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
@@ -20,7 +20,7 @@ ModelName: TypeAlias = (
20
20
  "grok-4",
21
21
  # Small models
22
22
  "gemini-2.5-flash",
23
- "gpt-5-nano",
23
+ "gpt-5-mini",
24
24
  "grok-4-fast",
25
25
  # Search models
26
26
  "gemini-2.5-flash-search",
@@ -126,6 +126,10 @@ class Settings(BaseSettings):
126
126
  # Prefect Configuration
127
127
  prefect_api_url: str = ""
128
128
  prefect_api_key: str = ""
129
+ prefect_api_auth_string: str = ""
130
+ prefect_work_pool_name: str = "default"
131
+ prefect_work_queue_name: str = "default"
132
+ prefect_gcs_bucket: str = ""
129
133
 
130
134
  # Observability
131
135
  lmnr_project_api_key: str = ""
@@ -135,6 +139,4 @@ class Settings(BaseSettings):
135
139
  gcs_service_account_file: str = "" # Path to GCS service account JSON file
136
140
 
137
141
 
138
- # Legacy: Module-level instance for backwards compatibility
139
- # Applications should create their own settings instance
140
142
  settings = Settings()
@@ -1,7 +1,5 @@
1
1
  """Command-line interface for simple pipeline execution."""
2
2
 
3
- from __future__ import annotations
4
-
5
3
  import asyncio
6
4
  import os
7
5
  import sys
@@ -6,8 +6,6 @@ This module centralizes:
6
6
  ``observe`` instrumentation, and optional support for test runs.
7
7
  """
8
8
 
9
- from __future__ import annotations
10
-
11
9
  import inspect
12
10
  import json
13
11
  import os
@@ -0,0 +1,8 @@
1
+ """Experimental utilities for deployment and remote execution.
2
+
3
+ These features are experimental and subject to change.
4
+ """
5
+
6
+ from .remote_deployment import remote_deployment, run_remote_deployment
7
+
8
+ __all__ = ["remote_deployment", "run_remote_deployment"]
@@ -0,0 +1,358 @@
1
+ #!/usr/bin/env python3
2
+ """Universal Prefect deployment script using Python API.
3
+
4
+ This script:
5
+ 1. Builds a Python package from pyproject.toml
6
+ 2. Uploads it to Google Cloud Storage
7
+ 3. Creates/updates a Prefect deployment using the RunnerDeployment pattern
8
+
9
+ Requirements:
10
+ - Settings configured with PREFECT_API_URL and optionally PREFECT_API_KEY
11
+ - Settings configured with PREFECT_GCS_BUCKET
12
+ - pyproject.toml with project name and version
13
+ - Local package installed for flow metadata extraction
14
+
15
+ Usage:
16
+ python -m ai_pipeline_core.utils.deploy
17
+ """
18
+
19
+ import argparse
20
+ import asyncio
21
+ import subprocess
22
+ import sys
23
+ import tomllib
24
+ import traceback
25
+ from pathlib import Path
26
+ from typing import Any, Optional
27
+
28
+ from prefect.cli.deploy._storage import _PullStepStorage # type: ignore
29
+ from prefect.client.orchestration import get_client
30
+ from prefect.deployments.runner import RunnerDeployment
31
+ from prefect.flows import load_flow_from_entrypoint
32
+
33
+ from ai_pipeline_core.settings import settings
34
+ from ai_pipeline_core.storage import Storage
35
+
36
+ # ============================================================================
37
+ # Deployer Class
38
+ # ============================================================================
39
+
40
+
41
+ class Deployer:
42
+ """Deploy Prefect flows using the RunnerDeployment pattern.
43
+
44
+ This is the official Prefect approach that handles flow registration,
45
+ deployment creation/updates, and all edge cases automatically.
46
+ """
47
+
48
+ def __init__(self):
49
+ """Initialize deployer."""
50
+ self.config = self._load_config()
51
+ self._validate_prefect_settings()
52
+
53
+ def _load_config(self) -> dict[str, Any]:
54
+ """Load and normalize project configuration from pyproject.toml.
55
+
56
+ Returns:
57
+ Configuration dictionary with project metadata and deployment settings.
58
+ """
59
+ if not settings.prefect_gcs_bucket:
60
+ self._die(
61
+ "PREFECT_GCS_BUCKET not configured in settings.\n"
62
+ "Configure via environment variable or .env file:\n"
63
+ " PREFECT_GCS_BUCKET=your-bucket-name"
64
+ )
65
+
66
+ pyproject_path = Path("pyproject.toml")
67
+ if not pyproject_path.exists():
68
+ self._die("pyproject.toml not found. Run from project root.")
69
+
70
+ with open(pyproject_path, "rb") as f:
71
+ data = tomllib.load(f)
72
+
73
+ project = data.get("project", {})
74
+ name = project.get("name")
75
+ version = project.get("version")
76
+
77
+ if not name:
78
+ self._die("Project name not found in pyproject.toml")
79
+ if not version:
80
+ self._die("Project version not found in pyproject.toml")
81
+
82
+ # Normalize naming conventions
83
+ # Hyphens in package names become underscores in Python imports
84
+ package_name = name.replace("-", "_")
85
+ flow_folder = name.replace("_", "-")
86
+
87
+ return {
88
+ "name": name,
89
+ "package": package_name,
90
+ "version": version,
91
+ "bucket": settings.prefect_gcs_bucket,
92
+ "folder": f"flows/{flow_folder}",
93
+ "tarball": f"{package_name}-{version}.tar.gz",
94
+ "work_pool": settings.prefect_work_pool_name,
95
+ "work_queue": settings.prefect_work_queue_name,
96
+ }
97
+
98
+ def _validate_prefect_settings(self):
99
+ """Validate that required Prefect settings are configured."""
100
+ self.api_url = settings.prefect_api_url
101
+ if not self.api_url:
102
+ self._die(
103
+ "PREFECT_API_URL not configured in settings.\n"
104
+ "Configure via environment variable or .env file:\n"
105
+ " PREFECT_API_URL=https://api.prefect.cloud/api/accounts/.../workspaces/..."
106
+ )
107
+
108
+ def _run(self, cmd: str, check: bool = True) -> Optional[str]:
109
+ """Execute shell command and return output.
110
+
111
+ Args:
112
+ cmd: Shell command to execute
113
+ check: Whether to raise on non-zero exit code
114
+
115
+ Returns:
116
+ Command stdout if successful, None if failed and check=False
117
+ """
118
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
119
+
120
+ if check and result.returncode != 0:
121
+ self._die(f"Command failed: {cmd}\n{result.stderr}")
122
+
123
+ return result.stdout.strip() if result.returncode == 0 else None
124
+
125
+ def _info(self, msg: str):
126
+ """Print info message."""
127
+ print(f"→ {msg}")
128
+
129
+ def _success(self, msg: str):
130
+ """Print success message."""
131
+ print(f"✓ {msg}")
132
+
133
+ def _die(self, msg: str):
134
+ """Print error and exit."""
135
+ print(f"✗ {msg}", file=sys.stderr)
136
+ sys.exit(1)
137
+
138
+ def _build_package(self) -> Path:
139
+ """Build Python package using `python -m build`.
140
+
141
+ Returns:
142
+ Path to the built tarball
143
+ """
144
+ self._info(f"Building {self.config['name']} v{self.config['version']}")
145
+
146
+ # Build sdist (source distribution)
147
+ build_cmd = "python -m build --sdist"
148
+
149
+ self._run(build_cmd)
150
+
151
+ # Verify tarball was created
152
+ tarball_path = Path("dist") / self.config["tarball"]
153
+ if not tarball_path.exists():
154
+ self._die(
155
+ f"Build artifact not found: {tarball_path}\n"
156
+ f"Expected tarball name: {self.config['tarball']}\n"
157
+ f"Check that pyproject.toml version matches."
158
+ )
159
+
160
+ self._success(f"Built {tarball_path.name} ({tarball_path.stat().st_size // 1024} KB)")
161
+ return tarball_path
162
+
163
+ async def _upload_package(self, tarball: Path):
164
+ """Upload package tarball to Google Cloud Storage using Storage abstraction.
165
+
166
+ Args:
167
+ tarball: Path to the tarball to upload
168
+ """
169
+ # Extract flow_folder from the config folder path
170
+ # e.g., "flows/ai-document-writer" -> "ai-document-writer"
171
+ flow_folder = self.config["folder"].split("/", 1)[1] if "/" in self.config["folder"] else ""
172
+
173
+ # Initialize storage with gs://bucket-name/flows and set subfolder to flow_folder
174
+ base_uri = f"gs://{self.config['bucket']}/flows"
175
+ storage = await Storage.from_uri(base_uri)
176
+ storage = storage.with_base(flow_folder)
177
+
178
+ dest_uri = storage.url_for(tarball.name)
179
+ self._info(f"Uploading to {dest_uri}")
180
+
181
+ # Read and upload the tarball
182
+ tarball_bytes = tarball.read_bytes()
183
+ await storage.write_bytes(tarball.name, tarball_bytes)
184
+
185
+ self._success(f"Package uploaded to {self.config['folder']}/{tarball.name}")
186
+
187
+ async def _deploy_via_api(self):
188
+ """Create or update Prefect deployment using RunnerDeployment pattern.
189
+
190
+ This is the official Prefect approach that:
191
+ 1. Automatically creates/updates the flow registration
192
+ 2. Handles deployment create vs update logic
193
+ 3. Properly formats all parameters for the API
194
+ """
195
+ # Define entrypoint (assumes flow function has same name as package)
196
+ entrypoint = f"{self.config['package']}:{self.config['package']}"
197
+
198
+ # Load flow to get metadata
199
+ # This requires the package to be installed locally (typical dev workflow)
200
+ self._info(f"Loading flow from entrypoint: {entrypoint}")
201
+ try:
202
+ flow = load_flow_from_entrypoint(entrypoint)
203
+ self._success(f"Loaded flow: {flow.name}")
204
+ except ImportError as e:
205
+ self._die(
206
+ f"Failed to import flow: {e}\n\n"
207
+ f"The package must be installed locally to extract flow metadata.\n"
208
+ f"Install it with: pip install -e .\n\n"
209
+ f"Expected entrypoint: {entrypoint}\n"
210
+ f"This means: Python package '{self.config['package']}' "
211
+ f"with flow function '{self.config['package']}'"
212
+ )
213
+ except AttributeError as e:
214
+ self._die(
215
+ f"Flow function not found: {e}\n\n"
216
+ f"Expected flow function named '{self.config['package']}' "
217
+ f"in package '{self.config['package']}'.\n"
218
+ f"Check that your flow is decorated with @flow and named correctly."
219
+ )
220
+
221
+ # Define pull steps for workers
222
+ # These steps tell workers how to get and install the flow code
223
+ pull_steps = [
224
+ {
225
+ "prefect_gcp.deployments.steps.pull_from_gcs": {
226
+ "id": "pull_code",
227
+ "requires": "prefect-gcp>=0.6",
228
+ "bucket": self.config["bucket"],
229
+ "folder": self.config["folder"],
230
+ }
231
+ },
232
+ {
233
+ "prefect.deployments.steps.run_shell_script": {
234
+ "id": "install_project",
235
+ "stream_output": True,
236
+ "directory": "{{ pull_code.directory }}",
237
+ # Use uv for fast installation (worker has it installed)
238
+ "script": f"uv pip install --system ./{self.config['tarball']}",
239
+ }
240
+ },
241
+ ]
242
+
243
+ # Create RunnerDeployment
244
+ # This is the official Prefect pattern that handles all the complexity
245
+ self._info(f"Creating deployment for flow '{flow.name}'")
246
+
247
+ deployment = RunnerDeployment(
248
+ name=self.config["package"],
249
+ flow_name=flow.name,
250
+ entrypoint=entrypoint,
251
+ work_pool_name=self.config["work_pool"],
252
+ work_queue_name=self.config["work_queue"],
253
+ tags=[self.config["name"]],
254
+ version=self.config["version"],
255
+ description=flow.description
256
+ or f"Deployment for {self.config['package']} v{self.config['version']}",
257
+ storage=_PullStepStorage(pull_steps),
258
+ parameters={},
259
+ job_variables={},
260
+ paused=False,
261
+ )
262
+
263
+ # Verify work pool exists before deploying
264
+ async with get_client() as client:
265
+ try:
266
+ work_pool = await client.read_work_pool(self.config["work_pool"])
267
+ self._success(
268
+ f"Work pool '{self.config['work_pool']}' verified (type: {work_pool.type})"
269
+ )
270
+ except Exception as e:
271
+ self._die(
272
+ f"Work pool '{self.config['work_pool']}' not accessible: {e}\n"
273
+ "Create it in the Prefect UI or with: prefect work-pool create"
274
+ )
275
+
276
+ # Apply deployment
277
+ # This automatically handles create vs update based on whether deployment exists
278
+ self._info("Applying deployment (create or update)...")
279
+ try:
280
+ deployment_id = await deployment.apply() # type: ignore
281
+ self._success(f"Deployment ID: {deployment_id}")
282
+
283
+ # Print helpful URLs
284
+ if self.api_url:
285
+ ui_url = self.api_url.replace("/api/", "/")
286
+ print(f"\n🌐 View deployment: {ui_url}/deployments/deployment/{deployment_id}")
287
+ print(f"🚀 Run now: prefect deployment run '{flow.name}/{self.config['package']}'")
288
+ except Exception as e:
289
+ self._die(f"Failed to apply deployment: {e}")
290
+
291
+ async def run(self):
292
+ """Execute the complete deployment pipeline."""
293
+ print("=" * 70)
294
+ print(f"Prefect Deployment: {self.config['name']} v{self.config['version']}")
295
+ print(f"Target: gs://{self.config['bucket']}/{self.config['folder']}")
296
+ print("=" * 70)
297
+ print()
298
+
299
+ # Phase 1: Build
300
+ tarball = self._build_package()
301
+
302
+ # Phase 2: Upload
303
+ await self._upload_package(tarball)
304
+
305
+ # Phase 3: Deploy
306
+ await self._deploy_via_api()
307
+
308
+ print()
309
+ print("=" * 70)
310
+ self._success("Deployment complete!")
311
+ print("=" * 70)
312
+
313
+
314
+ # ============================================================================
315
+ # CLI Entry Point
316
+ # ============================================================================
317
+
318
+
319
+ def main():
320
+ """Command-line interface for deployment script."""
321
+ parser = argparse.ArgumentParser(
322
+ description="Deploy Prefect flows to GCP using the official RunnerDeployment pattern",
323
+ formatter_class=argparse.RawDescriptionHelpFormatter,
324
+ epilog="""
325
+ Example:
326
+ python -m ai_pipeline_core.utils.deploy
327
+
328
+ Prerequisites:
329
+ - Settings configured with PREFECT_API_URL (and optionally PREFECT_API_KEY)
330
+ - Settings configured with PREFECT_GCS_BUCKET
331
+ - pyproject.toml with project name and version
332
+ - Package installed locally: pip install -e .
333
+ - GCP authentication configured (via service account or default credentials)
334
+ - Work pool created in Prefect UI or CLI
335
+
336
+ Settings can be configured via:
337
+ - Environment variables (e.g., export PREFECT_API_URL=...)
338
+ - .env file in the current directory
339
+ """,
340
+ )
341
+
342
+ parser.parse_args()
343
+
344
+ try:
345
+ deployer = Deployer()
346
+ asyncio.run(deployer.run())
347
+ except KeyboardInterrupt:
348
+ print("\n✗ Deployment cancelled by user", file=sys.stderr)
349
+ sys.exit(1)
350
+ except Exception as e:
351
+ print(f"\n✗ Unexpected error: {e}", file=sys.stderr)
352
+
353
+ traceback.print_exc()
354
+ sys.exit(1)
355
+
356
+
357
+ if __name__ == "__main__":
358
+ main()
@@ -0,0 +1,269 @@
1
+ """Experimental remote deployment utilities.
2
+
3
+ EXPERIMENTAL: This module provides utilities for calling remotely deployed Prefect flows.
4
+ Subject to change in future versions.
5
+ """
6
+
7
+ import inspect
8
+ from functools import wraps
9
+ from typing import Any, Callable, ParamSpec, Type, TypeVar
10
+
11
+ from prefect import get_client
12
+ from prefect.client.orchestration import PrefectClient
13
+ from prefect.client.schemas import FlowRun
14
+ from prefect.context import AsyncClientContext
15
+ from prefect.deployments.flow_runs import run_deployment
16
+ from prefect.exceptions import ObjectNotFound
17
+
18
+ from ai_pipeline_core import DocumentList, FlowDocument
19
+ from ai_pipeline_core.settings import settings
20
+ from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
21
+
22
+ # --------------------------------------------------------------------------- #
23
+ # Utility functions (copied from pipeline.py for consistency)
24
+ # --------------------------------------------------------------------------- #
25
+
26
+
27
+ def _callable_name(obj: Any, fallback: str) -> str:
28
+ """Safely extract callable's name for error messages.
29
+
30
+ Args:
31
+ obj: Any object that might have a __name__ attribute.
32
+ fallback: Default name if extraction fails.
33
+
34
+ Returns:
35
+ The callable's __name__ if available, fallback otherwise.
36
+
37
+ Note:
38
+ Internal helper that never raises exceptions.
39
+ """
40
+ try:
41
+ n = getattr(obj, "__name__", None)
42
+ return n if isinstance(n, str) else fallback
43
+ except Exception:
44
+ return fallback
45
+
46
+
47
+ def _is_already_traced(func: Callable[..., Any]) -> bool:
48
+ """Check if a function has already been wrapped by the trace decorator.
49
+
50
+ This checks both for the explicit __is_traced__ marker and walks
51
+ the __wrapped__ chain to detect nested trace decorations.
52
+
53
+ Args:
54
+ func: Function to check for existing trace decoration.
55
+
56
+ Returns:
57
+ True if the function is already traced, False otherwise.
58
+ """
59
+ # Check for explicit marker
60
+ if hasattr(func, "__is_traced__") and func.__is_traced__: # type: ignore[attr-defined]
61
+ return True
62
+
63
+ # Walk the __wrapped__ chain to detect nested traces
64
+ current = func
65
+ depth = 0
66
+ max_depth = 10 # Prevent infinite loops
67
+
68
+ while hasattr(current, "__wrapped__") and depth < max_depth:
69
+ wrapped = current.__wrapped__ # type: ignore[attr-defined]
70
+ # Check if the wrapped function has the trace marker
71
+ if hasattr(wrapped, "__is_traced__") and wrapped.__is_traced__: # type: ignore[attr-defined]
72
+ return True
73
+ current = wrapped
74
+ depth += 1
75
+
76
+ return False
77
+
78
+
79
+ # --------------------------------------------------------------------------- #
80
+ # Remote deployment execution
81
+ # --------------------------------------------------------------------------- #
82
+
83
+
84
+ async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]) -> Any:
85
+ """Run a remote Prefect deployment.
86
+
87
+ Args:
88
+ deployment_name: Name of the deployment to run.
89
+ parameters: Parameters to pass to the deployment.
90
+
91
+ Returns:
92
+ Result from the deployment execution.
93
+
94
+ Raises:
95
+ ValueError: If deployment is not found in local or remote Prefect API.
96
+ """
97
+
98
+ async def _run(client: PrefectClient, as_subflow: bool) -> Any:
99
+ fr: FlowRun = await run_deployment(
100
+ client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow
101
+ ) # type: ignore
102
+ return await fr.state.result() # type: ignore
103
+
104
+ async with get_client() as client:
105
+ try:
106
+ await client.read_deployment_by_name(name=deployment_name)
107
+ return await _run(client, True)
108
+ except ObjectNotFound:
109
+ pass
110
+
111
+ if not settings.prefect_api_url:
112
+ raise ValueError(f"{deployment_name} deployment not found, PREFECT_API_URL is not set")
113
+
114
+ async with PrefectClient(
115
+ api=settings.prefect_api_url,
116
+ api_key=settings.prefect_api_key,
117
+ auth_string=settings.prefect_api_auth_string,
118
+ ) as client:
119
+ try:
120
+ await client.read_deployment_by_name(name=deployment_name)
121
+ with AsyncClientContext.model_construct(
122
+ client=client, _httpx_settings=None, _context_stack=0
123
+ ):
124
+ return await _run(client, False)
125
+ except ObjectNotFound:
126
+ pass
127
+
128
+ raise ValueError(f"{deployment_name} deployment not found")
129
+
130
+
131
+ P = ParamSpec("P")
132
+ T = TypeVar("T")
133
+
134
+
135
+ def remote_deployment(
136
+ output_document_type: Type[FlowDocument],
137
+ *,
138
+ # tracing
139
+ name: str | None = None,
140
+ trace_level: TraceLevel = "always",
141
+ trace_ignore_input: bool = False,
142
+ trace_ignore_output: bool = False,
143
+ trace_ignore_inputs: list[str] | None = None,
144
+ trace_input_formatter: Callable[..., str] | None = None,
145
+ trace_output_formatter: Callable[..., str] | None = None,
146
+ trace_cost: float | None = None,
147
+ trace_trim_documents: bool = True,
148
+ ) -> Callable[[Callable[P, T]], Callable[P, T]]:
149
+ """Decorator for calling remote Prefect deployments with automatic tracing.
150
+
151
+ EXPERIMENTAL: Decorator for calling remote Prefect deployments with automatic
152
+ parameter serialization, result deserialization, and LMNR tracing.
153
+
154
+ IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
155
+ The framework will raise TypeError if you try to use both decorators together.
156
+
157
+ Best Practice - Use Defaults:
158
+ For most use cases, only specify output_document_type. The defaults provide
159
+ automatic tracing with optimal settings.
160
+
161
+ Args:
162
+ output_document_type: The FlowDocument type to deserialize results into.
163
+ name: Custom trace name (defaults to function name).
164
+ trace_level: When to trace ("always", "debug", "off").
165
+ - "always": Always trace (default)
166
+ - "debug": Only trace when LMNR_DEBUG="true"
167
+ - "off": Disable tracing
168
+ trace_ignore_input: Don't trace input arguments.
169
+ trace_ignore_output: Don't trace return value.
170
+ trace_ignore_inputs: List of parameter names to exclude from tracing.
171
+ trace_input_formatter: Custom formatter for input tracing.
172
+ trace_output_formatter: Custom formatter for output tracing.
173
+ trace_cost: Optional cost value to track in metadata. When provided and > 0,
174
+ sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
175
+ trace_trim_documents: Trim document content in traces to first 100 chars (default True).
176
+ Reduces trace size with large documents.
177
+
178
+ Returns:
179
+ Decorator function that wraps the target function.
180
+
181
+ Example:
182
+ >>> # RECOMMENDED - Minimal usage
183
+ >>> @remote_deployment(output_document_type=OutputDoc)
184
+ >>> async def process_remotely(
185
+ ... project_name: str,
186
+ ... documents: DocumentList,
187
+ ... flow_options: FlowOptions
188
+ >>> ) -> DocumentList:
189
+ ... pass # This stub is replaced by remote call
190
+ >>>
191
+ >>> # With custom tracing
192
+ >>> @remote_deployment(
193
+ ... output_document_type=OutputDoc,
194
+ ... trace_cost=0.05, # Track cost of remote execution
195
+ ... trace_level="debug" # Only trace in debug mode
196
+ >>> )
197
+ >>> async def debug_remote_flow(...) -> DocumentList:
198
+ ... pass
199
+
200
+ Note:
201
+ - Remote calls are automatically traced with LMNR
202
+ - The decorated function's body is never executed - it serves as a signature template
203
+ - Deployment name is auto-derived from function name
204
+ - DocumentList parameters are automatically serialized/deserialized
205
+
206
+ Raises:
207
+ TypeError: If function is already decorated with @trace.
208
+ ValueError: If deployment is not found.
209
+ """
210
+
211
+ def decorator(func: Callable[P, T]) -> Callable[P, T]:
212
+ fname = _callable_name(func, "remote_deployment")
213
+
214
+ # Check if function is already traced
215
+ if _is_already_traced(func):
216
+ raise TypeError(
217
+ f"@remote_deployment target '{fname}' is already decorated "
218
+ f"with @trace. Remove the @trace decorator - @remote_deployment includes "
219
+ f"tracing automatically."
220
+ )
221
+
222
+ @wraps(func)
223
+ async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
224
+ sig = inspect.signature(func)
225
+ bound = sig.bind(*args, **kwargs)
226
+ bound.apply_defaults()
227
+
228
+ # Serialize parameters, converting DocumentList to list[dict]
229
+ parameters = {}
230
+ for pname, value in bound.arguments.items():
231
+ if isinstance(value, DocumentList):
232
+ parameters[pname] = [doc for doc in value]
233
+ else:
234
+ parameters[pname] = value
235
+
236
+ # Auto-derive deployment name
237
+ deployment_name = f"{func.__name__.replace('_', '-')}/{func.__name__}"
238
+
239
+ result = await run_remote_deployment(
240
+ deployment_name=deployment_name, parameters=parameters
241
+ )
242
+
243
+ # Set trace cost if provided
244
+ if trace_cost is not None and trace_cost > 0:
245
+ set_trace_cost(trace_cost)
246
+
247
+ assert isinstance(result, list), "Result must be a list"
248
+
249
+ # Auto-handle return type conversion from list[dict] to DocumentList
250
+ return_type = sig.return_annotation
251
+
252
+ assert return_type is DocumentList, "Return type must be a DocumentList"
253
+ return DocumentList([output_document_type(**item) for item in result]) # type: ignore
254
+
255
+ # Apply trace decorator
256
+ traced_wrapper = trace(
257
+ level=trace_level,
258
+ name=name or fname,
259
+ ignore_input=trace_ignore_input,
260
+ ignore_output=trace_ignore_output,
261
+ ignore_inputs=trace_ignore_inputs,
262
+ input_formatter=trace_input_formatter,
263
+ output_formatter=trace_output_formatter,
264
+ trim_documents=trace_trim_documents,
265
+ )(_wrapper)
266
+
267
+ return traced_wrapper # type: ignore
268
+
269
+ return decorator
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.2.5
3
+ Version: 0.2.7
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -1,11 +1,11 @@
1
- ai_pipeline_core/__init__.py,sha256=XjQnfQELFAbS9MOd41N59AQHkZuepKmHGeY_m8TzKSk,5720
1
+ ai_pipeline_core/__init__.py,sha256=SCyd40nB8yF10KylHMBhEdpF0slV35OTsIePEKi9GP8,5720
2
2
  ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
3
3
  ai_pipeline_core/pipeline.py,sha256=fWTVmrnOEIFge6o2NUYW2ndGef5UurpL8_fK5tkXbzI,28700
4
4
  ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
5
5
  ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
6
6
  ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- ai_pipeline_core/settings.py,sha256=-a9jVGg77xifj2SagCR9shXfzXUd-2MlrlquEu4htG8,5035
8
- ai_pipeline_core/tracing.py,sha256=mmK64s1lw18EE_7PQgfZb0sJhAuhkVDxXw_wBpR7UGE,31530
7
+ ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
8
+ ai_pipeline_core/tracing.py,sha256=HT8heSwsVot6D6u8dPi-BHVlaemkPsPs5aXtG-iIzNk,31494
9
9
  ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
10
10
  ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
11
11
  ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
@@ -18,21 +18,24 @@ ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVg
18
18
  ai_pipeline_core/flow/config.py,sha256=3PCDph2n8dj-txqAvd9Wflbi_6lmfXFR9rUhM-szGSQ,18887
19
19
  ai_pipeline_core/flow/options.py,sha256=2rKR2GifhXcyw8avI_oiEDMLC2jm5Qzpw8z56pbxUMo,2285
20
20
  ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
21
- ai_pipeline_core/llm/ai_messages.py,sha256=tseyncD-T1IjaXKzUkzEgS3CGvz-WEDsA6v8wt9Emx0,14295
22
- ai_pipeline_core/llm/client.py,sha256=6gedRSeReT0SIbFZfqe73zjeSfgIXhBDRqeXUEHq4x8,22177
23
- ai_pipeline_core/llm/model_options.py,sha256=5XTBDdGZMxAo8JvhfEBIrtTOJC6piaMsgJ_J2cGiByo,11775
24
- ai_pipeline_core/llm/model_response.py,sha256=KhdlgR1vL5LmM_HHWKBAgI0LaqLdyaZqahEvtItpBEM,12706
25
- ai_pipeline_core/llm/model_types.py,sha256=OCRdTbQ1ZZ95nT_2PgOm16n2et25QOQyBtB1zsqm_3U,2791
21
+ ai_pipeline_core/llm/ai_messages.py,sha256=Onin3UPdbJQNl3WfY3-_jE5KRmF-ciXsa5K6UPOiy5s,14410
22
+ ai_pipeline_core/llm/client.py,sha256=385nKrr5fbDha8lNe9AbGLJ9Eszzo3_ArC22WWd8T_s,23646
23
+ ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
24
+ ai_pipeline_core/llm/model_response.py,sha256=6kEr9ss3UGlykvtWAvh1l55rGw2-wyVup3QJhm0Oggc,13264
25
+ ai_pipeline_core/llm/model_types.py,sha256=2J4Qsb1x21I4eo_VPeaMMOW8shOGPqzJuoGjTLcBFPM,2791
26
26
  ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
27
27
  ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
28
28
  ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
29
29
  ai_pipeline_core/logging/logging_mixin.py,sha256=OTye2pbUbG5oYZkI06TNkGCEa4y0ldePz5IAfdmNUPU,8090
30
30
  ai_pipeline_core/simple_runner/__init__.py,sha256=9krT-CcDAZ0jB2MjWqFYhaK5qtUDMpB5qWzjRLa4Zhk,322
31
- ai_pipeline_core/simple_runner/cli.py,sha256=yVyuxLY2RZvdNwmwT5LCe-km2nQJzWTPI0vSWn4_yms,9344
31
+ ai_pipeline_core/simple_runner/cli.py,sha256=p9Z1jtRMH10T5Bl3QfHPxyW6LL4qYvvXeOXbPGeeXeE,9308
32
32
  ai_pipeline_core/simple_runner/simple_runner.py,sha256=f6cIodYkul-Apu1d63T6kR5DZpiaCWpphUcEPp5XjFo,9102
33
33
  ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
34
34
  ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
35
- ai_pipeline_core-0.2.5.dist-info/METADATA,sha256=y6-nOwAztmZCHnCY4Z5tmrDmR7pQGMyxCj8OONVMz7E,15159
36
- ai_pipeline_core-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
37
- ai_pipeline_core-0.2.5.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
38
- ai_pipeline_core-0.2.5.dist-info/RECORD,,
35
+ ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
36
+ ai_pipeline_core/utils/deploy.py,sha256=rAtRuwkmGkc-fqvDMXpt08OzLrD7KTDMAmLDC9wYg7Y,13147
37
+ ai_pipeline_core/utils/remote_deployment.py,sha256=cPTgnS5InK08qiWnuPz3e8YKjoT3sPBloSaDfNTzghs,10137
38
+ ai_pipeline_core-0.2.7.dist-info/METADATA,sha256=Ec4yFJTtt8qFtQD4Hg7KXT2VO80_i8JmLbwfXNia2GE,15159
39
+ ai_pipeline_core-0.2.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
40
+ ai_pipeline_core-0.2.7.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
41
+ ai_pipeline_core-0.2.7.dist-info/RECORD,,