ai-pipeline-core 0.3.0__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ class ModelResponse(ChatCompletion):
28
28
 
29
29
  Primary usage is adding to AIMessages for multi-turn conversations:
30
30
 
31
- >>> response = await llm.generate("gpt-5", messages=messages)
31
+ >>> response = await llm.generate("gpt-5.1", messages=messages)
32
32
  >>> messages.append(response) # Add assistant response to conversation
33
33
  >>> print(response.content) # Access generated text
34
34
 
@@ -43,7 +43,7 @@ class ModelResponse(ChatCompletion):
43
43
  >>> from ai_pipeline_core import llm, AIMessages
44
44
  >>>
45
45
  >>> messages = AIMessages(["Explain quantum computing"])
46
- >>> response = await llm.generate("gpt-5", messages=messages)
46
+ >>> response = await llm.generate("gpt-5.1", messages=messages)
47
47
  >>>
48
48
  >>> # Primary usage: add to conversation
49
49
  >>> messages.append(response)
@@ -81,7 +81,7 @@ class ModelResponse(ChatCompletion):
81
81
  >>> # Usually created internally by generate()
82
82
  >>> response = ModelResponse(
83
83
  ... chat_completion=completion,
84
- ... model_options={"temperature": 0.7, "model": "gpt-4"},
84
+ ... model_options={"temperature": 0.7, "model": "gpt-5.1"},
85
85
  ... metadata={"time_taken": 1.5, "first_token_time": 0.3}
86
86
  ... )
87
87
  """
@@ -116,7 +116,7 @@ class ModelResponse(ChatCompletion):
116
116
  Generated text from the model, or empty string if none.
117
117
 
118
118
  Example:
119
- >>> response = await generate("gpt-5", messages="Hello")
119
+ >>> response = await generate("gpt-5.1", messages="Hello")
120
120
  >>> text = response.content # The generated response
121
121
  >>>
122
122
  >>> # Common pattern: add to messages then use content
@@ -185,7 +185,7 @@ class ModelResponse(ChatCompletion):
185
185
 
186
186
  Example:
187
187
  >>> response = await llm.generate(
188
- ... "gpt-5",
188
+ ... "gpt-5.1",
189
189
  ... context=large_doc,
190
190
  ... messages="Summarize this"
191
191
  ... )
@@ -15,17 +15,15 @@ from typing import Literal, TypeAlias
15
15
  ModelName: TypeAlias = (
16
16
  Literal[
17
17
  # Core models
18
- "gemini-2.5-pro",
19
- "gpt-5",
20
- "grok-4",
18
+ "gemini-3-pro",
19
+ "gpt-5.1",
21
20
  # Small models
22
- "gemini-2.5-flash",
21
+ "gemini-3-flash",
23
22
  "gpt-5-mini",
24
- "grok-4-fast",
23
+ "grok-4.1-fast",
25
24
  # Search models
26
- "gemini-2.5-flash-search",
25
+ "gemini-3-flash-search",
27
26
  "sonar-pro-search",
28
- "gpt-4o-search",
29
27
  ]
30
28
  | str
31
29
  )
@@ -38,15 +36,15 @@ string for custom models. The type is a union of predefined literals
38
36
  and str, giving you the best of both worlds: suggestions for known
39
37
  models and flexibility for custom ones.
40
38
 
41
- Note: These are example common model names as of Q3 2025. Actual availability
39
+ Note: These are example common model names as of Q1 2026. Actual availability
42
40
  depends on your LiteLLM proxy configuration and provider access.
43
41
 
44
42
  Model categories:
45
- Core models (gemini-2.5-pro, gpt-5, grok-4):
43
+ Core models (gemini-3-pro, gpt-5.1):
46
44
  High-capability models for complex tasks requiring deep reasoning,
47
45
  nuanced understanding, or creative generation.
48
46
 
49
- Small models (gemini-2.5-flash, gpt-5-mini, grok-4-fast):
47
+ Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
50
48
  Efficient models optimized for speed and cost, suitable for
51
49
  simpler tasks or high-volume processing.
52
50
 
@@ -64,7 +62,7 @@ Example:
64
62
  >>> from ai_pipeline_core import llm, ModelName
65
63
  >>>
66
64
  >>> # Predefined model with IDE autocomplete
67
- >>> model: ModelName = "gpt-5" # IDE suggests common models
65
+ >>> model: ModelName = "gpt-5.1" # IDE suggests common models
68
66
  >>> response = await llm.generate(model, messages="Hello")
69
67
  >>>
70
68
  >>> # Custom model works directly
@@ -72,7 +70,7 @@ Example:
72
70
  >>> response = await llm.generate(model, messages="Hello")
73
71
  >>>
74
72
  >>> # Both types work seamlessly
75
- >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
73
+ >>> models: list[ModelName] = ["gpt-5.1", "custom-llm", "gemini-3-pro"]
76
74
 
77
75
  Note:
78
76
  The ModelName type includes both predefined literals and str,
@@ -117,7 +117,7 @@ class StructuredLoggerMixin(LoggerMixin):
117
117
 
118
118
  Example:
119
119
  self.log_metric("processing_time", 1.23, "seconds",
120
- document_type="pdf", model="gpt-4")
120
+ document_type="pdf", model="gpt-5.1")
121
121
  """
122
122
  self.logger.info(
123
123
  f"Metric: {metric_name}",
@@ -140,7 +140,7 @@ class StructuredLoggerMixin(LoggerMixin):
140
140
 
141
141
  Example:
142
142
  self.log_span("llm_generation", 1234.5,
143
- model="gpt-4", tokens=500)
143
+ model="gpt-5.1", tokens=500)
144
144
  """
145
145
  self.logger.info(
146
146
  f"Span: {operation}",
@@ -144,7 +144,7 @@ class PromptBuilder(BaseModel):
144
144
  options.service_tier = None
145
145
  options.cache_ttl = None
146
146
  cache_lock = False
147
- if "grok-4-fast" in model:
147
+ if "grok-4.1-fast" in model:
148
148
  options.max_completion_tokens = 30000
149
149
 
150
150
  if self.mode == "test":
@@ -154,7 +154,7 @@ class PromptBuilder(BaseModel):
154
154
  options.reasoning_effort = "medium"
155
155
  options.verbosity = None
156
156
 
157
- if model.startswith("gpt-5"):
157
+ if model.startswith("gpt-5.1"):
158
158
  options.service_tier = "flex"
159
159
 
160
160
  return options, cache_lock
@@ -224,7 +224,7 @@ class PromptBuilder(BaseModel):
224
224
  self, model: ModelName, prompt: str | AIMessages, options: ModelOptions | None = None
225
225
  ) -> str:
226
226
  options, _ = self._get_options(model, options)
227
- if "gpt-5" not in model and "grok-4" not in model and "openrouter/" not in model:
227
+ if "gpt-5.1" not in model and "grok-4.1-fast" not in model and "openrouter/" not in model:
228
228
  options.stop = "</document>"
229
229
 
230
230
  response = await self.call(model, prompt, options)
@@ -276,6 +276,9 @@ class TraceInfo(BaseModel):
276
276
  # ---------------------------------------------------------------------------
277
277
 
278
278
 
279
+ _debug_processor_initialized = False
280
+
281
+
279
282
  def _initialise_laminar() -> None:
280
283
  """Initialize Laminar SDK with project configuration.
281
284
 
@@ -287,17 +290,66 @@ def _initialise_laminar() -> None:
287
290
  - Uses settings.lmnr_project_api_key for authentication
288
291
  - Disables OPENAI instrument to prevent double-tracing
289
292
  - Called automatically by trace decorator on first use
293
+ - Optionally adds local debug processor if TRACE_DEBUG_PATH is set
290
294
 
291
295
  Note:
292
296
  This is an internal function called once per process.
293
297
  Multiple calls are safe (Laminar handles idempotency).
294
298
  """
299
+ global _debug_processor_initialized
300
+
295
301
  if settings.lmnr_project_api_key:
296
302
  Laminar.initialize(
297
303
  project_api_key=settings.lmnr_project_api_key,
298
304
  disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [],
299
305
  )
300
306
 
307
+ # Add local debug processor if configured (only once)
308
+ if not _debug_processor_initialized:
309
+ _debug_processor_initialized = True
310
+ debug_path = os.environ.get("TRACE_DEBUG_PATH")
311
+ if debug_path:
312
+ _setup_debug_processor(debug_path)
313
+
314
+
315
+ def _setup_debug_processor(debug_path: str) -> None:
316
+ """Set up local debug trace processor."""
317
+ try:
318
+ from pathlib import Path # noqa: PLC0415
319
+
320
+ from opentelemetry import trace # noqa: PLC0415
321
+
322
+ from ai_pipeline_core.debug import ( # noqa: PLC0415
323
+ LocalDebugSpanProcessor,
324
+ LocalTraceWriter,
325
+ TraceDebugConfig,
326
+ )
327
+
328
+ config = TraceDebugConfig(
329
+ path=Path(debug_path),
330
+ max_element_bytes=int(os.environ.get("TRACE_DEBUG_MAX_INLINE", 10000)),
331
+ max_traces=int(os.environ.get("TRACE_DEBUG_MAX_TRACES", 20)) or None,
332
+ )
333
+
334
+ writer = LocalTraceWriter(config)
335
+ processor = LocalDebugSpanProcessor(writer)
336
+
337
+ # Add to tracer provider
338
+ provider = trace.get_tracer_provider()
339
+ add_processor = getattr(provider, "add_span_processor", None)
340
+ if add_processor is not None:
341
+ add_processor(processor)
342
+
343
+ # Register shutdown
344
+ import atexit # noqa: PLC0415
345
+
346
+ atexit.register(processor.shutdown)
347
+
348
+ except Exception as e:
349
+ import logging # noqa: PLC0415
350
+
351
+ logging.getLogger(__name__).warning(f"Failed to setup debug trace processor: {e}")
352
+
301
353
 
302
354
  # Overload for calls like @trace(name="...", level="debug")
303
355
  @overload
@@ -728,7 +780,7 @@ def set_trace_cost(cost: float | str) -> None:
728
780
  >>> @pipeline_task
729
781
  >>> async def enriched_generation(prompt: str) -> str:
730
782
  ... # LLM cost tracked automatically via ModelResponse
731
- ... response = await llm.generate("gpt-5", messages=prompt)
783
+ ... response = await llm.generate("gpt-5.1", messages=prompt)
732
784
  ...
733
785
  ... # Add cost for post-processing
734
786
  ... processing_cost = 0.02 # Fixed cost for enrichment
@@ -18,10 +18,13 @@ Usage:
18
18
 
19
19
  import argparse
20
20
  import asyncio
21
+ import json
21
22
  import subprocess
22
23
  import sys
24
+ import tempfile
23
25
  import tomllib
24
26
  import traceback
27
+ from datetime import datetime, timezone
25
28
  from pathlib import Path
26
29
  from typing import Any, Optional
27
30
 
@@ -70,6 +73,8 @@ class Deployer:
70
73
  with open(pyproject_path, "rb") as f:
71
74
  data = tomllib.load(f)
72
75
 
76
+ self._pyproject_data = data
77
+
73
78
  project = data.get("project", {})
74
79
  name = project.get("name")
75
80
  version = project.get("version")
@@ -160,6 +165,192 @@ class Deployer:
160
165
  self._success(f"Built {tarball_path.name} ({tarball_path.stat().st_size // 1024} KB)")
161
166
  return tarball_path
162
167
 
168
+ # -- Agent build/upload support --
169
+
170
+ def _load_agent_config(self) -> dict[str, dict[str, Any]]:
171
+ """Load [tool.deploy.agents] from pyproject.toml.
172
+
173
+ Returns:
174
+ Dict mapping agent name to config (path, extra_vendor).
175
+ Empty dict if no agents configured.
176
+ """
177
+ return self._pyproject_data.get("tool", {}).get("deploy", {}).get("agents", {})
178
+
179
+ def _get_cli_agents_source(self) -> str | None:
180
+ """Get cli_agents_source path from [tool.deploy]."""
181
+ return self._pyproject_data.get("tool", {}).get("deploy", {}).get("cli_agents_source")
182
+
183
+ def _build_wheel_from_source(self, source_dir: Path) -> Path:
184
+ """Build a wheel from a source directory.
185
+
186
+ Args:
187
+ source_dir: Directory containing pyproject.toml
188
+
189
+ Returns:
190
+ Path to built .whl file in a temp dist directory
191
+ """
192
+ if not (source_dir / "pyproject.toml").exists():
193
+ self._die(f"No pyproject.toml in {source_dir}")
194
+
195
+ with tempfile.TemporaryDirectory() as tmpdir:
196
+ tmp_dist = Path(tmpdir) / "dist"
197
+ result = subprocess.run(
198
+ [sys.executable, "-m", "build", "--wheel", "--outdir", str(tmp_dist)],
199
+ cwd=source_dir,
200
+ capture_output=True,
201
+ text=True,
202
+ )
203
+ if result.returncode != 0:
204
+ self._die(f"Wheel build failed for {source_dir.name}:\n{result.stderr}")
205
+
206
+ wheels = list(tmp_dist.glob("*.whl"))
207
+ if not wheels:
208
+ self._die(f"No wheel produced for {source_dir.name}")
209
+
210
+ # Copy to persistent dist/ under source_dir
211
+ dist_dir = source_dir / "dist"
212
+ dist_dir.mkdir(exist_ok=True)
213
+ output = dist_dir / wheels[0].name
214
+ output.write_bytes(wheels[0].read_bytes())
215
+ return output
216
+
217
+ def _build_agents(self) -> dict[str, dict[str, Any]]:
218
+ """Build agent wheels and manifests for all configured agents.
219
+
220
+ Returns:
221
+ Dict mapping agent name to build info:
222
+ {name: {"manifest_json": str, "files": {filename: Path}}}
223
+ Empty dict if no agents configured.
224
+ """
225
+ agent_config = self._load_agent_config()
226
+ if not agent_config:
227
+ return {}
228
+
229
+ cli_agents_source = self._get_cli_agents_source()
230
+ if not cli_agents_source:
231
+ self._die(
232
+ "Agents configured in [tool.deploy.agents] but "
233
+ "[tool.deploy].cli_agents_source is not set.\n"
234
+ "Add to pyproject.toml:\n"
235
+ ' [tool.deploy]\n cli_agents_source = "vendor/cli-agents"'
236
+ )
237
+
238
+ self._info(f"Building {len(agent_config)} agent(s): {', '.join(agent_config)}")
239
+
240
+ # Build cli-agents wheel once (shared across all agents)
241
+ cli_agents_dir = Path(cli_agents_source).resolve()
242
+ if not (cli_agents_dir / "pyproject.toml").exists():
243
+ self._die(f"cli-agents source not found at {cli_agents_dir}")
244
+
245
+ cli_agents_wheel = self._build_wheel_from_source(cli_agents_dir)
246
+ self._success(f"Built cli-agents wheel: {cli_agents_wheel.name}")
247
+
248
+ builds: dict[str, dict[str, Any]] = {}
249
+
250
+ for agent_name, config in agent_config.items():
251
+ agent_path = Path(config["path"]).resolve()
252
+ if not (agent_path / "pyproject.toml").exists():
253
+ self._die(
254
+ f"Agent '{agent_name}' path not found: {agent_path}\n"
255
+ f"Check [tool.deploy.agents.{agent_name}].path in pyproject.toml"
256
+ )
257
+
258
+ # Read module_name from agent's pyproject.toml
259
+ with open(agent_path / "pyproject.toml", "rb") as f:
260
+ agent_pyproject = tomllib.load(f)
261
+
262
+ module_name = agent_pyproject.get("tool", {}).get("agent", {}).get("module")
263
+ if not module_name:
264
+ self._die(
265
+ f"Agent '{agent_name}' missing [tool.agent].module in "
266
+ f"{agent_path / 'pyproject.toml'}\n"
267
+ f'Add:\n [tool.agent]\n module = "agent_{agent_name}"'
268
+ )
269
+
270
+ # Build agent wheel
271
+ agent_wheel = self._build_wheel_from_source(agent_path)
272
+ self._success(f"Built agent wheel: {agent_wheel.name}")
273
+
274
+ # Collect all files for this agent bundle
275
+ files: dict[str, Path] = {
276
+ agent_wheel.name: agent_wheel,
277
+ cli_agents_wheel.name: cli_agents_wheel,
278
+ }
279
+
280
+ # Build extra_vendor packages from repo root
281
+ vendor_packages: list[str] = []
282
+ extra_built: set[str] = set()
283
+ for vendor_name in config.get("extra_vendor", []):
284
+ extra_source_dir = Path(vendor_name).resolve()
285
+ if not (extra_source_dir / "pyproject.toml").exists():
286
+ self._die(
287
+ f"Extra vendor '{vendor_name}' for agent '{agent_name}' "
288
+ f"not found at {extra_source_dir}\n"
289
+ f"Ensure the directory exists at repo root with pyproject.toml"
290
+ )
291
+ vendor_wheel = self._build_wheel_from_source(extra_source_dir)
292
+ files[vendor_wheel.name] = vendor_wheel
293
+ vendor_packages.append(vendor_wheel.name)
294
+ extra_built.add(extra_source_dir.name.replace("-", "_"))
295
+ self._success(f"Built vendor wheel: {vendor_wheel.name}")
296
+
297
+ # Collect existing vendor/*.whl and vendor/*.tar.gz from agent directory,
298
+ # skipping packages already built from extra_vendor
299
+ agent_vendor_dir = agent_path / "vendor"
300
+ if agent_vendor_dir.exists():
301
+ for pkg in list(agent_vendor_dir.glob("*.whl")) + list(
302
+ agent_vendor_dir.glob("*.tar.gz")
303
+ ):
304
+ pkg_base = pkg.name.split("-")[0].replace("-", "_")
305
+ if pkg.name not in files and pkg_base not in extra_built:
306
+ files[pkg.name] = pkg
307
+ vendor_packages.append(pkg.name)
308
+
309
+ # Write manifest (plain JSON dict, compatible with AgentManifest schema)
310
+ manifest = {
311
+ "module_name": module_name,
312
+ "agent_wheel": agent_wheel.name,
313
+ "cli_agents_wheel": cli_agents_wheel.name,
314
+ "vendor_packages": vendor_packages,
315
+ "built_at": datetime.now(timezone.utc).isoformat(),
316
+ }
317
+ manifest_json = json.dumps(manifest, indent=2)
318
+
319
+ builds[agent_name] = {"manifest_json": manifest_json, "files": files}
320
+ self._success(f"Agent '{agent_name}' bundle ready ({module_name}, {len(files)} files)")
321
+
322
+ return builds
323
+
324
+ async def _upload_agents(self, agent_builds: dict[str, dict[str, Any]]):
325
+ """Upload agent bundles to GCS.
326
+
327
+ Args:
328
+ agent_builds: Output from _build_agents()
329
+ """
330
+ if not agent_builds:
331
+ return
332
+
333
+ flow_folder = self.config["folder"].split("/", 1)[1] if "/" in self.config["folder"] else ""
334
+ base_uri = f"gs://{self.config['bucket']}/flows"
335
+ base_storage = await Storage.from_uri(base_uri)
336
+ base_storage = base_storage.with_base(flow_folder)
337
+
338
+ for agent_name, build_info in agent_builds.items():
339
+ agent_storage = base_storage.with_base(f"agents/{agent_name}")
340
+ self._info(f"Uploading agent '{agent_name}' bundle to {agent_storage.url_for('')}")
341
+
342
+ # Upload manifest
343
+ await agent_storage.write_bytes(
344
+ "manifest.json",
345
+ build_info["manifest_json"].encode(),
346
+ )
347
+
348
+ # Upload wheels
349
+ for filename, filepath in build_info["files"].items():
350
+ await agent_storage.write_bytes(filename, filepath.read_bytes())
351
+
352
+ self._success(f"Agent '{agent_name}' uploaded ({len(build_info['files'])} files)")
353
+
163
354
  async def _upload_package(self, tarball: Path):
164
355
  """Upload package tarball to Google Cloud Storage using Storage abstraction.
165
356
 
@@ -184,13 +375,17 @@ class Deployer:
184
375
 
185
376
  self._success(f"Package uploaded to {self.config['folder']}/{tarball.name}")
186
377
 
187
- async def _deploy_via_api(self):
378
+ async def _deploy_via_api(self, agent_builds: dict[str, dict[str, Any]] | None = None):
188
379
  """Create or update Prefect deployment using RunnerDeployment pattern.
189
380
 
190
381
  This is the official Prefect approach that:
191
382
  1. Automatically creates/updates the flow registration
192
383
  2. Handles deployment create vs update logic
193
384
  3. Properly formats all parameters for the API
385
+
386
+ Args:
387
+ agent_builds: Output from _build_agents(). If non-empty, sets
388
+ AGENT_BUNDLES_URI env var on the deployment.
194
389
  """
195
390
  # Define entrypoint (assumes flow function has same name as package)
196
391
  entrypoint = f"{self.config['package']}:{self.config['package']}"
@@ -244,6 +439,13 @@ class Deployer:
244
439
  # This is the official Prefect pattern that handles all the complexity
245
440
  self._info(f"Creating deployment for flow '{flow.name}'")
246
441
 
442
+ # Set AGENT_BUNDLES_URI env var if agents were built
443
+ job_variables: dict[str, Any] = {}
444
+ if agent_builds:
445
+ bundles_uri = f"gs://{self.config['bucket']}/{self.config['folder']}/agents"
446
+ job_variables["env"] = {"AGENT_BUNDLES_URI": bundles_uri}
447
+ self._info(f"Setting AGENT_BUNDLES_URI={bundles_uri}")
448
+
247
449
  deployment = RunnerDeployment(
248
450
  name=self.config["package"],
249
451
  flow_name=flow.name,
@@ -256,7 +458,7 @@ class Deployer:
256
458
  or f"Deployment for {self.config['package']} v{self.config['version']}",
257
459
  storage=_PullStepStorage(pull_steps),
258
460
  parameters={},
259
- job_variables={},
461
+ job_variables=job_variables,
260
462
  paused=False,
261
463
  )
262
464
 
@@ -296,14 +498,20 @@ class Deployer:
296
498
  print("=" * 70)
297
499
  print()
298
500
 
299
- # Phase 1: Build
501
+ # Phase 1: Build flow package
300
502
  tarball = self._build_package()
301
503
 
302
- # Phase 2: Upload
504
+ # Phase 2: Build agent bundles (if configured)
505
+ agent_builds = self._build_agents()
506
+
507
+ # Phase 3: Upload flow package
303
508
  await self._upload_package(tarball)
304
509
 
305
- # Phase 3: Deploy
306
- await self._deploy_via_api()
510
+ # Phase 4: Upload agent bundles
511
+ await self._upload_agents(agent_builds)
512
+
513
+ # Phase 5: Create/update Prefect deployment
514
+ await self._deploy_via_api(agent_builds)
307
515
 
308
516
  print()
309
517
  print("=" * 70)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.3.0
3
+ Version: 0.3.3
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
23
  Requires-Dist: lmnr>=0.7.18
24
24
  Requires-Dist: openai>=1.109.1
25
+ Requires-Dist: pillow>=10.0.0
25
26
  Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
26
27
  Requires-Dist: prefect>=3.4.21
27
28
  Requires-Dist: pydantic-settings>=2.10.1
@@ -124,7 +125,7 @@ async def analyze_flow(
124
125
  for doc in documents:
125
126
  # Use AIMessages for LLM interaction
126
127
  response = await llm.generate(
127
- model="gpt-5",
128
+ model="gpt-5.1",
128
129
  messages=AIMessages([doc])
129
130
  )
130
131
 
@@ -151,7 +152,7 @@ class Analysis(BaseModel):
151
152
 
152
153
  # Generate structured output
153
154
  response = await llm.generate_structured(
154
- model="gpt-5",
155
+ model="gpt-5.1",
155
156
  response_format=Analysis,
156
157
  messages="Analyze this product review: ..."
157
158
  )
@@ -246,7 +247,7 @@ from ai_pipeline_core import llm, AIMessages, ModelOptions
246
247
 
247
248
  # Simple generation
248
249
  response = await llm.generate(
249
- model="gpt-5",
250
+ model="gpt-5.1",
250
251
  messages="Explain quantum computing"
251
252
  )
252
253
  print(response.content)
@@ -256,21 +257,21 @@ static_context = AIMessages([large_document])
256
257
 
257
258
  # First call: caches context
258
259
  r1 = await llm.generate(
259
- model="gpt-5",
260
+ model="gpt-5.1",
260
261
  context=static_context, # Cached for 120 seconds by default
261
262
  messages="Summarize" # Dynamic query
262
263
  )
263
264
 
264
265
  # Second call: reuses cache
265
266
  r2 = await llm.generate(
266
- model="gpt-5",
267
+ model="gpt-5.1",
267
268
  context=static_context, # Reused from cache!
268
269
  messages="Key points?" # Different query
269
270
  )
270
271
 
271
272
  # Custom cache TTL
272
273
  response = await llm.generate(
273
- model="gpt-5",
274
+ model="gpt-5.1",
274
275
  context=static_context,
275
276
  messages="Analyze",
276
277
  options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
@@ -278,7 +279,7 @@ response = await llm.generate(
278
279
 
279
280
  # Disable caching for dynamic contexts
280
281
  response = await llm.generate(
281
- model="gpt-5",
282
+ model="gpt-5.1",
282
283
  context=dynamic_context,
283
284
  messages="Process",
284
285
  options=ModelOptions(cache_ttl=None) # No caching
@@ -335,6 +336,68 @@ async def main_flow(
335
336
  return DocumentList(results)
336
337
  ```
337
338
 
339
+ ### Local Trace Debugging
340
+
341
+ Save all trace spans to the local filesystem for LLM-assisted debugging:
342
+
343
+ ```bash
344
+ export TRACE_DEBUG_PATH=/path/to/debug/output
345
+ ```
346
+
347
+ This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
348
+
349
+ ```
350
+ 20260128_152932_abc12345_my_flow/
351
+ ├── _trace.yaml # Trace metadata
352
+ ├── _index.yaml # Span ID → path mapping
353
+ ├── _summary.md # Unified summary for human inspection and LLM debugging
354
+ ├── artifacts/ # Deduplicated content storage
355
+ │ └── sha256/
356
+ │ └── ab/cd/ # Sharded by hash prefix
357
+ │ └── abcdef...1234.txt # Large content (>10KB)
358
+ └── 0001_my_flow/ # Root span (numbered for execution order)
359
+ ├── _span.yaml # Span metadata (timing, status, I/O refs)
360
+ ├── input.yaml # Structured inputs (inline or refs)
361
+ ├── output.yaml # Structured outputs (inline or refs)
362
+ ├── 0002_task_1/ # Child spans nested inside parent
363
+ │ ├── _span.yaml
364
+ │ ├── input.yaml
365
+ │ ├── output.yaml
366
+ │ └── 0003_llm_call/
367
+ │ ├── _span.yaml
368
+ │ ├── input.yaml # LLM messages with inline/external content
369
+ │ └── output.yaml
370
+ └── 0004_task_2/
371
+ └── ...
372
+ ```
373
+
374
+ **Key Features:**
375
+ - **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
376
+ - **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
377
+ - **AI-Friendly**: Files capped at 50KB for easy LLM processing
378
+ - **Lossless**: Full content reconstruction via `content_ref` pointers
379
+
380
+ Example `input.yaml` with externalization:
381
+ ```yaml
382
+ format_version: 3
383
+ type: llm_messages
384
+ messages:
385
+ - role: system
386
+ parts:
387
+ - type: text
388
+ size_bytes: 28500
389
+ content_ref: # Large content → artifact
390
+ hash: sha256:a1b2c3d4...
391
+ path: artifacts/sha256/a1/b2/a1b2c3d4...txt
392
+ excerpt: "You are a helpful assistant...\n[TRUNCATED]"
393
+ - role: user
394
+ parts:
395
+ - type: text
396
+ content: "Hello!" # Small content stays inline
397
+ ```
398
+
399
+ Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
400
+
338
401
  ## Configuration
339
402
 
340
403
  ### Environment Variables
@@ -348,6 +411,9 @@ OPENAI_API_KEY=your-api-key
348
411
  LMNR_PROJECT_API_KEY=your-lmnr-key
349
412
  LMNR_DEBUG=true # Enable debug traces
350
413
 
414
+ # Optional: Local Trace Debugging
415
+ TRACE_DEBUG_PATH=/path/to/trace/output # Save traces locally for LLM-assisted debugging
416
+
351
417
  # Optional: Orchestration
352
418
  PREFECT_API_URL=http://localhost:4200/api
353
419
  PREFECT_API_KEY=your-prefect-key