ai-pipeline-core 0.3.0__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/PKG-INFO +74 -8
  2. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/README.md +72 -7
  3. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/__init__.py +19 -2
  4. ai_pipeline_core-0.3.3/ai_pipeline_core/debug/__init__.py +26 -0
  5. ai_pipeline_core-0.3.3/ai_pipeline_core/debug/config.py +91 -0
  6. ai_pipeline_core-0.3.3/ai_pipeline_core/debug/content.py +705 -0
  7. ai_pipeline_core-0.3.3/ai_pipeline_core/debug/processor.py +99 -0
  8. ai_pipeline_core-0.3.3/ai_pipeline_core/debug/summary.py +236 -0
  9. ai_pipeline_core-0.3.3/ai_pipeline_core/debug/writer.py +913 -0
  10. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/flow/options.py +3 -3
  11. ai_pipeline_core-0.3.3/ai_pipeline_core/images/__init__.py +362 -0
  12. ai_pipeline_core-0.3.3/ai_pipeline_core/images/_processing.py +157 -0
  13. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/llm/ai_messages.py +25 -4
  14. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/llm/client.py +14 -16
  15. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/llm/model_response.py +5 -5
  16. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/llm/model_types.py +10 -12
  17. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/logging/logging_mixin.py +2 -2
  18. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prompt_builder/prompt_builder.py +3 -3
  19. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/tracing.py +53 -1
  20. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/utils/deploy.py +214 -6
  21. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/pyproject.toml +3 -2
  22. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/.gitignore +0 -0
  23. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/LICENSE +0 -0
  24. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/deployment/__init__.py +0 -0
  25. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/deployment/base.py +0 -0
  26. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/deployment/contract.py +0 -0
  27. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/deployment/helpers.py +0 -0
  28. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/__init__.py +0 -0
  29. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/document.py +0 -0
  30. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/document_list.py +0 -0
  31. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/flow_document.py +0 -0
  32. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/mime_type.py +0 -0
  33. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/task_document.py +0 -0
  34. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/temporary_document.py +0 -0
  35. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/documents/utils.py +0 -0
  36. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/exceptions.py +0 -0
  37. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/flow/__init__.py +0 -0
  38. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/flow/config.py +0 -0
  39. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/llm/__init__.py +0 -0
  40. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/llm/model_options.py +0 -0
  41. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/logging/__init__.py +0 -0
  42. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/logging/logging.yml +0 -0
  43. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/logging/logging_config.py +0 -0
  44. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/pipeline.py +0 -0
  45. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prefect.py +0 -0
  46. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/progress.py +0 -0
  47. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prompt_builder/__init__.py +0 -0
  48. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -0
  49. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prompt_builder/global_cache.py +0 -0
  50. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -0
  51. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -0
  52. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/prompt_manager.py +0 -0
  53. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/py.typed +0 -0
  54. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/settings.py +0 -0
  55. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/storage/__init__.py +0 -0
  56. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/storage/storage.py +0 -0
  57. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/utils/__init__.py +0 -0
  58. {ai_pipeline_core-0.3.0 → ai_pipeline_core-0.3.3}/ai_pipeline_core/utils/remote_deployment.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.3.0
3
+ Version: 0.3.3
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
23
  Requires-Dist: lmnr>=0.7.18
24
24
  Requires-Dist: openai>=1.109.1
25
+ Requires-Dist: pillow>=10.0.0
25
26
  Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
26
27
  Requires-Dist: prefect>=3.4.21
27
28
  Requires-Dist: pydantic-settings>=2.10.1
@@ -124,7 +125,7 @@ async def analyze_flow(
124
125
  for doc in documents:
125
126
  # Use AIMessages for LLM interaction
126
127
  response = await llm.generate(
127
- model="gpt-5",
128
+ model="gpt-5.1",
128
129
  messages=AIMessages([doc])
129
130
  )
130
131
 
@@ -151,7 +152,7 @@ class Analysis(BaseModel):
151
152
 
152
153
  # Generate structured output
153
154
  response = await llm.generate_structured(
154
- model="gpt-5",
155
+ model="gpt-5.1",
155
156
  response_format=Analysis,
156
157
  messages="Analyze this product review: ..."
157
158
  )
@@ -246,7 +247,7 @@ from ai_pipeline_core import llm, AIMessages, ModelOptions
246
247
 
247
248
  # Simple generation
248
249
  response = await llm.generate(
249
- model="gpt-5",
250
+ model="gpt-5.1",
250
251
  messages="Explain quantum computing"
251
252
  )
252
253
  print(response.content)
@@ -256,21 +257,21 @@ static_context = AIMessages([large_document])
256
257
 
257
258
  # First call: caches context
258
259
  r1 = await llm.generate(
259
- model="gpt-5",
260
+ model="gpt-5.1",
260
261
  context=static_context, # Cached for 120 seconds by default
261
262
  messages="Summarize" # Dynamic query
262
263
  )
263
264
 
264
265
  # Second call: reuses cache
265
266
  r2 = await llm.generate(
266
- model="gpt-5",
267
+ model="gpt-5.1",
267
268
  context=static_context, # Reused from cache!
268
269
  messages="Key points?" # Different query
269
270
  )
270
271
 
271
272
  # Custom cache TTL
272
273
  response = await llm.generate(
273
- model="gpt-5",
274
+ model="gpt-5.1",
274
275
  context=static_context,
275
276
  messages="Analyze",
276
277
  options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
@@ -278,7 +279,7 @@ response = await llm.generate(
278
279
 
279
280
  # Disable caching for dynamic contexts
280
281
  response = await llm.generate(
281
- model="gpt-5",
282
+ model="gpt-5.1",
282
283
  context=dynamic_context,
283
284
  messages="Process",
284
285
  options=ModelOptions(cache_ttl=None) # No caching
@@ -335,6 +336,68 @@ async def main_flow(
335
336
  return DocumentList(results)
336
337
  ```
337
338
 
339
+ ### Local Trace Debugging
340
+
341
+ Save all trace spans to the local filesystem for LLM-assisted debugging:
342
+
343
+ ```bash
344
+ export TRACE_DEBUG_PATH=/path/to/debug/output
345
+ ```
346
+
347
+ This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
348
+
349
+ ```
350
+ 20260128_152932_abc12345_my_flow/
351
+ ├── _trace.yaml # Trace metadata
352
+ ├── _index.yaml # Span ID → path mapping
353
+ ├── _summary.md # Unified summary for human inspection and LLM debugging
354
+ ├── artifacts/ # Deduplicated content storage
355
+ │ └── sha256/
356
+ │ └── ab/cd/ # Sharded by hash prefix
357
+ │ └── abcdef...1234.txt # Large content (>10KB)
358
+ └── 0001_my_flow/ # Root span (numbered for execution order)
359
+ ├── _span.yaml # Span metadata (timing, status, I/O refs)
360
+ ├── input.yaml # Structured inputs (inline or refs)
361
+ ├── output.yaml # Structured outputs (inline or refs)
362
+ ├── 0002_task_1/ # Child spans nested inside parent
363
+ │ ├── _span.yaml
364
+ │ ├── input.yaml
365
+ │ ├── output.yaml
366
+ │ └── 0003_llm_call/
367
+ │ ├── _span.yaml
368
+ │ ├── input.yaml # LLM messages with inline/external content
369
+ │ └── output.yaml
370
+ └── 0004_task_2/
371
+ └── ...
372
+ ```
373
+
374
+ **Key Features:**
375
+ - **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
376
+ - **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
377
+ - **AI-Friendly**: Files capped at 50KB for easy LLM processing
378
+ - **Lossless**: Full content reconstruction via `content_ref` pointers
379
+
380
+ Example `input.yaml` with externalization:
381
+ ```yaml
382
+ format_version: 3
383
+ type: llm_messages
384
+ messages:
385
+ - role: system
386
+ parts:
387
+ - type: text
388
+ size_bytes: 28500
389
+ content_ref: # Large content → artifact
390
+ hash: sha256:a1b2c3d4...
391
+ path: artifacts/sha256/a1/b2/a1b2c3d4...txt
392
+ excerpt: "You are a helpful assistant...\n[TRUNCATED]"
393
+ - role: user
394
+ parts:
395
+ - type: text
396
+ content: "Hello!" # Small content stays inline
397
+ ```
398
+
399
+ Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
400
+
338
401
  ## Configuration
339
402
 
340
403
  ### Environment Variables
@@ -348,6 +411,9 @@ OPENAI_API_KEY=your-api-key
348
411
  LMNR_PROJECT_API_KEY=your-lmnr-key
349
412
  LMNR_DEBUG=true # Enable debug traces
350
413
 
414
+ # Optional: Local Trace Debugging
415
+ TRACE_DEBUG_PATH=/path/to/trace/output # Save traces locally for LLM-assisted debugging
416
+
351
417
  # Optional: Orchestration
352
418
  PREFECT_API_URL=http://localhost:4200/api
353
419
  PREFECT_API_KEY=your-prefect-key
@@ -79,7 +79,7 @@ async def analyze_flow(
79
79
  for doc in documents:
80
80
  # Use AIMessages for LLM interaction
81
81
  response = await llm.generate(
82
- model="gpt-5",
82
+ model="gpt-5.1",
83
83
  messages=AIMessages([doc])
84
84
  )
85
85
 
@@ -106,7 +106,7 @@ class Analysis(BaseModel):
106
106
 
107
107
  # Generate structured output
108
108
  response = await llm.generate_structured(
109
- model="gpt-5",
109
+ model="gpt-5.1",
110
110
  response_format=Analysis,
111
111
  messages="Analyze this product review: ..."
112
112
  )
@@ -201,7 +201,7 @@ from ai_pipeline_core import llm, AIMessages, ModelOptions
201
201
 
202
202
  # Simple generation
203
203
  response = await llm.generate(
204
- model="gpt-5",
204
+ model="gpt-5.1",
205
205
  messages="Explain quantum computing"
206
206
  )
207
207
  print(response.content)
@@ -211,21 +211,21 @@ static_context = AIMessages([large_document])
211
211
 
212
212
  # First call: caches context
213
213
  r1 = await llm.generate(
214
- model="gpt-5",
214
+ model="gpt-5.1",
215
215
  context=static_context, # Cached for 120 seconds by default
216
216
  messages="Summarize" # Dynamic query
217
217
  )
218
218
 
219
219
  # Second call: reuses cache
220
220
  r2 = await llm.generate(
221
- model="gpt-5",
221
+ model="gpt-5.1",
222
222
  context=static_context, # Reused from cache!
223
223
  messages="Key points?" # Different query
224
224
  )
225
225
 
226
226
  # Custom cache TTL
227
227
  response = await llm.generate(
228
- model="gpt-5",
228
+ model="gpt-5.1",
229
229
  context=static_context,
230
230
  messages="Analyze",
231
231
  options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
@@ -233,7 +233,7 @@ response = await llm.generate(
233
233
 
234
234
  # Disable caching for dynamic contexts
235
235
  response = await llm.generate(
236
- model="gpt-5",
236
+ model="gpt-5.1",
237
237
  context=dynamic_context,
238
238
  messages="Process",
239
239
  options=ModelOptions(cache_ttl=None) # No caching
@@ -290,6 +290,68 @@ async def main_flow(
290
290
  return DocumentList(results)
291
291
  ```
292
292
 
293
+ ### Local Trace Debugging
294
+
295
+ Save all trace spans to the local filesystem for LLM-assisted debugging:
296
+
297
+ ```bash
298
+ export TRACE_DEBUG_PATH=/path/to/debug/output
299
+ ```
300
+
301
+ This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
302
+
303
+ ```
304
+ 20260128_152932_abc12345_my_flow/
305
+ ├── _trace.yaml # Trace metadata
306
+ ├── _index.yaml # Span ID → path mapping
307
+ ├── _summary.md # Unified summary for human inspection and LLM debugging
308
+ ├── artifacts/ # Deduplicated content storage
309
+ │ └── sha256/
310
+ │ └── ab/cd/ # Sharded by hash prefix
311
+ │ └── abcdef...1234.txt # Large content (>10KB)
312
+ └── 0001_my_flow/ # Root span (numbered for execution order)
313
+ ├── _span.yaml # Span metadata (timing, status, I/O refs)
314
+ ├── input.yaml # Structured inputs (inline or refs)
315
+ ├── output.yaml # Structured outputs (inline or refs)
316
+ ├── 0002_task_1/ # Child spans nested inside parent
317
+ │ ├── _span.yaml
318
+ │ ├── input.yaml
319
+ │ ├── output.yaml
320
+ │ └── 0003_llm_call/
321
+ │ ├── _span.yaml
322
+ │ ├── input.yaml # LLM messages with inline/external content
323
+ │ └── output.yaml
324
+ └── 0004_task_2/
325
+ └── ...
326
+ ```
327
+
328
+ **Key Features:**
329
+ - **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
330
+ - **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
331
+ - **AI-Friendly**: Files capped at 50KB for easy LLM processing
332
+ - **Lossless**: Full content reconstruction via `content_ref` pointers
333
+
334
+ Example `input.yaml` with externalization:
335
+ ```yaml
336
+ format_version: 3
337
+ type: llm_messages
338
+ messages:
339
+ - role: system
340
+ parts:
341
+ - type: text
342
+ size_bytes: 28500
343
+ content_ref: # Large content → artifact
344
+ hash: sha256:a1b2c3d4...
345
+ path: artifacts/sha256/a1/b2/a1b2c3d4...txt
346
+ excerpt: "You are a helpful assistant...\n[TRUNCATED]"
347
+ - role: user
348
+ parts:
349
+ - type: text
350
+ content: "Hello!" # Small content stays inline
351
+ ```
352
+
353
+ Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
354
+
293
355
  ## Configuration
294
356
 
295
357
  ### Environment Variables
@@ -303,6 +365,9 @@ OPENAI_API_KEY=your-api-key
303
365
  LMNR_PROJECT_API_KEY=your-lmnr-key
304
366
  LMNR_DEBUG=true # Enable debug traces
305
367
 
368
+ # Optional: Local Trace Debugging
369
+ TRACE_DEBUG_PATH=/path/to/trace/output # Save traces locally for LLM-assisted debugging
370
+
306
371
  # Optional: Orchestration
307
372
  PREFECT_API_URL=http://localhost:4200/api
308
373
  PREFECT_API_KEY=your-prefect-key
@@ -59,7 +59,7 @@ Quick Start:
59
59
  ... ) -> DocumentList:
60
60
  ... # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
61
61
  ... response = await llm.generate(
62
- ... "gpt-5",
62
+ ... "gpt-5.1",
63
63
  ... messages=AIMessages([documents[0]])
64
64
  ... )
65
65
  ... result = OutputDoc.create(
@@ -95,6 +95,15 @@ from .documents import (
95
95
  sanitize_url,
96
96
  )
97
97
  from .flow import FlowConfig, FlowOptions
98
+ from .images import (
99
+ ImagePart,
100
+ ImagePreset,
101
+ ImageProcessingConfig,
102
+ ImageProcessingError,
103
+ ProcessedImage,
104
+ process_image,
105
+ process_image_to_documents,
106
+ )
98
107
  from .llm import (
99
108
  AIMessages,
100
109
  AIMessageType,
@@ -121,7 +130,7 @@ from .settings import Settings
121
130
  from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
122
131
  from .utils.remote_deployment import remote_deployment
123
132
 
124
- __version__ = "0.3.0"
133
+ __version__ = "0.3.3"
125
134
 
126
135
  __all__ = [
127
136
  # Config/Settings
@@ -175,6 +184,14 @@ __all__ = [
175
184
  # Prompt Builder
176
185
  "PromptBuilder",
177
186
  "EnvironmentVariable",
187
+ # Images
188
+ "process_image",
189
+ "process_image_to_documents",
190
+ "ImagePreset",
191
+ "ImageProcessingConfig",
192
+ "ProcessedImage",
193
+ "ImagePart",
194
+ "ImageProcessingError",
178
195
  # Utils
179
196
  "PromptManager",
180
197
  ]
@@ -0,0 +1,26 @@
1
+ """Local trace debugging system for AI pipelines.
2
+
3
+ This module provides filesystem-based trace debugging that saves all spans
4
+ with their inputs/outputs for LLM-assisted debugging.
5
+
6
+ Enable by setting TRACE_DEBUG_PATH environment variable.
7
+ """
8
+
9
+ from .config import TraceDebugConfig
10
+ from .content import ArtifactStore, ContentRef, ContentWriter, reconstruct_span_content
11
+ from .processor import LocalDebugSpanProcessor
12
+ from .summary import generate_summary
13
+ from .writer import LocalTraceWriter, TraceState, WriteJob
14
+
15
+ __all__ = [
16
+ "TraceDebugConfig",
17
+ "ContentRef",
18
+ "ContentWriter",
19
+ "ArtifactStore",
20
+ "reconstruct_span_content",
21
+ "LocalDebugSpanProcessor",
22
+ "LocalTraceWriter",
23
+ "TraceState",
24
+ "WriteJob",
25
+ "generate_summary",
26
+ ]
@@ -0,0 +1,91 @@
1
+ """Configuration for local trace debugging."""
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field
6
+
7
+
8
+ class TraceDebugConfig(BaseModel):
9
+ """Configuration for local trace debugging.
10
+
11
+ Controls how traces are written to the local filesystem for debugging.
12
+ Enable by setting TRACE_DEBUG_PATH environment variable.
13
+ """
14
+
15
+ model_config = ConfigDict(frozen=True)
16
+
17
+ path: Path = Field(description="Directory for debug traces")
18
+ enabled: bool = Field(default=True, description="Whether debug tracing is enabled")
19
+
20
+ # Content size limits (Issue #2)
21
+ max_file_bytes: int = Field(
22
+ default=50_000,
23
+ description="Max bytes for input.yaml or output.yaml. Elements externalized to stay under.",
24
+ )
25
+ max_element_bytes: int = Field(
26
+ default=10_000,
27
+ description="Max bytes for single element. Above this, partial + artifact ref.",
28
+ )
29
+ element_excerpt_bytes: int = Field(
30
+ default=2_000,
31
+ description="Bytes of content to keep inline when element exceeds max_element_bytes.",
32
+ )
33
+ max_content_bytes: int = Field(
34
+ default=10_000_000,
35
+ description="Max bytes for any single artifact. Above this, truncate.",
36
+ )
37
+
38
+ # Image handling (Issue #7 - no changes per user)
39
+ extract_base64_images: bool = Field(
40
+ default=True,
41
+ description="Extract base64 images to artifact files",
42
+ )
43
+
44
+ # Span optimization (Issue #4)
45
+ merge_wrapper_spans: bool = Field(
46
+ default=True,
47
+ description="Merge Prefect wrapper spans with inner traced function spans",
48
+ )
49
+
50
+ # Events (Issue #12)
51
+ events_file_mode: str = Field(
52
+ default="errors_only",
53
+ description="When to write events.yaml: 'all', 'errors_only', 'none'",
54
+ )
55
+
56
+ # Indexes (Issue #1)
57
+ include_llm_index: bool = Field(
58
+ default=True,
59
+ description="Generate _llm_calls.yaml with LLM-specific details",
60
+ )
61
+ include_error_index: bool = Field(
62
+ default=True,
63
+ description="Generate _errors.yaml with failed span details",
64
+ )
65
+
66
+ # Cleanup
67
+ max_traces: int | None = Field(
68
+ default=None,
69
+ description="Max number of traces to keep. None for unlimited.",
70
+ )
71
+
72
+ # Security - default redaction patterns for common secrets
73
+ redact_patterns: tuple[str, ...] = Field(
74
+ default=(
75
+ r"sk-[a-zA-Z0-9]{20,}", # OpenAI API keys
76
+ r"sk-proj-[a-zA-Z0-9\-_]{20,}", # OpenAI project keys
77
+ r"AKIA[0-9A-Z]{16}", # AWS access keys
78
+ r"ghp_[a-zA-Z0-9]{36}", # GitHub personal tokens
79
+ r"gho_[a-zA-Z0-9]{36}", # GitHub OAuth tokens
80
+ r"xoxb-[a-zA-Z0-9\-]+", # Slack bot tokens
81
+ r"xoxp-[a-zA-Z0-9\-]+", # Slack user tokens
82
+ r"(?i)password\s*[:=]\s*['\"]?[^\s'\"]+", # Passwords
83
+ r"(?i)secret\s*[:=]\s*['\"]?[^\s'\"]+", # Secrets
84
+ r"(?i)api[_\-]?key\s*[:=]\s*['\"]?[^\s'\"]+", # API keys
85
+ r"(?i)bearer\s+[a-zA-Z0-9\-_\.]+", # Bearer tokens
86
+ ),
87
+ description="Regex patterns for secrets to redact",
88
+ )
89
+
90
+ # Summary
91
+ generate_summary: bool = Field(default=True, description="Generate _summary.md")