ai-pipeline-core 0.1.13__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/.gitignore +1 -0
  2. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/PKG-INFO +60 -23
  3. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/README.md +58 -22
  4. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/__init__.py +25 -14
  5. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/__init__.py +2 -1
  6. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/document.py +317 -49
  7. ai_pipeline_core-0.2.0/ai_pipeline_core/documents/document_list.py +343 -0
  8. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/flow_document.py +8 -29
  9. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/task_document.py +6 -27
  10. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/temporary_document.py +6 -27
  11. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/utils.py +64 -1
  12. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/flow/config.py +174 -5
  13. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/flow/options.py +2 -2
  14. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/__init__.py +6 -1
  15. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/ai_messages.py +14 -7
  16. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/client.py +143 -55
  17. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/model_options.py +20 -5
  18. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/llm/model_response.py +77 -29
  19. ai_pipeline_core-0.2.0/ai_pipeline_core/llm/model_types.py +82 -0
  20. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/__init__.py +0 -2
  21. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/logging_config.py +0 -6
  22. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/logging_mixin.py +2 -10
  23. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/pipeline.py +68 -65
  24. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/prefect.py +12 -3
  25. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/prompt_manager.py +6 -7
  26. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/settings.py +13 -5
  27. ai_pipeline_core-0.2.0/ai_pipeline_core/simple_runner/__init__.py +14 -0
  28. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/simple_runner/cli.py +13 -12
  29. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/simple_runner/simple_runner.py +34 -172
  30. ai_pipeline_core-0.2.0/ai_pipeline_core/storage/__init__.py +8 -0
  31. ai_pipeline_core-0.2.0/ai_pipeline_core/storage/storage.py +628 -0
  32. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/tracing.py +110 -26
  33. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/pyproject.toml +4 -2
  34. ai_pipeline_core-0.1.13/ai_pipeline_core/documents/document_list.py +0 -240
  35. ai_pipeline_core-0.1.13/ai_pipeline_core/llm/model_types.py +0 -84
  36. ai_pipeline_core-0.1.13/ai_pipeline_core/simple_runner/__init__.py +0 -24
  37. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/LICENSE +0 -0
  38. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/documents/mime_type.py +0 -0
  39. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/exceptions.py +0 -0
  40. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/flow/__init__.py +0 -0
  41. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/logging/logging.yml +0 -0
  42. {ai_pipeline_core-0.1.13 → ai_pipeline_core-0.2.0}/ai_pipeline_core/py.typed +0 -0
@@ -112,6 +112,7 @@ venv/
112
112
  ENV/
113
113
  env.bak/
114
114
  venv.bak/
115
+ key.json
115
116
 
116
117
  # Spyder project settings
117
118
  .spyderproject
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.13
3
+ Version: 0.2.0
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -22,6 +22,7 @@ Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
23
  Requires-Dist: lmnr>=0.7.6
24
24
  Requires-Dist: openai>=1.99.9
25
+ Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
25
26
  Requires-Dist: prefect>=3.4.13
26
27
  Requires-Dist: pydantic-settings>=2.10.1
27
28
  Requires-Dist: pydantic>=2.11.7
@@ -57,11 +58,11 @@ AI Pipeline Core is a production-ready framework that combines document processi
57
58
 
58
59
  ### Key Features
59
60
 
60
- - **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection
61
- - **LLM Integration**: Unified interface to any model via LiteLLM proxy with intelligent context caching
61
+ - **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection and provenance tracking
62
+ - **LLM Integration**: Unified interface to any model via LiteLLM proxy with configurable context caching
62
63
  - **Structured Output**: Type-safe generation with Pydantic model validation
63
64
  - **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
64
- - **Observability**: Built-in distributed tracing via Laminar (LMNR) for debugging and monitoring
65
+ - **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
65
66
  - **Local Development**: Simple runner for testing pipelines without infrastructure
66
67
 
67
68
  ## Installation
@@ -111,15 +112,13 @@ class AnalysisConfig(FlowConfig):
111
112
  INPUT_DOCUMENT_TYPES = [InputDoc]
112
113
  OUTPUT_DOCUMENT_TYPE = OutputDoc
113
114
 
114
- # Create pipeline flow
115
- @pipeline_flow
115
+ # Create pipeline flow with required config
116
+ @pipeline_flow(config=AnalysisConfig)
116
117
  async def analyze_flow(
117
118
  project_name: str,
118
119
  documents: DocumentList,
119
120
  flow_options: FlowOptions
120
121
  ) -> DocumentList:
121
- config = AnalysisConfig()
122
-
123
122
  # Process documents
124
123
  outputs = []
125
124
  for doc in documents:
@@ -136,7 +135,7 @@ async def analyze_flow(
136
135
  outputs.append(output)
137
136
 
138
137
  # RECOMMENDED: Always validate output
139
- return config.create_and_validate_output(outputs)
138
+ return AnalysisConfig.create_and_validate_output(outputs)
140
139
  ```
141
140
 
142
141
  ### Structured Output
@@ -178,6 +177,19 @@ doc = MyDocument.create(
178
177
  # Parse back to original type
179
178
  data = doc.parse(dict) # Returns {"key": "value"}
180
179
 
180
+ # Document provenance tracking (new in v0.1.14)
181
+ doc_with_sources = MyDocument.create(
182
+ name="derived.json",
183
+ content={"result": "processed"},
184
+ sources=[source_doc.sha256, "https://api.example.com/data"]
185
+ )
186
+
187
+ # Check provenance
188
+ for hash in doc_with_sources.get_source_documents():
189
+ print(f"Derived from document: {hash}")
190
+ for ref in doc_with_sources.get_source_references():
191
+ print(f"External source: {ref}")
192
+
181
193
  # Temporary documents (never persisted)
182
194
  temp = TemporaryDocument.create(
183
195
  name="api_response.json",
@@ -211,6 +223,10 @@ if doc.is_text:
211
223
 
212
224
  # Parse structured data
213
225
  data = doc.as_json() # or as_yaml(), as_pydantic_model()
226
+
227
+ # Enhanced filtering (new in v0.1.14)
228
+ filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
229
+ named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
214
230
  ```
215
231
 
216
232
  ### LLM Integration
@@ -233,7 +249,7 @@ static_context = AIMessages([large_document])
233
249
  # First call: caches context
234
250
  r1 = await llm.generate(
235
251
  model="gpt-5",
236
- context=static_context, # Cached for 120 seconds
252
+ context=static_context, # Cached for 120 seconds by default
237
253
  messages="Summarize" # Dynamic query
238
254
  )
239
255
 
@@ -243,6 +259,22 @@ r2 = await llm.generate(
243
259
  context=static_context, # Reused from cache!
244
260
  messages="Key points?" # Different query
245
261
  )
262
+
263
+ # Custom cache TTL (new in v0.1.14)
264
+ response = await llm.generate(
265
+ model="gpt-5",
266
+ context=static_context,
267
+ messages="Analyze",
268
+ options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
269
+ )
270
+
271
+ # Disable caching for dynamic contexts
272
+ response = await llm.generate(
273
+ model="gpt-5",
274
+ context=dynamic_context,
275
+ messages="Process",
276
+ options=ModelOptions(cache_ttl=None) # No caching
277
+ )
246
278
  ```
247
279
 
248
280
  ### Flow Configuration
@@ -256,15 +288,15 @@ class ProcessingConfig(FlowConfig):
256
288
  INPUT_DOCUMENT_TYPES = [RawDataDocument]
257
289
  OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
258
290
 
259
- # Use in flows for validation
260
- @pipeline_flow
261
- async def process(
262
- config: ProcessingConfig,
263
- documents: DocumentList,
264
- flow_options: FlowOptions
265
- ) -> DocumentList:
266
- # ... processing logic ...
267
- return config.create_and_validate_output(outputs)
291
+ # Use in flows for validation
292
+ @pipeline_flow(config=ProcessingConfig)
293
+ async def process(
294
+ project_name: str,
295
+ documents: DocumentList,
296
+ flow_options: FlowOptions
297
+ ) -> DocumentList:
298
+ # ... processing logic ...
299
+ return ProcessingConfig.create_and_validate_output(outputs)
268
300
  ```
269
301
 
270
302
  ### Pipeline Decorators
@@ -272,13 +304,15 @@ class ProcessingConfig(FlowConfig):
272
304
  Enhanced decorators with built-in tracing and monitoring:
273
305
 
274
306
  ```python
275
- from ai_pipeline_core import pipeline_flow, pipeline_task
307
+ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
276
308
 
277
309
  @pipeline_task # Automatic retry, tracing, and monitoring
278
310
  async def process_chunk(data: str) -> str:
279
- return await transform(data)
311
+ result = await transform(data)
312
+ set_trace_cost(0.05) # Track costs (new in v0.1.14)
313
+ return result
280
314
 
281
- @pipeline_flow # Full observability and orchestration
315
+ @pipeline_flow(config=MyFlowConfig) # Full observability and orchestration
282
316
  async def main_flow(
283
317
  project_name: str,
284
318
  documents: DocumentList,
@@ -304,6 +338,9 @@ LMNR_DEBUG=true # Enable debug traces
304
338
  # Optional: Orchestration
305
339
  PREFECT_API_URL=http://localhost:4200/api
306
340
  PREFECT_API_KEY=your-prefect-key
341
+
342
+ # Optional: Storage (for Google Cloud Storage)
343
+ GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
307
344
  ```
308
345
 
309
346
  ### Settings Management
@@ -331,7 +368,7 @@ print(settings.app_name)
331
368
 
332
369
  ### Framework Rules (90% Use Cases)
333
370
 
334
- 1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
371
+ 1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
335
372
  2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
336
373
  3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
337
374
  4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
@@ -13,11 +13,11 @@ AI Pipeline Core is a production-ready framework that combines document processi
13
13
 
14
14
  ### Key Features
15
15
 
16
- - **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection
17
- - **LLM Integration**: Unified interface to any model via LiteLLM proxy with intelligent context caching
16
+ - **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection and provenance tracking
17
+ - **LLM Integration**: Unified interface to any model via LiteLLM proxy with configurable context caching
18
18
  - **Structured Output**: Type-safe generation with Pydantic model validation
19
19
  - **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
20
- - **Observability**: Built-in distributed tracing via Laminar (LMNR) for debugging and monitoring
20
+ - **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
21
21
  - **Local Development**: Simple runner for testing pipelines without infrastructure
22
22
 
23
23
  ## Installation
@@ -67,15 +67,13 @@ class AnalysisConfig(FlowConfig):
67
67
  INPUT_DOCUMENT_TYPES = [InputDoc]
68
68
  OUTPUT_DOCUMENT_TYPE = OutputDoc
69
69
 
70
- # Create pipeline flow
71
- @pipeline_flow
70
+ # Create pipeline flow with required config
71
+ @pipeline_flow(config=AnalysisConfig)
72
72
  async def analyze_flow(
73
73
  project_name: str,
74
74
  documents: DocumentList,
75
75
  flow_options: FlowOptions
76
76
  ) -> DocumentList:
77
- config = AnalysisConfig()
78
-
79
77
  # Process documents
80
78
  outputs = []
81
79
  for doc in documents:
@@ -92,7 +90,7 @@ async def analyze_flow(
92
90
  outputs.append(output)
93
91
 
94
92
  # RECOMMENDED: Always validate output
95
- return config.create_and_validate_output(outputs)
93
+ return AnalysisConfig.create_and_validate_output(outputs)
96
94
  ```
97
95
 
98
96
  ### Structured Output
@@ -134,6 +132,19 @@ doc = MyDocument.create(
134
132
  # Parse back to original type
135
133
  data = doc.parse(dict) # Returns {"key": "value"}
136
134
 
135
+ # Document provenance tracking (new in v0.1.14)
136
+ doc_with_sources = MyDocument.create(
137
+ name="derived.json",
138
+ content={"result": "processed"},
139
+ sources=[source_doc.sha256, "https://api.example.com/data"]
140
+ )
141
+
142
+ # Check provenance
143
+ for hash in doc_with_sources.get_source_documents():
144
+ print(f"Derived from document: {hash}")
145
+ for ref in doc_with_sources.get_source_references():
146
+ print(f"External source: {ref}")
147
+
137
148
  # Temporary documents (never persisted)
138
149
  temp = TemporaryDocument.create(
139
150
  name="api_response.json",
@@ -167,6 +178,10 @@ if doc.is_text:
167
178
 
168
179
  # Parse structured data
169
180
  data = doc.as_json() # or as_yaml(), as_pydantic_model()
181
+
182
+ # Enhanced filtering (new in v0.1.14)
183
+ filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
184
+ named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
170
185
  ```
171
186
 
172
187
  ### LLM Integration
@@ -189,7 +204,7 @@ static_context = AIMessages([large_document])
189
204
  # First call: caches context
190
205
  r1 = await llm.generate(
191
206
  model="gpt-5",
192
- context=static_context, # Cached for 120 seconds
207
+ context=static_context, # Cached for 120 seconds by default
193
208
  messages="Summarize" # Dynamic query
194
209
  )
195
210
 
@@ -199,6 +214,22 @@ r2 = await llm.generate(
199
214
  context=static_context, # Reused from cache!
200
215
  messages="Key points?" # Different query
201
216
  )
217
+
218
+ # Custom cache TTL (new in v0.1.14)
219
+ response = await llm.generate(
220
+ model="gpt-5",
221
+ context=static_context,
222
+ messages="Analyze",
223
+ options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
224
+ )
225
+
226
+ # Disable caching for dynamic contexts
227
+ response = await llm.generate(
228
+ model="gpt-5",
229
+ context=dynamic_context,
230
+ messages="Process",
231
+ options=ModelOptions(cache_ttl=None) # No caching
232
+ )
202
233
  ```
203
234
 
204
235
  ### Flow Configuration
@@ -212,15 +243,15 @@ class ProcessingConfig(FlowConfig):
212
243
  INPUT_DOCUMENT_TYPES = [RawDataDocument]
213
244
  OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
214
245
 
215
- # Use in flows for validation
216
- @pipeline_flow
217
- async def process(
218
- config: ProcessingConfig,
219
- documents: DocumentList,
220
- flow_options: FlowOptions
221
- ) -> DocumentList:
222
- # ... processing logic ...
223
- return config.create_and_validate_output(outputs)
246
+ # Use in flows for validation
247
+ @pipeline_flow(config=ProcessingConfig)
248
+ async def process(
249
+ project_name: str,
250
+ documents: DocumentList,
251
+ flow_options: FlowOptions
252
+ ) -> DocumentList:
253
+ # ... processing logic ...
254
+ return ProcessingConfig.create_and_validate_output(outputs)
224
255
  ```
225
256
 
226
257
  ### Pipeline Decorators
@@ -228,13 +259,15 @@ class ProcessingConfig(FlowConfig):
228
259
  Enhanced decorators with built-in tracing and monitoring:
229
260
 
230
261
  ```python
231
- from ai_pipeline_core import pipeline_flow, pipeline_task
262
+ from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
232
263
 
233
264
  @pipeline_task # Automatic retry, tracing, and monitoring
234
265
  async def process_chunk(data: str) -> str:
235
- return await transform(data)
266
+ result = await transform(data)
267
+ set_trace_cost(0.05) # Track costs (new in v0.1.14)
268
+ return result
236
269
 
237
- @pipeline_flow # Full observability and orchestration
270
+ @pipeline_flow(config=MyFlowConfig) # Full observability and orchestration
238
271
  async def main_flow(
239
272
  project_name: str,
240
273
  documents: DocumentList,
@@ -260,6 +293,9 @@ LMNR_DEBUG=true # Enable debug traces
260
293
  # Optional: Orchestration
261
294
  PREFECT_API_URL=http://localhost:4200/api
262
295
  PREFECT_API_KEY=your-prefect-key
296
+
297
+ # Optional: Storage (for Google Cloud Storage)
298
+ GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
263
299
  ```
264
300
 
265
301
  ### Settings Management
@@ -287,7 +323,7 @@ print(settings.app_name)
287
323
 
288
324
  ### Framework Rules (90% Use Cases)
289
325
 
290
- 1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
326
+ 1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
291
327
  2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
292
328
  3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
293
329
  4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
@@ -7,7 +7,7 @@ It combines document processing, LLM integration, and workflow orchestration int
7
7
  system designed for production use.
8
8
 
9
9
  The framework enforces best practices through strong typing (Pydantic), automatic retries,
10
- cost tracking, and distributed tracing. All I/O operations are async for maximum throughput.
10
+ and cost tracking. All I/O operations are async for maximum throughput.
11
11
 
12
12
  **CRITICAL IMPORT RULE**:
13
13
  Always import from the top-level package:
@@ -18,12 +18,12 @@ cost tracking, and distributed tracing. All I/O operations are async for maximum
18
18
  from ai_pipeline_core.llm import generate # NO!
19
19
  from ai_pipeline_core.documents import FlowDocument # NO!
20
20
 
21
- FRAMEWORK RULES (90% Use Cases):
22
- 1. Decorators: Use @trace, @pipeline_task, @pipeline_flow WITHOUT parameters
21
+ FRAMEWORK RULES (Use by default, unless instructed otherwise):
22
+ 1. Decorators: Use @pipeline_task WITHOUT parameters, @pipeline_flow WITH config
23
23
  2. Logging: Use get_pipeline_logger(__name__) - NEVER print() or logging module
24
24
  3. LLM calls: Use AIMessages or str. Wrap Documents in AIMessages; do not call .text yourself
25
- 4. Options: Omit ModelOptions unless specifically needed (defaults are optimal)
26
- 5. Documents: Create with just name and content - skip description
25
+ 4. Options: DO NOT use options parameter - omit it entirely (defaults are optimal)
26
+ 5. Documents: Create with just name and content - skip description unless needed
27
27
  6. FlowConfig: OUTPUT_DOCUMENT_TYPE must differ from all INPUT_DOCUMENT_TYPES
28
28
  7. Initialization: PromptManager and logger at module scope, not in functions
29
29
  8. DocumentList: Use default constructor - no validation flags needed
@@ -36,18 +36,22 @@ Core Capabilities:
36
36
  - **LLM Integration**: Unified interface to any model via LiteLLM with caching
37
37
  - **Structured Output**: Type-safe generation with Pydantic model validation
38
38
  - **Workflow Orchestration**: Prefect-based flows and tasks with retries
39
- - **Observability**: Distributed tracing via Laminar (LMNR) for debugging
39
+ - **Observability**: Built-in monitoring and debugging capabilities
40
40
  - **Local Development**: Simple runner for testing without infrastructure
41
41
 
42
42
  Quick Start:
43
43
  >>> from ai_pipeline_core import (
44
- ... pipeline_flow, FlowDocument, DocumentList, FlowOptions, llm, AIMessages
44
+ ... pipeline_flow, FlowDocument, DocumentList, FlowOptions, FlowConfig, llm, AIMessages
45
45
  ... )
46
46
  >>>
47
47
  >>> class OutputDoc(FlowDocument):
48
48
  ... '''Analysis result document.'''
49
49
  >>>
50
- >>> @pipeline_flow
50
+ >>> class MyFlowConfig(FlowConfig):
51
+ ... INPUT_DOCUMENT_TYPES = []
52
+ ... OUTPUT_DOCUMENT_TYPE = OutputDoc
53
+ >>>
54
+ >>> @pipeline_flow(config=MyFlowConfig)
51
55
  >>> async def analyze_flow(
52
56
  ... project_name: str,
53
57
  ... documents: DocumentList,
@@ -55,7 +59,7 @@ Quick Start:
55
59
  ... ) -> DocumentList:
56
60
  ... # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
57
61
  ... response = await llm.generate(
58
- ... model="gpt-5",
62
+ ... "gpt-5",
59
63
  ... messages=AIMessages([documents[0]])
60
64
  ... )
61
65
  ... result = OutputDoc.create(
@@ -76,8 +80,6 @@ Optional Environment Variables:
76
80
  - PREFECT_API_KEY: Prefect API authentication key
77
81
  - LMNR_PROJECT_API_KEY: Laminar (LMNR) API key for tracing
78
82
  - LMNR_DEBUG: Set to "true" to enable debug-level traces
79
- - LMNR_SESSION_ID: Default session ID for traces
80
- - LMNR_USER_ID: Default user ID for traces
81
83
  """
82
84
 
83
85
  from . import llm
@@ -88,6 +90,7 @@ from .documents import (
88
90
  TaskDocument,
89
91
  TemporaryDocument,
90
92
  canonical_name_key,
93
+ is_document_sha256,
91
94
  sanitize_url,
92
95
  )
93
96
  from .flow import FlowConfig, FlowOptions
@@ -98,6 +101,8 @@ from .llm import (
98
101
  ModelOptions,
99
102
  ModelResponse,
100
103
  StructuredModelResponse,
104
+ generate,
105
+ generate_structured,
101
106
  )
102
107
  from .logging import (
103
108
  LoggerMixin,
@@ -111,9 +116,9 @@ from .pipeline import pipeline_flow, pipeline_task
111
116
  from .prefect import disable_run_logger, prefect_test_harness
112
117
  from .prompt_manager import PromptManager
113
118
  from .settings import Settings
114
- from .tracing import TraceInfo, TraceLevel, trace
119
+ from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
115
120
 
116
- __version__ = "0.1.13"
121
+ __version__ = "0.2.0"
117
122
 
118
123
  __all__ = [
119
124
  # Config/Settings
@@ -132,6 +137,7 @@ __all__ = [
132
137
  "TaskDocument",
133
138
  "TemporaryDocument",
134
139
  "canonical_name_key",
140
+ "is_document_sha256",
135
141
  "sanitize_url",
136
142
  # Flow/Task
137
143
  "FlowConfig",
@@ -143,7 +149,9 @@ __all__ = [
143
149
  "prefect_test_harness",
144
150
  "disable_run_logger",
145
151
  # LLM
146
- "llm",
152
+ "llm", # for backward compatibility
153
+ "generate",
154
+ "generate_structured",
147
155
  "ModelName",
148
156
  "ModelOptions",
149
157
  "ModelResponse",
@@ -154,6 +162,9 @@ __all__ = [
154
162
  "trace",
155
163
  "TraceLevel",
156
164
  "TraceInfo",
165
+ "set_trace_cost",
157
166
  # Utils
158
167
  "PromptManager",
168
+ "generate",
169
+ "generate_structured",
159
170
  ]
@@ -12,7 +12,7 @@ from .document_list import DocumentList
12
12
  from .flow_document import FlowDocument
13
13
  from .task_document import TaskDocument
14
14
  from .temporary_document import TemporaryDocument
15
- from .utils import canonical_name_key, sanitize_url
15
+ from .utils import canonical_name_key, is_document_sha256, sanitize_url
16
16
 
17
17
  __all__ = [
18
18
  "Document",
@@ -21,5 +21,6 @@ __all__ = [
21
21
  "TaskDocument",
22
22
  "TemporaryDocument",
23
23
  "canonical_name_key",
24
+ "is_document_sha256",
24
25
  "sanitize_url",
25
26
  ]