ai-pipeline-core 0.1.14__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/.gitignore +1 -0
  2. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/PKG-INFO +35 -20
  3. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/README.md +30 -16
  4. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/__init__.py +21 -13
  5. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/document.py +202 -51
  6. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/document_list.py +148 -24
  7. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/flow_document.py +2 -6
  8. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/task_document.py +0 -4
  9. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/temporary_document.py +1 -8
  10. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/flow/config.py +174 -5
  11. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/llm/__init__.py +1 -6
  12. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/llm/ai_messages.py +137 -4
  13. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/llm/client.py +118 -65
  14. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/llm/model_options.py +6 -7
  15. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/llm/model_response.py +17 -16
  16. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/llm/model_types.py +3 -7
  17. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/logging/__init__.py +0 -2
  18. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/logging/logging_config.py +0 -6
  19. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/logging/logging_mixin.py +2 -10
  20. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/pipeline.py +54 -68
  21. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/prefect.py +12 -3
  22. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/prompt_manager.py +14 -7
  23. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/settings.py +13 -5
  24. ai_pipeline_core-0.2.1/ai_pipeline_core/simple_runner/__init__.py +14 -0
  25. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/simple_runner/cli.py +13 -12
  26. ai_pipeline_core-0.2.1/ai_pipeline_core/simple_runner/simple_runner.py +247 -0
  27. ai_pipeline_core-0.2.1/ai_pipeline_core/storage/__init__.py +8 -0
  28. ai_pipeline_core-0.2.1/ai_pipeline_core/storage/storage.py +628 -0
  29. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/tracing.py +234 -30
  30. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/pyproject.toml +7 -5
  31. ai_pipeline_core-0.1.14/ai_pipeline_core/simple_runner/__init__.py +0 -24
  32. ai_pipeline_core-0.1.14/ai_pipeline_core/simple_runner/simple_runner.py +0 -402
  33. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/LICENSE +0 -0
  34. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/__init__.py +0 -0
  35. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/mime_type.py +0 -0
  36. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/documents/utils.py +0 -0
  37. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/exceptions.py +0 -0
  38. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/flow/__init__.py +0 -0
  39. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/flow/options.py +0 -0
  40. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/logging/logging.yml +0 -0
  41. {ai_pipeline_core-0.1.14 → ai_pipeline_core-0.2.1}/ai_pipeline_core/py.typed +0 -0
@@ -112,6 +112,7 @@ venv/
112
112
  ENV/
113
113
  env.bak/
114
114
  venv.bak/
115
+ key.json
115
116
 
116
117
  # Spyder project settings
117
118
  .spyderproject
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.14
3
+ Version: 0.2.1
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,9 +20,10 @@ Classifier: Typing :: Typed
20
20
  Requires-Python: >=3.12
21
21
  Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
- Requires-Dist: lmnr>=0.7.6
24
- Requires-Dist: openai>=1.99.9
25
- Requires-Dist: prefect>=3.4.13
23
+ Requires-Dist: lmnr>=0.7.13
24
+ Requires-Dist: openai>=1.108.1
25
+ Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
26
+ Requires-Dist: prefect>=3.4.19
26
27
  Requires-Dist: pydantic-settings>=2.10.1
27
28
  Requires-Dist: pydantic>=2.11.7
28
29
  Requires-Dist: python-magic>=0.4.27
@@ -111,15 +112,13 @@ class AnalysisConfig(FlowConfig):
111
112
  INPUT_DOCUMENT_TYPES = [InputDoc]
112
113
  OUTPUT_DOCUMENT_TYPE = OutputDoc
113
114
 
114
- # Create pipeline flow
115
- @pipeline_flow
115
+ # Create pipeline flow with required config
116
+ @pipeline_flow(config=AnalysisConfig)
116
117
  async def analyze_flow(
117
118
  project_name: str,
118
119
  documents: DocumentList,
119
120
  flow_options: FlowOptions
120
121
  ) -> DocumentList:
121
- config = AnalysisConfig()
122
-
123
122
  # Process documents
124
123
  outputs = []
125
124
  for doc in documents:
@@ -136,7 +135,7 @@ async def analyze_flow(
136
135
  outputs.append(output)
137
136
 
138
137
  # RECOMMENDED: Always validate output
139
- return config.create_and_validate_output(outputs)
138
+ return AnalysisConfig.create_and_validate_output(outputs)
140
139
  ```
141
140
 
142
141
  ### Structured Output
@@ -225,9 +224,17 @@ if doc.is_text:
225
224
  # Parse structured data
226
225
  data = doc.as_json() # or as_yaml(), as_pydantic_model()
227
226
 
227
+ # Convert between document types (new in v0.2.1)
228
+ task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
229
+ new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
230
+
228
231
  # Enhanced filtering (new in v0.1.14)
229
232
  filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
230
233
  named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
234
+
235
+ # Immutable collections (new in v0.2.1)
236
+ frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
237
+ frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
231
238
  ```
232
239
 
233
240
  ### LLM Integration
@@ -289,15 +296,15 @@ class ProcessingConfig(FlowConfig):
289
296
  INPUT_DOCUMENT_TYPES = [RawDataDocument]
290
297
  OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
291
298
 
292
- # Use in flows for validation
293
- @pipeline_flow
294
- async def process(
295
- config: ProcessingConfig,
296
- documents: DocumentList,
297
- flow_options: FlowOptions
298
- ) -> DocumentList:
299
- # ... processing logic ...
300
- return config.create_and_validate_output(outputs)
299
+ # Use in flows for validation
300
+ @pipeline_flow(config=ProcessingConfig)
301
+ async def process(
302
+ project_name: str,
303
+ documents: DocumentList,
304
+ flow_options: FlowOptions
305
+ ) -> DocumentList:
306
+ # ... processing logic ...
307
+ return ProcessingConfig.create_and_validate_output(outputs)
301
308
  ```
302
309
 
303
310
  ### Pipeline Decorators
@@ -313,13 +320,18 @@ async def process_chunk(data: str) -> str:
313
320
  set_trace_cost(0.05) # Track costs (new in v0.1.14)
314
321
  return result
315
322
 
316
- @pipeline_flow # Full observability and orchestration
323
+ @pipeline_flow(
324
+ config=MyFlowConfig,
325
+ trace_trim_documents=True # Trim large documents in traces (new in v0.2.1)
326
+ )
317
327
  async def main_flow(
318
328
  project_name: str,
319
329
  documents: DocumentList,
320
330
  flow_options: FlowOptions
321
331
  ) -> DocumentList:
322
332
  # Your pipeline logic
333
+ # Large documents are automatically trimmed to 100 chars in traces
334
+ # for better observability without overwhelming the tracing UI
323
335
  return DocumentList(results)
324
336
  ```
325
337
 
@@ -339,6 +351,9 @@ LMNR_DEBUG=true # Enable debug traces
339
351
  # Optional: Orchestration
340
352
  PREFECT_API_URL=http://localhost:4200/api
341
353
  PREFECT_API_KEY=your-prefect-key
354
+
355
+ # Optional: Storage (for Google Cloud Storage)
356
+ GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
342
357
  ```
343
358
 
344
359
  ### Settings Management
@@ -366,7 +381,7 @@ print(settings.app_name)
366
381
 
367
382
  ### Framework Rules (90% Use Cases)
368
383
 
369
- 1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
384
+ 1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
370
385
  2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
371
386
  3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
372
387
  4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
@@ -67,15 +67,13 @@ class AnalysisConfig(FlowConfig):
67
67
  INPUT_DOCUMENT_TYPES = [InputDoc]
68
68
  OUTPUT_DOCUMENT_TYPE = OutputDoc
69
69
 
70
- # Create pipeline flow
71
- @pipeline_flow
70
+ # Create pipeline flow with required config
71
+ @pipeline_flow(config=AnalysisConfig)
72
72
  async def analyze_flow(
73
73
  project_name: str,
74
74
  documents: DocumentList,
75
75
  flow_options: FlowOptions
76
76
  ) -> DocumentList:
77
- config = AnalysisConfig()
78
-
79
77
  # Process documents
80
78
  outputs = []
81
79
  for doc in documents:
@@ -92,7 +90,7 @@ async def analyze_flow(
92
90
  outputs.append(output)
93
91
 
94
92
  # RECOMMENDED: Always validate output
95
- return config.create_and_validate_output(outputs)
93
+ return AnalysisConfig.create_and_validate_output(outputs)
96
94
  ```
97
95
 
98
96
  ### Structured Output
@@ -181,9 +179,17 @@ if doc.is_text:
181
179
  # Parse structured data
182
180
  data = doc.as_json() # or as_yaml(), as_pydantic_model()
183
181
 
182
+ # Convert between document types (new in v0.2.1)
183
+ task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
184
+ new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
185
+
184
186
  # Enhanced filtering (new in v0.1.14)
185
187
  filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
186
188
  named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
189
+
190
+ # Immutable collections (new in v0.2.1)
191
+ frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
192
+ frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
187
193
  ```
188
194
 
189
195
  ### LLM Integration
@@ -245,15 +251,15 @@ class ProcessingConfig(FlowConfig):
245
251
  INPUT_DOCUMENT_TYPES = [RawDataDocument]
246
252
  OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
247
253
 
248
- # Use in flows for validation
249
- @pipeline_flow
250
- async def process(
251
- config: ProcessingConfig,
252
- documents: DocumentList,
253
- flow_options: FlowOptions
254
- ) -> DocumentList:
255
- # ... processing logic ...
256
- return config.create_and_validate_output(outputs)
254
+ # Use in flows for validation
255
+ @pipeline_flow(config=ProcessingConfig)
256
+ async def process(
257
+ project_name: str,
258
+ documents: DocumentList,
259
+ flow_options: FlowOptions
260
+ ) -> DocumentList:
261
+ # ... processing logic ...
262
+ return ProcessingConfig.create_and_validate_output(outputs)
257
263
  ```
258
264
 
259
265
  ### Pipeline Decorators
@@ -269,13 +275,18 @@ async def process_chunk(data: str) -> str:
269
275
  set_trace_cost(0.05) # Track costs (new in v0.1.14)
270
276
  return result
271
277
 
272
- @pipeline_flow # Full observability and orchestration
278
+ @pipeline_flow(
279
+ config=MyFlowConfig,
280
+ trace_trim_documents=True # Trim large documents in traces (new in v0.2.1)
281
+ )
273
282
  async def main_flow(
274
283
  project_name: str,
275
284
  documents: DocumentList,
276
285
  flow_options: FlowOptions
277
286
  ) -> DocumentList:
278
287
  # Your pipeline logic
288
+ # Large documents are automatically trimmed to 100 chars in traces
289
+ # for better observability without overwhelming the tracing UI
279
290
  return DocumentList(results)
280
291
  ```
281
292
 
@@ -295,6 +306,9 @@ LMNR_DEBUG=true # Enable debug traces
295
306
  # Optional: Orchestration
296
307
  PREFECT_API_URL=http://localhost:4200/api
297
308
  PREFECT_API_KEY=your-prefect-key
309
+
310
+ # Optional: Storage (for Google Cloud Storage)
311
+ GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
298
312
  ```
299
313
 
300
314
  ### Settings Management
@@ -322,7 +336,7 @@ print(settings.app_name)
322
336
 
323
337
  ### Framework Rules (90% Use Cases)
324
338
 
325
- 1. **Decorators**: Use `@trace`, `@pipeline_task`, `@pipeline_flow` WITHOUT parameters
339
+ 1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
326
340
  2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
327
341
  3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
328
342
  4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
@@ -7,7 +7,7 @@ It combines document processing, LLM integration, and workflow orchestration int
7
7
  system designed for production use.
8
8
 
9
9
  The framework enforces best practices through strong typing (Pydantic), automatic retries,
10
- cost tracking, and distributed tracing. All I/O operations are async for maximum throughput.
10
+ and cost tracking. All I/O operations are async for maximum throughput.
11
11
 
12
12
  **CRITICAL IMPORT RULE**:
13
13
  Always import from the top-level package:
@@ -18,12 +18,12 @@ cost tracking, and distributed tracing. All I/O operations are async for maximum
18
18
  from ai_pipeline_core.llm import generate # NO!
19
19
  from ai_pipeline_core.documents import FlowDocument # NO!
20
20
 
21
- FRAMEWORK RULES (90% Use Cases):
22
- 1. Decorators: Use @trace, @pipeline_task, @pipeline_flow WITHOUT parameters
21
+ FRAMEWORK RULES (Use by default, unless instructed otherwise):
22
+ 1. Decorators: Use @pipeline_task WITHOUT parameters, @pipeline_flow WITH config
23
23
  2. Logging: Use get_pipeline_logger(__name__) - NEVER print() or logging module
24
24
  3. LLM calls: Use AIMessages or str. Wrap Documents in AIMessages; do not call .text yourself
25
- 4. Options: Omit ModelOptions unless specifically needed (defaults are optimal)
26
- 5. Documents: Create with just name and content - skip description
25
+ 4. Options: DO NOT use options parameter - omit it entirely (defaults are optimal)
26
+ 5. Documents: Create with just name and content - skip description unless needed
27
27
  6. FlowConfig: OUTPUT_DOCUMENT_TYPE must differ from all INPUT_DOCUMENT_TYPES
28
28
  7. Initialization: PromptManager and logger at module scope, not in functions
29
29
  8. DocumentList: Use default constructor - no validation flags needed
@@ -36,18 +36,22 @@ Core Capabilities:
36
36
  - **LLM Integration**: Unified interface to any model via LiteLLM with caching
37
37
  - **Structured Output**: Type-safe generation with Pydantic model validation
38
38
  - **Workflow Orchestration**: Prefect-based flows and tasks with retries
39
- - **Observability**: Distributed tracing via Laminar (LMNR) for debugging
39
+ - **Observability**: Built-in monitoring and debugging capabilities
40
40
  - **Local Development**: Simple runner for testing without infrastructure
41
41
 
42
42
  Quick Start:
43
43
  >>> from ai_pipeline_core import (
44
- ... pipeline_flow, FlowDocument, DocumentList, FlowOptions, llm, AIMessages
44
+ ... pipeline_flow, FlowDocument, DocumentList, FlowOptions, FlowConfig, llm, AIMessages
45
45
  ... )
46
46
  >>>
47
47
  >>> class OutputDoc(FlowDocument):
48
48
  ... '''Analysis result document.'''
49
49
  >>>
50
- >>> @pipeline_flow
50
+ >>> class MyFlowConfig(FlowConfig):
51
+ ... INPUT_DOCUMENT_TYPES = []
52
+ ... OUTPUT_DOCUMENT_TYPE = OutputDoc
53
+ >>>
54
+ >>> @pipeline_flow(config=MyFlowConfig)
51
55
  >>> async def analyze_flow(
52
56
  ... project_name: str,
53
57
  ... documents: DocumentList,
@@ -55,7 +59,7 @@ Quick Start:
55
59
  ... ) -> DocumentList:
56
60
  ... # Messages accept AIMessages or str. Wrap documents: AIMessages([doc])
57
61
  ... response = await llm.generate(
58
- ... model="gpt-5",
62
+ ... "gpt-5",
59
63
  ... messages=AIMessages([documents[0]])
60
64
  ... )
61
65
  ... result = OutputDoc.create(
@@ -76,8 +80,6 @@ Optional Environment Variables:
76
80
  - PREFECT_API_KEY: Prefect API authentication key
77
81
  - LMNR_PROJECT_API_KEY: Laminar (LMNR) API key for tracing
78
82
  - LMNR_DEBUG: Set to "true" to enable debug-level traces
79
- - LMNR_SESSION_ID: Default session ID for traces
80
- - LMNR_USER_ID: Default user ID for traces
81
83
  """
82
84
 
83
85
  from . import llm
@@ -99,6 +101,8 @@ from .llm import (
99
101
  ModelOptions,
100
102
  ModelResponse,
101
103
  StructuredModelResponse,
104
+ generate,
105
+ generate_structured,
102
106
  )
103
107
  from .logging import (
104
108
  LoggerMixin,
@@ -114,7 +118,7 @@ from .prompt_manager import PromptManager
114
118
  from .settings import Settings
115
119
  from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
116
120
 
117
- __version__ = "0.1.14"
121
+ __version__ = "0.2.1"
118
122
 
119
123
  __all__ = [
120
124
  # Config/Settings
@@ -145,7 +149,9 @@ __all__ = [
145
149
  "prefect_test_harness",
146
150
  "disable_run_logger",
147
151
  # LLM
148
- "llm",
152
+ "llm", # for backward compatibility
153
+ "generate",
154
+ "generate_structured",
149
155
  "ModelName",
150
156
  "ModelOptions",
151
157
  "ModelResponse",
@@ -159,4 +165,6 @@ __all__ = [
159
165
  "set_trace_cost",
160
166
  # Utils
161
167
  "PromptManager",
168
+ "generate",
169
+ "generate_structured",
162
170
  ]