ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +21 -13
- ai_pipeline_core/documents/document.py +202 -51
- ai_pipeline_core/documents/document_list.py +148 -24
- ai_pipeline_core/documents/flow_document.py +2 -6
- ai_pipeline_core/documents/task_document.py +0 -4
- ai_pipeline_core/documents/temporary_document.py +1 -8
- ai_pipeline_core/flow/config.py +174 -5
- ai_pipeline_core/llm/__init__.py +1 -6
- ai_pipeline_core/llm/ai_messages.py +137 -4
- ai_pipeline_core/llm/client.py +118 -65
- ai_pipeline_core/llm/model_options.py +6 -7
- ai_pipeline_core/llm/model_response.py +17 -16
- ai_pipeline_core/llm/model_types.py +3 -7
- ai_pipeline_core/logging/__init__.py +0 -2
- ai_pipeline_core/logging/logging_config.py +0 -6
- ai_pipeline_core/logging/logging_mixin.py +2 -10
- ai_pipeline_core/pipeline.py +54 -68
- ai_pipeline_core/prefect.py +12 -3
- ai_pipeline_core/prompt_manager.py +14 -7
- ai_pipeline_core/settings.py +13 -5
- ai_pipeline_core/simple_runner/__init__.py +1 -11
- ai_pipeline_core/simple_runner/cli.py +13 -12
- ai_pipeline_core/simple_runner/simple_runner.py +34 -189
- ai_pipeline_core/storage/__init__.py +8 -0
- ai_pipeline_core/storage/storage.py +628 -0
- ai_pipeline_core/tracing.py +234 -30
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/METADATA +35 -20
- ai_pipeline_core-0.2.1.dist-info/RECORD +38 -0
- ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/tracing.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Tracing utilities that integrate Laminar (``lmnr``) with our code-base.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
This module centralizes:
|
|
6
4
|
- ``TraceInfo`` - a small helper object for propagating contextual metadata.
|
|
7
5
|
- ``trace`` decorator - augments a callable with Laminar tracing, automatic
|
|
@@ -11,6 +9,7 @@ This module centralizes:
|
|
|
11
9
|
from __future__ import annotations
|
|
12
10
|
|
|
13
11
|
import inspect
|
|
12
|
+
import json
|
|
14
13
|
import os
|
|
15
14
|
from functools import wraps
|
|
16
15
|
from typing import Any, Callable, Literal, ParamSpec, TypeVar, cast, overload
|
|
@@ -18,6 +17,10 @@ from typing import Any, Callable, Literal, ParamSpec, TypeVar, cast, overload
|
|
|
18
17
|
from lmnr import Attributes, Instruments, Laminar, observe
|
|
19
18
|
from pydantic import BaseModel
|
|
20
19
|
|
|
20
|
+
# Import for document trimming - needed for isinstance checks
|
|
21
|
+
# These are lazy imports only used when trim_documents is enabled
|
|
22
|
+
from ai_pipeline_core.documents import Document, DocumentList
|
|
23
|
+
from ai_pipeline_core.llm import AIMessages, ModelResponse
|
|
21
24
|
from ai_pipeline_core.settings import settings
|
|
22
25
|
|
|
23
26
|
# ---------------------------------------------------------------------------
|
|
@@ -29,8 +32,6 @@ R = TypeVar("R")
|
|
|
29
32
|
TraceLevel = Literal["always", "debug", "off"]
|
|
30
33
|
"""Control level for tracing activation.
|
|
31
34
|
|
|
32
|
-
@public
|
|
33
|
-
|
|
34
35
|
Values:
|
|
35
36
|
- "always": Always trace (default, production mode)
|
|
36
37
|
- "debug": Only trace when LMNR_DEBUG == "true"
|
|
@@ -38,6 +39,145 @@ Values:
|
|
|
38
39
|
"""
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Serialization helpers
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
def _serialize_for_tracing(obj: Any) -> Any:
|
|
46
|
+
"""Convert objects to JSON-serializable format for tracing.
|
|
47
|
+
|
|
48
|
+
Handles Pydantic models, Documents, and other special types.
|
|
49
|
+
This is extracted for better testability.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
obj: Object to serialize
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
JSON-serializable representation of the object
|
|
56
|
+
"""
|
|
57
|
+
# Our Document types - handle first to ensure serialize_model is used
|
|
58
|
+
if isinstance(obj, Document):
|
|
59
|
+
return obj.serialize_model()
|
|
60
|
+
# DocumentList
|
|
61
|
+
if isinstance(obj, DocumentList):
|
|
62
|
+
return [doc.serialize_model() for doc in obj]
|
|
63
|
+
# AIMessages
|
|
64
|
+
if isinstance(obj, AIMessages):
|
|
65
|
+
result = []
|
|
66
|
+
for msg in obj:
|
|
67
|
+
if isinstance(msg, Document):
|
|
68
|
+
result.append(msg.serialize_model())
|
|
69
|
+
else:
|
|
70
|
+
result.append(msg)
|
|
71
|
+
return result
|
|
72
|
+
# ModelResponse (special Pydantic model) - use standard model_dump
|
|
73
|
+
if isinstance(obj, ModelResponse):
|
|
74
|
+
return obj.model_dump()
|
|
75
|
+
# Pydantic models - use custom serializer that respects Document.serialize_model()
|
|
76
|
+
if isinstance(obj, BaseModel):
|
|
77
|
+
# For Pydantic models, we need to handle Document fields specially
|
|
78
|
+
data = {}
|
|
79
|
+
for field_name, field_value in obj.__dict__.items():
|
|
80
|
+
if isinstance(field_value, Document):
|
|
81
|
+
# Use serialize_model for Documents to get base_type
|
|
82
|
+
data[field_name] = field_value.serialize_model()
|
|
83
|
+
elif isinstance(field_value, BaseModel):
|
|
84
|
+
# Recursively handle nested Pydantic models
|
|
85
|
+
data[field_name] = _serialize_for_tracing(field_value)
|
|
86
|
+
else:
|
|
87
|
+
# Let Pydantic handle other fields normally
|
|
88
|
+
data[field_name] = field_value
|
|
89
|
+
return data
|
|
90
|
+
# Fallback to string representation
|
|
91
|
+
try:
|
|
92
|
+
return str(obj)
|
|
93
|
+
except Exception:
|
|
94
|
+
return f"<{type(obj).__name__}>"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
# Document trimming utilities
|
|
99
|
+
# ---------------------------------------------------------------------------
|
|
100
|
+
def _trim_document_content(doc_dict: dict[str, Any]) -> dict[str, Any]:
|
|
101
|
+
"""Trim document content based on document type and content type.
|
|
102
|
+
|
|
103
|
+
For non-FlowDocuments:
|
|
104
|
+
- Text content: Keep first 100 and last 100 chars (unless < 250 total)
|
|
105
|
+
- Binary content: Remove content entirely
|
|
106
|
+
|
|
107
|
+
For FlowDocuments:
|
|
108
|
+
- Text content: Keep full content
|
|
109
|
+
- Binary content: Remove content entirely
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
doc_dict: Document dictionary with base_type, content, and content_encoding
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Modified document dictionary with trimmed content
|
|
116
|
+
"""
|
|
117
|
+
# Check if this looks like a document (has required fields)
|
|
118
|
+
if not isinstance(doc_dict, dict): # type: ignore[reportUnknownArgumentType]
|
|
119
|
+
return doc_dict
|
|
120
|
+
|
|
121
|
+
if "base_type" not in doc_dict or "content" not in doc_dict:
|
|
122
|
+
return doc_dict
|
|
123
|
+
|
|
124
|
+
base_type = doc_dict.get("base_type")
|
|
125
|
+
content = doc_dict.get("content", "")
|
|
126
|
+
content_encoding = doc_dict.get("content_encoding", "utf-8")
|
|
127
|
+
|
|
128
|
+
# For binary content (base64 encoded), remove content
|
|
129
|
+
if content_encoding == "base64":
|
|
130
|
+
doc_dict = doc_dict.copy()
|
|
131
|
+
doc_dict["content"] = "[binary content removed]"
|
|
132
|
+
return doc_dict
|
|
133
|
+
|
|
134
|
+
# For FlowDocuments with text content, keep full content
|
|
135
|
+
if base_type == "flow":
|
|
136
|
+
return doc_dict
|
|
137
|
+
|
|
138
|
+
# For other documents (task, temporary), trim text content
|
|
139
|
+
if isinstance(content, str) and len(content) > 250:
|
|
140
|
+
doc_dict = doc_dict.copy()
|
|
141
|
+
# Keep first 100 and last 100 characters
|
|
142
|
+
trimmed_chars = len(content) - 200 # Number of characters removed
|
|
143
|
+
doc_dict["content"] = (
|
|
144
|
+
content[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + content[-100:]
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return doc_dict
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _trim_documents_in_data(data: Any) -> Any:
|
|
151
|
+
"""Recursively trim document content in nested data structures.
|
|
152
|
+
|
|
153
|
+
Processes dictionaries, lists, and nested structures to find and trim
|
|
154
|
+
documents based on their type and content.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
data: Input data that may contain documents
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Data with document content trimmed according to rules
|
|
161
|
+
"""
|
|
162
|
+
if isinstance(data, dict):
|
|
163
|
+
# Check if this is a document
|
|
164
|
+
if "base_type" in data and "content" in data:
|
|
165
|
+
# This is a document, trim it
|
|
166
|
+
return _trim_document_content(data)
|
|
167
|
+
else:
|
|
168
|
+
# Recursively process dictionary values
|
|
169
|
+
return {k: _trim_documents_in_data(v) for k, v in data.items()}
|
|
170
|
+
elif isinstance(data, list):
|
|
171
|
+
# Process each item in list
|
|
172
|
+
return [_trim_documents_in_data(item) for item in data]
|
|
173
|
+
elif isinstance(data, tuple):
|
|
174
|
+
# Process tuples
|
|
175
|
+
return tuple(_trim_documents_in_data(item) for item in data)
|
|
176
|
+
else:
|
|
177
|
+
# Return other types unchanged
|
|
178
|
+
return data
|
|
179
|
+
|
|
180
|
+
|
|
41
181
|
# ---------------------------------------------------------------------------
|
|
42
182
|
# ``TraceInfo`` – metadata container
|
|
43
183
|
# ---------------------------------------------------------------------------
|
|
@@ -51,16 +191,12 @@ class TraceInfo(BaseModel):
|
|
|
51
191
|
|
|
52
192
|
Attributes:
|
|
53
193
|
session_id: Unique identifier for the current session/conversation.
|
|
54
|
-
Falls back to LMNR_SESSION_ID environment variable.
|
|
55
194
|
user_id: Identifier for the user triggering the operation.
|
|
56
|
-
Falls back to LMNR_USER_ID environment variable.
|
|
57
195
|
metadata: Key-value pairs for additional trace context.
|
|
58
196
|
Useful for filtering and searching in LMNR dashboard.
|
|
59
197
|
tags: List of tags for categorizing traces (e.g., ["production", "v2"]).
|
|
60
198
|
|
|
61
199
|
Environment fallbacks:
|
|
62
|
-
- LMNR_SESSION_ID: Default session_id if not explicitly set
|
|
63
|
-
- LMNR_USER_ID: Default user_id if not explicitly set
|
|
64
200
|
- LMNR_DEBUG: Controls debug-level tracing when set to "true"
|
|
65
201
|
|
|
66
202
|
Note: These variables are read directly by the tracing layer and are
|
|
@@ -102,8 +238,8 @@ class TraceInfo(BaseModel):
|
|
|
102
238
|
|
|
103
239
|
Returns:
|
|
104
240
|
Dictionary with keys:
|
|
105
|
-
- session_id: From field or
|
|
106
|
-
- user_id: From field or
|
|
241
|
+
- session_id: From field or environment variable fallback
|
|
242
|
+
- user_id: From field or environment variable fallback
|
|
107
243
|
- metadata: Dictionary of custom metadata (if set)
|
|
108
244
|
- tags: List of tags (if set)
|
|
109
245
|
|
|
@@ -183,6 +319,7 @@ def trace(
|
|
|
183
319
|
output_formatter: Callable[..., str] | None = None,
|
|
184
320
|
ignore_exceptions: bool = False,
|
|
185
321
|
preserve_global_context: bool = True,
|
|
322
|
+
trim_documents: bool = True,
|
|
186
323
|
) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
|
|
187
324
|
|
|
188
325
|
|
|
@@ -209,18 +346,17 @@ def trace(
|
|
|
209
346
|
output_formatter: Callable[..., str] | None = None,
|
|
210
347
|
ignore_exceptions: bool = False,
|
|
211
348
|
preserve_global_context: bool = True,
|
|
349
|
+
trim_documents: bool = True,
|
|
212
350
|
) -> Callable[[Callable[P, R]], Callable[P, R]] | Callable[P, R]:
|
|
213
351
|
"""Add Laminar observability tracing to any function.
|
|
214
352
|
|
|
215
|
-
@public
|
|
216
|
-
|
|
217
353
|
The trace decorator integrates functions with Laminar (LMNR) for
|
|
218
354
|
distributed tracing, performance monitoring, and debugging. It
|
|
219
355
|
automatically handles both sync and async functions, propagates
|
|
220
356
|
trace context, and provides fine-grained control over what gets traced.
|
|
221
357
|
|
|
222
358
|
USAGE GUIDELINE - Defaults First:
|
|
223
|
-
|
|
359
|
+
By default, use WITHOUT any parameters unless instructed otherwise.
|
|
224
360
|
The defaults are optimized for most use cases.
|
|
225
361
|
|
|
226
362
|
Args:
|
|
@@ -267,6 +403,12 @@ def trace(
|
|
|
267
403
|
preserve_global_context: Maintain Laminar's global context across
|
|
268
404
|
calls (default True). Set False for isolated traces.
|
|
269
405
|
|
|
406
|
+
trim_documents: Automatically trim document content in traces (default True).
|
|
407
|
+
When enabled, non-FlowDocument text content is trimmed to
|
|
408
|
+
first/last 100 chars, and all binary content is removed.
|
|
409
|
+
FlowDocuments keep full text content but binary is removed.
|
|
410
|
+
Helps reduce trace size for large documents.
|
|
411
|
+
|
|
270
412
|
Returns:
|
|
271
413
|
Decorated function with same signature but added tracing.
|
|
272
414
|
|
|
@@ -306,8 +448,6 @@ def trace(
|
|
|
306
448
|
|
|
307
449
|
Environment variables:
|
|
308
450
|
- LMNR_DEBUG: Set to "true" to enable debug-level traces
|
|
309
|
-
- LMNR_SESSION_ID: Default session ID if not in TraceInfo
|
|
310
|
-
- LMNR_USER_ID: Default user ID if not in TraceInfo
|
|
311
451
|
- LMNR_PROJECT_API_KEY: Required for trace submission
|
|
312
452
|
|
|
313
453
|
Performance:
|
|
@@ -320,11 +460,6 @@ def trace(
|
|
|
320
460
|
- Works with both sync and async functions
|
|
321
461
|
- Preserves function signature and metadata
|
|
322
462
|
- Thread-safe and async-safe
|
|
323
|
-
|
|
324
|
-
See Also:
|
|
325
|
-
- TraceInfo: Container for trace metadata
|
|
326
|
-
- pipeline_task: Task decorator with built-in tracing
|
|
327
|
-
- pipeline_flow: Flow decorator with built-in tracing
|
|
328
463
|
"""
|
|
329
464
|
if level == "off":
|
|
330
465
|
if func:
|
|
@@ -380,6 +515,72 @@ def trace(
|
|
|
380
515
|
_output_formatter = output_formatter
|
|
381
516
|
_ignore_exceptions = ignore_exceptions
|
|
382
517
|
_preserve_global_context = preserve_global_context
|
|
518
|
+
_trim_documents = trim_documents
|
|
519
|
+
|
|
520
|
+
# Create document trimming formatters if needed
|
|
521
|
+
def _create_trimming_input_formatter(*args, **kwargs) -> str:
|
|
522
|
+
# First, let any custom formatter process the data
|
|
523
|
+
if _input_formatter:
|
|
524
|
+
result = _input_formatter(*args, **kwargs)
|
|
525
|
+
# If formatter returns string, try to parse and trim
|
|
526
|
+
if isinstance(result, str): # type: ignore[reportUnknownArgumentType]
|
|
527
|
+
try:
|
|
528
|
+
data = json.loads(result)
|
|
529
|
+
trimmed = _trim_documents_in_data(data)
|
|
530
|
+
return json.dumps(trimmed)
|
|
531
|
+
except (json.JSONDecodeError, TypeError):
|
|
532
|
+
return result
|
|
533
|
+
else:
|
|
534
|
+
# If formatter returns dict/list, trim it
|
|
535
|
+
trimmed = _trim_documents_in_data(result)
|
|
536
|
+
return json.dumps(trimmed) if not isinstance(trimmed, str) else trimmed
|
|
537
|
+
else:
|
|
538
|
+
# No custom formatter - mimic Laminar's get_input_from_func_args
|
|
539
|
+
# Build a dict with parameter names as keys (like Laminar does)
|
|
540
|
+
params = list(sig.parameters.keys())
|
|
541
|
+
data = {}
|
|
542
|
+
|
|
543
|
+
# Map args to parameter names
|
|
544
|
+
for i, arg in enumerate(args):
|
|
545
|
+
if i < len(params):
|
|
546
|
+
data[params[i]] = arg
|
|
547
|
+
|
|
548
|
+
# Add kwargs
|
|
549
|
+
data.update(kwargs)
|
|
550
|
+
|
|
551
|
+
# Serialize with our helper function
|
|
552
|
+
serialized = json.dumps(data, default=_serialize_for_tracing)
|
|
553
|
+
parsed = json.loads(serialized)
|
|
554
|
+
|
|
555
|
+
# Trim documents in the serialized data
|
|
556
|
+
trimmed = _trim_documents_in_data(parsed)
|
|
557
|
+
return json.dumps(trimmed)
|
|
558
|
+
|
|
559
|
+
def _create_trimming_output_formatter(result: Any) -> str:
|
|
560
|
+
# First, let any custom formatter process the data
|
|
561
|
+
if _output_formatter:
|
|
562
|
+
formatted = _output_formatter(result)
|
|
563
|
+
# If formatter returns string, try to parse and trim
|
|
564
|
+
if isinstance(formatted, str): # type: ignore[reportUnknownArgumentType]
|
|
565
|
+
try:
|
|
566
|
+
data = json.loads(formatted)
|
|
567
|
+
trimmed = _trim_documents_in_data(data)
|
|
568
|
+
return json.dumps(trimmed)
|
|
569
|
+
except (json.JSONDecodeError, TypeError):
|
|
570
|
+
return formatted
|
|
571
|
+
else:
|
|
572
|
+
# If formatter returns dict/list, trim it
|
|
573
|
+
trimmed = _trim_documents_in_data(formatted)
|
|
574
|
+
return json.dumps(trimmed) if not isinstance(trimmed, str) else trimmed
|
|
575
|
+
else:
|
|
576
|
+
# No custom formatter, serialize result with smart defaults
|
|
577
|
+
# Serialize with our extracted helper function
|
|
578
|
+
serialized = json.dumps(result, default=_serialize_for_tracing)
|
|
579
|
+
parsed = json.loads(serialized)
|
|
580
|
+
|
|
581
|
+
# Trim documents in the serialized data
|
|
582
|
+
trimmed = _trim_documents_in_data(parsed)
|
|
583
|
+
return json.dumps(trimmed)
|
|
383
584
|
|
|
384
585
|
# --- Helper function for runtime logic ---
|
|
385
586
|
def _prepare_and_get_observe_params(runtime_kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
@@ -418,10 +619,19 @@ def trace(
|
|
|
418
619
|
observe_params["ignore_output"] = _ignore_output
|
|
419
620
|
if _ignore_inputs is not None:
|
|
420
621
|
observe_params["ignore_inputs"] = _ignore_inputs
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
if
|
|
424
|
-
|
|
622
|
+
|
|
623
|
+
# Use trimming formatters if trim_documents is enabled
|
|
624
|
+
if _trim_documents:
|
|
625
|
+
# Use the trimming formatters (which may wrap custom formatters)
|
|
626
|
+
observe_params["input_formatter"] = _create_trimming_input_formatter
|
|
627
|
+
observe_params["output_formatter"] = _create_trimming_output_formatter
|
|
628
|
+
else:
|
|
629
|
+
# Use custom formatters directly if provided
|
|
630
|
+
if _input_formatter is not None:
|
|
631
|
+
observe_params["input_formatter"] = _input_formatter
|
|
632
|
+
if _output_formatter is not None:
|
|
633
|
+
observe_params["output_formatter"] = _output_formatter
|
|
634
|
+
|
|
425
635
|
if _ignore_exceptions:
|
|
426
636
|
observe_params["ignore_exceptions"] = _ignore_exceptions
|
|
427
637
|
if _preserve_global_context:
|
|
@@ -541,12 +751,6 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
541
751
|
- Multiple calls overwrite the previous cost (not cumulative)
|
|
542
752
|
- If called outside a traced context (no active span), it has no effect
|
|
543
753
|
and does not raise an error
|
|
544
|
-
|
|
545
|
-
See Also:
|
|
546
|
-
- trace: Decorator for adding tracing to functions
|
|
547
|
-
- ModelResponse.get_laminar_metadata: Access LLM generation costs
|
|
548
|
-
- pipeline_task: Task decorator with built-in tracing and optional trace_cost parameter
|
|
549
|
-
- pipeline_flow: Flow decorator with built-in tracing and optional trace_cost parameter
|
|
550
754
|
"""
|
|
551
755
|
# Parse string format if provided
|
|
552
756
|
if isinstance(cost, str):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-pipeline-core
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Core utilities for AI-powered processing pipelines using prefect
|
|
5
5
|
Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
|
|
6
6
|
Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
|
|
@@ -20,9 +20,10 @@ Classifier: Typing :: Typed
|
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
21
|
Requires-Dist: httpx>=0.28.1
|
|
22
22
|
Requires-Dist: jinja2>=3.1.6
|
|
23
|
-
Requires-Dist: lmnr>=0.7.
|
|
24
|
-
Requires-Dist: openai>=1.
|
|
25
|
-
Requires-Dist: prefect>=
|
|
23
|
+
Requires-Dist: lmnr>=0.7.13
|
|
24
|
+
Requires-Dist: openai>=1.108.1
|
|
25
|
+
Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
|
|
26
|
+
Requires-Dist: prefect>=3.4.19
|
|
26
27
|
Requires-Dist: pydantic-settings>=2.10.1
|
|
27
28
|
Requires-Dist: pydantic>=2.11.7
|
|
28
29
|
Requires-Dist: python-magic>=0.4.27
|
|
@@ -111,15 +112,13 @@ class AnalysisConfig(FlowConfig):
|
|
|
111
112
|
INPUT_DOCUMENT_TYPES = [InputDoc]
|
|
112
113
|
OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
113
114
|
|
|
114
|
-
# Create pipeline flow
|
|
115
|
-
@pipeline_flow
|
|
115
|
+
# Create pipeline flow with required config
|
|
116
|
+
@pipeline_flow(config=AnalysisConfig)
|
|
116
117
|
async def analyze_flow(
|
|
117
118
|
project_name: str,
|
|
118
119
|
documents: DocumentList,
|
|
119
120
|
flow_options: FlowOptions
|
|
120
121
|
) -> DocumentList:
|
|
121
|
-
config = AnalysisConfig()
|
|
122
|
-
|
|
123
122
|
# Process documents
|
|
124
123
|
outputs = []
|
|
125
124
|
for doc in documents:
|
|
@@ -136,7 +135,7 @@ async def analyze_flow(
|
|
|
136
135
|
outputs.append(output)
|
|
137
136
|
|
|
138
137
|
# RECOMMENDED: Always validate output
|
|
139
|
-
return
|
|
138
|
+
return AnalysisConfig.create_and_validate_output(outputs)
|
|
140
139
|
```
|
|
141
140
|
|
|
142
141
|
### Structured Output
|
|
@@ -225,9 +224,17 @@ if doc.is_text:
|
|
|
225
224
|
# Parse structured data
|
|
226
225
|
data = doc.as_json() # or as_yaml(), as_pydantic_model()
|
|
227
226
|
|
|
227
|
+
# Convert between document types (new in v0.2.1)
|
|
228
|
+
task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
|
|
229
|
+
new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
|
|
230
|
+
|
|
228
231
|
# Enhanced filtering (new in v0.1.14)
|
|
229
232
|
filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
|
|
230
233
|
named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
|
|
234
|
+
|
|
235
|
+
# Immutable collections (new in v0.2.1)
|
|
236
|
+
frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
|
|
237
|
+
frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
|
|
231
238
|
```
|
|
232
239
|
|
|
233
240
|
### LLM Integration
|
|
@@ -289,15 +296,15 @@ class ProcessingConfig(FlowConfig):
|
|
|
289
296
|
INPUT_DOCUMENT_TYPES = [RawDataDocument]
|
|
290
297
|
OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
|
|
291
298
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
299
|
+
# Use in flows for validation
|
|
300
|
+
@pipeline_flow(config=ProcessingConfig)
|
|
301
|
+
async def process(
|
|
302
|
+
project_name: str,
|
|
303
|
+
documents: DocumentList,
|
|
304
|
+
flow_options: FlowOptions
|
|
305
|
+
) -> DocumentList:
|
|
306
|
+
# ... processing logic ...
|
|
307
|
+
return ProcessingConfig.create_and_validate_output(outputs)
|
|
301
308
|
```
|
|
302
309
|
|
|
303
310
|
### Pipeline Decorators
|
|
@@ -313,13 +320,18 @@ async def process_chunk(data: str) -> str:
|
|
|
313
320
|
set_trace_cost(0.05) # Track costs (new in v0.1.14)
|
|
314
321
|
return result
|
|
315
322
|
|
|
316
|
-
@pipeline_flow
|
|
323
|
+
@pipeline_flow(
|
|
324
|
+
config=MyFlowConfig,
|
|
325
|
+
trace_trim_documents=True # Trim large documents in traces (new in v0.2.1)
|
|
326
|
+
)
|
|
317
327
|
async def main_flow(
|
|
318
328
|
project_name: str,
|
|
319
329
|
documents: DocumentList,
|
|
320
330
|
flow_options: FlowOptions
|
|
321
331
|
) -> DocumentList:
|
|
322
332
|
# Your pipeline logic
|
|
333
|
+
# Large documents are automatically trimmed to 100 chars in traces
|
|
334
|
+
# for better observability without overwhelming the tracing UI
|
|
323
335
|
return DocumentList(results)
|
|
324
336
|
```
|
|
325
337
|
|
|
@@ -339,6 +351,9 @@ LMNR_DEBUG=true # Enable debug traces
|
|
|
339
351
|
# Optional: Orchestration
|
|
340
352
|
PREFECT_API_URL=http://localhost:4200/api
|
|
341
353
|
PREFECT_API_KEY=your-prefect-key
|
|
354
|
+
|
|
355
|
+
# Optional: Storage (for Google Cloud Storage)
|
|
356
|
+
GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
|
|
342
357
|
```
|
|
343
358
|
|
|
344
359
|
### Settings Management
|
|
@@ -366,7 +381,7 @@ print(settings.app_name)
|
|
|
366
381
|
|
|
367
382
|
### Framework Rules (90% Use Cases)
|
|
368
383
|
|
|
369
|
-
1. **Decorators**: Use `@
|
|
384
|
+
1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
|
|
370
385
|
2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
|
|
371
386
|
3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
|
|
372
387
|
4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
ai_pipeline_core/__init__.py,sha256=xTgroRQcXnKqsZSr8XQp-Q7R8gzdIBppVYPxYh5c5uo,5720
|
|
2
|
+
ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
|
|
3
|
+
ai_pipeline_core/pipeline.py,sha256=_00Qctqd7QibyXaetZv6KfyWoW9KZIRdndkYItNHWWI,28921
|
|
4
|
+
ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
|
|
5
|
+
ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
|
|
6
|
+
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
ai_pipeline_core/settings.py,sha256=-a9jVGg77xifj2SagCR9shXfzXUd-2MlrlquEu4htG8,5035
|
|
8
|
+
ai_pipeline_core/tracing.py,sha256=9RaJaAX5Vp2C8t73TaY-a9gpVy6a_VtSY0JPohIoQsc,31460
|
|
9
|
+
ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
|
|
10
|
+
ai_pipeline_core/documents/document.py,sha256=L3S_bfOiViMZLYRcmbV4-s3qO8HoGmqJ5g3bXNVs_3Q,67082
|
|
11
|
+
ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
|
|
12
|
+
ai_pipeline_core/documents/flow_document.py,sha256=g9wlRJRJgy4RsrrZ_P5Qu6kj0FuUFfhfUsRFgtq4NIM,3918
|
|
13
|
+
ai_pipeline_core/documents/mime_type.py,sha256=DkW88K95el5nAmhC00XLS0G3WpDXgs5IRsBWbKiqG3Y,7995
|
|
14
|
+
ai_pipeline_core/documents/task_document.py,sha256=40tFavBLX3FhK9-CRsuOH-3gUZ0zvEkqv9XcMFr8ySk,4077
|
|
15
|
+
ai_pipeline_core/documents/temporary_document.py,sha256=Sam344Mm5AlZTm3_l01YdDWeF26F6pR2tytGRL1doQY,2711
|
|
16
|
+
ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
|
|
17
|
+
ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
|
|
18
|
+
ai_pipeline_core/flow/config.py,sha256=3PCDph2n8dj-txqAvd9Wflbi_6lmfXFR9rUhM-szGSQ,18887
|
|
19
|
+
ai_pipeline_core/flow/options.py,sha256=2rKR2GifhXcyw8avI_oiEDMLC2jm5Qzpw8z56pbxUMo,2285
|
|
20
|
+
ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
|
|
21
|
+
ai_pipeline_core/llm/ai_messages.py,sha256=ML4rSCCEEu9_83Mnfn7r4yx0pUkarvnBsrxRZbO4ulw,13126
|
|
22
|
+
ai_pipeline_core/llm/client.py,sha256=3nN_QTW0R64PEvZsg9RxsYnxyq_hvYHYj-nBYG8KeDA,22773
|
|
23
|
+
ai_pipeline_core/llm/model_options.py,sha256=UFuI2drXNamA1G7OSwGHeXbOrkf2gGy2jpMpZEfK9i4,7789
|
|
24
|
+
ai_pipeline_core/llm/model_response.py,sha256=xKJPsqFHtOGfqpKlsGzyBHPbqjEjNfP-Ix3lGVdiTjQ,15289
|
|
25
|
+
ai_pipeline_core/llm/model_types.py,sha256=2J4Qsb1x21I4eo_VPeaMMOW8shOGPqzJuoGjTLcBFPM,2791
|
|
26
|
+
ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
|
|
27
|
+
ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
|
|
28
|
+
ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
|
|
29
|
+
ai_pipeline_core/logging/logging_mixin.py,sha256=OTye2pbUbG5oYZkI06TNkGCEa4y0ldePz5IAfdmNUPU,8090
|
|
30
|
+
ai_pipeline_core/simple_runner/__init__.py,sha256=9krT-CcDAZ0jB2MjWqFYhaK5qtUDMpB5qWzjRLa4Zhk,322
|
|
31
|
+
ai_pipeline_core/simple_runner/cli.py,sha256=yVyuxLY2RZvdNwmwT5LCe-km2nQJzWTPI0vSWn4_yms,9344
|
|
32
|
+
ai_pipeline_core/simple_runner/simple_runner.py,sha256=f6cIodYkul-Apu1d63T6kR5DZpiaCWpphUcEPp5XjFo,9102
|
|
33
|
+
ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
|
|
34
|
+
ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
|
|
35
|
+
ai_pipeline_core-0.2.1.dist-info/METADATA,sha256=OBa_0wjIopzJcEMRMgfkZgeTZ8Vk7kad3QuzM6Rg2x4,15159
|
|
36
|
+
ai_pipeline_core-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
37
|
+
ai_pipeline_core-0.2.1.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
38
|
+
ai_pipeline_core-0.2.1.dist-info/RECORD,,
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
ai_pipeline_core/__init__.py,sha256=jDITXj2wA7lQ46IT9TOvmjg7Ug2aY_QPkuLYfYQEd2E,5484
|
|
2
|
-
ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
|
|
3
|
-
ai_pipeline_core/pipeline.py,sha256=dq-v4IYEaBNt290y545E5JuUahe_k3ffI2_rrGjD-GQ,29384
|
|
4
|
-
ai_pipeline_core/prefect.py,sha256=CC8qeIpVqzNq8m6YWNIcRYeDEqkcAFiNjFwcuwwKO0k,2064
|
|
5
|
-
ai_pipeline_core/prompt_manager.py,sha256=XZwah5fp3GyZ0e0na_yOs6m4ngCcotysh-K_cU2U978,11572
|
|
6
|
-
ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
ai_pipeline_core/settings.py,sha256=X8m13zUHWte953l3or45wG8o7ZQ4X-XBe6umk4PlkMQ,4598
|
|
8
|
-
ai_pipeline_core/tracing.py,sha256=uXQdP7GIvTghiqyG3XIgES_DbWfkxU0guSqfiazkc0Q,22877
|
|
9
|
-
ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
|
|
10
|
-
ai_pipeline_core/documents/document.py,sha256=2UfLx4t7k0lV1v2R67mykeEwRAuyAPOCCjvfUOmPDiI,60450
|
|
11
|
-
ai_pipeline_core/documents/document_list.py,sha256=SnWzRqCloI8A6e1HIxaKKbaSkPbL2RrqtkjQKY8H6dI,11354
|
|
12
|
-
ai_pipeline_core/documents/flow_document.py,sha256=GbOa8mjo6xy5t6EUY7E857S0q5nFBgC1yYQdD7gr-Ls,4043
|
|
13
|
-
ai_pipeline_core/documents/mime_type.py,sha256=DkW88K95el5nAmhC00XLS0G3WpDXgs5IRsBWbKiqG3Y,7995
|
|
14
|
-
ai_pipeline_core/documents/task_document.py,sha256=h2jAE3k9-2MJ_MjDrH8UtJRXxNuoxACWBfHu2xEHsN4,4226
|
|
15
|
-
ai_pipeline_core/documents/temporary_document.py,sha256=1Jpi5ozEes4nudilptnXyXatImZDNAhGL7Jd3cQXM3g,2845
|
|
16
|
-
ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
|
|
17
|
-
ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
|
|
18
|
-
ai_pipeline_core/flow/config.py,sha256=4JVc30tztSW9sYufWLN3hx6qSeR1VX31H1aI9I2jIrA,12114
|
|
19
|
-
ai_pipeline_core/flow/options.py,sha256=2rKR2GifhXcyw8avI_oiEDMLC2jm5Qzpw8z56pbxUMo,2285
|
|
20
|
-
ai_pipeline_core/llm/__init__.py,sha256=kLMoOj_JQgvGZXZXU-5u9QzLAu2sq5ixMCyEqk2jKKc,857
|
|
21
|
-
ai_pipeline_core/llm/ai_messages.py,sha256=udzFwUFfQgJeu1JzpqVuBr0QHdWXvuhfJEEgedSHauY,8383
|
|
22
|
-
ai_pipeline_core/llm/client.py,sha256=PZkNaBe1x20ecPTI30PjUjuo26vB76Tam422X3bWhzk,19454
|
|
23
|
-
ai_pipeline_core/llm/model_options.py,sha256=_wAUM3d7b_1oBeSpDjcyyx_wZmvBMobGOyF0JEgLTPg,7660
|
|
24
|
-
ai_pipeline_core/llm/model_response.py,sha256=TUgEi8CLQ1Bw3vvQeNzX3j9YYjuToNZseaMJ7Uaf4GI,15224
|
|
25
|
-
ai_pipeline_core/llm/model_types.py,sha256=JjaJSDY3TTL-ifSLKVNBEV1KJtBIJTr1QwIh9ZnD-is,2895
|
|
26
|
-
ai_pipeline_core/logging/__init__.py,sha256=4iXN4jNiOXLfCYGH3wZB0-Zf-SlU-gQ07f1AyP2H5-s,660
|
|
27
|
-
ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
|
|
28
|
-
ai_pipeline_core/logging/logging_config.py,sha256=QYI-vz9BqNA02RxoIWPdKhomZpZJkXeFINIuu08O3hY,6242
|
|
29
|
-
ai_pipeline_core/logging/logging_mixin.py,sha256=UFd_CfyJ6YP_XVA-CrpAszOr8g1FH8RwRIwiY23kRG0,8131
|
|
30
|
-
ai_pipeline_core/simple_runner/__init__.py,sha256=OXKFOu3rRcqXCWwBBxnZ7Vz8KRFF5g-G3eJq-vm3CUY,521
|
|
31
|
-
ai_pipeline_core/simple_runner/cli.py,sha256=sbIvv_d401o8h-b5JlcIJQhwzte1sttdmUi2a3As-wY,9357
|
|
32
|
-
ai_pipeline_core/simple_runner/simple_runner.py,sha256=1FC1x0WlOUbOiMbiOtkDQdY0d5NswDbx0FSzrNmewCA,15067
|
|
33
|
-
ai_pipeline_core-0.1.14.dist-info/METADATA,sha256=vzEIlKku7IF-tY0ho30lP8yMaFBWHOASlLa7CaQRsjw,14351
|
|
34
|
-
ai_pipeline_core-0.1.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
35
|
-
ai_pipeline_core-0.1.14.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
|
|
36
|
-
ai_pipeline_core-0.1.14.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|