ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +78 -125
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +130 -81
- ai_pipeline_core/llm/client.py +327 -193
- ai_pipeline_core/llm/model_options.py +14 -86
- ai_pipeline_core/llm/model_response.py +60 -103
- ai_pipeline_core/llm/model_types.py +16 -34
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -483
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/simple_runner/__init__.py +0 -14
- ai_pipeline_core/simple_runner/cli.py +0 -254
- ai_pipeline_core/simple_runner/simple_runner.py +0 -247
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core/utils/deploy.py +0 -373
- ai_pipeline_core/utils/remote_deployment.py +0 -269
- ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
- ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
- {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,18 +6,18 @@ This module centralizes:
|
|
|
6
6
|
``observe`` instrumentation, and optional support for test runs.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
import contextlib
|
|
9
10
|
import inspect
|
|
10
11
|
import json
|
|
11
12
|
import os
|
|
13
|
+
from collections.abc import Callable
|
|
12
14
|
from functools import wraps
|
|
13
|
-
from typing import Any,
|
|
15
|
+
from typing import Any, Literal, ParamSpec, TypeVar, cast, overload
|
|
14
16
|
|
|
15
17
|
from lmnr import Attributes, Instruments, Laminar, observe
|
|
16
18
|
from pydantic import BaseModel, Field
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
# These are lazy imports only used when trim_documents is enabled
|
|
20
|
-
from ai_pipeline_core.documents import Document, DocumentList
|
|
20
|
+
from ai_pipeline_core.documents import Document
|
|
21
21
|
from ai_pipeline_core.llm import AIMessages, ModelResponse
|
|
22
22
|
from ai_pipeline_core.settings import settings
|
|
23
23
|
|
|
@@ -40,144 +40,112 @@ Values:
|
|
|
40
40
|
# ---------------------------------------------------------------------------
|
|
41
41
|
# Serialization helpers
|
|
42
42
|
# ---------------------------------------------------------------------------
|
|
43
|
-
def _serialize_for_tracing(obj: Any) -> Any:
|
|
44
|
-
"""Convert objects to JSON-serializable format for tracing.
|
|
45
|
-
|
|
46
|
-
Handles Pydantic models, Documents, and other special types.
|
|
47
|
-
This is extracted for better testability.
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
obj: Object to serialize
|
|
51
|
-
|
|
52
|
-
Returns:
|
|
53
|
-
JSON-serializable representation of the object
|
|
54
|
-
"""
|
|
55
|
-
# Our Document types - handle first to ensure serialize_model is used
|
|
43
|
+
def _serialize_for_tracing(obj: Any) -> Any: # noqa: PLR0911
|
|
44
|
+
"""Convert objects to JSON-serializable format for tracing."""
|
|
56
45
|
if isinstance(obj, Document):
|
|
57
46
|
return obj.serialize_model()
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
return [doc.serialize_model() for doc in obj]
|
|
61
|
-
# AIMessages
|
|
47
|
+
if isinstance(obj, list) and obj and isinstance(obj[0], Document):
|
|
48
|
+
return [doc.serialize_model() for doc in cast(list[Document], obj)]
|
|
62
49
|
if isinstance(obj, AIMessages):
|
|
63
|
-
result = []
|
|
50
|
+
result: list[Any] = []
|
|
64
51
|
for msg in obj:
|
|
65
52
|
if isinstance(msg, Document):
|
|
66
53
|
result.append(msg.serialize_model())
|
|
67
54
|
else:
|
|
68
55
|
result.append(msg)
|
|
69
56
|
return result
|
|
70
|
-
# ModelResponse (special Pydantic model) - use standard model_dump
|
|
71
57
|
if isinstance(obj, ModelResponse):
|
|
72
58
|
return obj.model_dump()
|
|
73
|
-
# Pydantic models - use custom serializer that respects Document.serialize_model()
|
|
74
59
|
if isinstance(obj, BaseModel):
|
|
75
|
-
|
|
76
|
-
data = {}
|
|
60
|
+
data: dict[str, Any] = {}
|
|
77
61
|
for field_name, field_value in obj.__dict__.items():
|
|
78
62
|
if isinstance(field_value, Document):
|
|
79
|
-
# Use serialize_model for Documents to get base_type
|
|
80
63
|
data[field_name] = field_value.serialize_model()
|
|
81
64
|
elif isinstance(field_value, BaseModel):
|
|
82
|
-
# Recursively handle nested Pydantic models
|
|
83
65
|
data[field_name] = _serialize_for_tracing(field_value)
|
|
84
66
|
else:
|
|
85
|
-
# Let Pydantic handle other fields normally
|
|
86
67
|
data[field_name] = field_value
|
|
87
68
|
return data
|
|
88
|
-
# Fallback to string representation
|
|
89
69
|
try:
|
|
90
|
-
return str(obj)
|
|
70
|
+
return str(obj) # pyright: ignore[reportUnknownArgumentType]
|
|
91
71
|
except Exception:
|
|
92
|
-
return f"<{type(obj).__name__}>"
|
|
72
|
+
return f"<{type(obj).__name__}>" # pyright: ignore[reportUnknownArgumentType]
|
|
93
73
|
|
|
94
74
|
|
|
95
75
|
# ---------------------------------------------------------------------------
|
|
96
76
|
# Document trimming utilities
|
|
97
77
|
# ---------------------------------------------------------------------------
|
|
98
|
-
def
|
|
99
|
-
"""Trim
|
|
78
|
+
def _trim_attachment_list(attachments: list[Any]) -> list[Any]:
|
|
79
|
+
"""Trim attachment content in a serialized attachment list.
|
|
100
80
|
|
|
101
|
-
|
|
102
|
-
-
|
|
103
|
-
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
81
|
+
Always trims regardless of parent document type:
|
|
82
|
+
- Binary (base64): replace content with placeholder
|
|
83
|
+
- Text > 250 chars: keep first 100 + last 100
|
|
84
|
+
"""
|
|
85
|
+
trimmed: list[Any] = []
|
|
86
|
+
for raw_att in attachments:
|
|
87
|
+
if not isinstance(raw_att, dict):
|
|
88
|
+
trimmed.append(raw_att)
|
|
89
|
+
continue
|
|
90
|
+
att: dict[str, Any] = cast(dict[str, Any], raw_att)
|
|
91
|
+
content_encoding: str = att.get("content_encoding", "utf-8")
|
|
92
|
+
if content_encoding == "base64":
|
|
93
|
+
att = att.copy()
|
|
94
|
+
att["content"] = "[binary content removed]"
|
|
95
|
+
elif isinstance(att.get("content"), str) and len(att["content"]) > 250:
|
|
96
|
+
att = att.copy()
|
|
97
|
+
c: str = att["content"]
|
|
98
|
+
trimmed_chars = len(c) - 200
|
|
99
|
+
att["content"] = c[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + c[-100:]
|
|
100
|
+
trimmed.append(att)
|
|
101
|
+
return trimmed
|
|
108
102
|
|
|
109
|
-
Args:
|
|
110
|
-
doc_dict: Document dictionary with base_type, content, and content_encoding
|
|
111
103
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
if not isinstance(doc_dict, dict): # type: ignore[reportUnknownArgumentType]
|
|
117
|
-
return doc_dict
|
|
104
|
+
def _trim_document_content(doc_dict: dict[str, Any]) -> dict[str, Any]:
|
|
105
|
+
"""Trim document content for traces. All documents trimmed equally."""
|
|
106
|
+
if not isinstance(doc_dict, dict): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
107
|
+
return doc_dict # pyright: ignore[reportUnreachable]
|
|
118
108
|
|
|
119
|
-
if "
|
|
109
|
+
if "content" not in doc_dict or "class_name" not in doc_dict:
|
|
120
110
|
return doc_dict
|
|
121
111
|
|
|
122
|
-
|
|
112
|
+
doc_dict = doc_dict.copy()
|
|
123
113
|
content = doc_dict.get("content", "")
|
|
124
114
|
content_encoding = doc_dict.get("content_encoding", "utf-8")
|
|
125
115
|
|
|
126
|
-
#
|
|
116
|
+
# Trim attachments
|
|
117
|
+
if "attachments" in doc_dict and isinstance(doc_dict["attachments"], list):
|
|
118
|
+
doc_dict["attachments"] = _trim_attachment_list(cast(list[Any], doc_dict["attachments"]))
|
|
119
|
+
|
|
120
|
+
# Binary: remove content
|
|
127
121
|
if content_encoding == "base64":
|
|
128
|
-
doc_dict = doc_dict.copy()
|
|
129
122
|
doc_dict["content"] = "[binary content removed]"
|
|
130
123
|
return doc_dict
|
|
131
124
|
|
|
132
|
-
#
|
|
133
|
-
if base_type == "flow":
|
|
134
|
-
return doc_dict
|
|
135
|
-
|
|
136
|
-
# For other documents (task, temporary), trim text content
|
|
125
|
+
# Text: trim if > 250 chars
|
|
137
126
|
if isinstance(content, str) and len(content) > 250:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
trimmed_chars = len(content) - 200 # Number of characters removed
|
|
141
|
-
doc_dict["content"] = (
|
|
142
|
-
content[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + content[-100:]
|
|
143
|
-
)
|
|
127
|
+
trimmed_chars = len(content) - 200
|
|
128
|
+
doc_dict["content"] = content[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + content[-100:]
|
|
144
129
|
|
|
145
130
|
return doc_dict
|
|
146
131
|
|
|
147
132
|
|
|
148
133
|
def _trim_documents_in_data(data: Any) -> Any:
|
|
149
|
-
"""Recursively trim document content in nested data structures.
|
|
150
|
-
|
|
151
|
-
Processes dictionaries, lists, and nested structures to find and trim
|
|
152
|
-
documents based on their type and content.
|
|
153
|
-
|
|
154
|
-
Args:
|
|
155
|
-
data: Input data that may contain documents
|
|
156
|
-
|
|
157
|
-
Returns:
|
|
158
|
-
Data with document content trimmed according to rules
|
|
159
|
-
"""
|
|
134
|
+
"""Recursively trim document content in nested data structures."""
|
|
160
135
|
if isinstance(data, dict):
|
|
161
|
-
|
|
162
|
-
if "
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
return [_trim_documents_in_data(item) for item in data]
|
|
171
|
-
elif isinstance(data, tuple):
|
|
172
|
-
# Process tuples
|
|
173
|
-
return tuple(_trim_documents_in_data(item) for item in data)
|
|
174
|
-
else:
|
|
175
|
-
# Return other types unchanged
|
|
176
|
-
return data
|
|
136
|
+
data_dict = cast(dict[str, Any], data)
|
|
137
|
+
if "class_name" in data_dict and "content" in data_dict:
|
|
138
|
+
return _trim_document_content(data_dict)
|
|
139
|
+
return {k: _trim_documents_in_data(v) for k, v in data_dict.items()}
|
|
140
|
+
if isinstance(data, list):
|
|
141
|
+
return [_trim_documents_in_data(item) for item in cast(list[Any], data)]
|
|
142
|
+
if isinstance(data, tuple):
|
|
143
|
+
return tuple(_trim_documents_in_data(item) for item in cast(tuple[Any, ...], data))
|
|
144
|
+
return data
|
|
177
145
|
|
|
178
146
|
|
|
179
147
|
# ---------------------------------------------------------------------------
|
|
180
|
-
# ``TraceInfo``
|
|
148
|
+
# ``TraceInfo`` - metadata container
|
|
181
149
|
# ---------------------------------------------------------------------------
|
|
182
150
|
class TraceInfo(BaseModel):
|
|
183
151
|
"""Container for propagating trace context through the pipeline.
|
|
@@ -196,30 +164,12 @@ class TraceInfo(BaseModel):
|
|
|
196
164
|
|
|
197
165
|
Environment fallbacks:
|
|
198
166
|
- LMNR_DEBUG: Controls debug-level tracing when set to "true"
|
|
199
|
-
|
|
200
|
-
Note: These variables are read directly by the tracing layer and are
|
|
167
|
+
These variables are read directly by the tracing layer and are
|
|
201
168
|
not part of the Settings configuration.
|
|
202
169
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
... session_id="sess_123",
|
|
207
|
-
... user_id="user_456",
|
|
208
|
-
... metadata={"flow": "document_analysis", "version": "1.2"},
|
|
209
|
-
... tags=["production", "high_priority"]
|
|
210
|
-
... )
|
|
211
|
-
>>>
|
|
212
|
-
>>> # Pass through function calls
|
|
213
|
-
>>> @trace
|
|
214
|
-
>>> async def process(data, trace_info: TraceInfo):
|
|
215
|
-
... # TraceInfo automatically propagates to nested calls
|
|
216
|
-
... result = await analyze(data, trace_info=trace_info)
|
|
217
|
-
... return result
|
|
218
|
-
|
|
219
|
-
Note:
|
|
220
|
-
TraceInfo is typically created at the entry point of a flow
|
|
221
|
-
and passed through all subsequent function calls for
|
|
222
|
-
consistent tracing context.
|
|
170
|
+
TraceInfo is typically created at the entry point of a flow
|
|
171
|
+
and passed through all subsequent function calls for
|
|
172
|
+
consistent tracing context.
|
|
223
173
|
"""
|
|
224
174
|
|
|
225
175
|
session_id: str | None = None
|
|
@@ -243,14 +193,8 @@ class TraceInfo(BaseModel):
|
|
|
243
193
|
|
|
244
194
|
Only non-empty values are included in the output.
|
|
245
195
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
>>> kwargs = trace_info.get_observe_kwargs()
|
|
249
|
-
>>> # Returns: {"session_id": "sess_123", "tags": ["test"]}
|
|
250
|
-
|
|
251
|
-
Note:
|
|
252
|
-
This method is called internally by the trace decorator
|
|
253
|
-
to configure Laminar observation parameters.
|
|
196
|
+
Called internally by the trace decorator to configure Laminar
|
|
197
|
+
observation parameters.
|
|
254
198
|
"""
|
|
255
199
|
kwargs: dict[str, Any] = {}
|
|
256
200
|
|
|
@@ -283,19 +227,11 @@ def _initialise_laminar() -> None:
|
|
|
283
227
|
from settings. Disables automatic OpenAI instrumentation to avoid
|
|
284
228
|
conflicts with our custom tracing.
|
|
285
229
|
|
|
286
|
-
|
|
287
|
-
- Uses settings.lmnr_project_api_key for authentication
|
|
288
|
-
- Disables OPENAI instrument to prevent double-tracing
|
|
289
|
-
- Called automatically by trace decorator on first use
|
|
290
|
-
|
|
291
|
-
Note:
|
|
292
|
-
This is an internal function called once per process.
|
|
293
|
-
Multiple calls are safe (Laminar handles idempotency).
|
|
230
|
+
Called once per process. Multiple calls are safe (Laminar handles idempotency).
|
|
294
231
|
"""
|
|
295
232
|
if settings.lmnr_project_api_key:
|
|
296
233
|
Laminar.initialize(
|
|
297
|
-
project_api_key=settings.lmnr_project_api_key,
|
|
298
|
-
disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [],
|
|
234
|
+
project_api_key=settings.lmnr_project_api_key, disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [], export_timeout_seconds=15
|
|
299
235
|
)
|
|
300
236
|
|
|
301
237
|
|
|
@@ -323,11 +259,11 @@ def trace(
|
|
|
323
259
|
|
|
324
260
|
# Overload for the bare @trace call
|
|
325
261
|
@overload
|
|
326
|
-
def trace(func: Callable[P, R]) -> Callable[P, R]: ...
|
|
262
|
+
def trace(func: Callable[P, R]) -> Callable[P, R]: ... # noqa: UP047
|
|
327
263
|
|
|
328
264
|
|
|
329
265
|
# Actual implementation
|
|
330
|
-
def trace(
|
|
266
|
+
def trace( # noqa: UP047
|
|
331
267
|
func: Callable[P, R] | None = None,
|
|
332
268
|
*,
|
|
333
269
|
level: TraceLevel = "always",
|
|
@@ -402,9 +338,10 @@ def trace(
|
|
|
402
338
|
calls (default True). Set False for isolated traces.
|
|
403
339
|
|
|
404
340
|
trim_documents: Automatically trim document content in traces (default True).
|
|
405
|
-
When enabled,
|
|
341
|
+
When enabled, text content is trimmed to
|
|
406
342
|
first/last 100 chars, and all binary content is removed.
|
|
407
|
-
|
|
343
|
+
Binary content is removed, text content is trimmed.
|
|
344
|
+
Attachment content follows the same trimming rules.
|
|
408
345
|
Helps reduce trace size for large documents.
|
|
409
346
|
|
|
410
347
|
Returns:
|
|
@@ -415,35 +352,6 @@ def trace(
|
|
|
415
352
|
automatically creates or propagates a TraceInfo instance, ensuring
|
|
416
353
|
consistent session/user tracking across the call chain.
|
|
417
354
|
|
|
418
|
-
Example:
|
|
419
|
-
>>> # RECOMMENDED - No parameters needed for most cases!
|
|
420
|
-
>>> @trace
|
|
421
|
-
>>> async def process_document(doc):
|
|
422
|
-
... return await analyze(doc)
|
|
423
|
-
>>>
|
|
424
|
-
>>> # With parameters (RARE - only when specifically needed):
|
|
425
|
-
>>> @trace(level="debug") # Only for debug-specific tracing
|
|
426
|
-
>>> async def debug_operation():
|
|
427
|
-
... pass
|
|
428
|
-
|
|
429
|
-
>>> @trace(ignore_inputs=["api_key"]) # Only for sensitive data
|
|
430
|
-
>>> async def api_call(data, api_key):
|
|
431
|
-
... return await external_api(data, api_key)
|
|
432
|
-
>>>
|
|
433
|
-
>>> # AVOID unnecessary configuration - defaults handle:
|
|
434
|
-
>>> # - Automatic naming from function name
|
|
435
|
-
>>> # - Standard trace level ("always")
|
|
436
|
-
>>> # - Full input/output capture
|
|
437
|
-
>>> # - Proper span type inference
|
|
438
|
-
>>>
|
|
439
|
-
>>> # Custom formatting
|
|
440
|
-
>>> @trace(
|
|
441
|
-
... input_formatter=lambda doc: f"Document: {doc.id}",
|
|
442
|
-
... output_formatter=lambda res: f"Results: {len(res)} items"
|
|
443
|
-
>>> )
|
|
444
|
-
>>> def analyze(doc):
|
|
445
|
-
... return results
|
|
446
|
-
|
|
447
355
|
Environment variables:
|
|
448
356
|
- LMNR_DEBUG: Set to "true" to enable debug-level traces
|
|
449
357
|
- LMNR_PROJECT_API_KEY: Required for trace submission
|
|
@@ -453,11 +361,9 @@ def trace(
|
|
|
453
361
|
- When level="off", decorator returns original function unchanged
|
|
454
362
|
- Large inputs/outputs can be excluded with ignore_* parameters
|
|
455
363
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
- Preserves function signature and metadata
|
|
460
|
-
- Thread-safe and async-safe
|
|
364
|
+
Automatically initializes Laminar on first use. Works with both sync and
|
|
365
|
+
async functions. Preserves function signature and metadata. Thread-safe
|
|
366
|
+
and async-safe.
|
|
461
367
|
"""
|
|
462
368
|
if level == "off":
|
|
463
369
|
if func:
|
|
@@ -498,27 +404,27 @@ def trace(
|
|
|
498
404
|
sig = inspect.signature(f)
|
|
499
405
|
is_coroutine = inspect.iscoroutinefunction(f)
|
|
500
406
|
observe_name = name or f.__name__
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
407
|
+
bound_observe = observe
|
|
408
|
+
|
|
409
|
+
bound_session_id = session_id
|
|
410
|
+
bound_user_id = user_id
|
|
411
|
+
bound_metadata = metadata if metadata is not None else {}
|
|
412
|
+
bound_tags = tags if tags is not None else []
|
|
413
|
+
bound_span_type = span_type
|
|
414
|
+
bound_ignore_input = ignore_input
|
|
415
|
+
bound_ignore_output = ignore_output
|
|
416
|
+
bound_ignore_inputs = ignore_inputs
|
|
417
|
+
bound_input_formatter = input_formatter
|
|
418
|
+
bound_output_formatter = output_formatter
|
|
419
|
+
bound_ignore_exceptions = ignore_exceptions
|
|
420
|
+
bound_preserve_global_context = preserve_global_context
|
|
421
|
+
bound_trim_documents = trim_documents
|
|
516
422
|
|
|
517
423
|
# Create document trimming formatters if needed
|
|
518
|
-
def _create_trimming_input_formatter(*args, **kwargs) -> str:
|
|
424
|
+
def _create_trimming_input_formatter(*args: Any, **kwargs: Any) -> str:
|
|
519
425
|
# First, let any custom formatter process the data
|
|
520
|
-
if
|
|
521
|
-
result =
|
|
426
|
+
if bound_input_formatter:
|
|
427
|
+
result = bound_input_formatter(*args, **kwargs)
|
|
522
428
|
# If formatter returns string, try to parse and trim
|
|
523
429
|
if isinstance(result, str): # type: ignore[reportUnknownArgumentType]
|
|
524
430
|
try:
|
|
@@ -535,7 +441,7 @@ def trace(
|
|
|
535
441
|
# No custom formatter - mimic Laminar's get_input_from_func_args
|
|
536
442
|
# Build a dict with parameter names as keys (like Laminar does)
|
|
537
443
|
params = list(sig.parameters.keys())
|
|
538
|
-
data = {}
|
|
444
|
+
data: dict[str, Any] = {}
|
|
539
445
|
|
|
540
446
|
# Map args to parameter names
|
|
541
447
|
for i, arg in enumerate(args):
|
|
@@ -555,8 +461,8 @@ def trace(
|
|
|
555
461
|
|
|
556
462
|
def _create_trimming_output_formatter(result: Any) -> str:
|
|
557
463
|
# First, let any custom formatter process the data
|
|
558
|
-
if
|
|
559
|
-
formatted =
|
|
464
|
+
if bound_output_formatter:
|
|
465
|
+
formatted = bound_output_formatter(result)
|
|
560
466
|
# If formatter returns string, try to parse and trim
|
|
561
467
|
if isinstance(formatted, str): # type: ignore[reportUnknownArgumentType]
|
|
562
468
|
try:
|
|
@@ -598,41 +504,41 @@ def trace(
|
|
|
598
504
|
observe_params["name"] = observe_name
|
|
599
505
|
|
|
600
506
|
# Override with decorator-level session_id and user_id if provided
|
|
601
|
-
if
|
|
602
|
-
observe_params["session_id"] =
|
|
603
|
-
if
|
|
604
|
-
observe_params["user_id"] =
|
|
605
|
-
if
|
|
606
|
-
observe_params["metadata"] =
|
|
607
|
-
if
|
|
608
|
-
observe_params["tags"] = observe_params.get("tags", []) +
|
|
609
|
-
if
|
|
610
|
-
observe_params["span_type"] =
|
|
507
|
+
if bound_session_id:
|
|
508
|
+
observe_params["session_id"] = bound_session_id
|
|
509
|
+
if bound_user_id:
|
|
510
|
+
observe_params["user_id"] = bound_user_id
|
|
511
|
+
if bound_metadata:
|
|
512
|
+
observe_params["metadata"] = bound_metadata
|
|
513
|
+
if bound_tags:
|
|
514
|
+
observe_params["tags"] = observe_params.get("tags", []) + bound_tags
|
|
515
|
+
if bound_span_type:
|
|
516
|
+
observe_params["span_type"] = bound_span_type
|
|
611
517
|
|
|
612
518
|
# Add the new Laminar parameters
|
|
613
|
-
if
|
|
614
|
-
observe_params["ignore_input"] =
|
|
615
|
-
if
|
|
616
|
-
observe_params["ignore_output"] =
|
|
617
|
-
if
|
|
618
|
-
observe_params["ignore_inputs"] =
|
|
519
|
+
if bound_ignore_input:
|
|
520
|
+
observe_params["ignore_input"] = bound_ignore_input
|
|
521
|
+
if bound_ignore_output:
|
|
522
|
+
observe_params["ignore_output"] = bound_ignore_output
|
|
523
|
+
if bound_ignore_inputs is not None:
|
|
524
|
+
observe_params["ignore_inputs"] = bound_ignore_inputs
|
|
619
525
|
|
|
620
526
|
# Use trimming formatters if trim_documents is enabled
|
|
621
|
-
if
|
|
527
|
+
if bound_trim_documents:
|
|
622
528
|
# Use the trimming formatters (which may wrap custom formatters)
|
|
623
529
|
observe_params["input_formatter"] = _create_trimming_input_formatter
|
|
624
530
|
observe_params["output_formatter"] = _create_trimming_output_formatter
|
|
625
531
|
else:
|
|
626
532
|
# Use custom formatters directly if provided
|
|
627
|
-
if
|
|
628
|
-
observe_params["input_formatter"] =
|
|
629
|
-
if
|
|
630
|
-
observe_params["output_formatter"] =
|
|
533
|
+
if bound_input_formatter is not None:
|
|
534
|
+
observe_params["input_formatter"] = bound_input_formatter
|
|
535
|
+
if bound_output_formatter is not None:
|
|
536
|
+
observe_params["output_formatter"] = bound_output_formatter
|
|
631
537
|
|
|
632
|
-
if
|
|
633
|
-
observe_params["ignore_exceptions"] =
|
|
634
|
-
if
|
|
635
|
-
observe_params["preserve_global_context"] =
|
|
538
|
+
if bound_ignore_exceptions:
|
|
539
|
+
observe_params["ignore_exceptions"] = bound_ignore_exceptions
|
|
540
|
+
if bound_preserve_global_context:
|
|
541
|
+
observe_params["preserve_global_context"] = bound_preserve_global_context
|
|
636
542
|
|
|
637
543
|
return observe_params
|
|
638
544
|
|
|
@@ -645,7 +551,7 @@ def trace(
|
|
|
645
551
|
The result of the wrapped function.
|
|
646
552
|
"""
|
|
647
553
|
observe_params = _prepare_and_get_observe_params(kwargs)
|
|
648
|
-
observed_func =
|
|
554
|
+
observed_func = bound_observe(**observe_params)(f)
|
|
649
555
|
return observed_func(*args, **kwargs)
|
|
650
556
|
|
|
651
557
|
@wraps(f)
|
|
@@ -656,8 +562,8 @@ def trace(
|
|
|
656
562
|
The result of the wrapped function.
|
|
657
563
|
"""
|
|
658
564
|
observe_params = _prepare_and_get_observe_params(kwargs)
|
|
659
|
-
observed_func =
|
|
660
|
-
return await observed_func(*args, **kwargs)
|
|
565
|
+
observed_func = bound_observe(**observe_params)(f)
|
|
566
|
+
return await observed_func(*args, **kwargs) # pyright: ignore[reportGeneralTypeIssues, reportUnknownVariableType]
|
|
661
567
|
|
|
662
568
|
wrapper = async_wrapper if is_coroutine else sync_wrapper
|
|
663
569
|
|
|
@@ -665,17 +571,14 @@ def trace(
|
|
|
665
571
|
wrapper.__is_traced__ = True # type: ignore[attr-defined]
|
|
666
572
|
|
|
667
573
|
# Preserve the original function signature
|
|
668
|
-
|
|
574
|
+
with contextlib.suppress(AttributeError, ValueError):
|
|
669
575
|
wrapper.__signature__ = sig # type: ignore[attr-defined]
|
|
670
|
-
except (AttributeError, ValueError):
|
|
671
|
-
pass
|
|
672
576
|
|
|
673
577
|
return cast(Callable[P, R], wrapper)
|
|
674
578
|
|
|
675
579
|
if func:
|
|
676
580
|
return decorator(func) # Called as @trace
|
|
677
|
-
|
|
678
|
-
return decorator # Called as @trace(...)
|
|
581
|
+
return decorator # Called as @trace(...)
|
|
679
582
|
|
|
680
583
|
|
|
681
584
|
def set_trace_cost(cost: float | str) -> None:
|
|
@@ -687,10 +590,10 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
687
590
|
current operation. Particularly useful for tracking costs of external
|
|
688
591
|
API calls, compute resources, or custom billing scenarios.
|
|
689
592
|
|
|
690
|
-
The cost is stored in three metadata fields for
|
|
691
|
-
- gen_ai.usage.output_cost:
|
|
692
|
-
- gen_ai.usage.cost:
|
|
693
|
-
- cost:
|
|
593
|
+
The cost is stored in three metadata fields for observability tool consumption:
|
|
594
|
+
- gen_ai.usage.output_cost: OpenTelemetry GenAI semantic convention
|
|
595
|
+
- gen_ai.usage.cost: Aggregated cost field
|
|
596
|
+
- cost: Short-form cost field
|
|
694
597
|
|
|
695
598
|
Args:
|
|
696
599
|
cost: The cost value to set. Can be:
|
|
@@ -698,56 +601,14 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
698
601
|
- str: USD format with dollar sign (e.g., "$0.05" or "$1.25")
|
|
699
602
|
Only positive values will be set; zero or negative values are ignored.
|
|
700
603
|
|
|
701
|
-
Example:
|
|
702
|
-
>>> # Track cost of external API call
|
|
703
|
-
>>> @trace
|
|
704
|
-
>>> async def call_translation_api(text: str) -> str:
|
|
705
|
-
... # External API charges per character
|
|
706
|
-
... char_count = len(text)
|
|
707
|
-
... cost_per_char = 0.00001 # $0.00001 per character
|
|
708
|
-
...
|
|
709
|
-
... result = await external_api.translate(text)
|
|
710
|
-
...
|
|
711
|
-
... # Set the cost for this operation
|
|
712
|
-
... set_trace_cost(char_count * cost_per_char)
|
|
713
|
-
... return result
|
|
714
|
-
>>>
|
|
715
|
-
>>> # Track compute resource costs
|
|
716
|
-
>>> @trace
|
|
717
|
-
>>> def process_video(video_path: str) -> dict:
|
|
718
|
-
... duration = get_video_duration(video_path)
|
|
719
|
-
... cost_per_minute = 0.10 # $0.10 per minute
|
|
720
|
-
...
|
|
721
|
-
... result = process_video_content(video_path)
|
|
722
|
-
...
|
|
723
|
-
... # Set cost using string format
|
|
724
|
-
... set_trace_cost(f"${duration * cost_per_minute:.2f}")
|
|
725
|
-
... return result
|
|
726
|
-
>>>
|
|
727
|
-
>>> # Combine with LLM costs in pipeline
|
|
728
|
-
>>> @pipeline_task
|
|
729
|
-
>>> async def enriched_generation(prompt: str) -> str:
|
|
730
|
-
... # LLM cost tracked automatically via ModelResponse
|
|
731
|
-
... response = await llm.generate("gpt-5", messages=prompt)
|
|
732
|
-
...
|
|
733
|
-
... # Add cost for post-processing
|
|
734
|
-
... processing_cost = 0.02 # Fixed cost for enrichment
|
|
735
|
-
... set_trace_cost(processing_cost)
|
|
736
|
-
...
|
|
737
|
-
... return enrich_response(response.content)
|
|
738
|
-
|
|
739
604
|
Raises:
|
|
740
605
|
ValueError: If string format is invalid (not a valid USD amount).
|
|
741
606
|
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
- The cost is added to the current span's attributes/metadata
|
|
748
|
-
- Multiple calls overwrite the previous cost (not cumulative)
|
|
749
|
-
- If called outside a traced context (no active span), it has no effect
|
|
750
|
-
and does not raise an error
|
|
607
|
+
Only works within a traced context (function decorated with @trace,
|
|
608
|
+
@pipeline_task, or @pipeline_flow). LLM costs are tracked automatically via
|
|
609
|
+
ModelResponse; use this for non-LLM costs. Multiple calls overwrite the
|
|
610
|
+
previous cost (not cumulative). If called outside a traced context, it has
|
|
611
|
+
no effect and does not raise an error.
|
|
751
612
|
"""
|
|
752
613
|
# Parse string format if provided
|
|
753
614
|
if isinstance(cost, str):
|
|
@@ -760,9 +621,7 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
760
621
|
# Remove $ and convert to float
|
|
761
622
|
cost_value = float(cost_str[1:])
|
|
762
623
|
except ValueError as e:
|
|
763
|
-
raise ValueError(
|
|
764
|
-
f"Invalid USD format: {cost!r}. Must be a valid number after '$'"
|
|
765
|
-
) from e
|
|
624
|
+
raise ValueError(f"Invalid USD format: {cost!r}. Must be a valid number after '$'") from e
|
|
766
625
|
else:
|
|
767
626
|
cost_value = cost
|
|
768
627
|
|
|
@@ -774,11 +633,8 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
774
633
|
"cost": cost_value,
|
|
775
634
|
}
|
|
776
635
|
|
|
777
|
-
|
|
636
|
+
with contextlib.suppress(Exception):
|
|
778
637
|
Laminar.set_span_attributes(attributes)
|
|
779
|
-
except Exception:
|
|
780
|
-
# Silently ignore if not in a traced context
|
|
781
|
-
pass
|
|
782
638
|
|
|
783
639
|
|
|
784
|
-
__all__ = ["
|
|
640
|
+
__all__ = ["TraceInfo", "TraceLevel", "set_trace_cost", "trace"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Pipeline framework primitives — decorators and flow options."""
|
|
2
|
+
|
|
3
|
+
from ai_pipeline_core.pipeline.decorators import pipeline_flow, pipeline_task
|
|
4
|
+
from ai_pipeline_core.pipeline.options import FlowOptions
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"FlowOptions",
|
|
8
|
+
"pipeline_flow",
|
|
9
|
+
"pipeline_task",
|
|
10
|
+
]
|