ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +83 -119
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +14 -15
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +349 -1062
- ai_pipeline_core/documents/mime_type.py +40 -85
- ai_pipeline_core/documents/utils.py +62 -7
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +5 -3
- ai_pipeline_core/llm/ai_messages.py +284 -73
- ai_pipeline_core/llm/client.py +462 -209
- ai_pipeline_core/llm/model_options.py +86 -53
- ai_pipeline_core/llm/model_response.py +187 -241
- ai_pipeline_core/llm/model_types.py +34 -54
- ai_pipeline_core/logging/__init__.py +2 -9
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -43
- ai_pipeline_core/logging/logging_mixin.py +17 -51
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/observability/tracing.py +640 -0
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +26 -105
- ai_pipeline_core/settings.py +41 -32
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -240
- ai_pipeline_core/documents/flow_document.py +0 -128
- ai_pipeline_core/documents/task_document.py +0 -133
- ai_pipeline_core/documents/temporary_document.py +0 -95
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -314
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -717
- ai_pipeline_core/prefect.py +0 -54
- ai_pipeline_core/simple_runner/__init__.py +0 -24
- ai_pipeline_core/simple_runner/cli.py +0 -255
- ai_pipeline_core/simple_runner/simple_runner.py +0 -385
- ai_pipeline_core/tracing.py +0 -475
- ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
- ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
"""Tracing utilities that integrate Laminar (``lmnr``) with our code-base.
|
|
2
|
+
|
|
3
|
+
This module centralizes:
|
|
4
|
+
- ``TraceInfo`` - a small helper object for propagating contextual metadata.
|
|
5
|
+
- ``trace`` decorator - augments a callable with Laminar tracing, automatic
|
|
6
|
+
``observe`` instrumentation, and optional support for test runs.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import contextlib
|
|
10
|
+
import inspect
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
from functools import wraps
|
|
15
|
+
from typing import Any, Literal, ParamSpec, TypeVar, cast, overload
|
|
16
|
+
|
|
17
|
+
from lmnr import Attributes, Instruments, Laminar, observe
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
19
|
+
|
|
20
|
+
from ai_pipeline_core.documents import Document
|
|
21
|
+
from ai_pipeline_core.llm import AIMessages, ModelResponse
|
|
22
|
+
from ai_pipeline_core.settings import settings
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Typing helpers
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
P = ParamSpec("P")
|
|
28
|
+
R = TypeVar("R")
|
|
29
|
+
|
|
30
|
+
TraceLevel = Literal["always", "debug", "off"]
|
|
31
|
+
"""Control level for tracing activation.
|
|
32
|
+
|
|
33
|
+
Values:
|
|
34
|
+
- "always": Always trace (default, production mode)
|
|
35
|
+
- "debug": Only trace when LMNR_DEBUG == "true"
|
|
36
|
+
- "off": Disable tracing completely
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Serialization helpers
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
def _serialize_for_tracing(obj: Any) -> Any: # noqa: PLR0911
|
|
44
|
+
"""Convert objects to JSON-serializable format for tracing."""
|
|
45
|
+
if isinstance(obj, Document):
|
|
46
|
+
return obj.serialize_model()
|
|
47
|
+
if isinstance(obj, list) and obj and isinstance(obj[0], Document):
|
|
48
|
+
return [doc.serialize_model() for doc in cast(list[Document], obj)]
|
|
49
|
+
if isinstance(obj, AIMessages):
|
|
50
|
+
result: list[Any] = []
|
|
51
|
+
for msg in obj:
|
|
52
|
+
if isinstance(msg, Document):
|
|
53
|
+
result.append(msg.serialize_model())
|
|
54
|
+
else:
|
|
55
|
+
result.append(msg)
|
|
56
|
+
return result
|
|
57
|
+
if isinstance(obj, ModelResponse):
|
|
58
|
+
return obj.model_dump()
|
|
59
|
+
if isinstance(obj, BaseModel):
|
|
60
|
+
data: dict[str, Any] = {}
|
|
61
|
+
for field_name, field_value in obj.__dict__.items():
|
|
62
|
+
if isinstance(field_value, Document):
|
|
63
|
+
data[field_name] = field_value.serialize_model()
|
|
64
|
+
elif isinstance(field_value, BaseModel):
|
|
65
|
+
data[field_name] = _serialize_for_tracing(field_value)
|
|
66
|
+
else:
|
|
67
|
+
data[field_name] = field_value
|
|
68
|
+
return data
|
|
69
|
+
try:
|
|
70
|
+
return str(obj) # pyright: ignore[reportUnknownArgumentType]
|
|
71
|
+
except Exception:
|
|
72
|
+
return f"<{type(obj).__name__}>" # pyright: ignore[reportUnknownArgumentType]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
# Document trimming utilities
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
def _trim_attachment_list(attachments: list[Any]) -> list[Any]:
|
|
79
|
+
"""Trim attachment content in a serialized attachment list.
|
|
80
|
+
|
|
81
|
+
Always trims regardless of parent document type:
|
|
82
|
+
- Binary (base64): replace content with placeholder
|
|
83
|
+
- Text > 250 chars: keep first 100 + last 100
|
|
84
|
+
"""
|
|
85
|
+
trimmed: list[Any] = []
|
|
86
|
+
for raw_att in attachments:
|
|
87
|
+
if not isinstance(raw_att, dict):
|
|
88
|
+
trimmed.append(raw_att)
|
|
89
|
+
continue
|
|
90
|
+
att: dict[str, Any] = cast(dict[str, Any], raw_att)
|
|
91
|
+
content_encoding: str = att.get("content_encoding", "utf-8")
|
|
92
|
+
if content_encoding == "base64":
|
|
93
|
+
att = att.copy()
|
|
94
|
+
att["content"] = "[binary content removed]"
|
|
95
|
+
elif isinstance(att.get("content"), str) and len(att["content"]) > 250:
|
|
96
|
+
att = att.copy()
|
|
97
|
+
c: str = att["content"]
|
|
98
|
+
trimmed_chars = len(c) - 200
|
|
99
|
+
att["content"] = c[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + c[-100:]
|
|
100
|
+
trimmed.append(att)
|
|
101
|
+
return trimmed
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _trim_document_content(doc_dict: dict[str, Any]) -> dict[str, Any]:
|
|
105
|
+
"""Trim document content for traces. All documents trimmed equally."""
|
|
106
|
+
if not isinstance(doc_dict, dict): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
107
|
+
return doc_dict # pyright: ignore[reportUnreachable]
|
|
108
|
+
|
|
109
|
+
if "content" not in doc_dict or "class_name" not in doc_dict:
|
|
110
|
+
return doc_dict
|
|
111
|
+
|
|
112
|
+
doc_dict = doc_dict.copy()
|
|
113
|
+
content = doc_dict.get("content", "")
|
|
114
|
+
content_encoding = doc_dict.get("content_encoding", "utf-8")
|
|
115
|
+
|
|
116
|
+
# Trim attachments
|
|
117
|
+
if "attachments" in doc_dict and isinstance(doc_dict["attachments"], list):
|
|
118
|
+
doc_dict["attachments"] = _trim_attachment_list(cast(list[Any], doc_dict["attachments"]))
|
|
119
|
+
|
|
120
|
+
# Binary: remove content
|
|
121
|
+
if content_encoding == "base64":
|
|
122
|
+
doc_dict["content"] = "[binary content removed]"
|
|
123
|
+
return doc_dict
|
|
124
|
+
|
|
125
|
+
# Text: trim if > 250 chars
|
|
126
|
+
if isinstance(content, str) and len(content) > 250:
|
|
127
|
+
trimmed_chars = len(content) - 200
|
|
128
|
+
doc_dict["content"] = content[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + content[-100:]
|
|
129
|
+
|
|
130
|
+
return doc_dict
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _trim_documents_in_data(data: Any) -> Any:
|
|
134
|
+
"""Recursively trim document content in nested data structures."""
|
|
135
|
+
if isinstance(data, dict):
|
|
136
|
+
data_dict = cast(dict[str, Any], data)
|
|
137
|
+
if "class_name" in data_dict and "content" in data_dict:
|
|
138
|
+
return _trim_document_content(data_dict)
|
|
139
|
+
return {k: _trim_documents_in_data(v) for k, v in data_dict.items()}
|
|
140
|
+
if isinstance(data, list):
|
|
141
|
+
return [_trim_documents_in_data(item) for item in cast(list[Any], data)]
|
|
142
|
+
if isinstance(data, tuple):
|
|
143
|
+
return tuple(_trim_documents_in_data(item) for item in cast(tuple[Any, ...], data))
|
|
144
|
+
return data
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ---------------------------------------------------------------------------
|
|
148
|
+
# ``TraceInfo`` - metadata container
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
class TraceInfo(BaseModel):
|
|
151
|
+
"""Container for propagating trace context through the pipeline.
|
|
152
|
+
|
|
153
|
+
TraceInfo provides a structured way to pass tracing metadata through
|
|
154
|
+
function calls, ensuring consistent observability across the entire
|
|
155
|
+
execution flow. It integrates with Laminar (LMNR) for distributed
|
|
156
|
+
tracing and debugging.
|
|
157
|
+
|
|
158
|
+
Attributes:
|
|
159
|
+
session_id: Unique identifier for the current session/conversation.
|
|
160
|
+
user_id: Identifier for the user triggering the operation.
|
|
161
|
+
metadata: Key-value pairs for additional trace context.
|
|
162
|
+
Useful for filtering and searching in LMNR dashboard.
|
|
163
|
+
tags: List of tags for categorizing traces (e.g., ["production", "v2"]).
|
|
164
|
+
|
|
165
|
+
Environment fallbacks:
|
|
166
|
+
- LMNR_DEBUG: Controls debug-level tracing when set to "true"
|
|
167
|
+
These variables are read directly by the tracing layer and are
|
|
168
|
+
not part of the Settings configuration.
|
|
169
|
+
|
|
170
|
+
TraceInfo is typically created at the entry point of a flow
|
|
171
|
+
and passed through all subsequent function calls for
|
|
172
|
+
consistent tracing context.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
session_id: str | None = None
|
|
176
|
+
user_id: str | None = None
|
|
177
|
+
metadata: dict[str, str] = Field(default_factory=dict)
|
|
178
|
+
tags: list[str] = Field(default_factory=list)
|
|
179
|
+
|
|
180
|
+
def get_observe_kwargs(self) -> dict[str, Any]:
|
|
181
|
+
"""Convert TraceInfo to kwargs for Laminar's observe decorator.
|
|
182
|
+
|
|
183
|
+
Transforms the TraceInfo fields into the format expected by
|
|
184
|
+
the lmnr.observe() decorator, applying environment variable
|
|
185
|
+
fallbacks for session_id and user_id.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dictionary with keys:
|
|
189
|
+
- session_id: From field or environment variable fallback
|
|
190
|
+
- user_id: From field or environment variable fallback
|
|
191
|
+
- metadata: Dictionary of custom metadata (if set)
|
|
192
|
+
- tags: List of tags (if set)
|
|
193
|
+
|
|
194
|
+
Only non-empty values are included in the output.
|
|
195
|
+
|
|
196
|
+
Called internally by the trace decorator to configure Laminar
|
|
197
|
+
observation parameters.
|
|
198
|
+
"""
|
|
199
|
+
kwargs: dict[str, Any] = {}
|
|
200
|
+
|
|
201
|
+
# Use environment variable fallback for session_id
|
|
202
|
+
session_id = self.session_id or os.getenv("LMNR_SESSION_ID")
|
|
203
|
+
if session_id:
|
|
204
|
+
kwargs["session_id"] = session_id
|
|
205
|
+
|
|
206
|
+
# Use environment variable fallback for user_id
|
|
207
|
+
user_id = self.user_id or os.getenv("LMNR_USER_ID")
|
|
208
|
+
if user_id:
|
|
209
|
+
kwargs["user_id"] = user_id
|
|
210
|
+
|
|
211
|
+
if self.metadata:
|
|
212
|
+
kwargs["metadata"] = self.metadata
|
|
213
|
+
if self.tags:
|
|
214
|
+
kwargs["tags"] = self.tags
|
|
215
|
+
return kwargs
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
# ---------------------------------------------------------------------------
|
|
219
|
+
# ``trace`` decorator
|
|
220
|
+
# ---------------------------------------------------------------------------
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _initialise_laminar() -> None:
|
|
224
|
+
"""Initialize Laminar SDK with project configuration.
|
|
225
|
+
|
|
226
|
+
Sets up the Laminar observability client with the project API key
|
|
227
|
+
from settings. Disables automatic OpenAI instrumentation to avoid
|
|
228
|
+
conflicts with our custom tracing.
|
|
229
|
+
|
|
230
|
+
Called once per process. Multiple calls are safe (Laminar handles idempotency).
|
|
231
|
+
"""
|
|
232
|
+
if settings.lmnr_project_api_key:
|
|
233
|
+
Laminar.initialize(
|
|
234
|
+
project_api_key=settings.lmnr_project_api_key, disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [], export_timeout_seconds=15
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# Overload for calls like @trace(name="...", level="debug")
|
|
239
|
+
@overload
|
|
240
|
+
def trace(
|
|
241
|
+
*,
|
|
242
|
+
level: TraceLevel = "always",
|
|
243
|
+
name: str | None = None,
|
|
244
|
+
session_id: str | None = None,
|
|
245
|
+
user_id: str | None = None,
|
|
246
|
+
metadata: dict[str, Any] | None = None,
|
|
247
|
+
tags: list[str] | None = None,
|
|
248
|
+
span_type: str | None = None,
|
|
249
|
+
ignore_input: bool = False,
|
|
250
|
+
ignore_output: bool = False,
|
|
251
|
+
ignore_inputs: list[str] | None = None,
|
|
252
|
+
input_formatter: Callable[..., str] | None = None,
|
|
253
|
+
output_formatter: Callable[..., str] | None = None,
|
|
254
|
+
ignore_exceptions: bool = False,
|
|
255
|
+
preserve_global_context: bool = True,
|
|
256
|
+
trim_documents: bool = True,
|
|
257
|
+
) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# Overload for the bare @trace call
|
|
261
|
+
@overload
|
|
262
|
+
def trace(func: Callable[P, R]) -> Callable[P, R]: ... # noqa: UP047
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# Actual implementation
|
|
266
|
+
def trace( # noqa: UP047
|
|
267
|
+
func: Callable[P, R] | None = None,
|
|
268
|
+
*,
|
|
269
|
+
level: TraceLevel = "always",
|
|
270
|
+
name: str | None = None,
|
|
271
|
+
session_id: str | None = None,
|
|
272
|
+
user_id: str | None = None,
|
|
273
|
+
metadata: dict[str, Any] | None = None,
|
|
274
|
+
tags: list[str] | None = None,
|
|
275
|
+
span_type: str | None = None,
|
|
276
|
+
ignore_input: bool = False,
|
|
277
|
+
ignore_output: bool = False,
|
|
278
|
+
ignore_inputs: list[str] | None = None,
|
|
279
|
+
input_formatter: Callable[..., str] | None = None,
|
|
280
|
+
output_formatter: Callable[..., str] | None = None,
|
|
281
|
+
ignore_exceptions: bool = False,
|
|
282
|
+
preserve_global_context: bool = True,
|
|
283
|
+
trim_documents: bool = True,
|
|
284
|
+
) -> Callable[[Callable[P, R]], Callable[P, R]] | Callable[P, R]:
|
|
285
|
+
"""Add Laminar observability tracing to any function.
|
|
286
|
+
|
|
287
|
+
The trace decorator integrates functions with Laminar (LMNR) for
|
|
288
|
+
distributed tracing, performance monitoring, and debugging. It
|
|
289
|
+
automatically handles both sync and async functions, propagates
|
|
290
|
+
trace context, and provides fine-grained control over what gets traced.
|
|
291
|
+
|
|
292
|
+
USAGE GUIDELINE - Defaults First:
|
|
293
|
+
By default, use WITHOUT any parameters unless instructed otherwise.
|
|
294
|
+
The defaults are optimized for most use cases.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
func: Function to trace (when used without parentheses: @trace).
|
|
298
|
+
|
|
299
|
+
level: Controls when tracing is active:
|
|
300
|
+
- "always": Always trace (default, production mode)
|
|
301
|
+
- "debug": Only trace when LMNR_DEBUG == "true"
|
|
302
|
+
- "off": Disable tracing completely
|
|
303
|
+
|
|
304
|
+
name: Custom span name in traces (defaults to function.__name__).
|
|
305
|
+
Use descriptive names for better trace readability.
|
|
306
|
+
|
|
307
|
+
session_id: Override session ID for this function's traces.
|
|
308
|
+
Typically propagated via TraceInfo instead.
|
|
309
|
+
|
|
310
|
+
user_id: Override user ID for this function's traces.
|
|
311
|
+
Typically propagated via TraceInfo instead.
|
|
312
|
+
|
|
313
|
+
metadata: Additional key-value metadata attached to spans.
|
|
314
|
+
Searchable in LMNR dashboard. Merged with TraceInfo metadata.
|
|
315
|
+
|
|
316
|
+
tags: List of tags for categorizing spans (e.g., ["api", "critical"]).
|
|
317
|
+
Merged with TraceInfo tags.
|
|
318
|
+
|
|
319
|
+
span_type: Semantic type of the span (e.g., "LLM", "CHAIN", "TOOL").
|
|
320
|
+
Affects visualization in LMNR dashboard.
|
|
321
|
+
|
|
322
|
+
ignore_input: Don't record function inputs in trace (privacy/size).
|
|
323
|
+
|
|
324
|
+
ignore_output: Don't record function output in trace (privacy/size).
|
|
325
|
+
|
|
326
|
+
ignore_inputs: List of parameter names to exclude from trace.
|
|
327
|
+
Useful for sensitive data like API keys.
|
|
328
|
+
|
|
329
|
+
input_formatter: Custom function to format inputs for tracing.
|
|
330
|
+
Receives all function args, returns display string.
|
|
331
|
+
|
|
332
|
+
output_formatter: Custom function to format output for tracing.
|
|
333
|
+
Receives function result, returns display string.
|
|
334
|
+
|
|
335
|
+
ignore_exceptions: Don't record exceptions in traces (default False).
|
|
336
|
+
|
|
337
|
+
preserve_global_context: Maintain Laminar's global context across
|
|
338
|
+
calls (default True). Set False for isolated traces.
|
|
339
|
+
|
|
340
|
+
trim_documents: Automatically trim document content in traces (default True).
|
|
341
|
+
When enabled, text content is trimmed to
|
|
342
|
+
first/last 100 chars, and all binary content is removed.
|
|
343
|
+
Binary content is removed, text content is trimmed.
|
|
344
|
+
Attachment content follows the same trimming rules.
|
|
345
|
+
Helps reduce trace size for large documents.
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
Decorated function with same signature but added tracing.
|
|
349
|
+
|
|
350
|
+
TraceInfo propagation:
|
|
351
|
+
If the decorated function has a 'trace_info' parameter, the decorator
|
|
352
|
+
automatically creates or propagates a TraceInfo instance, ensuring
|
|
353
|
+
consistent session/user tracking across the call chain.
|
|
354
|
+
|
|
355
|
+
Environment variables:
|
|
356
|
+
- LMNR_DEBUG: Set to "true" to enable debug-level traces
|
|
357
|
+
- LMNR_PROJECT_API_KEY: Required for trace submission
|
|
358
|
+
|
|
359
|
+
Performance:
|
|
360
|
+
- Tracing overhead is minimal (~1-2ms per call)
|
|
361
|
+
- When level="off", decorator returns original function unchanged
|
|
362
|
+
- Large inputs/outputs can be excluded with ignore_* parameters
|
|
363
|
+
|
|
364
|
+
Automatically initializes Laminar on first use. Works with both sync and
|
|
365
|
+
async functions. Preserves function signature and metadata. Thread-safe
|
|
366
|
+
and async-safe.
|
|
367
|
+
"""
|
|
368
|
+
if level == "off":
|
|
369
|
+
if func:
|
|
370
|
+
return func
|
|
371
|
+
return lambda f: f
|
|
372
|
+
|
|
373
|
+
def decorator(f: Callable[P, R]) -> Callable[P, R]:
|
|
374
|
+
"""Apply tracing to the target function.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Wrapped function with LMNR observability.
|
|
378
|
+
|
|
379
|
+
Raises:
|
|
380
|
+
TypeError: If function is already decorated with @pipeline_task or @pipeline_flow.
|
|
381
|
+
"""
|
|
382
|
+
# Check if this is already a traced pipeline_task or pipeline_flow
|
|
383
|
+
# This happens when @trace is applied after @pipeline_task/@pipeline_flow
|
|
384
|
+
if hasattr(f, "__is_traced__") and f.__is_traced__: # type: ignore[attr-defined]
|
|
385
|
+
# Check if it's a Prefect Task or Flow object (they have specific attributes)
|
|
386
|
+
# Prefect objects have certain attributes that regular functions don't
|
|
387
|
+
is_prefect_task = hasattr(f, "fn") and hasattr(f, "submit") and hasattr(f, "map")
|
|
388
|
+
is_prefect_flow = hasattr(f, "fn") and hasattr(f, "serve")
|
|
389
|
+
if is_prefect_task or is_prefect_flow:
|
|
390
|
+
fname = getattr(f, "__name__", "function")
|
|
391
|
+
raise TypeError(
|
|
392
|
+
f"Function '{fname}' is already decorated with @pipeline_task or "
|
|
393
|
+
f"@pipeline_flow. Remove the @trace decorator - pipeline decorators "
|
|
394
|
+
f"include tracing automatically."
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Handle 'debug' level logic - only trace when LMNR_DEBUG is "true"
|
|
398
|
+
debug_value = settings.lmnr_debug or os.getenv("LMNR_DEBUG", "")
|
|
399
|
+
if level == "debug" and debug_value.lower() != "true":
|
|
400
|
+
return f
|
|
401
|
+
|
|
402
|
+
# --- Pre-computation (done once when the function is decorated) ---
|
|
403
|
+
_initialise_laminar()
|
|
404
|
+
sig = inspect.signature(f)
|
|
405
|
+
is_coroutine = inspect.iscoroutinefunction(f)
|
|
406
|
+
observe_name = name or f.__name__
|
|
407
|
+
bound_observe = observe
|
|
408
|
+
|
|
409
|
+
bound_session_id = session_id
|
|
410
|
+
bound_user_id = user_id
|
|
411
|
+
bound_metadata = metadata if metadata is not None else {}
|
|
412
|
+
bound_tags = tags if tags is not None else []
|
|
413
|
+
bound_span_type = span_type
|
|
414
|
+
bound_ignore_input = ignore_input
|
|
415
|
+
bound_ignore_output = ignore_output
|
|
416
|
+
bound_ignore_inputs = ignore_inputs
|
|
417
|
+
bound_input_formatter = input_formatter
|
|
418
|
+
bound_output_formatter = output_formatter
|
|
419
|
+
bound_ignore_exceptions = ignore_exceptions
|
|
420
|
+
bound_preserve_global_context = preserve_global_context
|
|
421
|
+
bound_trim_documents = trim_documents
|
|
422
|
+
|
|
423
|
+
# Create document trimming formatters if needed
|
|
424
|
+
def _create_trimming_input_formatter(*args: Any, **kwargs: Any) -> str:
|
|
425
|
+
# First, let any custom formatter process the data
|
|
426
|
+
if bound_input_formatter:
|
|
427
|
+
result = bound_input_formatter(*args, **kwargs)
|
|
428
|
+
# If formatter returns string, try to parse and trim
|
|
429
|
+
if isinstance(result, str): # type: ignore[reportUnknownArgumentType]
|
|
430
|
+
try:
|
|
431
|
+
data = json.loads(result)
|
|
432
|
+
trimmed = _trim_documents_in_data(data)
|
|
433
|
+
return json.dumps(trimmed)
|
|
434
|
+
except (json.JSONDecodeError, TypeError):
|
|
435
|
+
return result
|
|
436
|
+
else:
|
|
437
|
+
# If formatter returns dict/list, trim it
|
|
438
|
+
trimmed = _trim_documents_in_data(result)
|
|
439
|
+
return json.dumps(trimmed) if not isinstance(trimmed, str) else trimmed
|
|
440
|
+
else:
|
|
441
|
+
# No custom formatter - mimic Laminar's get_input_from_func_args
|
|
442
|
+
# Build a dict with parameter names as keys (like Laminar does)
|
|
443
|
+
params = list(sig.parameters.keys())
|
|
444
|
+
data: dict[str, Any] = {}
|
|
445
|
+
|
|
446
|
+
# Map args to parameter names
|
|
447
|
+
for i, arg in enumerate(args):
|
|
448
|
+
if i < len(params):
|
|
449
|
+
data[params[i]] = arg
|
|
450
|
+
|
|
451
|
+
# Add kwargs
|
|
452
|
+
data.update(kwargs)
|
|
453
|
+
|
|
454
|
+
# Serialize with our helper function
|
|
455
|
+
serialized = json.dumps(data, default=_serialize_for_tracing)
|
|
456
|
+
parsed = json.loads(serialized)
|
|
457
|
+
|
|
458
|
+
# Trim documents in the serialized data
|
|
459
|
+
trimmed = _trim_documents_in_data(parsed)
|
|
460
|
+
return json.dumps(trimmed)
|
|
461
|
+
|
|
462
|
+
def _create_trimming_output_formatter(result: Any) -> str:
|
|
463
|
+
# First, let any custom formatter process the data
|
|
464
|
+
if bound_output_formatter:
|
|
465
|
+
formatted = bound_output_formatter(result)
|
|
466
|
+
# If formatter returns string, try to parse and trim
|
|
467
|
+
if isinstance(formatted, str): # type: ignore[reportUnknownArgumentType]
|
|
468
|
+
try:
|
|
469
|
+
data = json.loads(formatted)
|
|
470
|
+
trimmed = _trim_documents_in_data(data)
|
|
471
|
+
return json.dumps(trimmed)
|
|
472
|
+
except (json.JSONDecodeError, TypeError):
|
|
473
|
+
return formatted
|
|
474
|
+
else:
|
|
475
|
+
# If formatter returns dict/list, trim it
|
|
476
|
+
trimmed = _trim_documents_in_data(formatted)
|
|
477
|
+
return json.dumps(trimmed) if not isinstance(trimmed, str) else trimmed
|
|
478
|
+
else:
|
|
479
|
+
# No custom formatter, serialize result with smart defaults
|
|
480
|
+
# Serialize with our extracted helper function
|
|
481
|
+
serialized = json.dumps(result, default=_serialize_for_tracing)
|
|
482
|
+
parsed = json.loads(serialized)
|
|
483
|
+
|
|
484
|
+
# Trim documents in the serialized data
|
|
485
|
+
trimmed = _trim_documents_in_data(parsed)
|
|
486
|
+
return json.dumps(trimmed)
|
|
487
|
+
|
|
488
|
+
# --- Helper function for runtime logic ---
|
|
489
|
+
def _prepare_and_get_observe_params(runtime_kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
490
|
+
"""Inspects runtime args, manages TraceInfo, and returns params for lmnr.observe.
|
|
491
|
+
|
|
492
|
+
Modifies runtime_kwargs in place to inject TraceInfo if the function expects it.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
Dictionary of parameters for lmnr.observe decorator.
|
|
496
|
+
"""
|
|
497
|
+
trace_info = runtime_kwargs.get("trace_info")
|
|
498
|
+
if not isinstance(trace_info, TraceInfo):
|
|
499
|
+
trace_info = TraceInfo()
|
|
500
|
+
if "trace_info" in sig.parameters:
|
|
501
|
+
runtime_kwargs["trace_info"] = trace_info
|
|
502
|
+
|
|
503
|
+
observe_params = trace_info.get_observe_kwargs()
|
|
504
|
+
observe_params["name"] = observe_name
|
|
505
|
+
|
|
506
|
+
# Override with decorator-level session_id and user_id if provided
|
|
507
|
+
if bound_session_id:
|
|
508
|
+
observe_params["session_id"] = bound_session_id
|
|
509
|
+
if bound_user_id:
|
|
510
|
+
observe_params["user_id"] = bound_user_id
|
|
511
|
+
if bound_metadata:
|
|
512
|
+
observe_params["metadata"] = bound_metadata
|
|
513
|
+
if bound_tags:
|
|
514
|
+
observe_params["tags"] = observe_params.get("tags", []) + bound_tags
|
|
515
|
+
if bound_span_type:
|
|
516
|
+
observe_params["span_type"] = bound_span_type
|
|
517
|
+
|
|
518
|
+
# Add the new Laminar parameters
|
|
519
|
+
if bound_ignore_input:
|
|
520
|
+
observe_params["ignore_input"] = bound_ignore_input
|
|
521
|
+
if bound_ignore_output:
|
|
522
|
+
observe_params["ignore_output"] = bound_ignore_output
|
|
523
|
+
if bound_ignore_inputs is not None:
|
|
524
|
+
observe_params["ignore_inputs"] = bound_ignore_inputs
|
|
525
|
+
|
|
526
|
+
# Use trimming formatters if trim_documents is enabled
|
|
527
|
+
if bound_trim_documents:
|
|
528
|
+
# Use the trimming formatters (which may wrap custom formatters)
|
|
529
|
+
observe_params["input_formatter"] = _create_trimming_input_formatter
|
|
530
|
+
observe_params["output_formatter"] = _create_trimming_output_formatter
|
|
531
|
+
else:
|
|
532
|
+
# Use custom formatters directly if provided
|
|
533
|
+
if bound_input_formatter is not None:
|
|
534
|
+
observe_params["input_formatter"] = bound_input_formatter
|
|
535
|
+
if bound_output_formatter is not None:
|
|
536
|
+
observe_params["output_formatter"] = bound_output_formatter
|
|
537
|
+
|
|
538
|
+
if bound_ignore_exceptions:
|
|
539
|
+
observe_params["ignore_exceptions"] = bound_ignore_exceptions
|
|
540
|
+
if bound_preserve_global_context:
|
|
541
|
+
observe_params["preserve_global_context"] = bound_preserve_global_context
|
|
542
|
+
|
|
543
|
+
return observe_params
|
|
544
|
+
|
|
545
|
+
# --- The actual wrappers ---
|
|
546
|
+
@wraps(f)
|
|
547
|
+
def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
548
|
+
"""Synchronous wrapper for traced function.
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
The result of the wrapped function.
|
|
552
|
+
"""
|
|
553
|
+
observe_params = _prepare_and_get_observe_params(kwargs)
|
|
554
|
+
observed_func = bound_observe(**observe_params)(f)
|
|
555
|
+
return observed_func(*args, **kwargs)
|
|
556
|
+
|
|
557
|
+
@wraps(f)
|
|
558
|
+
async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
559
|
+
"""Asynchronous wrapper for traced function.
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
The result of the wrapped function.
|
|
563
|
+
"""
|
|
564
|
+
observe_params = _prepare_and_get_observe_params(kwargs)
|
|
565
|
+
observed_func = bound_observe(**observe_params)(f)
|
|
566
|
+
return await observed_func(*args, **kwargs) # pyright: ignore[reportGeneralTypeIssues, reportUnknownVariableType]
|
|
567
|
+
|
|
568
|
+
wrapper = async_wrapper if is_coroutine else sync_wrapper
|
|
569
|
+
|
|
570
|
+
# Mark function as traced for detection by pipeline decorators
|
|
571
|
+
wrapper.__is_traced__ = True # type: ignore[attr-defined]
|
|
572
|
+
|
|
573
|
+
# Preserve the original function signature
|
|
574
|
+
with contextlib.suppress(AttributeError, ValueError):
|
|
575
|
+
wrapper.__signature__ = sig # type: ignore[attr-defined]
|
|
576
|
+
|
|
577
|
+
return cast(Callable[P, R], wrapper)
|
|
578
|
+
|
|
579
|
+
if func:
|
|
580
|
+
return decorator(func) # Called as @trace
|
|
581
|
+
return decorator # Called as @trace(...)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def set_trace_cost(cost: float | str) -> None:
|
|
585
|
+
"""Set cost attributes for the current trace span.
|
|
586
|
+
|
|
587
|
+
Sets cost metadata in the current LMNR trace span for tracking expenses
|
|
588
|
+
of custom operations. This function should be called within a traced
|
|
589
|
+
function to dynamically set or update the cost associated with the
|
|
590
|
+
current operation. Particularly useful for tracking costs of external
|
|
591
|
+
API calls, compute resources, or custom billing scenarios.
|
|
592
|
+
|
|
593
|
+
The cost is stored in three metadata fields for observability tool consumption:
|
|
594
|
+
- gen_ai.usage.output_cost: OpenTelemetry GenAI semantic convention
|
|
595
|
+
- gen_ai.usage.cost: Aggregated cost field
|
|
596
|
+
- cost: Short-form cost field
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
cost: The cost value to set. Can be:
|
|
600
|
+
- float: Cost in dollars (e.g., 0.05 for 5 cents)
|
|
601
|
+
- str: USD format with dollar sign (e.g., "$0.05" or "$1.25")
|
|
602
|
+
Only positive values will be set; zero or negative values are ignored.
|
|
603
|
+
|
|
604
|
+
Raises:
|
|
605
|
+
ValueError: If string format is invalid (not a valid USD amount).
|
|
606
|
+
|
|
607
|
+
Only works within a traced context (function decorated with @trace,
|
|
608
|
+
@pipeline_task, or @pipeline_flow). LLM costs are tracked automatically via
|
|
609
|
+
ModelResponse; use this for non-LLM costs. Multiple calls overwrite the
|
|
610
|
+
previous cost (not cumulative). If called outside a traced context, it has
|
|
611
|
+
no effect and does not raise an error.
|
|
612
|
+
"""
|
|
613
|
+
# Parse string format if provided
|
|
614
|
+
if isinstance(cost, str):
|
|
615
|
+
# Remove dollar sign and any whitespace
|
|
616
|
+
cost_str = cost.strip()
|
|
617
|
+
if not cost_str.startswith("$"):
|
|
618
|
+
raise ValueError(f"Invalid USD format: {cost!r}. Must start with '$' (e.g., '$0.50')")
|
|
619
|
+
|
|
620
|
+
try:
|
|
621
|
+
# Remove $ and convert to float
|
|
622
|
+
cost_value = float(cost_str[1:])
|
|
623
|
+
except ValueError as e:
|
|
624
|
+
raise ValueError(f"Invalid USD format: {cost!r}. Must be a valid number after '$'") from e
|
|
625
|
+
else:
|
|
626
|
+
cost_value = cost
|
|
627
|
+
|
|
628
|
+
if cost_value > 0:
|
|
629
|
+
# Build the attributes dictionary with cost metadata
|
|
630
|
+
attributes: dict[Attributes | str, float] = {
|
|
631
|
+
"gen_ai.usage.output_cost": cost_value,
|
|
632
|
+
"gen_ai.usage.cost": cost_value,
|
|
633
|
+
"cost": cost_value,
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
with contextlib.suppress(Exception):
|
|
637
|
+
Laminar.set_span_attributes(attributes)
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
__all__ = ["TraceInfo", "TraceLevel", "set_trace_cost", "trace"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Pipeline framework primitives — decorators and flow options."""
|
|
2
|
+
|
|
3
|
+
from ai_pipeline_core.pipeline.decorators import pipeline_flow, pipeline_task
|
|
4
|
+
from ai_pipeline_core.pipeline.options import FlowOptions
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"FlowOptions",
|
|
8
|
+
"pipeline_flow",
|
|
9
|
+
"pipeline_task",
|
|
10
|
+
]
|