ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +64 -158
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +11 -84
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +102 -90
- ai_pipeline_core/llm/client.py +229 -183
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,18 +6,18 @@ This module centralizes:
|
|
|
6
6
|
``observe`` instrumentation, and optional support for test runs.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
import contextlib
|
|
9
10
|
import inspect
|
|
10
11
|
import json
|
|
11
12
|
import os
|
|
13
|
+
from collections.abc import Callable
|
|
12
14
|
from functools import wraps
|
|
13
|
-
from typing import Any,
|
|
15
|
+
from typing import Any, Literal, ParamSpec, TypeVar, cast, overload
|
|
14
16
|
|
|
15
17
|
from lmnr import Attributes, Instruments, Laminar, observe
|
|
16
18
|
from pydantic import BaseModel, Field
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
# These are lazy imports only used when trim_documents is enabled
|
|
20
|
-
from ai_pipeline_core.documents import Document, DocumentList
|
|
20
|
+
from ai_pipeline_core.documents import Document
|
|
21
21
|
from ai_pipeline_core.llm import AIMessages, ModelResponse
|
|
22
22
|
from ai_pipeline_core.settings import settings
|
|
23
23
|
|
|
@@ -40,144 +40,112 @@ Values:
|
|
|
40
40
|
# ---------------------------------------------------------------------------
|
|
41
41
|
# Serialization helpers
|
|
42
42
|
# ---------------------------------------------------------------------------
|
|
43
|
-
def _serialize_for_tracing(obj: Any) -> Any:
|
|
44
|
-
"""Convert objects to JSON-serializable format for tracing.
|
|
45
|
-
|
|
46
|
-
Handles Pydantic models, Documents, and other special types.
|
|
47
|
-
This is extracted for better testability.
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
obj: Object to serialize
|
|
51
|
-
|
|
52
|
-
Returns:
|
|
53
|
-
JSON-serializable representation of the object
|
|
54
|
-
"""
|
|
55
|
-
# Our Document types - handle first to ensure serialize_model is used
|
|
43
|
+
def _serialize_for_tracing(obj: Any) -> Any: # noqa: PLR0911
|
|
44
|
+
"""Convert objects to JSON-serializable format for tracing."""
|
|
56
45
|
if isinstance(obj, Document):
|
|
57
46
|
return obj.serialize_model()
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
return [doc.serialize_model() for doc in obj]
|
|
61
|
-
# AIMessages
|
|
47
|
+
if isinstance(obj, list) and obj and isinstance(obj[0], Document):
|
|
48
|
+
return [doc.serialize_model() for doc in cast(list[Document], obj)]
|
|
62
49
|
if isinstance(obj, AIMessages):
|
|
63
|
-
result = []
|
|
50
|
+
result: list[Any] = []
|
|
64
51
|
for msg in obj:
|
|
65
52
|
if isinstance(msg, Document):
|
|
66
53
|
result.append(msg.serialize_model())
|
|
67
54
|
else:
|
|
68
55
|
result.append(msg)
|
|
69
56
|
return result
|
|
70
|
-
# ModelResponse (special Pydantic model) - use standard model_dump
|
|
71
57
|
if isinstance(obj, ModelResponse):
|
|
72
58
|
return obj.model_dump()
|
|
73
|
-
# Pydantic models - use custom serializer that respects Document.serialize_model()
|
|
74
59
|
if isinstance(obj, BaseModel):
|
|
75
|
-
|
|
76
|
-
data = {}
|
|
60
|
+
data: dict[str, Any] = {}
|
|
77
61
|
for field_name, field_value in obj.__dict__.items():
|
|
78
62
|
if isinstance(field_value, Document):
|
|
79
|
-
# Use serialize_model for Documents to get base_type
|
|
80
63
|
data[field_name] = field_value.serialize_model()
|
|
81
64
|
elif isinstance(field_value, BaseModel):
|
|
82
|
-
# Recursively handle nested Pydantic models
|
|
83
65
|
data[field_name] = _serialize_for_tracing(field_value)
|
|
84
66
|
else:
|
|
85
|
-
# Let Pydantic handle other fields normally
|
|
86
67
|
data[field_name] = field_value
|
|
87
68
|
return data
|
|
88
|
-
# Fallback to string representation
|
|
89
69
|
try:
|
|
90
|
-
return str(obj)
|
|
70
|
+
return str(obj) # pyright: ignore[reportUnknownArgumentType]
|
|
91
71
|
except Exception:
|
|
92
|
-
return f"<{type(obj).__name__}>"
|
|
72
|
+
return f"<{type(obj).__name__}>" # pyright: ignore[reportUnknownArgumentType]
|
|
93
73
|
|
|
94
74
|
|
|
95
75
|
# ---------------------------------------------------------------------------
|
|
96
76
|
# Document trimming utilities
|
|
97
77
|
# ---------------------------------------------------------------------------
|
|
98
|
-
def
|
|
99
|
-
"""Trim
|
|
78
|
+
def _trim_attachment_list(attachments: list[Any]) -> list[Any]:
|
|
79
|
+
"""Trim attachment content in a serialized attachment list.
|
|
100
80
|
|
|
101
|
-
|
|
102
|
-
-
|
|
103
|
-
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
81
|
+
Always trims regardless of parent document type:
|
|
82
|
+
- Binary (base64): replace content with placeholder
|
|
83
|
+
- Text > 250 chars: keep first 100 + last 100
|
|
84
|
+
"""
|
|
85
|
+
trimmed: list[Any] = []
|
|
86
|
+
for raw_att in attachments:
|
|
87
|
+
if not isinstance(raw_att, dict):
|
|
88
|
+
trimmed.append(raw_att)
|
|
89
|
+
continue
|
|
90
|
+
att: dict[str, Any] = cast(dict[str, Any], raw_att)
|
|
91
|
+
content_encoding: str = att.get("content_encoding", "utf-8")
|
|
92
|
+
if content_encoding == "base64":
|
|
93
|
+
att = att.copy()
|
|
94
|
+
att["content"] = "[binary content removed]"
|
|
95
|
+
elif isinstance(att.get("content"), str) and len(att["content"]) > 250:
|
|
96
|
+
att = att.copy()
|
|
97
|
+
c: str = att["content"]
|
|
98
|
+
trimmed_chars = len(c) - 200
|
|
99
|
+
att["content"] = c[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + c[-100:]
|
|
100
|
+
trimmed.append(att)
|
|
101
|
+
return trimmed
|
|
108
102
|
|
|
109
|
-
Args:
|
|
110
|
-
doc_dict: Document dictionary with base_type, content, and content_encoding
|
|
111
103
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
if not isinstance(doc_dict, dict): # type: ignore[reportUnknownArgumentType]
|
|
117
|
-
return doc_dict
|
|
104
|
+
def _trim_document_content(doc_dict: dict[str, Any]) -> dict[str, Any]:
|
|
105
|
+
"""Trim document content for traces. All documents trimmed equally."""
|
|
106
|
+
if not isinstance(doc_dict, dict): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
107
|
+
return doc_dict # pyright: ignore[reportUnreachable]
|
|
118
108
|
|
|
119
|
-
if "
|
|
109
|
+
if "content" not in doc_dict or "class_name" not in doc_dict:
|
|
120
110
|
return doc_dict
|
|
121
111
|
|
|
122
|
-
|
|
112
|
+
doc_dict = doc_dict.copy()
|
|
123
113
|
content = doc_dict.get("content", "")
|
|
124
114
|
content_encoding = doc_dict.get("content_encoding", "utf-8")
|
|
125
115
|
|
|
126
|
-
#
|
|
116
|
+
# Trim attachments
|
|
117
|
+
if "attachments" in doc_dict and isinstance(doc_dict["attachments"], list):
|
|
118
|
+
doc_dict["attachments"] = _trim_attachment_list(cast(list[Any], doc_dict["attachments"]))
|
|
119
|
+
|
|
120
|
+
# Binary: remove content
|
|
127
121
|
if content_encoding == "base64":
|
|
128
|
-
doc_dict = doc_dict.copy()
|
|
129
122
|
doc_dict["content"] = "[binary content removed]"
|
|
130
123
|
return doc_dict
|
|
131
124
|
|
|
132
|
-
#
|
|
133
|
-
if base_type == "flow":
|
|
134
|
-
return doc_dict
|
|
135
|
-
|
|
136
|
-
# For other documents (task, temporary), trim text content
|
|
125
|
+
# Text: trim if > 250 chars
|
|
137
126
|
if isinstance(content, str) and len(content) > 250:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
trimmed_chars = len(content) - 200 # Number of characters removed
|
|
141
|
-
doc_dict["content"] = (
|
|
142
|
-
content[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + content[-100:]
|
|
143
|
-
)
|
|
127
|
+
trimmed_chars = len(content) - 200
|
|
128
|
+
doc_dict["content"] = content[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + content[-100:]
|
|
144
129
|
|
|
145
130
|
return doc_dict
|
|
146
131
|
|
|
147
132
|
|
|
148
133
|
def _trim_documents_in_data(data: Any) -> Any:
|
|
149
|
-
"""Recursively trim document content in nested data structures.
|
|
150
|
-
|
|
151
|
-
Processes dictionaries, lists, and nested structures to find and trim
|
|
152
|
-
documents based on their type and content.
|
|
153
|
-
|
|
154
|
-
Args:
|
|
155
|
-
data: Input data that may contain documents
|
|
156
|
-
|
|
157
|
-
Returns:
|
|
158
|
-
Data with document content trimmed according to rules
|
|
159
|
-
"""
|
|
134
|
+
"""Recursively trim document content in nested data structures."""
|
|
160
135
|
if isinstance(data, dict):
|
|
161
|
-
|
|
162
|
-
if "
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
return [_trim_documents_in_data(item) for item in data]
|
|
171
|
-
elif isinstance(data, tuple):
|
|
172
|
-
# Process tuples
|
|
173
|
-
return tuple(_trim_documents_in_data(item) for item in data)
|
|
174
|
-
else:
|
|
175
|
-
# Return other types unchanged
|
|
176
|
-
return data
|
|
136
|
+
data_dict = cast(dict[str, Any], data)
|
|
137
|
+
if "class_name" in data_dict and "content" in data_dict:
|
|
138
|
+
return _trim_document_content(data_dict)
|
|
139
|
+
return {k: _trim_documents_in_data(v) for k, v in data_dict.items()}
|
|
140
|
+
if isinstance(data, list):
|
|
141
|
+
return [_trim_documents_in_data(item) for item in cast(list[Any], data)]
|
|
142
|
+
if isinstance(data, tuple):
|
|
143
|
+
return tuple(_trim_documents_in_data(item) for item in cast(tuple[Any, ...], data))
|
|
144
|
+
return data
|
|
177
145
|
|
|
178
146
|
|
|
179
147
|
# ---------------------------------------------------------------------------
|
|
180
|
-
# ``TraceInfo``
|
|
148
|
+
# ``TraceInfo`` - metadata container
|
|
181
149
|
# ---------------------------------------------------------------------------
|
|
182
150
|
class TraceInfo(BaseModel):
|
|
183
151
|
"""Container for propagating trace context through the pipeline.
|
|
@@ -196,30 +164,12 @@ class TraceInfo(BaseModel):
|
|
|
196
164
|
|
|
197
165
|
Environment fallbacks:
|
|
198
166
|
- LMNR_DEBUG: Controls debug-level tracing when set to "true"
|
|
199
|
-
|
|
200
|
-
Note: These variables are read directly by the tracing layer and are
|
|
167
|
+
These variables are read directly by the tracing layer and are
|
|
201
168
|
not part of the Settings configuration.
|
|
202
169
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
... session_id="sess_123",
|
|
207
|
-
... user_id="user_456",
|
|
208
|
-
... metadata={"flow": "document_analysis", "version": "1.2"},
|
|
209
|
-
... tags=["production", "high_priority"]
|
|
210
|
-
... )
|
|
211
|
-
>>>
|
|
212
|
-
>>> # Pass through function calls
|
|
213
|
-
>>> @trace
|
|
214
|
-
>>> async def process(data, trace_info: TraceInfo):
|
|
215
|
-
... # TraceInfo automatically propagates to nested calls
|
|
216
|
-
... result = await analyze(data, trace_info=trace_info)
|
|
217
|
-
... return result
|
|
218
|
-
|
|
219
|
-
Note:
|
|
220
|
-
TraceInfo is typically created at the entry point of a flow
|
|
221
|
-
and passed through all subsequent function calls for
|
|
222
|
-
consistent tracing context.
|
|
170
|
+
TraceInfo is typically created at the entry point of a flow
|
|
171
|
+
and passed through all subsequent function calls for
|
|
172
|
+
consistent tracing context.
|
|
223
173
|
"""
|
|
224
174
|
|
|
225
175
|
session_id: str | None = None
|
|
@@ -243,14 +193,8 @@ class TraceInfo(BaseModel):
|
|
|
243
193
|
|
|
244
194
|
Only non-empty values are included in the output.
|
|
245
195
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
>>> kwargs = trace_info.get_observe_kwargs()
|
|
249
|
-
>>> # Returns: {"session_id": "sess_123", "tags": ["test"]}
|
|
250
|
-
|
|
251
|
-
Note:
|
|
252
|
-
This method is called internally by the trace decorator
|
|
253
|
-
to configure Laminar observation parameters.
|
|
196
|
+
Called internally by the trace decorator to configure Laminar
|
|
197
|
+
observation parameters.
|
|
254
198
|
"""
|
|
255
199
|
kwargs: dict[str, Any] = {}
|
|
256
200
|
|
|
@@ -276,9 +220,6 @@ class TraceInfo(BaseModel):
|
|
|
276
220
|
# ---------------------------------------------------------------------------
|
|
277
221
|
|
|
278
222
|
|
|
279
|
-
_debug_processor_initialized = False
|
|
280
|
-
|
|
281
|
-
|
|
282
223
|
def _initialise_laminar() -> None:
|
|
283
224
|
"""Initialize Laminar SDK with project configuration.
|
|
284
225
|
|
|
@@ -286,70 +227,13 @@ def _initialise_laminar() -> None:
|
|
|
286
227
|
from settings. Disables automatic OpenAI instrumentation to avoid
|
|
287
228
|
conflicts with our custom tracing.
|
|
288
229
|
|
|
289
|
-
|
|
290
|
-
- Uses settings.lmnr_project_api_key for authentication
|
|
291
|
-
- Disables OPENAI instrument to prevent double-tracing
|
|
292
|
-
- Called automatically by trace decorator on first use
|
|
293
|
-
- Optionally adds local debug processor if TRACE_DEBUG_PATH is set
|
|
294
|
-
|
|
295
|
-
Note:
|
|
296
|
-
This is an internal function called once per process.
|
|
297
|
-
Multiple calls are safe (Laminar handles idempotency).
|
|
230
|
+
Called once per process. Multiple calls are safe (Laminar handles idempotency).
|
|
298
231
|
"""
|
|
299
|
-
global _debug_processor_initialized
|
|
300
|
-
|
|
301
232
|
if settings.lmnr_project_api_key:
|
|
302
233
|
Laminar.initialize(
|
|
303
|
-
project_api_key=settings.lmnr_project_api_key,
|
|
304
|
-
disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [],
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
# Add local debug processor if configured (only once)
|
|
308
|
-
if not _debug_processor_initialized:
|
|
309
|
-
_debug_processor_initialized = True
|
|
310
|
-
debug_path = os.environ.get("TRACE_DEBUG_PATH")
|
|
311
|
-
if debug_path:
|
|
312
|
-
_setup_debug_processor(debug_path)
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
def _setup_debug_processor(debug_path: str) -> None:
|
|
316
|
-
"""Set up local debug trace processor."""
|
|
317
|
-
try:
|
|
318
|
-
from pathlib import Path # noqa: PLC0415
|
|
319
|
-
|
|
320
|
-
from opentelemetry import trace # noqa: PLC0415
|
|
321
|
-
|
|
322
|
-
from ai_pipeline_core.debug import ( # noqa: PLC0415
|
|
323
|
-
LocalDebugSpanProcessor,
|
|
324
|
-
LocalTraceWriter,
|
|
325
|
-
TraceDebugConfig,
|
|
326
|
-
)
|
|
327
|
-
|
|
328
|
-
config = TraceDebugConfig(
|
|
329
|
-
path=Path(debug_path),
|
|
330
|
-
max_element_bytes=int(os.environ.get("TRACE_DEBUG_MAX_INLINE", 10000)),
|
|
331
|
-
max_traces=int(os.environ.get("TRACE_DEBUG_MAX_TRACES", 20)) or None,
|
|
234
|
+
project_api_key=settings.lmnr_project_api_key, disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [], export_timeout_seconds=15
|
|
332
235
|
)
|
|
333
236
|
|
|
334
|
-
writer = LocalTraceWriter(config)
|
|
335
|
-
processor = LocalDebugSpanProcessor(writer)
|
|
336
|
-
|
|
337
|
-
# Add to tracer provider
|
|
338
|
-
provider = trace.get_tracer_provider()
|
|
339
|
-
add_processor = getattr(provider, "add_span_processor", None)
|
|
340
|
-
if add_processor is not None:
|
|
341
|
-
add_processor(processor)
|
|
342
|
-
|
|
343
|
-
# Register shutdown
|
|
344
|
-
import atexit # noqa: PLC0415
|
|
345
|
-
|
|
346
|
-
atexit.register(processor.shutdown)
|
|
347
|
-
|
|
348
|
-
except Exception as e:
|
|
349
|
-
import logging # noqa: PLC0415
|
|
350
|
-
|
|
351
|
-
logging.getLogger(__name__).warning(f"Failed to setup debug trace processor: {e}")
|
|
352
|
-
|
|
353
237
|
|
|
354
238
|
# Overload for calls like @trace(name="...", level="debug")
|
|
355
239
|
@overload
|
|
@@ -375,11 +259,11 @@ def trace(
|
|
|
375
259
|
|
|
376
260
|
# Overload for the bare @trace call
|
|
377
261
|
@overload
|
|
378
|
-
def trace(func: Callable[P, R]) -> Callable[P, R]: ...
|
|
262
|
+
def trace(func: Callable[P, R]) -> Callable[P, R]: ... # noqa: UP047
|
|
379
263
|
|
|
380
264
|
|
|
381
265
|
# Actual implementation
|
|
382
|
-
def trace(
|
|
266
|
+
def trace( # noqa: UP047
|
|
383
267
|
func: Callable[P, R] | None = None,
|
|
384
268
|
*,
|
|
385
269
|
level: TraceLevel = "always",
|
|
@@ -454,9 +338,10 @@ def trace(
|
|
|
454
338
|
calls (default True). Set False for isolated traces.
|
|
455
339
|
|
|
456
340
|
trim_documents: Automatically trim document content in traces (default True).
|
|
457
|
-
When enabled,
|
|
341
|
+
When enabled, text content is trimmed to
|
|
458
342
|
first/last 100 chars, and all binary content is removed.
|
|
459
|
-
|
|
343
|
+
Binary content is removed, text content is trimmed.
|
|
344
|
+
Attachment content follows the same trimming rules.
|
|
460
345
|
Helps reduce trace size for large documents.
|
|
461
346
|
|
|
462
347
|
Returns:
|
|
@@ -467,35 +352,6 @@ def trace(
|
|
|
467
352
|
automatically creates or propagates a TraceInfo instance, ensuring
|
|
468
353
|
consistent session/user tracking across the call chain.
|
|
469
354
|
|
|
470
|
-
Example:
|
|
471
|
-
>>> # RECOMMENDED - No parameters needed for most cases!
|
|
472
|
-
>>> @trace
|
|
473
|
-
>>> async def process_document(doc):
|
|
474
|
-
... return await analyze(doc)
|
|
475
|
-
>>>
|
|
476
|
-
>>> # With parameters (RARE - only when specifically needed):
|
|
477
|
-
>>> @trace(level="debug") # Only for debug-specific tracing
|
|
478
|
-
>>> async def debug_operation():
|
|
479
|
-
... pass
|
|
480
|
-
|
|
481
|
-
>>> @trace(ignore_inputs=["api_key"]) # Only for sensitive data
|
|
482
|
-
>>> async def api_call(data, api_key):
|
|
483
|
-
... return await external_api(data, api_key)
|
|
484
|
-
>>>
|
|
485
|
-
>>> # AVOID unnecessary configuration - defaults handle:
|
|
486
|
-
>>> # - Automatic naming from function name
|
|
487
|
-
>>> # - Standard trace level ("always")
|
|
488
|
-
>>> # - Full input/output capture
|
|
489
|
-
>>> # - Proper span type inference
|
|
490
|
-
>>>
|
|
491
|
-
>>> # Custom formatting
|
|
492
|
-
>>> @trace(
|
|
493
|
-
... input_formatter=lambda doc: f"Document: {doc.id}",
|
|
494
|
-
... output_formatter=lambda res: f"Results: {len(res)} items"
|
|
495
|
-
>>> )
|
|
496
|
-
>>> def analyze(doc):
|
|
497
|
-
... return results
|
|
498
|
-
|
|
499
355
|
Environment variables:
|
|
500
356
|
- LMNR_DEBUG: Set to "true" to enable debug-level traces
|
|
501
357
|
- LMNR_PROJECT_API_KEY: Required for trace submission
|
|
@@ -505,11 +361,9 @@ def trace(
|
|
|
505
361
|
- When level="off", decorator returns original function unchanged
|
|
506
362
|
- Large inputs/outputs can be excluded with ignore_* parameters
|
|
507
363
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
- Preserves function signature and metadata
|
|
512
|
-
- Thread-safe and async-safe
|
|
364
|
+
Automatically initializes Laminar on first use. Works with both sync and
|
|
365
|
+
async functions. Preserves function signature and metadata. Thread-safe
|
|
366
|
+
and async-safe.
|
|
513
367
|
"""
|
|
514
368
|
if level == "off":
|
|
515
369
|
if func:
|
|
@@ -550,27 +404,27 @@ def trace(
|
|
|
550
404
|
sig = inspect.signature(f)
|
|
551
405
|
is_coroutine = inspect.iscoroutinefunction(f)
|
|
552
406
|
observe_name = name or f.__name__
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
407
|
+
bound_observe = observe
|
|
408
|
+
|
|
409
|
+
bound_session_id = session_id
|
|
410
|
+
bound_user_id = user_id
|
|
411
|
+
bound_metadata = metadata if metadata is not None else {}
|
|
412
|
+
bound_tags = tags if tags is not None else []
|
|
413
|
+
bound_span_type = span_type
|
|
414
|
+
bound_ignore_input = ignore_input
|
|
415
|
+
bound_ignore_output = ignore_output
|
|
416
|
+
bound_ignore_inputs = ignore_inputs
|
|
417
|
+
bound_input_formatter = input_formatter
|
|
418
|
+
bound_output_formatter = output_formatter
|
|
419
|
+
bound_ignore_exceptions = ignore_exceptions
|
|
420
|
+
bound_preserve_global_context = preserve_global_context
|
|
421
|
+
bound_trim_documents = trim_documents
|
|
568
422
|
|
|
569
423
|
# Create document trimming formatters if needed
|
|
570
|
-
def _create_trimming_input_formatter(*args, **kwargs) -> str:
|
|
424
|
+
def _create_trimming_input_formatter(*args: Any, **kwargs: Any) -> str:
|
|
571
425
|
# First, let any custom formatter process the data
|
|
572
|
-
if
|
|
573
|
-
result =
|
|
426
|
+
if bound_input_formatter:
|
|
427
|
+
result = bound_input_formatter(*args, **kwargs)
|
|
574
428
|
# If formatter returns string, try to parse and trim
|
|
575
429
|
if isinstance(result, str): # type: ignore[reportUnknownArgumentType]
|
|
576
430
|
try:
|
|
@@ -587,7 +441,7 @@ def trace(
|
|
|
587
441
|
# No custom formatter - mimic Laminar's get_input_from_func_args
|
|
588
442
|
# Build a dict with parameter names as keys (like Laminar does)
|
|
589
443
|
params = list(sig.parameters.keys())
|
|
590
|
-
data = {}
|
|
444
|
+
data: dict[str, Any] = {}
|
|
591
445
|
|
|
592
446
|
# Map args to parameter names
|
|
593
447
|
for i, arg in enumerate(args):
|
|
@@ -607,8 +461,8 @@ def trace(
|
|
|
607
461
|
|
|
608
462
|
def _create_trimming_output_formatter(result: Any) -> str:
|
|
609
463
|
# First, let any custom formatter process the data
|
|
610
|
-
if
|
|
611
|
-
formatted =
|
|
464
|
+
if bound_output_formatter:
|
|
465
|
+
formatted = bound_output_formatter(result)
|
|
612
466
|
# If formatter returns string, try to parse and trim
|
|
613
467
|
if isinstance(formatted, str): # type: ignore[reportUnknownArgumentType]
|
|
614
468
|
try:
|
|
@@ -650,41 +504,41 @@ def trace(
|
|
|
650
504
|
observe_params["name"] = observe_name
|
|
651
505
|
|
|
652
506
|
# Override with decorator-level session_id and user_id if provided
|
|
653
|
-
if
|
|
654
|
-
observe_params["session_id"] =
|
|
655
|
-
if
|
|
656
|
-
observe_params["user_id"] =
|
|
657
|
-
if
|
|
658
|
-
observe_params["metadata"] =
|
|
659
|
-
if
|
|
660
|
-
observe_params["tags"] = observe_params.get("tags", []) +
|
|
661
|
-
if
|
|
662
|
-
observe_params["span_type"] =
|
|
507
|
+
if bound_session_id:
|
|
508
|
+
observe_params["session_id"] = bound_session_id
|
|
509
|
+
if bound_user_id:
|
|
510
|
+
observe_params["user_id"] = bound_user_id
|
|
511
|
+
if bound_metadata:
|
|
512
|
+
observe_params["metadata"] = bound_metadata
|
|
513
|
+
if bound_tags:
|
|
514
|
+
observe_params["tags"] = observe_params.get("tags", []) + bound_tags
|
|
515
|
+
if bound_span_type:
|
|
516
|
+
observe_params["span_type"] = bound_span_type
|
|
663
517
|
|
|
664
518
|
# Add the new Laminar parameters
|
|
665
|
-
if
|
|
666
|
-
observe_params["ignore_input"] =
|
|
667
|
-
if
|
|
668
|
-
observe_params["ignore_output"] =
|
|
669
|
-
if
|
|
670
|
-
observe_params["ignore_inputs"] =
|
|
519
|
+
if bound_ignore_input:
|
|
520
|
+
observe_params["ignore_input"] = bound_ignore_input
|
|
521
|
+
if bound_ignore_output:
|
|
522
|
+
observe_params["ignore_output"] = bound_ignore_output
|
|
523
|
+
if bound_ignore_inputs is not None:
|
|
524
|
+
observe_params["ignore_inputs"] = bound_ignore_inputs
|
|
671
525
|
|
|
672
526
|
# Use trimming formatters if trim_documents is enabled
|
|
673
|
-
if
|
|
527
|
+
if bound_trim_documents:
|
|
674
528
|
# Use the trimming formatters (which may wrap custom formatters)
|
|
675
529
|
observe_params["input_formatter"] = _create_trimming_input_formatter
|
|
676
530
|
observe_params["output_formatter"] = _create_trimming_output_formatter
|
|
677
531
|
else:
|
|
678
532
|
# Use custom formatters directly if provided
|
|
679
|
-
if
|
|
680
|
-
observe_params["input_formatter"] =
|
|
681
|
-
if
|
|
682
|
-
observe_params["output_formatter"] =
|
|
533
|
+
if bound_input_formatter is not None:
|
|
534
|
+
observe_params["input_formatter"] = bound_input_formatter
|
|
535
|
+
if bound_output_formatter is not None:
|
|
536
|
+
observe_params["output_formatter"] = bound_output_formatter
|
|
683
537
|
|
|
684
|
-
if
|
|
685
|
-
observe_params["ignore_exceptions"] =
|
|
686
|
-
if
|
|
687
|
-
observe_params["preserve_global_context"] =
|
|
538
|
+
if bound_ignore_exceptions:
|
|
539
|
+
observe_params["ignore_exceptions"] = bound_ignore_exceptions
|
|
540
|
+
if bound_preserve_global_context:
|
|
541
|
+
observe_params["preserve_global_context"] = bound_preserve_global_context
|
|
688
542
|
|
|
689
543
|
return observe_params
|
|
690
544
|
|
|
@@ -697,7 +551,7 @@ def trace(
|
|
|
697
551
|
The result of the wrapped function.
|
|
698
552
|
"""
|
|
699
553
|
observe_params = _prepare_and_get_observe_params(kwargs)
|
|
700
|
-
observed_func =
|
|
554
|
+
observed_func = bound_observe(**observe_params)(f)
|
|
701
555
|
return observed_func(*args, **kwargs)
|
|
702
556
|
|
|
703
557
|
@wraps(f)
|
|
@@ -708,8 +562,8 @@ def trace(
|
|
|
708
562
|
The result of the wrapped function.
|
|
709
563
|
"""
|
|
710
564
|
observe_params = _prepare_and_get_observe_params(kwargs)
|
|
711
|
-
observed_func =
|
|
712
|
-
return await observed_func(*args, **kwargs) # pyright: ignore[reportGeneralTypeIssues]
|
|
565
|
+
observed_func = bound_observe(**observe_params)(f)
|
|
566
|
+
return await observed_func(*args, **kwargs) # pyright: ignore[reportGeneralTypeIssues, reportUnknownVariableType]
|
|
713
567
|
|
|
714
568
|
wrapper = async_wrapper if is_coroutine else sync_wrapper
|
|
715
569
|
|
|
@@ -717,17 +571,14 @@ def trace(
|
|
|
717
571
|
wrapper.__is_traced__ = True # type: ignore[attr-defined]
|
|
718
572
|
|
|
719
573
|
# Preserve the original function signature
|
|
720
|
-
|
|
574
|
+
with contextlib.suppress(AttributeError, ValueError):
|
|
721
575
|
wrapper.__signature__ = sig # type: ignore[attr-defined]
|
|
722
|
-
except (AttributeError, ValueError):
|
|
723
|
-
pass
|
|
724
576
|
|
|
725
577
|
return cast(Callable[P, R], wrapper)
|
|
726
578
|
|
|
727
579
|
if func:
|
|
728
580
|
return decorator(func) # Called as @trace
|
|
729
|
-
|
|
730
|
-
return decorator # Called as @trace(...)
|
|
581
|
+
return decorator # Called as @trace(...)
|
|
731
582
|
|
|
732
583
|
|
|
733
584
|
def set_trace_cost(cost: float | str) -> None:
|
|
@@ -739,10 +590,10 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
739
590
|
current operation. Particularly useful for tracking costs of external
|
|
740
591
|
API calls, compute resources, or custom billing scenarios.
|
|
741
592
|
|
|
742
|
-
The cost is stored in three metadata fields for
|
|
743
|
-
- gen_ai.usage.output_cost:
|
|
744
|
-
- gen_ai.usage.cost:
|
|
745
|
-
- cost:
|
|
593
|
+
The cost is stored in three metadata fields for observability tool consumption:
|
|
594
|
+
- gen_ai.usage.output_cost: OpenTelemetry GenAI semantic convention
|
|
595
|
+
- gen_ai.usage.cost: Aggregated cost field
|
|
596
|
+
- cost: Short-form cost field
|
|
746
597
|
|
|
747
598
|
Args:
|
|
748
599
|
cost: The cost value to set. Can be:
|
|
@@ -750,56 +601,14 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
750
601
|
- str: USD format with dollar sign (e.g., "$0.05" or "$1.25")
|
|
751
602
|
Only positive values will be set; zero or negative values are ignored.
|
|
752
603
|
|
|
753
|
-
Example:
|
|
754
|
-
>>> # Track cost of external API call
|
|
755
|
-
>>> @trace
|
|
756
|
-
>>> async def call_translation_api(text: str) -> str:
|
|
757
|
-
... # External API charges per character
|
|
758
|
-
... char_count = len(text)
|
|
759
|
-
... cost_per_char = 0.00001 # $0.00001 per character
|
|
760
|
-
...
|
|
761
|
-
... result = await external_api.translate(text)
|
|
762
|
-
...
|
|
763
|
-
... # Set the cost for this operation
|
|
764
|
-
... set_trace_cost(char_count * cost_per_char)
|
|
765
|
-
... return result
|
|
766
|
-
>>>
|
|
767
|
-
>>> # Track compute resource costs
|
|
768
|
-
>>> @trace
|
|
769
|
-
>>> def process_video(video_path: str) -> dict:
|
|
770
|
-
... duration = get_video_duration(video_path)
|
|
771
|
-
... cost_per_minute = 0.10 # $0.10 per minute
|
|
772
|
-
...
|
|
773
|
-
... result = process_video_content(video_path)
|
|
774
|
-
...
|
|
775
|
-
... # Set cost using string format
|
|
776
|
-
... set_trace_cost(f"${duration * cost_per_minute:.2f}")
|
|
777
|
-
... return result
|
|
778
|
-
>>>
|
|
779
|
-
>>> # Combine with LLM costs in pipeline
|
|
780
|
-
>>> @pipeline_task
|
|
781
|
-
>>> async def enriched_generation(prompt: str) -> str:
|
|
782
|
-
... # LLM cost tracked automatically via ModelResponse
|
|
783
|
-
... response = await llm.generate("gpt-5.1", messages=prompt)
|
|
784
|
-
...
|
|
785
|
-
... # Add cost for post-processing
|
|
786
|
-
... processing_cost = 0.02 # Fixed cost for enrichment
|
|
787
|
-
... set_trace_cost(processing_cost)
|
|
788
|
-
...
|
|
789
|
-
... return enrich_response(response.content)
|
|
790
|
-
|
|
791
604
|
Raises:
|
|
792
605
|
ValueError: If string format is invalid (not a valid USD amount).
|
|
793
606
|
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
- The cost is added to the current span's attributes/metadata
|
|
800
|
-
- Multiple calls overwrite the previous cost (not cumulative)
|
|
801
|
-
- If called outside a traced context (no active span), it has no effect
|
|
802
|
-
and does not raise an error
|
|
607
|
+
Only works within a traced context (function decorated with @trace,
|
|
608
|
+
@pipeline_task, or @pipeline_flow). LLM costs are tracked automatically via
|
|
609
|
+
ModelResponse; use this for non-LLM costs. Multiple calls overwrite the
|
|
610
|
+
previous cost (not cumulative). If called outside a traced context, it has
|
|
611
|
+
no effect and does not raise an error.
|
|
803
612
|
"""
|
|
804
613
|
# Parse string format if provided
|
|
805
614
|
if isinstance(cost, str):
|
|
@@ -812,9 +621,7 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
812
621
|
# Remove $ and convert to float
|
|
813
622
|
cost_value = float(cost_str[1:])
|
|
814
623
|
except ValueError as e:
|
|
815
|
-
raise ValueError(
|
|
816
|
-
f"Invalid USD format: {cost!r}. Must be a valid number after '$'"
|
|
817
|
-
) from e
|
|
624
|
+
raise ValueError(f"Invalid USD format: {cost!r}. Must be a valid number after '$'") from e
|
|
818
625
|
else:
|
|
819
626
|
cost_value = cost
|
|
820
627
|
|
|
@@ -826,11 +633,8 @@ def set_trace_cost(cost: float | str) -> None:
|
|
|
826
633
|
"cost": cost_value,
|
|
827
634
|
}
|
|
828
635
|
|
|
829
|
-
|
|
636
|
+
with contextlib.suppress(Exception):
|
|
830
637
|
Laminar.set_span_attributes(attributes)
|
|
831
|
-
except Exception:
|
|
832
|
-
# Silently ignore if not in a traced context
|
|
833
|
-
pass
|
|
834
638
|
|
|
835
639
|
|
|
836
|
-
__all__ = ["
|
|
640
|
+
__all__ = ["TraceInfo", "TraceLevel", "set_trace_cost", "trace"]
|