ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,640 @@
1
+ """Tracing utilities that integrate Laminar (``lmnr``) with our code-base.
2
+
3
+ This module centralizes:
4
+ - ``TraceInfo`` - a small helper object for propagating contextual metadata.
5
+ - ``trace`` decorator - augments a callable with Laminar tracing, automatic
6
+ ``observe`` instrumentation, and optional support for test runs.
7
+ """
8
+
9
+ import contextlib
10
+ import inspect
11
+ import json
12
+ import os
13
+ from collections.abc import Callable
14
+ from functools import wraps
15
+ from typing import Any, Literal, ParamSpec, TypeVar, cast, overload
16
+
17
+ from lmnr import Attributes, Instruments, Laminar, observe
18
+ from pydantic import BaseModel, Field
19
+
20
+ from ai_pipeline_core.documents import Document
21
+ from ai_pipeline_core.llm import AIMessages, ModelResponse
22
+ from ai_pipeline_core.settings import settings
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Typing helpers
26
+ # ---------------------------------------------------------------------------
27
+ P = ParamSpec("P")
28
+ R = TypeVar("R")
29
+
30
+ TraceLevel = Literal["always", "debug", "off"]
31
+ """Control level for tracing activation.
32
+
33
+ Values:
34
+ - "always": Always trace (default, production mode)
35
+ - "debug": Only trace when LMNR_DEBUG == "true"
36
+ - "off": Disable tracing completely
37
+ """
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Serialization helpers
42
+ # ---------------------------------------------------------------------------
43
+ def _serialize_for_tracing(obj: Any) -> Any: # noqa: PLR0911
44
+ """Convert objects to JSON-serializable format for tracing."""
45
+ if isinstance(obj, Document):
46
+ return obj.serialize_model()
47
+ if isinstance(obj, list) and obj and isinstance(obj[0], Document):
48
+ return [doc.serialize_model() for doc in cast(list[Document], obj)]
49
+ if isinstance(obj, AIMessages):
50
+ result: list[Any] = []
51
+ for msg in obj:
52
+ if isinstance(msg, Document):
53
+ result.append(msg.serialize_model())
54
+ else:
55
+ result.append(msg)
56
+ return result
57
+ if isinstance(obj, ModelResponse):
58
+ return obj.model_dump()
59
+ if isinstance(obj, BaseModel):
60
+ data: dict[str, Any] = {}
61
+ for field_name, field_value in obj.__dict__.items():
62
+ if isinstance(field_value, Document):
63
+ data[field_name] = field_value.serialize_model()
64
+ elif isinstance(field_value, BaseModel):
65
+ data[field_name] = _serialize_for_tracing(field_value)
66
+ else:
67
+ data[field_name] = field_value
68
+ return data
69
+ try:
70
+ return str(obj) # pyright: ignore[reportUnknownArgumentType]
71
+ except Exception:
72
+ return f"<{type(obj).__name__}>" # pyright: ignore[reportUnknownArgumentType]
73
+
74
+
75
+ # ---------------------------------------------------------------------------
76
+ # Document trimming utilities
77
+ # ---------------------------------------------------------------------------
78
+ def _trim_attachment_list(attachments: list[Any]) -> list[Any]:
79
+ """Trim attachment content in a serialized attachment list.
80
+
81
+ Always trims regardless of parent document type:
82
+ - Binary (base64): replace content with placeholder
83
+ - Text > 250 chars: keep first 100 + last 100
84
+ """
85
+ trimmed: list[Any] = []
86
+ for raw_att in attachments:
87
+ if not isinstance(raw_att, dict):
88
+ trimmed.append(raw_att)
89
+ continue
90
+ att: dict[str, Any] = cast(dict[str, Any], raw_att)
91
+ content_encoding: str = att.get("content_encoding", "utf-8")
92
+ if content_encoding == "base64":
93
+ att = att.copy()
94
+ att["content"] = "[binary content removed]"
95
+ elif isinstance(att.get("content"), str) and len(att["content"]) > 250:
96
+ att = att.copy()
97
+ c: str = att["content"]
98
+ trimmed_chars = len(c) - 200
99
+ att["content"] = c[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + c[-100:]
100
+ trimmed.append(att)
101
+ return trimmed
102
+
103
+
104
+ def _trim_document_content(doc_dict: dict[str, Any]) -> dict[str, Any]:
105
+ """Trim document content for traces. All documents trimmed equally."""
106
+ if not isinstance(doc_dict, dict): # pyright: ignore[reportUnnecessaryIsInstance]
107
+ return doc_dict # pyright: ignore[reportUnreachable]
108
+
109
+ if "content" not in doc_dict or "class_name" not in doc_dict:
110
+ return doc_dict
111
+
112
+ doc_dict = doc_dict.copy()
113
+ content = doc_dict.get("content", "")
114
+ content_encoding = doc_dict.get("content_encoding", "utf-8")
115
+
116
+ # Trim attachments
117
+ if "attachments" in doc_dict and isinstance(doc_dict["attachments"], list):
118
+ doc_dict["attachments"] = _trim_attachment_list(cast(list[Any], doc_dict["attachments"]))
119
+
120
+ # Binary: remove content
121
+ if content_encoding == "base64":
122
+ doc_dict["content"] = "[binary content removed]"
123
+ return doc_dict
124
+
125
+ # Text: trim if > 250 chars
126
+ if isinstance(content, str) and len(content) > 250:
127
+ trimmed_chars = len(content) - 200
128
+ doc_dict["content"] = content[:100] + f" ... [trimmed {trimmed_chars} chars] ... " + content[-100:]
129
+
130
+ return doc_dict
131
+
132
+
133
+ def _trim_documents_in_data(data: Any) -> Any:
134
+ """Recursively trim document content in nested data structures."""
135
+ if isinstance(data, dict):
136
+ data_dict = cast(dict[str, Any], data)
137
+ if "class_name" in data_dict and "content" in data_dict:
138
+ return _trim_document_content(data_dict)
139
+ return {k: _trim_documents_in_data(v) for k, v in data_dict.items()}
140
+ if isinstance(data, list):
141
+ return [_trim_documents_in_data(item) for item in cast(list[Any], data)]
142
+ if isinstance(data, tuple):
143
+ return tuple(_trim_documents_in_data(item) for item in cast(tuple[Any, ...], data))
144
+ return data
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # ``TraceInfo`` - metadata container
149
+ # ---------------------------------------------------------------------------
150
+ class TraceInfo(BaseModel):
151
+ """Container for propagating trace context through the pipeline.
152
+
153
+ TraceInfo provides a structured way to pass tracing metadata through
154
+ function calls, ensuring consistent observability across the entire
155
+ execution flow. It integrates with Laminar (LMNR) for distributed
156
+ tracing and debugging.
157
+
158
+ Attributes:
159
+ session_id: Unique identifier for the current session/conversation.
160
+ user_id: Identifier for the user triggering the operation.
161
+ metadata: Key-value pairs for additional trace context.
162
+ Useful for filtering and searching in LMNR dashboard.
163
+ tags: List of tags for categorizing traces (e.g., ["production", "v2"]).
164
+
165
+ Environment fallbacks:
166
+ - LMNR_DEBUG: Controls debug-level tracing when set to "true"
167
+ These variables are read directly by the tracing layer and are
168
+ not part of the Settings configuration.
169
+
170
+ TraceInfo is typically created at the entry point of a flow
171
+ and passed through all subsequent function calls for
172
+ consistent tracing context.
173
+ """
174
+
175
+ session_id: str | None = None
176
+ user_id: str | None = None
177
+ metadata: dict[str, str] = Field(default_factory=dict)
178
+ tags: list[str] = Field(default_factory=list)
179
+
180
+ def get_observe_kwargs(self) -> dict[str, Any]:
181
+ """Convert TraceInfo to kwargs for Laminar's observe decorator.
182
+
183
+ Transforms the TraceInfo fields into the format expected by
184
+ the lmnr.observe() decorator, applying environment variable
185
+ fallbacks for session_id and user_id.
186
+
187
+ Returns:
188
+ Dictionary with keys:
189
+ - session_id: From field or environment variable fallback
190
+ - user_id: From field or environment variable fallback
191
+ - metadata: Dictionary of custom metadata (if set)
192
+ - tags: List of tags (if set)
193
+
194
+ Only non-empty values are included in the output.
195
+
196
+ Called internally by the trace decorator to configure Laminar
197
+ observation parameters.
198
+ """
199
+ kwargs: dict[str, Any] = {}
200
+
201
+ # Use environment variable fallback for session_id
202
+ session_id = self.session_id or os.getenv("LMNR_SESSION_ID")
203
+ if session_id:
204
+ kwargs["session_id"] = session_id
205
+
206
+ # Use environment variable fallback for user_id
207
+ user_id = self.user_id or os.getenv("LMNR_USER_ID")
208
+ if user_id:
209
+ kwargs["user_id"] = user_id
210
+
211
+ if self.metadata:
212
+ kwargs["metadata"] = self.metadata
213
+ if self.tags:
214
+ kwargs["tags"] = self.tags
215
+ return kwargs
216
+
217
+
218
+ # ---------------------------------------------------------------------------
219
+ # ``trace`` decorator
220
+ # ---------------------------------------------------------------------------
221
+
222
+
223
+ def _initialise_laminar() -> None:
224
+ """Initialize Laminar SDK with project configuration.
225
+
226
+ Sets up the Laminar observability client with the project API key
227
+ from settings. Disables automatic OpenAI instrumentation to avoid
228
+ conflicts with our custom tracing.
229
+
230
+ Called once per process. Multiple calls are safe (Laminar handles idempotency).
231
+ """
232
+ if settings.lmnr_project_api_key:
233
+ Laminar.initialize(
234
+ project_api_key=settings.lmnr_project_api_key, disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [], export_timeout_seconds=15
235
+ )
236
+
237
+
238
+ # Overload for calls like @trace(name="...", level="debug")
239
+ @overload
240
+ def trace(
241
+ *,
242
+ level: TraceLevel = "always",
243
+ name: str | None = None,
244
+ session_id: str | None = None,
245
+ user_id: str | None = None,
246
+ metadata: dict[str, Any] | None = None,
247
+ tags: list[str] | None = None,
248
+ span_type: str | None = None,
249
+ ignore_input: bool = False,
250
+ ignore_output: bool = False,
251
+ ignore_inputs: list[str] | None = None,
252
+ input_formatter: Callable[..., str] | None = None,
253
+ output_formatter: Callable[..., str] | None = None,
254
+ ignore_exceptions: bool = False,
255
+ preserve_global_context: bool = True,
256
+ trim_documents: bool = True,
257
+ ) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
258
+
259
+
260
+ # Overload for the bare @trace call
261
+ @overload
262
+ def trace(func: Callable[P, R]) -> Callable[P, R]: ... # noqa: UP047
263
+
264
+
265
+ # Actual implementation
266
+ def trace( # noqa: UP047
267
+ func: Callable[P, R] | None = None,
268
+ *,
269
+ level: TraceLevel = "always",
270
+ name: str | None = None,
271
+ session_id: str | None = None,
272
+ user_id: str | None = None,
273
+ metadata: dict[str, Any] | None = None,
274
+ tags: list[str] | None = None,
275
+ span_type: str | None = None,
276
+ ignore_input: bool = False,
277
+ ignore_output: bool = False,
278
+ ignore_inputs: list[str] | None = None,
279
+ input_formatter: Callable[..., str] | None = None,
280
+ output_formatter: Callable[..., str] | None = None,
281
+ ignore_exceptions: bool = False,
282
+ preserve_global_context: bool = True,
283
+ trim_documents: bool = True,
284
+ ) -> Callable[[Callable[P, R]], Callable[P, R]] | Callable[P, R]:
285
+ """Add Laminar observability tracing to any function.
286
+
287
+ The trace decorator integrates functions with Laminar (LMNR) for
288
+ distributed tracing, performance monitoring, and debugging. It
289
+ automatically handles both sync and async functions, propagates
290
+ trace context, and provides fine-grained control over what gets traced.
291
+
292
+ USAGE GUIDELINE - Defaults First:
293
+ By default, use WITHOUT any parameters unless instructed otherwise.
294
+ The defaults are optimized for most use cases.
295
+
296
+ Args:
297
+ func: Function to trace (when used without parentheses: @trace).
298
+
299
+ level: Controls when tracing is active:
300
+ - "always": Always trace (default, production mode)
301
+ - "debug": Only trace when LMNR_DEBUG == "true"
302
+ - "off": Disable tracing completely
303
+
304
+ name: Custom span name in traces (defaults to function.__name__).
305
+ Use descriptive names for better trace readability.
306
+
307
+ session_id: Override session ID for this function's traces.
308
+ Typically propagated via TraceInfo instead.
309
+
310
+ user_id: Override user ID for this function's traces.
311
+ Typically propagated via TraceInfo instead.
312
+
313
+ metadata: Additional key-value metadata attached to spans.
314
+ Searchable in LMNR dashboard. Merged with TraceInfo metadata.
315
+
316
+ tags: List of tags for categorizing spans (e.g., ["api", "critical"]).
317
+ Merged with TraceInfo tags.
318
+
319
+ span_type: Semantic type of the span (e.g., "LLM", "CHAIN", "TOOL").
320
+ Affects visualization in LMNR dashboard.
321
+
322
+ ignore_input: Don't record function inputs in trace (privacy/size).
323
+
324
+ ignore_output: Don't record function output in trace (privacy/size).
325
+
326
+ ignore_inputs: List of parameter names to exclude from trace.
327
+ Useful for sensitive data like API keys.
328
+
329
+ input_formatter: Custom function to format inputs for tracing.
330
+ Receives all function args, returns display string.
331
+
332
+ output_formatter: Custom function to format output for tracing.
333
+ Receives function result, returns display string.
334
+
335
+ ignore_exceptions: Don't record exceptions in traces (default False).
336
+
337
+ preserve_global_context: Maintain Laminar's global context across
338
+ calls (default True). Set False for isolated traces.
339
+
340
+ trim_documents: Automatically trim document content in traces (default True).
341
+ When enabled, text content is trimmed to
342
+ first/last 100 chars, and all binary content is removed.
343
+ Binary content is removed, text content is trimmed.
344
+ Attachment content follows the same trimming rules.
345
+ Helps reduce trace size for large documents.
346
+
347
+ Returns:
348
+ Decorated function with same signature but added tracing.
349
+
350
+ TraceInfo propagation:
351
+ If the decorated function has a 'trace_info' parameter, the decorator
352
+ automatically creates or propagates a TraceInfo instance, ensuring
353
+ consistent session/user tracking across the call chain.
354
+
355
+ Environment variables:
356
+ - LMNR_DEBUG: Set to "true" to enable debug-level traces
357
+ - LMNR_PROJECT_API_KEY: Required for trace submission
358
+
359
+ Performance:
360
+ - Tracing overhead is minimal (~1-2ms per call)
361
+ - When level="off", decorator returns original function unchanged
362
+ - Large inputs/outputs can be excluded with ignore_* parameters
363
+
364
+ Automatically initializes Laminar on first use. Works with both sync and
365
+ async functions. Preserves function signature and metadata. Thread-safe
366
+ and async-safe.
367
+ """
368
+ if level == "off":
369
+ if func:
370
+ return func
371
+ return lambda f: f
372
+
373
+ def decorator(f: Callable[P, R]) -> Callable[P, R]:
374
+ """Apply tracing to the target function.
375
+
376
+ Returns:
377
+ Wrapped function with LMNR observability.
378
+
379
+ Raises:
380
+ TypeError: If function is already decorated with @pipeline_task or @pipeline_flow.
381
+ """
382
+ # Check if this is already a traced pipeline_task or pipeline_flow
383
+ # This happens when @trace is applied after @pipeline_task/@pipeline_flow
384
+ if hasattr(f, "__is_traced__") and f.__is_traced__: # type: ignore[attr-defined]
385
+ # Check if it's a Prefect Task or Flow object (they have specific attributes)
386
+ # Prefect objects have certain attributes that regular functions don't
387
+ is_prefect_task = hasattr(f, "fn") and hasattr(f, "submit") and hasattr(f, "map")
388
+ is_prefect_flow = hasattr(f, "fn") and hasattr(f, "serve")
389
+ if is_prefect_task or is_prefect_flow:
390
+ fname = getattr(f, "__name__", "function")
391
+ raise TypeError(
392
+ f"Function '{fname}' is already decorated with @pipeline_task or "
393
+ f"@pipeline_flow. Remove the @trace decorator - pipeline decorators "
394
+ f"include tracing automatically."
395
+ )
396
+
397
+ # Handle 'debug' level logic - only trace when LMNR_DEBUG is "true"
398
+ debug_value = settings.lmnr_debug or os.getenv("LMNR_DEBUG", "")
399
+ if level == "debug" and debug_value.lower() != "true":
400
+ return f
401
+
402
+ # --- Pre-computation (done once when the function is decorated) ---
403
+ _initialise_laminar()
404
+ sig = inspect.signature(f)
405
+ is_coroutine = inspect.iscoroutinefunction(f)
406
+ observe_name = name or f.__name__
407
+ bound_observe = observe
408
+
409
+ bound_session_id = session_id
410
+ bound_user_id = user_id
411
+ bound_metadata = metadata if metadata is not None else {}
412
+ bound_tags = tags if tags is not None else []
413
+ bound_span_type = span_type
414
+ bound_ignore_input = ignore_input
415
+ bound_ignore_output = ignore_output
416
+ bound_ignore_inputs = ignore_inputs
417
+ bound_input_formatter = input_formatter
418
+ bound_output_formatter = output_formatter
419
+ bound_ignore_exceptions = ignore_exceptions
420
+ bound_preserve_global_context = preserve_global_context
421
+ bound_trim_documents = trim_documents
422
+
423
+ # Create document trimming formatters if needed
424
+ def _create_trimming_input_formatter(*args: Any, **kwargs: Any) -> str:
425
+ # First, let any custom formatter process the data
426
+ if bound_input_formatter:
427
+ result = bound_input_formatter(*args, **kwargs)
428
+ # If formatter returns string, try to parse and trim
429
+ if isinstance(result, str): # type: ignore[reportUnknownArgumentType]
430
+ try:
431
+ data = json.loads(result)
432
+ trimmed = _trim_documents_in_data(data)
433
+ return json.dumps(trimmed)
434
+ except (json.JSONDecodeError, TypeError):
435
+ return result
436
+ else:
437
+ # If formatter returns dict/list, trim it
438
+ trimmed = _trim_documents_in_data(result)
439
+ return json.dumps(trimmed) if not isinstance(trimmed, str) else trimmed
440
+ else:
441
+ # No custom formatter - mimic Laminar's get_input_from_func_args
442
+ # Build a dict with parameter names as keys (like Laminar does)
443
+ params = list(sig.parameters.keys())
444
+ data: dict[str, Any] = {}
445
+
446
+ # Map args to parameter names
447
+ for i, arg in enumerate(args):
448
+ if i < len(params):
449
+ data[params[i]] = arg
450
+
451
+ # Add kwargs
452
+ data.update(kwargs)
453
+
454
+ # Serialize with our helper function
455
+ serialized = json.dumps(data, default=_serialize_for_tracing)
456
+ parsed = json.loads(serialized)
457
+
458
+ # Trim documents in the serialized data
459
+ trimmed = _trim_documents_in_data(parsed)
460
+ return json.dumps(trimmed)
461
+
462
+ def _create_trimming_output_formatter(result: Any) -> str:
463
+ # First, let any custom formatter process the data
464
+ if bound_output_formatter:
465
+ formatted = bound_output_formatter(result)
466
+ # If formatter returns string, try to parse and trim
467
+ if isinstance(formatted, str): # type: ignore[reportUnknownArgumentType]
468
+ try:
469
+ data = json.loads(formatted)
470
+ trimmed = _trim_documents_in_data(data)
471
+ return json.dumps(trimmed)
472
+ except (json.JSONDecodeError, TypeError):
473
+ return formatted
474
+ else:
475
+ # If formatter returns dict/list, trim it
476
+ trimmed = _trim_documents_in_data(formatted)
477
+ return json.dumps(trimmed) if not isinstance(trimmed, str) else trimmed
478
+ else:
479
+ # No custom formatter, serialize result with smart defaults
480
+ # Serialize with our extracted helper function
481
+ serialized = json.dumps(result, default=_serialize_for_tracing)
482
+ parsed = json.loads(serialized)
483
+
484
+ # Trim documents in the serialized data
485
+ trimmed = _trim_documents_in_data(parsed)
486
+ return json.dumps(trimmed)
487
+
488
+ # --- Helper function for runtime logic ---
489
+ def _prepare_and_get_observe_params(runtime_kwargs: dict[str, Any]) -> dict[str, Any]:
490
+ """Inspects runtime args, manages TraceInfo, and returns params for lmnr.observe.
491
+
492
+ Modifies runtime_kwargs in place to inject TraceInfo if the function expects it.
493
+
494
+ Returns:
495
+ Dictionary of parameters for lmnr.observe decorator.
496
+ """
497
+ trace_info = runtime_kwargs.get("trace_info")
498
+ if not isinstance(trace_info, TraceInfo):
499
+ trace_info = TraceInfo()
500
+ if "trace_info" in sig.parameters:
501
+ runtime_kwargs["trace_info"] = trace_info
502
+
503
+ observe_params = trace_info.get_observe_kwargs()
504
+ observe_params["name"] = observe_name
505
+
506
+ # Override with decorator-level session_id and user_id if provided
507
+ if bound_session_id:
508
+ observe_params["session_id"] = bound_session_id
509
+ if bound_user_id:
510
+ observe_params["user_id"] = bound_user_id
511
+ if bound_metadata:
512
+ observe_params["metadata"] = bound_metadata
513
+ if bound_tags:
514
+ observe_params["tags"] = observe_params.get("tags", []) + bound_tags
515
+ if bound_span_type:
516
+ observe_params["span_type"] = bound_span_type
517
+
518
+ # Add the new Laminar parameters
519
+ if bound_ignore_input:
520
+ observe_params["ignore_input"] = bound_ignore_input
521
+ if bound_ignore_output:
522
+ observe_params["ignore_output"] = bound_ignore_output
523
+ if bound_ignore_inputs is not None:
524
+ observe_params["ignore_inputs"] = bound_ignore_inputs
525
+
526
+ # Use trimming formatters if trim_documents is enabled
527
+ if bound_trim_documents:
528
+ # Use the trimming formatters (which may wrap custom formatters)
529
+ observe_params["input_formatter"] = _create_trimming_input_formatter
530
+ observe_params["output_formatter"] = _create_trimming_output_formatter
531
+ else:
532
+ # Use custom formatters directly if provided
533
+ if bound_input_formatter is not None:
534
+ observe_params["input_formatter"] = bound_input_formatter
535
+ if bound_output_formatter is not None:
536
+ observe_params["output_formatter"] = bound_output_formatter
537
+
538
+ if bound_ignore_exceptions:
539
+ observe_params["ignore_exceptions"] = bound_ignore_exceptions
540
+ if bound_preserve_global_context:
541
+ observe_params["preserve_global_context"] = bound_preserve_global_context
542
+
543
+ return observe_params
544
+
545
+ # --- The actual wrappers ---
546
+ @wraps(f)
547
+ def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
548
+ """Synchronous wrapper for traced function.
549
+
550
+ Returns:
551
+ The result of the wrapped function.
552
+ """
553
+ observe_params = _prepare_and_get_observe_params(kwargs)
554
+ observed_func = bound_observe(**observe_params)(f)
555
+ return observed_func(*args, **kwargs)
556
+
557
+ @wraps(f)
558
+ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
559
+ """Asynchronous wrapper for traced function.
560
+
561
+ Returns:
562
+ The result of the wrapped function.
563
+ """
564
+ observe_params = _prepare_and_get_observe_params(kwargs)
565
+ observed_func = bound_observe(**observe_params)(f)
566
+ return await observed_func(*args, **kwargs) # pyright: ignore[reportGeneralTypeIssues, reportUnknownVariableType]
567
+
568
+ wrapper = async_wrapper if is_coroutine else sync_wrapper
569
+
570
+ # Mark function as traced for detection by pipeline decorators
571
+ wrapper.__is_traced__ = True # type: ignore[attr-defined]
572
+
573
+ # Preserve the original function signature
574
+ with contextlib.suppress(AttributeError, ValueError):
575
+ wrapper.__signature__ = sig # type: ignore[attr-defined]
576
+
577
+ return cast(Callable[P, R], wrapper)
578
+
579
+ if func:
580
+ return decorator(func) # Called as @trace
581
+ return decorator # Called as @trace(...)
582
+
583
+
584
+ def set_trace_cost(cost: float | str) -> None:
585
+ """Set cost attributes for the current trace span.
586
+
587
+ Sets cost metadata in the current LMNR trace span for tracking expenses
588
+ of custom operations. This function should be called within a traced
589
+ function to dynamically set or update the cost associated with the
590
+ current operation. Particularly useful for tracking costs of external
591
+ API calls, compute resources, or custom billing scenarios.
592
+
593
+ The cost is stored in three metadata fields for observability tool consumption:
594
+ - gen_ai.usage.output_cost: OpenTelemetry GenAI semantic convention
595
+ - gen_ai.usage.cost: Aggregated cost field
596
+ - cost: Short-form cost field
597
+
598
+ Args:
599
+ cost: The cost value to set. Can be:
600
+ - float: Cost in dollars (e.g., 0.05 for 5 cents)
601
+ - str: USD format with dollar sign (e.g., "$0.05" or "$1.25")
602
+ Only positive values will be set; zero or negative values are ignored.
603
+
604
+ Raises:
605
+ ValueError: If string format is invalid (not a valid USD amount).
606
+
607
+ Only works within a traced context (function decorated with @trace,
608
+ @pipeline_task, or @pipeline_flow). LLM costs are tracked automatically via
609
+ ModelResponse; use this for non-LLM costs. Multiple calls overwrite the
610
+ previous cost (not cumulative). If called outside a traced context, it has
611
+ no effect and does not raise an error.
612
+ """
613
+ # Parse string format if provided
614
+ if isinstance(cost, str):
615
+ # Remove dollar sign and any whitespace
616
+ cost_str = cost.strip()
617
+ if not cost_str.startswith("$"):
618
+ raise ValueError(f"Invalid USD format: {cost!r}. Must start with '$' (e.g., '$0.50')")
619
+
620
+ try:
621
+ # Remove $ and convert to float
622
+ cost_value = float(cost_str[1:])
623
+ except ValueError as e:
624
+ raise ValueError(f"Invalid USD format: {cost!r}. Must be a valid number after '$'") from e
625
+ else:
626
+ cost_value = cost
627
+
628
+ if cost_value > 0:
629
+ # Build the attributes dictionary with cost metadata
630
+ attributes: dict[Attributes | str, float] = {
631
+ "gen_ai.usage.output_cost": cost_value,
632
+ "gen_ai.usage.cost": cost_value,
633
+ "cost": cost_value,
634
+ }
635
+
636
+ with contextlib.suppress(Exception):
637
+ Laminar.set_span_attributes(attributes)
638
+
639
+
640
+ __all__ = ["TraceInfo", "TraceLevel", "set_trace_cost", "trace"]
@@ -0,0 +1,10 @@
1
+ """Pipeline framework primitives — decorators and flow options."""
2
+
3
+ from ai_pipeline_core.pipeline.decorators import pipeline_flow, pipeline_task
4
+ from ai_pipeline_core.pipeline.options import FlowOptions
5
+
6
+ __all__ = [
7
+ "FlowOptions",
8
+ "pipeline_flow",
9
+ "pipeline_task",
10
+ ]