ai-pipeline-core 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ai_pipeline_core/__init__.py +84 -4
  2. ai_pipeline_core/documents/__init__.py +9 -0
  3. ai_pipeline_core/documents/document.py +1044 -152
  4. ai_pipeline_core/documents/document_list.py +147 -38
  5. ai_pipeline_core/documents/flow_document.py +112 -11
  6. ai_pipeline_core/documents/mime_type.py +173 -15
  7. ai_pipeline_core/documents/task_document.py +117 -12
  8. ai_pipeline_core/documents/temporary_document.py +84 -5
  9. ai_pipeline_core/documents/utils.py +41 -9
  10. ai_pipeline_core/exceptions.py +47 -11
  11. ai_pipeline_core/flow/__init__.py +2 -0
  12. ai_pipeline_core/flow/config.py +236 -27
  13. ai_pipeline_core/flow/options.py +50 -1
  14. ai_pipeline_core/llm/__init__.py +6 -0
  15. ai_pipeline_core/llm/ai_messages.py +125 -27
  16. ai_pipeline_core/llm/client.py +278 -26
  17. ai_pipeline_core/llm/model_options.py +130 -1
  18. ai_pipeline_core/llm/model_response.py +239 -35
  19. ai_pipeline_core/llm/model_types.py +67 -0
  20. ai_pipeline_core/logging/__init__.py +13 -0
  21. ai_pipeline_core/logging/logging_config.py +72 -20
  22. ai_pipeline_core/logging/logging_mixin.py +38 -32
  23. ai_pipeline_core/pipeline.py +363 -60
  24. ai_pipeline_core/prefect.py +48 -1
  25. ai_pipeline_core/prompt_manager.py +209 -24
  26. ai_pipeline_core/settings.py +108 -4
  27. ai_pipeline_core/simple_runner/__init__.py +5 -0
  28. ai_pipeline_core/simple_runner/cli.py +96 -11
  29. ai_pipeline_core/simple_runner/simple_runner.py +237 -4
  30. ai_pipeline_core/tracing.py +253 -30
  31. ai_pipeline_core-0.1.12.dist-info/METADATA +450 -0
  32. ai_pipeline_core-0.1.12.dist-info/RECORD +36 -0
  33. ai_pipeline_core-0.1.10.dist-info/METADATA +0 -538
  34. ai_pipeline_core-0.1.10.dist-info/RECORD +0 -36
  35. {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/WHEEL +0 -0
  36. {ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,40 @@
1
+ """Simple pipeline runner for local flow execution.
2
+
3
+ This module provides the core functionality for running AI pipeline flows
4
+ locally without full Prefect orchestration. It handles document I/O,
5
+ flow sequencing, and error management.
6
+
7
+ Key components:
8
+ - Document I/O from/to filesystem directories
9
+ - Single and multi-flow execution
10
+ - Automatic document validation and passing between flows
11
+ - Step-based execution control (start/end steps)
12
+
13
+ Directory structure:
14
+ working_dir/
15
+ ├── InputDocument/ # Documents of type InputDocument
16
+ │ ├── file1.txt
17
+ │ └── file1.txt.description.md # Optional description
18
+ └── OutputDocument/ # Documents of type OutputDocument
19
+ └── result.json
20
+
21
+ Example:
22
+ >>> from ai_pipeline_core.simple_runner import run_pipeline
23
+ >>>
24
+ >>> # Run single flow
25
+ >>> results = await run_pipeline(
26
+ ... flow_func=MyFlow,
27
+ ... config=MyConfig,
28
+ ... project_name="test",
29
+ ... output_dir=Path("./output"),
30
+ ... flow_options=options
31
+ ... )
32
+
33
+ Note:
34
+ Document directories are named using the canonical_name() method
35
+ of each document type for consistent organization.
36
+ """
37
+
1
38
  from pathlib import Path
2
39
  from typing import Any, Callable, Sequence, Type
3
40
 
@@ -9,13 +46,58 @@ from ai_pipeline_core.logging import get_pipeline_logger
9
46
  logger = get_pipeline_logger(__name__)
10
47
 
11
48
  FlowSequence = Sequence[Callable[..., Any]]
49
+ """Type alias for a sequence of flow functions."""
50
+
12
51
  ConfigSequence = Sequence[Type[FlowConfig]]
52
+ """Type alias for a sequence of flow configuration classes."""
13
53
 
14
54
 
15
55
  def load_documents_from_directory(
16
56
  base_dir: Path, document_types: Sequence[Type[FlowDocument]]
17
57
  ) -> DocumentList:
18
- """Loads documents using canonical_name."""
58
+ """Load documents from filesystem directories by type.
59
+
60
+ Scans subdirectories of base_dir for documents matching the provided
61
+ types. Each document type has its own subdirectory named after its
62
+ canonical_name().
63
+
64
+ Args:
65
+ base_dir: Base directory containing document subdirectories.
66
+ document_types: Sequence of FlowDocument subclasses to load.
67
+ Each type corresponds to a subdirectory.
68
+
69
+ Returns:
70
+ DocumentList containing all successfully loaded documents.
71
+ Empty list if no documents found or directories don't exist.
72
+
73
+ Directory structure:
74
+ base_dir/
75
+ ├── DocumentTypeA/ # canonical_name() of DocumentTypeA
76
+ │ ├── doc1.txt
77
+ │ ├── doc1.txt.description.md # Optional description file
78
+ │ └── doc2.json
79
+ └── DocumentTypeB/
80
+ └── data.csv
81
+
82
+ File handling:
83
+ - Document content is read as bytes
84
+ - Optional .description.md files provide document descriptions
85
+ - Failed loads are logged but don't stop processing
86
+ - Non-file entries are skipped
87
+
88
+ Example:
89
+ >>> from my_docs import InputDoc, ConfigDoc
90
+ >>> docs = load_documents_from_directory(
91
+ ... Path("./data"),
92
+ ... [InputDoc, ConfigDoc]
93
+ ... )
94
+ >>> print(f"Loaded {len(docs)} documents")
95
+
96
+ Note:
97
+ - Uses canonical_name() for directory names (e.g., "InputDocument")
98
+ - Descriptions are loaded from "{filename}.description.md" files
99
+ - All file types are supported (determined by document class)
100
+ """
19
101
  documents = DocumentList()
20
102
 
21
103
  for doc_class in document_types:
@@ -49,7 +131,44 @@ def load_documents_from_directory(
49
131
 
50
132
 
51
133
  def save_documents_to_directory(base_dir: Path, documents: DocumentList) -> None:
52
- """Saves documents using canonical_name."""
134
+ """Save documents to filesystem directories by type.
135
+
136
+ Creates subdirectories under base_dir for each document type and
137
+ saves documents with their original filenames. Only FlowDocument
138
+ instances are saved (temporary documents are skipped).
139
+
140
+ Args:
141
+ base_dir: Base directory for saving document subdirectories.
142
+ Created if it doesn't exist.
143
+ documents: DocumentList containing documents to save.
144
+ Non-FlowDocument instances are silently skipped.
145
+
146
+ Side effects:
147
+ - Creates base_dir and subdirectories as needed
148
+ - Overwrites existing files with the same name
149
+ - Logs each saved document
150
+ - Creates .description.md files for documents with descriptions
151
+
152
+ Directory structure created:
153
+ base_dir/
154
+ └── DocumentType/ # canonical_name() of document
155
+ ├── output.json # Document content
156
+ └── output.json.description.md # Optional description
157
+
158
+ Example:
159
+ >>> docs = DocumentList([
160
+ ... OutputDoc(name="result.txt", content=b"data"),
161
+ ... OutputDoc(name="stats.json", content=b'{...}')
162
+ ... ])
163
+ >>> save_documents_to_directory(Path("./output"), docs)
164
+ >>> # Creates ./output/OutputDocument/result.txt
165
+ >>> # and ./output/OutputDocument/stats.json
166
+
167
+ Note:
168
+ - Only FlowDocument subclasses are saved
169
+ - TaskDocument and other temporary documents are skipped
170
+ - Descriptions are saved as separate .description.md files
171
+ """
53
172
  for document in documents:
54
173
  if not isinstance(document, FlowDocument):
55
174
  continue
@@ -75,7 +194,61 @@ async def run_pipeline(
75
194
  flow_options: FlowOptions,
76
195
  flow_name: str | None = None,
77
196
  ) -> DocumentList:
78
- """Execute a single pipeline flow."""
197
+ """Execute a single pipeline flow with document I/O.
198
+
199
+ Runs a flow function with automatic document loading, validation,
200
+ and saving. The flow receives input documents from the filesystem
201
+ and saves its output for subsequent flows.
202
+
203
+ The execution proceeds through these steps:
204
+ 1. Load input documents from output_dir subdirectories
205
+ 2. Validate input documents against config requirements
206
+ 3. Execute flow function with documents and options
207
+ 4. Validate output documents match config.OUTPUT_DOCUMENT_TYPE
208
+ 5. Save output documents to output_dir subdirectories
209
+
210
+ Args:
211
+ flow_func: Async flow function decorated with @pipeline_flow.
212
+ Must accept (project_name, documents, flow_options).
213
+
214
+ config: FlowConfig subclass defining input/output document types.
215
+ Used for validation and directory organization.
216
+
217
+ project_name: Name of the project/pipeline for logging and tracking.
218
+
219
+ output_dir: Directory for loading input and saving output documents.
220
+ Document subdirectories are created as needed.
221
+
222
+ flow_options: Configuration options passed to the flow function.
223
+ Can be FlowOptions or any subclass.
224
+
225
+ flow_name: Optional display name for logging. If None, uses
226
+ flow_func.name or flow_func.__name__.
227
+
228
+ Returns:
229
+ DocumentList containing the flow's output documents.
230
+
231
+ Raises:
232
+ RuntimeError: If required input documents are missing.
233
+
234
+ Example:
235
+ >>> from my_flows import AnalysisFlow, AnalysisConfig
236
+ >>>
237
+ >>> results = await run_pipeline(
238
+ ... flow_func=AnalysisFlow,
239
+ ... config=AnalysisConfig,
240
+ ... project_name="analysis_001",
241
+ ... output_dir=Path("./results"),
242
+ ... flow_options=FlowOptions(temperature=0.7)
243
+ ... )
244
+ >>> print(f"Generated {len(results)} documents")
245
+
246
+ Note:
247
+ - Flow must be async (decorated with @pipeline_flow)
248
+ - Input documents are loaded based on config.INPUT_DOCUMENT_TYPES
249
+ - Output is validated against config.OUTPUT_DOCUMENT_TYPE
250
+ - All I/O is logged for debugging
251
+ """
79
252
  if flow_name is None:
80
253
  # For Prefect Flow objects, use their name attribute
81
254
  # For regular functions, fall back to __name__
@@ -108,7 +281,67 @@ async def run_pipelines(
108
281
  start_step: int = 1,
109
282
  end_step: int | None = None,
110
283
  ) -> None:
111
- """Executes multiple pipeline flows sequentially."""
284
+ """Execute multiple pipeline flows in sequence.
285
+
286
+ Runs a series of flows where each flow's output becomes the input
287
+ for the next flow. Supports partial execution with start/end steps
288
+ for debugging and resuming failed pipelines.
289
+
290
+ Execution proceeds by:
291
+ 1. Validating step indices and sequence lengths
292
+ 2. For each flow in range [start_step, end_step]:
293
+ a. Loading input documents from output_dir
294
+ b. Executing flow with documents
295
+ c. Saving output documents to output_dir
296
+ d. Output becomes input for next flow
297
+ 3. Logging progress and any failures
298
+
299
+ Steps are 1-based for user convenience. Step 1 is the first flow,
300
+ Step N is the Nth flow. Use start_step > 1 to skip initial flows
301
+ and end_step < N to stop early.
302
+
303
+ Args:
304
+ project_name: Name of the overall pipeline/project.
305
+ output_dir: Directory for document I/O between flows.
306
+ Shared by all flows in the sequence.
307
+ flows: Sequence of flow functions to execute in order.
308
+ Must all be async functions decorated with @pipeline_flow.
309
+ flow_configs: Sequence of FlowConfig classes corresponding to flows.
310
+ Must have same length as flows sequence.
311
+ flow_options: Options passed to all flows in the sequence.
312
+ Individual flows can use different fields.
313
+ start_step: First flow to execute (1-based index).
314
+ Default 1 starts from the beginning.
315
+ end_step: Last flow to execute (1-based index).
316
+ None runs through the last flow.
317
+
318
+ Raises:
319
+ ValueError: If flows and configs have different lengths, or if
320
+ start_step or end_step are out of range.
321
+
322
+ Example:
323
+ >>> # Run full pipeline
324
+ >>> await run_pipelines(
325
+ ... project_name="analysis",
326
+ ... output_dir=Path("./work"),
327
+ ... flows=[ExtractFlow, AnalyzeFlow, SummarizeFlow],
328
+ ... flow_configs=[ExtractConfig, AnalyzeConfig, SummaryConfig],
329
+ ... flow_options=options
330
+ ... )
331
+ >>>
332
+ >>> # Run only steps 2-3 (skip extraction)
333
+ >>> await run_pipelines(
334
+ ... ...,
335
+ ... start_step=2,
336
+ ... end_step=3
337
+ ... )
338
+
339
+ Note:
340
+ - Each flow's output must match the next flow's input types
341
+ - Failed flows stop the entire pipeline
342
+ - Progress is logged with step numbers for debugging
343
+ - Documents persist in output_dir between runs
344
+ """
112
345
  if len(flows) != len(flow_configs):
113
346
  raise ValueError("The number of flows and flow configs must match.")
114
347
 
@@ -1,9 +1,11 @@
1
1
  """Tracing utilities that integrate Laminar (``lmnr``) with our code-base.
2
2
 
3
- This module centralises:
4
- • ``TraceInfo`` - a small helper object for propagating contextual metadata.
5
- ``trace`` decorator - augments a callable with Laminar tracing, automatic
6
- ``observe`` instrumentation, and optional support for test runs.
3
+ @public
4
+
5
+ This module centralizes:
6
+ - ``TraceInfo`` - a small helper object for propagating contextual metadata.
7
+ - ``trace`` decorator - augments a callable with Laminar tracing, automatic
8
+ ``observe`` instrumentation, and optional support for test runs.
7
9
  """
8
10
 
9
11
  from __future__ import annotations
@@ -25,13 +27,66 @@ P = ParamSpec("P")
25
27
  R = TypeVar("R")
26
28
 
27
29
  TraceLevel = Literal["always", "debug", "off"]
30
+ """Control level for tracing activation.
31
+
32
+ @public
33
+
34
+ Values:
35
+ - "always": Always trace (default, production mode)
36
+ - "debug": Only trace when LMNR_DEBUG == "true"
37
+ - "off": Disable tracing completely
38
+ """
28
39
 
29
40
 
30
41
  # ---------------------------------------------------------------------------
31
42
  # ``TraceInfo`` – metadata container
32
43
  # ---------------------------------------------------------------------------
33
44
  class TraceInfo(BaseModel):
34
- """A container that holds contextual metadata for the current trace."""
45
+ """Container for propagating trace context through the pipeline.
46
+
47
+ TraceInfo provides a structured way to pass tracing metadata through
48
+ function calls, ensuring consistent observability across the entire
49
+ execution flow. It integrates with Laminar (LMNR) for distributed
50
+ tracing and debugging.
51
+
52
+ Attributes:
53
+ session_id: Unique identifier for the current session/conversation.
54
+ Falls back to LMNR_SESSION_ID environment variable.
55
+ user_id: Identifier for the user triggering the operation.
56
+ Falls back to LMNR_USER_ID environment variable.
57
+ metadata: Key-value pairs for additional trace context.
58
+ Useful for filtering and searching in LMNR dashboard.
59
+ tags: List of tags for categorizing traces (e.g., ["production", "v2"]).
60
+
61
+ Environment fallbacks:
62
+ - LMNR_SESSION_ID: Default session_id if not explicitly set
63
+ - LMNR_USER_ID: Default user_id if not explicitly set
64
+ - LMNR_DEBUG: Controls debug-level tracing when set to "true"
65
+
66
+ Note: These variables are read directly by the tracing layer and are
67
+ not part of the Settings configuration.
68
+
69
+ Example:
70
+ >>> # Create trace context
71
+ >>> trace_info = TraceInfo(
72
+ ... session_id="sess_123",
73
+ ... user_id="user_456",
74
+ ... metadata={"flow": "document_analysis", "version": "1.2"},
75
+ ... tags=["production", "high_priority"]
76
+ ... )
77
+ >>>
78
+ >>> # Pass through function calls
79
+ >>> @trace
80
+ >>> async def process(data, trace_info: TraceInfo):
81
+ ... # TraceInfo automatically propagates to nested calls
82
+ ... result = await analyze(data, trace_info=trace_info)
83
+ ... return result
84
+
85
+ Note:
86
+ TraceInfo is typically created at the entry point of a flow
87
+ and passed through all subsequent function calls for
88
+ consistent tracing context.
89
+ """
35
90
 
36
91
  session_id: str | None = None
37
92
  user_id: str | None = None
@@ -39,7 +94,30 @@ class TraceInfo(BaseModel):
39
94
  tags: list[str] = []
40
95
 
41
96
  def get_observe_kwargs(self) -> dict[str, Any]:
42
- """Return kwargs suitable for passing to the observe decorator."""
97
+ """Convert TraceInfo to kwargs for Laminar's observe decorator.
98
+
99
+ Transforms the TraceInfo fields into the format expected by
100
+ the lmnr.observe() decorator, applying environment variable
101
+ fallbacks for session_id and user_id.
102
+
103
+ Returns:
104
+ Dictionary with keys:
105
+ - session_id: From field or LMNR_SESSION_ID env var
106
+ - user_id: From field or LMNR_USER_ID env var
107
+ - metadata: Dictionary of custom metadata (if set)
108
+ - tags: List of tags (if set)
109
+
110
+ Only non-empty values are included in the output.
111
+
112
+ Example:
113
+ >>> trace_info = TraceInfo(session_id="sess_123", tags=["test"])
114
+ >>> kwargs = trace_info.get_observe_kwargs()
115
+ >>> # Returns: {"session_id": "sess_123", "tags": ["test"]}
116
+
117
+ Note:
118
+ This method is called internally by the trace decorator
119
+ to configure Laminar observation parameters.
120
+ """
43
121
  kwargs: dict[str, Any] = {}
44
122
 
45
123
  # Use environment variable fallback for session_id
@@ -65,7 +143,21 @@ class TraceInfo(BaseModel):
65
143
 
66
144
 
67
145
  def _initialise_laminar() -> None:
68
- """Ensure Laminar is initialised once per process."""
146
+ """Initialize Laminar SDK with project configuration.
147
+
148
+ Sets up the Laminar observability client with the project API key
149
+ from settings. Disables automatic OpenAI instrumentation to avoid
150
+ conflicts with our custom tracing.
151
+
152
+ Configuration:
153
+ - Uses settings.lmnr_project_api_key for authentication
154
+ - Disables OPENAI instrument to prevent double-tracing
155
+ - Called automatically by trace decorator on first use
156
+
157
+ Note:
158
+ This is an internal function called once per process.
159
+ Multiple calls are safe (Laminar handles idempotency).
160
+ """
69
161
  if settings.lmnr_project_api_key:
70
162
  Laminar.initialize(
71
163
  project_api_key=settings.lmnr_project_api_key,
@@ -118,38 +210,153 @@ def trace(
118
210
  ignore_exceptions: bool = False,
119
211
  preserve_global_context: bool = True,
120
212
  ) -> Callable[[Callable[P, R]], Callable[P, R]] | Callable[P, R]:
121
- """Decorator that wires Laminar tracing and observation into a function.
213
+ """Add Laminar observability tracing to any function.
214
+
215
+ @public
216
+
217
+ The trace decorator integrates functions with Laminar (LMNR) for
218
+ distributed tracing, performance monitoring, and debugging. It
219
+ automatically handles both sync and async functions, propagates
220
+ trace context, and provides fine-grained control over what gets traced.
221
+
222
+ USAGE GUIDELINE - Defaults First:
223
+ In 90% of cases, use WITHOUT any parameters.
224
+ The defaults are optimized for most use cases.
122
225
 
123
226
  Args:
124
- func: The function to be traced (when used as @trace)
125
- level: Trace level control:
126
- - "always": Always trace (default)
127
- - "debug": Only trace when LMNR_DEBUG environment variable is NOT set to "true"
128
- - "off": Never trace
129
- name: Custom name for the observation (defaults to function name)
130
- metadata: Additional metadata for the trace
131
- tags: Additional tags for the trace
132
- span_type: Type of span for the trace
133
- ignore_input: Ignore all inputs in the trace
134
- ignore_output: Ignore the output in the trace
135
- ignore_inputs: List of specific input parameter names to ignore
136
- input_formatter: Custom formatter for inputs (takes any arguments, returns string)
137
- output_formatter: Custom formatter for outputs (takes any arguments, returns string)
138
- ignore_exceptions: Whether to ignore exceptions in tracing
139
- preserve_global_context: Whether to preserve global context
227
+ func: Function to trace (when used without parentheses: @trace).
228
+
229
+ level: Controls when tracing is active:
230
+ - "always": Always trace (default, production mode)
231
+ - "debug": Only trace when LMNR_DEBUG == "true"
232
+ - "off": Disable tracing completely
233
+
234
+ name: Custom span name in traces (defaults to function.__name__).
235
+ Use descriptive names for better trace readability.
236
+
237
+ session_id: Override session ID for this function's traces.
238
+ Typically propagated via TraceInfo instead.
239
+
240
+ user_id: Override user ID for this function's traces.
241
+ Typically propagated via TraceInfo instead.
242
+
243
+ metadata: Additional key-value metadata attached to spans.
244
+ Searchable in LMNR dashboard. Merged with TraceInfo metadata.
245
+
246
+ tags: List of tags for categorizing spans (e.g., ["api", "critical"]).
247
+ Merged with TraceInfo tags.
248
+
249
+ span_type: Semantic type of the span (e.g., "LLM", "CHAIN", "TOOL").
250
+ Affects visualization in LMNR dashboard.
251
+
252
+ ignore_input: Don't record function inputs in trace (privacy/size).
253
+
254
+ ignore_output: Don't record function output in trace (privacy/size).
255
+
256
+ ignore_inputs: List of parameter names to exclude from trace.
257
+ Useful for sensitive data like API keys.
258
+
259
+ input_formatter: Custom function to format inputs for tracing.
260
+ Receives all function args, returns display string.
261
+
262
+ output_formatter: Custom function to format output for tracing.
263
+ Receives function result, returns display string.
264
+
265
+ ignore_exceptions: Don't record exceptions in traces (default False).
266
+
267
+ preserve_global_context: Maintain Laminar's global context across
268
+ calls (default True). Set False for isolated traces.
140
269
 
141
270
  Returns:
142
- The decorated function with Laminar tracing enabled
271
+ Decorated function with same signature but added tracing.
272
+
273
+ TraceInfo propagation:
274
+ If the decorated function has a 'trace_info' parameter, the decorator
275
+ automatically creates or propagates a TraceInfo instance, ensuring
276
+ consistent session/user tracking across the call chain.
277
+
278
+ Example:
279
+ >>> # RECOMMENDED - No parameters needed for most cases!
280
+ >>> @trace
281
+ >>> async def process_document(doc):
282
+ ... return await analyze(doc)
283
+ >>>
284
+ >>> # With parameters (RARE - only when specifically needed):
285
+ >>> @trace(level="debug") # Only for debug-specific tracing
286
+ >>> async def debug_operation():
287
+ ... pass
288
+
289
+ >>> @trace(ignore_inputs=["api_key"]) # Only for sensitive data
290
+ >>> async def api_call(data, api_key):
291
+ ... return await external_api(data, api_key)
292
+ >>>
293
+ >>> # AVOID unnecessary configuration - defaults handle:
294
+ >>> # - Automatic naming from function name
295
+ >>> # - Standard trace level ("always")
296
+ >>> # - Full input/output capture
297
+ >>> # - Proper span type inference
298
+ >>>
299
+ >>> # Custom formatting
300
+ >>> @trace(
301
+ ... input_formatter=lambda doc: f"Document: {doc.id}",
302
+ ... output_formatter=lambda res: f"Results: {len(res)} items"
303
+ >>> )
304
+ >>> def analyze(doc):
305
+ ... return results
306
+
307
+ Environment variables:
308
+ - LMNR_DEBUG: Set to "true" to enable debug-level traces
309
+ - LMNR_SESSION_ID: Default session ID if not in TraceInfo
310
+ - LMNR_USER_ID: Default user ID if not in TraceInfo
311
+ - LMNR_PROJECT_API_KEY: Required for trace submission
312
+
313
+ Performance:
314
+ - Tracing overhead is minimal (~1-2ms per call)
315
+ - When level="off", decorator returns original function unchanged
316
+ - Large inputs/outputs can be excluded with ignore_* parameters
317
+
318
+ Note:
319
+ - Automatically initializes Laminar on first use
320
+ - Works with both sync and async functions
321
+ - Preserves function signature and metadata
322
+ - Thread-safe and async-safe
323
+
324
+ See Also:
325
+ - TraceInfo: Container for trace metadata
326
+ - pipeline_task: Task decorator with built-in tracing
327
+ - pipeline_flow: Flow decorator with built-in tracing
143
328
  """
144
-
145
329
  if level == "off":
146
330
  if func:
147
331
  return func
148
332
  return lambda f: f
149
333
 
150
334
  def decorator(f: Callable[P, R]) -> Callable[P, R]:
151
- # Handle 'debug' level logic - only trace when LMNR_DEBUG is NOT "true"
152
- if level == "debug" and os.getenv("LMNR_DEBUG", "").lower() == "true":
335
+ """Apply tracing to the target function.
336
+
337
+ Returns:
338
+ Wrapped function with LMNR observability.
339
+
340
+ Raises:
341
+ TypeError: If function is already decorated with @pipeline_task or @pipeline_flow.
342
+ """
343
+ # Check if this is already a traced pipeline_task or pipeline_flow
344
+ # This happens when @trace is applied after @pipeline_task/@pipeline_flow
345
+ if hasattr(f, "__is_traced__") and f.__is_traced__: # type: ignore[attr-defined]
346
+ # Check if it's a Prefect Task or Flow object (they have specific attributes)
347
+ # Prefect objects have certain attributes that regular functions don't
348
+ is_prefect_task = hasattr(f, "fn") and hasattr(f, "submit") and hasattr(f, "map")
349
+ is_prefect_flow = hasattr(f, "fn") and hasattr(f, "serve")
350
+ if is_prefect_task or is_prefect_flow:
351
+ fname = getattr(f, "__name__", "function")
352
+ raise TypeError(
353
+ f"Function '{fname}' is already decorated with @pipeline_task or "
354
+ f"@pipeline_flow. Remove the @trace decorator - pipeline decorators "
355
+ f"include tracing automatically."
356
+ )
357
+
358
+ # Handle 'debug' level logic - only trace when LMNR_DEBUG is "true"
359
+ if level == "debug" and os.getenv("LMNR_DEBUG", "").lower() != "true":
153
360
  return f
154
361
 
155
362
  # --- Pre-computation (done once when the function is decorated) ---
@@ -175,9 +382,12 @@ def trace(
175
382
 
176
383
  # --- Helper function for runtime logic ---
177
384
  def _prepare_and_get_observe_params(runtime_kwargs: dict[str, Any]) -> dict[str, Any]:
178
- """
179
- Inspects runtime args, manages TraceInfo, and returns params for lmnr.observe.
385
+ """Inspects runtime args, manages TraceInfo, and returns params for lmnr.observe.
386
+
180
387
  Modifies runtime_kwargs in place to inject TraceInfo if the function expects it.
388
+
389
+ Returns:
390
+ Dictionary of parameters for lmnr.observe decorator.
181
391
  """
182
392
  trace_info = runtime_kwargs.get("trace_info")
183
393
  if not isinstance(trace_info, TraceInfo):
@@ -223,18 +433,31 @@ def trace(
223
433
  # --- The actual wrappers ---
224
434
  @wraps(f)
225
435
  def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
436
+ """Synchronous wrapper for traced function.
437
+
438
+ Returns:
439
+ The result of the wrapped function.
440
+ """
226
441
  observe_params = _prepare_and_get_observe_params(kwargs)
227
442
  observed_func = _observe(**observe_params)(f)
228
443
  return observed_func(*args, **kwargs)
229
444
 
230
445
  @wraps(f)
231
446
  async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
447
+ """Asynchronous wrapper for traced function.
448
+
449
+ Returns:
450
+ The result of the wrapped function.
451
+ """
232
452
  observe_params = _prepare_and_get_observe_params(kwargs)
233
453
  observed_func = _observe(**observe_params)(f)
234
454
  return await observed_func(*args, **kwargs)
235
455
 
236
456
  wrapper = async_wrapper if is_coroutine else sync_wrapper
237
457
 
458
+ # Mark function as traced for detection by pipeline decorators
459
+ wrapper.__is_traced__ = True # type: ignore[attr-defined]
460
+
238
461
  # Preserve the original function signature
239
462
  try:
240
463
  wrapper.__signature__ = sig # type: ignore[attr-defined]