ai-pipeline-core 0.4.7__tar.gz → 0.4.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/PKG-INFO +2 -1
  2. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/__init__.py +1 -1
  3. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/deployment/base.py +26 -2
  4. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/llm/client.py +8 -0
  5. ai_pipeline_core-0.4.9/ai_pipeline_core/llm/validation.py +176 -0
  6. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_initialization.py +4 -3
  7. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/tracing.py +39 -7
  8. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/pyproject.toml +2 -1
  9. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/.gitignore +0 -0
  10. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/LICENSE +0 -0
  11. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/README.md +0 -0
  12. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/deployment/__init__.py +0 -0
  13. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/deployment/contract.py +0 -0
  14. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/deployment/deploy.py +0 -0
  15. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/deployment/helpers.py +0 -0
  16. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/deployment/progress.py +0 -0
  17. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/deployment/remote.py +0 -0
  18. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/docs_generator/__init__.py +0 -0
  19. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/docs_generator/__main__.py +0 -0
  20. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/docs_generator/cli.py +0 -0
  21. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/docs_generator/extractor.py +0 -0
  22. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/docs_generator/guide_builder.py +0 -0
  23. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/docs_generator/trimmer.py +0 -0
  24. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/docs_generator/validator.py +0 -0
  25. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/__init__.py +0 -0
  26. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/_summary.py +0 -0
  27. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/_summary_worker.py +0 -0
  28. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/clickhouse.py +0 -0
  29. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/factory.py +0 -0
  30. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/local.py +0 -0
  31. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/memory.py +0 -0
  32. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/document_store/protocol.py +0 -0
  33. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/__init__.py +0 -0
  34. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/_context_vars.py +0 -0
  35. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/_hashing.py +0 -0
  36. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/attachment.py +0 -0
  37. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/context.py +0 -0
  38. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/document.py +0 -0
  39. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/mime_type.py +0 -0
  40. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/documents/utils.py +0 -0
  41. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/exceptions.py +0 -0
  42. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/images/__init__.py +0 -0
  43. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/images/_processing.py +0 -0
  44. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/llm/__init__.py +0 -0
  45. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/llm/ai_messages.py +0 -0
  46. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/llm/model_options.py +0 -0
  47. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/llm/model_response.py +0 -0
  48. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/llm/model_types.py +0 -0
  49. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/logging/__init__.py +0 -0
  50. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/logging/logging.yml +0 -0
  51. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/logging/logging_config.py +0 -0
  52. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  53. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/__init__.py +0 -0
  54. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/__init__.py +0 -0
  55. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/_auto_summary.py +0 -0
  56. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/_config.py +0 -0
  57. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/_content.py +0 -0
  58. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/_processor.py +0 -0
  59. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/_summary.py +0 -0
  60. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/_types.py +0 -0
  61. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_debug/_writer.py +0 -0
  62. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_document_tracking.py +0 -0
  63. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_logging_bridge.py +0 -0
  64. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_summary.py +0 -0
  65. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_tracking/__init__.py +0 -0
  66. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_tracking/_client.py +0 -0
  67. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_tracking/_internal.py +0 -0
  68. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_tracking/_models.py +0 -0
  69. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_tracking/_processor.py +0 -0
  70. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_tracking/_service.py +0 -0
  71. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/observability/_tracking/_writer.py +0 -0
  72. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/pipeline/__init__.py +0 -0
  73. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/pipeline/decorators.py +0 -0
  74. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/pipeline/options.py +0 -0
  75. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/prompt_manager.py +0 -0
  76. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/py.typed +0 -0
  77. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/settings.py +0 -0
  78. {ai_pipeline_core-0.4.7 → ai_pipeline_core-0.4.9}/ai_pipeline_core/testing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.4.7
3
+ Version: 0.4.9
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -29,6 +29,7 @@ Requires-Dist: prefect-gcp>=0.6.15
29
29
  Requires-Dist: prefect>=3.6.15
30
30
  Requires-Dist: pydantic-settings>=2.12.0
31
31
  Requires-Dist: pydantic>=2.12.5
32
+ Requires-Dist: pypdf>=5.0.0
32
33
  Requires-Dist: python-magic>=0.4.27
33
34
  Requires-Dist: ruamel-yaml>=0.19.1
34
35
  Requires-Dist: tiktoken>=0.12.0
@@ -64,7 +64,7 @@ from .prompt_manager import PromptManager
64
64
  from .settings import Settings
65
65
  from .testing import disable_run_logger, prefect_test_harness
66
66
 
67
- __version__ = "0.4.6"
67
+ __version__ = "0.4.9"
68
68
 
69
69
  __all__ = [
70
70
  "AIMessageType",
@@ -661,7 +661,10 @@ class PipelineDeployment(Generic[TOptions, TResult]):
661
661
  except Exception as e:
662
662
  logger.warning(f"Failed to initialize observability: {e}")
663
663
  with contextlib.suppress(Exception):
664
- Laminar.initialize(export_timeout_seconds=15)
664
+ # Use canonical initializer to ensure consistent Laminar setup
665
+ from ai_pipeline_core.observability import tracing
666
+
667
+ tracing._initialise_laminar()
665
668
 
666
669
  deployment = self
667
670
 
@@ -882,13 +885,34 @@ class PipelineDeployment(Generic[TOptions, TResult]):
882
885
  options: FlowOptions,
883
886
  context: DeploymentContext,
884
887
  ) -> DeploymentResult:
888
+ # Initialize observability for remote workers
889
+ try:
890
+ initialize_observability()
891
+ except Exception as e:
892
+ logger.warning(f"Failed to initialize observability: {e}")
893
+ with contextlib.suppress(Exception):
894
+ # Use canonical initializer to ensure consistent Laminar setup
895
+ from ai_pipeline_core.observability import tracing
896
+
897
+ tracing._initialise_laminar()
898
+
899
+ # Set session ID from Prefect flow run for trace grouping
900
+ flow_run_id = str(runtime.flow_run.get_id()) if runtime.flow_run else str(uuid4()) # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownArgumentType]
901
+ os.environ["LMNR_SESSION_ID"] = flow_run_id
902
+
885
903
  store = create_document_store(
886
904
  settings,
887
905
  summary_generator=_build_summary_generator(),
888
906
  )
889
907
  set_document_store(store)
890
908
  try:
891
- return await deployment.run(project_name, documents, cast(Any, options), context)
909
+ # Create parent span to group all traces under a single deployment trace
910
+ with Laminar.start_as_current_span(
911
+ name=f"{deployment.name}-{project_name}",
912
+ input={"project_name": project_name, "options": options.model_dump()},
913
+ session_id=flow_run_id,
914
+ ):
915
+ return await deployment.run(project_name, documents, cast(Any, options), context)
892
916
  finally:
893
917
  store.shutdown()
894
918
  set_document_store(None)
@@ -38,6 +38,7 @@ from .ai_messages import AIMessages, AIMessageType
38
38
  from .model_options import ModelOptions
39
39
  from .model_response import ModelResponse, StructuredModelResponse
40
40
  from .model_types import ModelName
41
+ from .validation import validate_messages
41
42
 
42
43
  logger = get_pipeline_logger(__name__)
43
44
 
@@ -399,6 +400,11 @@ async def _generate_with_retry( # noqa: PLR0917
399
400
  if not context and not messages:
400
401
  raise ValueError("Either context or messages must be provided")
401
402
 
403
+ # Validate inputs - filter out empty/corrupted documents and attachments
404
+ context, ctx_warnings = validate_messages(context)
405
+ messages, msg_warnings = validate_messages(messages)
406
+ validation_warnings = ctx_warnings + msg_warnings
407
+
402
408
  # Auto-split large images based on model-specific constraints
403
409
  context = _prepare_images_for_model(context, model)
404
410
  messages = _prepare_images_for_model(messages, model)
@@ -424,6 +430,8 @@ async def _generate_with_retry( # noqa: PLR0917
424
430
  laminar_metadata["purpose"] = purpose
425
431
  if expected_cost is not None:
426
432
  laminar_metadata["expected_cost"] = expected_cost
433
+ if validation_warnings:
434
+ response._metadata["validation_warnings"] = validation_warnings
427
435
  span.set_attributes(laminar_metadata) # pyright: ignore[reportArgumentType]
428
436
  Laminar.set_span_output([r for r in (response.reasoning_content, response.content) if r])
429
437
  response.validate_output()
@@ -0,0 +1,176 @@
1
+ """Validation for LLM inputs.
2
+
3
+ Validates documents and attachments before sending to LLM to catch
4
+ empty, corrupted, or invalid content early. Filters invalid content
5
+ and logs warnings instead of failing the entire request.
6
+ """
7
+
8
+ from io import BytesIO
9
+
10
+ from PIL import Image
11
+ from pypdf import PdfReader
12
+
13
+ from ai_pipeline_core.documents import Document
14
+ from ai_pipeline_core.documents.attachment import Attachment
15
+ from ai_pipeline_core.logging import get_pipeline_logger
16
+
17
+ from .ai_messages import AIMessages, AIMessageType
18
+
19
+ logger = get_pipeline_logger(__name__)
20
+
21
+
22
+ def _validate_image_content(content: bytes, name: str) -> str | None:
23
+ """Validate image content. Returns error message or None if valid."""
24
+ if not content:
25
+ return f"empty image content in '{name}'"
26
+ try:
27
+ with Image.open(BytesIO(content)) as img:
28
+ img.verify()
29
+ return None
30
+ except Exception as e:
31
+ return f"invalid image in '{name}': {e}"
32
+
33
+
34
+ def _validate_pdf_content(content: bytes, name: str) -> str | None:
35
+ """Validate PDF content. Returns error message or None if valid."""
36
+ if not content:
37
+ return f"empty PDF content in '{name}'"
38
+
39
+ # Check PDF header signature
40
+ if not content.lstrip().startswith(b"%PDF-"):
41
+ return f"invalid PDF header in '{name}' (missing %PDF- signature)"
42
+
43
+ # Check page count - catches 0-page and corrupted PDFs
44
+ try:
45
+ reader = PdfReader(BytesIO(content))
46
+ if len(reader.pages) == 0:
47
+ return f"PDF has no pages in '{name}'"
48
+ except Exception as e:
49
+ return f"corrupted PDF in '{name}': {e}"
50
+
51
+ return None
52
+
53
+
54
+ def _validate_text_content(content: bytes, name: str) -> str | None:
55
+ """Validate text content. Returns error message or None if valid."""
56
+ if not content:
57
+ return f"empty text content in '{name}'"
58
+
59
+ # Check for null bytes (indicates binary content)
60
+ if b"\x00" in content:
61
+ return f"binary content (null bytes) in text '{name}'"
62
+
63
+ # Check UTF-8 encoding
64
+ try:
65
+ content.decode("utf-8")
66
+ except UnicodeDecodeError as e:
67
+ return f"invalid UTF-8 encoding in '{name}': {e}"
68
+
69
+ return None
70
+
71
+
72
+ def _validate_attachment(att: Attachment, parent_name: str) -> str | None:
73
+ """Validate a single attachment. Returns error message or None if valid."""
74
+ att_name = f"attachment '{att.name}' of '{parent_name}'"
75
+
76
+ if att.is_image:
77
+ return _validate_image_content(att.content, att_name)
78
+ if att.is_pdf:
79
+ return _validate_pdf_content(att.content, att_name)
80
+ if att.is_text:
81
+ return _validate_text_content(att.content, att_name)
82
+
83
+ # Unknown type - let it through, document_to_prompt will handle/skip it
84
+ return None
85
+
86
+
87
+ def _validate_document(doc: Document) -> tuple[Document | None, list[str]]:
88
+ """Validate a document and its attachments.
89
+
90
+ Returns:
91
+ Tuple of (validated_document_or_None, list_of_error_messages).
92
+ Returns None for document if main content is invalid.
93
+ Filters out invalid attachments but keeps the document.
94
+ """
95
+ errors: list[str] = []
96
+
97
+ # Validate main content based on type
98
+ err: str | None = None
99
+ if doc.is_image:
100
+ err = _validate_image_content(doc.content, doc.name)
101
+ elif doc.is_pdf:
102
+ err = _validate_pdf_content(doc.content, doc.name)
103
+ elif doc.is_text:
104
+ err = _validate_text_content(doc.content, doc.name)
105
+ # else: unknown type - let document_to_prompt handle it
106
+
107
+ if err:
108
+ errors.append(err)
109
+ return None, errors
110
+
111
+ # Validate attachments
112
+ if not doc.attachments:
113
+ return doc, errors
114
+
115
+ valid_attachments: list[Attachment] = []
116
+ attachments_changed = False
117
+
118
+ for att in doc.attachments:
119
+ if err := _validate_attachment(att, doc.name):
120
+ errors.append(err)
121
+ attachments_changed = True
122
+ else:
123
+ valid_attachments.append(att)
124
+
125
+ if attachments_changed:
126
+ # Return document with filtered attachments
127
+ return doc.model_copy(update={"attachments": tuple(valid_attachments)}), errors
128
+
129
+ return doc, errors
130
+
131
+
132
+ def validate_messages(messages: AIMessages) -> tuple[AIMessages, list[str]]:
133
+ """Validate all documents in messages and filter out invalid content.
134
+
135
+ Validates documents and their attachments. Invalid documents are removed
136
+ entirely, invalid attachments are filtered from their parent documents.
137
+ All validation errors are logged as warnings.
138
+
139
+ Args:
140
+ messages: AIMessages to validate.
141
+
142
+ Returns:
143
+ Tuple of (validated_messages, list_of_warning_messages).
144
+ The validated_messages has invalid documents removed and invalid
145
+ attachments filtered from remaining documents.
146
+ """
147
+ if not messages:
148
+ return messages, []
149
+
150
+ # Quick check: if no documents, nothing to validate
151
+ has_documents = any(isinstance(m, Document) for m in messages)
152
+ if not has_documents:
153
+ return messages, []
154
+
155
+ valid_msgs: list[AIMessageType] = []
156
+ warnings: list[str] = []
157
+
158
+ for msg in messages:
159
+ if isinstance(msg, Document):
160
+ valid_doc, doc_errors = _validate_document(msg)
161
+
162
+ for err in doc_errors:
163
+ warning_msg = f"LLM input validation: filtering {err}"
164
+ warnings.append(warning_msg)
165
+ logger.warning(warning_msg)
166
+
167
+ if valid_doc is not None:
168
+ valid_msgs.append(valid_doc)
169
+ else:
170
+ valid_msgs.append(msg)
171
+
172
+ # Return original if nothing changed (preserve identity for caching)
173
+ if len(valid_msgs) == len(messages) and not warnings:
174
+ return messages, []
175
+
176
+ return AIMessages(valid_msgs), warnings
@@ -8,7 +8,6 @@ import importlib
8
8
  from typing import Any, Protocol
9
9
  from uuid import UUID
10
10
 
11
- from lmnr import Laminar
12
11
  from opentelemetry import trace as otel_trace
13
12
  from pydantic import BaseModel, ConfigDict
14
13
 
@@ -180,10 +179,12 @@ def initialize_observability(config: ObservabilityConfig | None = None) -> None:
180
179
  if config is None:
181
180
  config = _build_config_from_settings()
182
181
 
183
- # 1. Laminar
182
+ # 1. Laminar - use canonical initializer from tracing module
184
183
  if config.has_lmnr:
185
184
  try:
186
- Laminar.initialize(project_api_key=config.lmnr_project_api_key, export_timeout_seconds=15)
185
+ from ai_pipeline_core.observability import tracing # noqa: PLC0415
186
+
187
+ tracing._initialise_laminar()
187
188
  logger.info("Laminar initialized")
188
189
  except Exception as e:
189
190
  logger.warning(f"Laminar initialization failed: {e}")
@@ -10,6 +10,7 @@ import contextlib
10
10
  import inspect
11
11
  import json
12
12
  import os
13
+ import threading
13
14
  from collections.abc import Callable
14
15
  from functools import wraps
15
16
  from typing import Any, Literal, ParamSpec, TypeVar, cast, overload
@@ -220,19 +221,42 @@ class TraceInfo(BaseModel):
220
221
  # ---------------------------------------------------------------------------
221
222
 
222
223
 
224
+ _laminar_initialized = False
225
+ _laminar_init_lock = threading.Lock()
226
+
227
+
223
228
  def _initialise_laminar() -> None:
224
- """Initialize Laminar SDK with project configuration.
229
+ """Initialize Laminar SDK with project configuration (lazy, once per process).
225
230
 
226
231
  Sets up the Laminar observability client with the project API key
227
232
  from settings. Disables automatic OpenAI instrumentation to avoid
228
233
  conflicts with our custom tracing.
229
234
 
230
- Called once per process. Multiple calls are safe (Laminar handles idempotency).
235
+ IMPORTANT: This is called lazily at first trace execution (not at decoration time)
236
+ to allow LMNR_SPAN_CONTEXT environment variable to be set before initialization.
237
+ Laminar reads LMNR_SPAN_CONTEXT during initialize() to establish parent context
238
+ for cross-process tracing.
239
+
240
+ Uses double-checked locking pattern for thread safety. The flag is set AFTER
241
+ successful initialization to prevent permanently disabled tracing on init failure.
231
242
  """
232
- if settings.lmnr_project_api_key:
233
- Laminar.initialize(
234
- project_api_key=settings.lmnr_project_api_key, disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [], export_timeout_seconds=15
235
- )
243
+ global _laminar_initialized # noqa: PLW0603
244
+
245
+ # Fast path: already initialized (no lock needed)
246
+ if _laminar_initialized:
247
+ return
248
+
249
+ with _laminar_init_lock:
250
+ # Double-check inside lock
251
+ if _laminar_initialized:
252
+ return
253
+
254
+ if settings.lmnr_project_api_key:
255
+ disabled = [Instruments.OPENAI] if Instruments.OPENAI else []
256
+ Laminar.initialize(project_api_key=settings.lmnr_project_api_key, disabled_instruments=disabled, export_timeout_seconds=15)
257
+
258
+ # Set flag AFTER successful initialization
259
+ _laminar_initialized = True
236
260
 
237
261
 
238
262
  # Overload for calls like @trace(name="...", level="debug")
@@ -400,7 +424,9 @@ def trace( # noqa: UP047
400
424
  return f
401
425
 
402
426
  # --- Pre-computation (done once when the function is decorated) ---
403
- _initialise_laminar()
427
+ # NOTE: _initialise_laminar() is NOT called here (at decoration/import time)
428
+ # to allow LMNR_SPAN_CONTEXT to be set before Laminar.initialize() runs.
429
+ # It's called lazily in the wrapper functions at first execution.
404
430
  sig = inspect.signature(f)
405
431
  is_coroutine = inspect.iscoroutinefunction(f)
406
432
  observe_name = name or f.__name__
@@ -550,6 +576,9 @@ def trace( # noqa: UP047
550
576
  Returns:
551
577
  The result of the wrapped function.
552
578
  """
579
+ # Lazy initialization: called at first execution, not at decoration time.
580
+ # This allows LMNR_SPAN_CONTEXT to be set before Laminar.initialize().
581
+ _initialise_laminar()
553
582
  observe_params = _prepare_and_get_observe_params(kwargs)
554
583
  observed_func = bound_observe(**observe_params)(f)
555
584
  return observed_func(*args, **kwargs)
@@ -561,6 +590,9 @@ def trace( # noqa: UP047
561
590
  Returns:
562
591
  The result of the wrapped function.
563
592
  """
593
+ # Lazy initialization: called at first execution, not at decoration time.
594
+ # This allows LMNR_SPAN_CONTEXT to be set before Laminar.initialize().
595
+ _initialise_laminar()
564
596
  observe_params = _prepare_and_get_observe_params(kwargs)
565
597
  observed_func = bound_observe(**observe_params)(f)
566
598
  return await observed_func(*args, **kwargs) # pyright: ignore[reportGeneralTypeIssues, reportUnknownVariableType]
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.4.7"
3
+ version = "0.4.9"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -28,6 +28,7 @@ dependencies = [
28
28
  "prefect>=3.6.15",
29
29
  "pydantic-settings>=2.12.0",
30
30
  "pydantic>=2.12.5",
31
+ "pypdf>=5.0.0",
31
32
  "python-magic>=0.4.27",
32
33
  "ruamel.yaml>=0.19.1",
33
34
  "tiktoken>=0.12.0",