ai-pipeline-core 0.4.6__tar.gz → 0.4.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/PKG-INFO +1 -1
  2. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/__init__.py +1 -1
  3. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/deployment/base.py +26 -2
  4. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/llm/ai_messages.py +47 -4
  5. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_initialization.py +4 -3
  6. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/tracing.py +39 -7
  7. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/pyproject.toml +1 -1
  8. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/.gitignore +0 -0
  9. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/LICENSE +0 -0
  10. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/README.md +0 -0
  11. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/deployment/__init__.py +0 -0
  12. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/deployment/contract.py +0 -0
  13. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/deployment/deploy.py +0 -0
  14. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/deployment/helpers.py +0 -0
  15. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/deployment/progress.py +0 -0
  16. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/deployment/remote.py +0 -0
  17. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/docs_generator/__init__.py +0 -0
  18. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/docs_generator/__main__.py +0 -0
  19. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/docs_generator/cli.py +0 -0
  20. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/docs_generator/extractor.py +0 -0
  21. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/docs_generator/guide_builder.py +0 -0
  22. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/docs_generator/trimmer.py +0 -0
  23. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/docs_generator/validator.py +0 -0
  24. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/__init__.py +0 -0
  25. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/_summary.py +0 -0
  26. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/_summary_worker.py +0 -0
  27. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/clickhouse.py +0 -0
  28. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/factory.py +0 -0
  29. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/local.py +0 -0
  30. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/memory.py +0 -0
  31. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/document_store/protocol.py +0 -0
  32. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/__init__.py +0 -0
  33. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/_context_vars.py +0 -0
  34. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/_hashing.py +0 -0
  35. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/attachment.py +0 -0
  36. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/context.py +0 -0
  37. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/document.py +0 -0
  38. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/mime_type.py +0 -0
  39. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/documents/utils.py +0 -0
  40. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/exceptions.py +0 -0
  41. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/images/__init__.py +0 -0
  42. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/images/_processing.py +0 -0
  43. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/llm/__init__.py +0 -0
  44. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/llm/client.py +0 -0
  45. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/llm/model_options.py +0 -0
  46. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/llm/model_response.py +0 -0
  47. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/llm/model_types.py +0 -0
  48. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/logging/__init__.py +0 -0
  49. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/logging/logging.yml +0 -0
  50. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/logging/logging_config.py +0 -0
  51. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  52. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/__init__.py +0 -0
  53. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/__init__.py +0 -0
  54. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/_auto_summary.py +0 -0
  55. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/_config.py +0 -0
  56. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/_content.py +0 -0
  57. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/_processor.py +0 -0
  58. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/_summary.py +0 -0
  59. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/_types.py +0 -0
  60. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_debug/_writer.py +0 -0
  61. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_document_tracking.py +0 -0
  62. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_logging_bridge.py +0 -0
  63. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_summary.py +0 -0
  64. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_tracking/__init__.py +0 -0
  65. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_tracking/_client.py +0 -0
  66. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_tracking/_internal.py +0 -0
  67. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_tracking/_models.py +0 -0
  68. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_tracking/_processor.py +0 -0
  69. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_tracking/_service.py +0 -0
  70. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/observability/_tracking/_writer.py +0 -0
  71. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/pipeline/__init__.py +0 -0
  72. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/pipeline/decorators.py +0 -0
  73. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/pipeline/options.py +0 -0
  74. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/prompt_manager.py +0 -0
  75. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/py.typed +0 -0
  76. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/settings.py +0 -0
  77. {ai_pipeline_core-0.4.6 → ai_pipeline_core-0.4.8}/ai_pipeline_core/testing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.4.6
3
+ Version: 0.4.8
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -64,7 +64,7 @@ from .prompt_manager import PromptManager
64
64
  from .settings import Settings
65
65
  from .testing import disable_run_logger, prefect_test_harness
66
66
 
67
- __version__ = "0.4.6"
67
+ __version__ = "0.4.8"
68
68
 
69
69
  __all__ = [
70
70
  "AIMessageType",
@@ -661,7 +661,10 @@ class PipelineDeployment(Generic[TOptions, TResult]):
661
661
  except Exception as e:
662
662
  logger.warning(f"Failed to initialize observability: {e}")
663
663
  with contextlib.suppress(Exception):
664
- Laminar.initialize(export_timeout_seconds=15)
664
+ # Use canonical initializer to ensure consistent Laminar setup
665
+ from ai_pipeline_core.observability import tracing
666
+
667
+ tracing._initialise_laminar()
665
668
 
666
669
  deployment = self
667
670
 
@@ -882,13 +885,34 @@ class PipelineDeployment(Generic[TOptions, TResult]):
882
885
  options: FlowOptions,
883
886
  context: DeploymentContext,
884
887
  ) -> DeploymentResult:
888
+ # Initialize observability for remote workers
889
+ try:
890
+ initialize_observability()
891
+ except Exception as e:
892
+ logger.warning(f"Failed to initialize observability: {e}")
893
+ with contextlib.suppress(Exception):
894
+ # Use canonical initializer to ensure consistent Laminar setup
895
+ from ai_pipeline_core.observability import tracing
896
+
897
+ tracing._initialise_laminar()
898
+
899
+ # Set session ID from Prefect flow run for trace grouping
900
+ flow_run_id = str(runtime.flow_run.get_id()) if runtime.flow_run else str(uuid4()) # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownArgumentType]
901
+ os.environ["LMNR_SESSION_ID"] = flow_run_id
902
+
885
903
  store = create_document_store(
886
904
  settings,
887
905
  summary_generator=_build_summary_generator(),
888
906
  )
889
907
  set_document_store(store)
890
908
  try:
891
- return await deployment.run(project_name, documents, cast(Any, options), context)
909
+ # Create parent span to group all traces under a single deployment trace
910
+ with Laminar.start_as_current_span(
911
+ name=f"{deployment.name}-{project_name}",
912
+ input={"project_name": project_name, "options": options.model_dump()},
913
+ session_id=flow_run_id,
914
+ ):
915
+ return await deployment.run(project_name, documents, cast(Any, options), context)
892
916
  finally:
893
917
  store.shutdown()
894
918
  set_document_store(None)
@@ -38,6 +38,34 @@ def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes,
38
38
  return buf.getvalue(), "image/png"
39
39
 
40
40
 
41
+ def _looks_like_text(content: bytes) -> bool:
42
+ """Check if content is valid UTF-8 text (not binary).
43
+
44
+ Uses heuristics: must decode as UTF-8 and have no null bytes.
45
+ Null bytes are common in binary files but rare in text.
46
+ """
47
+ if not content:
48
+ return True
49
+ # Null bytes indicate binary content
50
+ if b"\x00" in content:
51
+ return False
52
+ try:
53
+ content.decode("utf-8")
54
+ return True
55
+ except UnicodeDecodeError:
56
+ return False
57
+
58
+
59
+ def _has_pdf_signature(content: bytes) -> bool:
60
+ """Check if content starts with PDF magic bytes (%PDF-).
61
+
62
+ Real PDFs start with %PDF- (possibly after whitespace).
63
+ This prevents false positives when a real PDF happens to be
64
+ partly UTF-8 decodable (e.g., ASCII-heavy PDF metadata).
65
+ """
66
+ return content.lstrip().startswith(b"%PDF-")
67
+
68
+
41
69
  AIMessageType = str | Document | ModelResponse
42
70
  """Type for messages in AIMessages container.
43
71
 
@@ -350,7 +378,7 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
350
378
  return count
351
379
 
352
380
  @staticmethod
353
- def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: PLR0912, PLR0914
381
+ def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: C901, PLR0912, PLR0914, PLR0915
354
382
  """Convert a document to prompt format for LLM consumption.
355
383
 
356
384
  Renders the document as XML with text/image/PDF content, followed by any
@@ -368,8 +396,15 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
368
396
  description = f"<description>{document.description}</description>\n" if document.description else ""
369
397
  header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
370
398
 
399
+ # Check if "PDF" is actually text (misnamed file from URL ending in .pdf)
400
+ # Real PDFs start with %PDF- magic bytes; if missing and content is UTF-8, it's text
401
+ is_text = document.is_text
402
+ if not is_text and document.is_pdf and _looks_like_text(document.content) and not _has_pdf_signature(document.content):
403
+ is_text = True
404
+ logger.debug(f"Document '{document.name}' has PDF extension but contains text content - sending as text")
405
+
371
406
  # Handle text documents
372
- if document.is_text:
407
+ if is_text:
373
408
  text_content = document.content.decode("utf-8")
374
409
  content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
375
410
  prompt.append({"type": "text", "text": content_text})
@@ -407,8 +442,16 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
407
442
  desc_attr = f' description="{att.description}"' if att.description else ""
408
443
  att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
409
444
 
410
- if att.is_text:
411
- prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
445
+ # Check if "PDF" attachment is actually text (same logic as document)
446
+ att_is_text = att.is_text
447
+ if not att_is_text and att.is_pdf and _looks_like_text(att.content) and not _has_pdf_signature(att.content):
448
+ att_is_text = True
449
+ logger.debug(f"Attachment '{att.name}' has PDF extension but contains text content - sending as text")
450
+
451
+ if att_is_text:
452
+ # Use content.decode() directly - att.text property raises ValueError if is_text is False
453
+ att_text = att.content.decode("utf-8")
454
+ prompt.append({"type": "text", "text": f"{att_open}{att_text}\n</attachment>\n"})
412
455
  elif att.is_image or att.is_pdf:
413
456
  prompt.append({"type": "text", "text": att_open})
414
457
 
@@ -8,7 +8,6 @@ import importlib
8
8
  from typing import Any, Protocol
9
9
  from uuid import UUID
10
10
 
11
- from lmnr import Laminar
12
11
  from opentelemetry import trace as otel_trace
13
12
  from pydantic import BaseModel, ConfigDict
14
13
 
@@ -180,10 +179,12 @@ def initialize_observability(config: ObservabilityConfig | None = None) -> None:
180
179
  if config is None:
181
180
  config = _build_config_from_settings()
182
181
 
183
- # 1. Laminar
182
+ # 1. Laminar - use canonical initializer from tracing module
184
183
  if config.has_lmnr:
185
184
  try:
186
- Laminar.initialize(project_api_key=config.lmnr_project_api_key, export_timeout_seconds=15)
185
+ from ai_pipeline_core.observability import tracing # noqa: PLC0415
186
+
187
+ tracing._initialise_laminar()
187
188
  logger.info("Laminar initialized")
188
189
  except Exception as e:
189
190
  logger.warning(f"Laminar initialization failed: {e}")
@@ -10,6 +10,7 @@ import contextlib
10
10
  import inspect
11
11
  import json
12
12
  import os
13
+ import threading
13
14
  from collections.abc import Callable
14
15
  from functools import wraps
15
16
  from typing import Any, Literal, ParamSpec, TypeVar, cast, overload
@@ -220,19 +221,42 @@ class TraceInfo(BaseModel):
220
221
  # ---------------------------------------------------------------------------
221
222
 
222
223
 
224
+ _laminar_initialized = False
225
+ _laminar_init_lock = threading.Lock()
226
+
227
+
223
228
  def _initialise_laminar() -> None:
224
- """Initialize Laminar SDK with project configuration.
229
+ """Initialize Laminar SDK with project configuration (lazy, once per process).
225
230
 
226
231
  Sets up the Laminar observability client with the project API key
227
232
  from settings. Disables automatic OpenAI instrumentation to avoid
228
233
  conflicts with our custom tracing.
229
234
 
230
- Called once per process. Multiple calls are safe (Laminar handles idempotency).
235
+ IMPORTANT: This is called lazily at first trace execution (not at decoration time)
236
+ to allow LMNR_SPAN_CONTEXT environment variable to be set before initialization.
237
+ Laminar reads LMNR_SPAN_CONTEXT during initialize() to establish parent context
238
+ for cross-process tracing.
239
+
240
+ Uses double-checked locking pattern for thread safety. The flag is set AFTER
241
+ successful initialization to prevent permanently disabled tracing on init failure.
231
242
  """
232
- if settings.lmnr_project_api_key:
233
- Laminar.initialize(
234
- project_api_key=settings.lmnr_project_api_key, disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [], export_timeout_seconds=15
235
- )
243
+ global _laminar_initialized # noqa: PLW0603
244
+
245
+ # Fast path: already initialized (no lock needed)
246
+ if _laminar_initialized:
247
+ return
248
+
249
+ with _laminar_init_lock:
250
+ # Double-check inside lock
251
+ if _laminar_initialized:
252
+ return
253
+
254
+ if settings.lmnr_project_api_key:
255
+ disabled = [Instruments.OPENAI] if Instruments.OPENAI else []
256
+ Laminar.initialize(project_api_key=settings.lmnr_project_api_key, disabled_instruments=disabled, export_timeout_seconds=15)
257
+
258
+ # Set flag AFTER successful initialization
259
+ _laminar_initialized = True
236
260
 
237
261
 
238
262
  # Overload for calls like @trace(name="...", level="debug")
@@ -400,7 +424,9 @@ def trace( # noqa: UP047
400
424
  return f
401
425
 
402
426
  # --- Pre-computation (done once when the function is decorated) ---
403
- _initialise_laminar()
427
+ # NOTE: _initialise_laminar() is NOT called here (at decoration/import time)
428
+ # to allow LMNR_SPAN_CONTEXT to be set before Laminar.initialize() runs.
429
+ # It's called lazily in the wrapper functions at first execution.
404
430
  sig = inspect.signature(f)
405
431
  is_coroutine = inspect.iscoroutinefunction(f)
406
432
  observe_name = name or f.__name__
@@ -550,6 +576,9 @@ def trace( # noqa: UP047
550
576
  Returns:
551
577
  The result of the wrapped function.
552
578
  """
579
+ # Lazy initialization: called at first execution, not at decoration time.
580
+ # This allows LMNR_SPAN_CONTEXT to be set before Laminar.initialize().
581
+ _initialise_laminar()
553
582
  observe_params = _prepare_and_get_observe_params(kwargs)
554
583
  observed_func = bound_observe(**observe_params)(f)
555
584
  return observed_func(*args, **kwargs)
@@ -561,6 +590,9 @@ def trace( # noqa: UP047
561
590
  Returns:
562
591
  The result of the wrapped function.
563
592
  """
593
+ # Lazy initialization: called at first execution, not at decoration time.
594
+ # This allows LMNR_SPAN_CONTEXT to be set before Laminar.initialize().
595
+ _initialise_laminar()
564
596
  observe_params = _prepare_and_get_observe_params(kwargs)
565
597
  observed_func = bound_observe(**observe_params)(f)
566
598
  return await observed_func(*args, **kwargs) # pyright: ignore[reportGeneralTypeIssues, reportUnknownVariableType]
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.4.6"
3
+ version = "0.4.8"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}