PyPI - ai-pipeline-core - Versions diffs - 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

ai-pipeline-core 0.3.3py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

ai_pipeline_core/__init__.py +70 -144
ai_pipeline_core/deployment/__init__.py +6 -18
ai_pipeline_core/deployment/base.py +392 -212
ai_pipeline_core/deployment/contract.py +6 -10
ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
ai_pipeline_core/deployment/helpers.py +16 -17
ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +12 -14
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +318 -1434
ai_pipeline_core/documents/mime_type.py +37 -82
ai_pipeline_core/documents/utils.py +4 -12
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +32 -85
ai_pipeline_core/images/_processing.py +5 -11
ai_pipeline_core/llm/__init__.py +6 -4
ai_pipeline_core/llm/ai_messages.py +106 -81
ai_pipeline_core/llm/client.py +267 -158
ai_pipeline_core/llm/model_options.py +12 -84
ai_pipeline_core/llm/model_response.py +53 -99
ai_pipeline_core/llm/model_types.py +8 -23
ai_pipeline_core/logging/__init__.py +2 -7
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -37
ai_pipeline_core/logging/logging_mixin.py +15 -41
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +16 -102
ai_pipeline_core/settings.py +26 -31
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
ai_pipeline_core/debug/__init__.py +0 -26
ai_pipeline_core/documents/document_list.py +0 -420
ai_pipeline_core/documents/flow_document.py +0 -112
ai_pipeline_core/documents/task_document.py +0 -117
ai_pipeline_core/documents/temporary_document.py +0 -74
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -494
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -718
ai_pipeline_core/prefect.py +0 -63
ai_pipeline_core/prompt_builder/__init__.py +0 -5
ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
ai_pipeline_core/prompt_builder/global_cache.py +0 -78
ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
ai_pipeline_core/storage/__init__.py +0 -8
ai_pipeline_core/storage/storage.py +0 -628
ai_pipeline_core/utils/__init__.py +0 -8
ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
{ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/model_options.py CHANGED Viewed

@@ -41,9 +41,9 @@ class ModelOptions(BaseModel):
         retries: Number of retry attempts on failure (default: 3).
-        retry_delay_seconds: Seconds to wait between retries (default: 10).
+        retry_delay_seconds: Seconds to wait between retries (default: 20).
-        timeout: Maximum seconds to wait for response (default: 300).
+        timeout: Maximum seconds to wait for response (default: 600).
         cache_ttl: Cache TTL for context messages (default: "300s").
                    String format like "60s", "5m", or None to disable caching.
@@ -99,77 +99,11 @@ class ModelOptions(BaseModel):
                    Merged with usage_tracking if both are set.
                    Useful for beta features or provider-specific capabilities.
-    Example:
-        >>> # Basic configuration
-        >>> options = ModelOptions(
-        ...     temperature=0.7,
-        ...     max_completion_tokens=1000
-        ... )
-        >>>
-        >>> # With system prompt
-        >>> options = ModelOptions(
-        ...     system_prompt="You are a helpful coding assistant",
-        ...     temperature=0.3  # Lower for code generation
-        ... )
-        >>>
-        >>> # With custom cache TTL
-        >>> options = ModelOptions(
-        ...     cache_ttl="300s",  # Cache context for 5 minutes
-        ...     max_completion_tokens=1000
-        ... )
-        >>>
-        >>> # Disable caching
-        >>> options = ModelOptions(
-        ...     cache_ttl=None,  # No context caching
-        ...     temperature=0.5
-        ... )
-        >>>
-        >>> # For search-enabled models
-        >>> options = ModelOptions(
-        ...     search_context_size="high",  # Get more search results
-        ...     max_completion_tokens=2000
-        ... )
-        >>>
-        >>> # For reasoning models
-        >>> options = ModelOptions(
-        ...     reasoning_effort="high",  # Deep reasoning
-        ...     timeout=600  # More time for complex reasoning
-        ... )
-        >>>
-        >>> # With stop sequences
-        >>> options = ModelOptions(
-        ...     stop=["STOP", "END", "\n\n"],  # Stop on these sequences
-        ...     temperature=0.7
-        ... )
-        >>>
-        >>> # With custom extra_body parameters
-        >>> options = ModelOptions(
-        ...     extra_body={"custom_param": "value", "beta_feature": True},
-        ...     usage_tracking=True  # Still tracks usage alongside custom params
-        ... )
-        >>>
-        >>> # With user tracking for cost monitoring
-        >>> options = ModelOptions(
-        ...     user="user_12345",  # Track costs per user
-        ...     temperature=0.7
-        ... )
-        >>>
-        >>> # With metadata for tracking and observability
-        >>> options = ModelOptions(
-        ...     metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
-        ...     temperature=0.7
-        ... )
-    Note:
-        - Not all options apply to all models
-        - search_context_size only works with search models
-        - reasoning_effort only works with models that support explicit reasoning
-        - response_format is set internally by generate_structured()
-        - cache_ttl accepts formats like "120s", "5m", "1h" or None (default: "300s")
-        - stop sequences are limited to 4 by most providers
-        - user identifier helps track costs per end-user (max 256 chars)
-        - extra_body allows passing provider-specific parameters
-        - usage_tracking is enabled by default for cost monitoring
+    Not all options apply to all models. search_context_size only works with search models,
+    reasoning_effort only works with models that support explicit reasoning, and
+    response_format is set internally by generate_structured(). cache_ttl accepts formats
+    like "120s", "5m", "1h" or None (default: "300s"). Stop sequences are limited to 4 by
+    most providers.
     """
     temperature: float | None = None
@@ -185,12 +119,13 @@ class ModelOptions(BaseModel):
     stop: str | list[str] | None = None
     response_format: type[BaseModel] | None = None
     verbosity: Literal["low", "medium", "high"] | None = None
+    stream: bool = False
     usage_tracking: bool = True
     user: str | None = None
     metadata: dict[str, str] | None = None
     extra_body: dict[str, Any] | None = None
-    def to_openai_completion_kwargs(self) -> dict[str, Any]:
+    def to_openai_completion_kwargs(self) -> dict[str, Any]:  # noqa: C901
         """Convert options to OpenAI API completion parameters.
         Transforms ModelOptions fields into the format expected by
@@ -221,16 +156,9 @@ class ModelOptions(BaseModel):
             {"web_search_options": {"search_context_size": "low|medium|high"}}
             Non-search models silently ignore this parameter.
-        Example:
-            >>> options = ModelOptions(temperature=0.5, timeout=60)
-            >>> kwargs = options.to_openai_completion_kwargs()
-            >>> kwargs
-            {'timeout': 60, 'extra_body': {}, 'temperature': 0.5}
-        Note:
-            - system_prompt is handled separately in _process_messages()
-            - retries and retry_delay_seconds are used by retry logic
-            - extra_body always includes usage tracking for cost monitoring
+        system_prompt is handled separately in _process_messages().
+        retries and retry_delay_seconds are used by retry logic.
+        extra_body always includes usage tracking for cost monitoring.
         """
         kwargs: dict[str, Any] = {
             "timeout": self.timeout,

ai_pipeline_core/llm/model_response.py CHANGED Viewed

@@ -1,13 +1,12 @@
 """Model response structures for LLM interactions.
-@public
 Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
 with additional metadata, cost tracking, and structured output support.
 """
 import json
 from copy import deepcopy
+from dataclasses import dataclass
 from typing import Any, Generic, TypeVar
 from openai.types.chat import ChatCompletion
@@ -21,11 +20,17 @@ T = TypeVar(
 """Type parameter for structured response Pydantic models."""
+@dataclass(frozen=True)
+class Citation:
+    """A URL citation returned by search-enabled models (e.g. sonar-pro-search, gemini-3-flash-search)."""
+    title: str
+    url: str
 class ModelResponse(ChatCompletion):
     """Response wrapper for LLM text generation.
-    @public
     Primary usage is adding to AIMessages for multi-turn conversations:
         >>> response = await llm.generate("gpt-5.1", messages=messages)
@@ -39,22 +44,9 @@ class ModelResponse(ChatCompletion):
     Almost all use cases are covered by these two patterns. Advanced features
     like token usage and cost tracking are available but rarely needed.
-    Example:
-        >>> from ai_pipeline_core import llm, AIMessages
-        >>>
-        >>> messages = AIMessages(["Explain quantum computing"])
-        >>> response = await llm.generate("gpt-5.1", messages=messages)
-        >>>
-        >>> # Primary usage: add to conversation
-        >>> messages.append(response)
-        >>>
-        >>> # Access generated text
-        >>> print(response.content)
-    Note:
-        Inherits from OpenAI's ChatCompletion for compatibility.
-        Other properties (usage, model, id) should only be accessed
-        when absolutely necessary.
+    Inherits from OpenAI's ChatCompletion for compatibility.
+    Other properties (usage, model, id) should only be accessed
+    when absolutely necessary.
     """
     def __init__(
@@ -77,13 +69,6 @@ class ModelResponse(ChatCompletion):
                      Includes timing information and custom tags.
             usage: Optional usage information from streaming response.
-        Example:
-            >>> # Usually created internally by generate()
-            >>> response = ModelResponse(
-            ...     chat_completion=completion,
-            ...     model_options={"temperature": 0.7, "model": "gpt-5.1"},
-            ...     metadata={"time_taken": 1.5, "first_token_time": 0.3}
-            ... )
         """
         data = chat_completion.model_dump()
@@ -95,6 +80,10 @@ class ModelResponse(ChatCompletion):
             current_finish_reason = data["choices"][i].get("finish_reason")
             if current_finish_reason not in valid_finish_reasons:
                 data["choices"][i]["finish_reason"] = "stop"
+            # Strip annotations with unsupported types (e.g. Grok returns type="file" for PDFs,
+            # but OpenAI's ChatCompletion only accepts type="url_citation")
+            if annotations := data["choices"][i]["message"].get("annotations"):
+                data["choices"][i]["message"]["annotations"] = [a for a in annotations if a.get("type") == "url_citation"]
         super().__init__(**data)
@@ -107,22 +96,12 @@ class ModelResponse(ChatCompletion):
     def content(self) -> str:
         """Get the generated text content.
-        @public
         Primary property for accessing the LLM's response text.
         This is the main property you'll use with ModelResponse.
         Returns:
             Generated text from the model, or empty string if none.
-        Example:
-            >>> response = await generate("gpt-5.1", messages="Hello")
-            >>> text = response.content  # The generated response
-            >>>
-            >>> # Common pattern: add to messages then use content
-            >>> messages.append(response)
-            >>> if "error" in response.content.lower():
-            ...     # Handle error case
         """
         content = self.choices[0].message.content or ""
         return content.split("</think>")[-1].strip()
@@ -131,8 +110,6 @@ class ModelResponse(ChatCompletion):
     def reasoning_content(self) -> str:
         """Get the reasoning content.
-        @public
         Returns:
             The reasoning content from the model, or empty string if none.
         """
@@ -143,7 +120,19 @@ class ModelResponse(ChatCompletion):
             return ""
         return message.content.split("</think>")[0].strip()
-    def get_laminar_metadata(self) -> dict[str, str | int | float]:
+    @property
+    def citations(self) -> list[Citation]:
+        """Get URL citations from search-enabled models.
+        Returns:
+            List of Citation objects with title and url. Empty list for non-search models.
+        """
+        annotations = self.choices[0].message.annotations
+        if not annotations:
+            return []
+        return [Citation(title=a.url_citation.title, url=a.url_citation.url) for a in annotations if a.url_citation]
+    def get_laminar_metadata(self) -> dict[str, str | int | float]:  # noqa: C901
         """Extract metadata for LMNR (Laminar) observability including cost tracking.
         Collects comprehensive metadata about the generation for tracing,
@@ -178,56 +167,26 @@ class ModelResponse(ChatCompletion):
             1. x-litellm-response-cost header (primary)
             2. usage.cost attribute (fallback)
-            Cost is stored in three fields for compatibility:
-            - gen_ai.usage.output_cost (standard)
-            - gen_ai.usage.cost (alternative)
-            - gen_ai.cost (simple)
-        Example:
-            >>> response = await llm.generate(
-            ...     "gpt-5.1",
-            ...     context=large_doc,
-            ...     messages="Summarize this"
-            ... )
-            >>>
-            >>> # Get comprehensive metadata
-            >>> metadata = response.get_laminar_metadata()
-            >>>
-            >>> # Track generation cost
-            >>> cost = metadata.get('gen_ai.usage.output_cost', 0)
-            >>> if cost > 0:
-            ...     print(f"Generation cost: ${cost:.4f}")
-            >>>
-            >>> # Monitor token usage
-            >>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
-            >>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
-            >>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
-            >>>
-            >>> # Check cache effectiveness
-            >>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
-            >>> if cached > 0:
-            ...     total = metadata.get('gen_ai.usage.total_tokens', 1)
-            ...     savings = (cached / total) * 100
-            ...     print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
-            >>>
-            >>> # Calculate cost per token
-            >>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
-            ...     cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
-            ...     print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
-        Note:
-            - Cost availability depends on LiteLLM proxy configuration
-            - Not all providers return cost information
-            - Cached tokens reduce actual cost but may not be reflected
-            - Used internally by tracing but accessible for cost analysis
+            Cost is stored in three fields for observability tool consumption:
+            - gen_ai.usage.output_cost (OpenTelemetry GenAI semantic convention)
+            - gen_ai.usage.cost (aggregated cost)
+            - gen_ai.cost (short-form)
+        Cost availability depends on LiteLLM proxy configuration. Not all providers
+        return cost information. Cached tokens reduce actual cost but may not be reflected.
+        Used internally by tracing but accessible for cost analysis.
         """
         metadata: dict[str, str | int | float] = deepcopy(self._metadata)
         # Add base metadata
+        # NOTE: gen_ai.response.model is intentionally omitted — Laminar's UI uses it
+        # to override the span display name in the tree view, hiding the actual span name
+        # (set via `purpose` parameter). Tracked upstream: Laminar's getSpanDisplayName()
+        # in frontend/components/traces/trace-view/utils.ts prefers model over span name
+        # for LLM spans. Restore once Laminar shows both or prefers span name.
         metadata.update({
             "gen_ai.response.id": self.id,
-            "gen_ai.response.model": self.model,
-            "get_ai.system": "litellm",
+            "gen_ai.system": "litellm",
         })
         # Add usage metadata if available
@@ -245,21 +204,19 @@ class ModelResponse(ChatCompletion):
                 cost = float(self.usage.cost)  # type: ignore[attr-defined]
             # Add reasoning tokens if available
-            if completion_details := self.usage.completion_tokens_details:
-                if reasoning_tokens := completion_details.reasoning_tokens:
-                    metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
+            if (completion_details := self.usage.completion_tokens_details) and (reasoning_tokens := completion_details.reasoning_tokens):
+                metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
             # Add cached tokens if available
-            if prompt_details := self.usage.prompt_tokens_details:
-                if cached_tokens := prompt_details.cached_tokens:
-                    metadata["gen_ai.usage.cached_tokens"] = cached_tokens
+            if (prompt_details := self.usage.prompt_tokens_details) and (cached_tokens := prompt_details.cached_tokens):
+                metadata["gen_ai.usage.cached_tokens"] = cached_tokens
         # Add cost metadata if available
         if cost and cost > 0:
             metadata.update({
                 "gen_ai.usage.output_cost": cost,
                 "gen_ai.usage.cost": cost,
-                "get_ai.cost": cost,
+                "gen_ai.cost": cost,
             })
         for key, value in self._model_options.items():
@@ -269,7 +226,7 @@ class ModelResponse(ChatCompletion):
         other_fields = self.__dict__
         for key, value in other_fields.items():
-            if key in ["_model_options", "_metadata", "choices"]:
+            if key in {"_model_options", "_metadata", "choices"}:
                 continue
             try:
                 metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
@@ -278,7 +235,7 @@ class ModelResponse(ChatCompletion):
         message = self.choices[0].message
         for key, value in message.__dict__.items():
-            if key in ["content"]:
+            if key in {"content"}:
                 continue
             metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
@@ -297,16 +254,13 @@ class ModelResponse(ChatCompletion):
         if not self.content:
             raise ValueError("Empty response content")
-        if response_format := self._model_options.get("response_format"):
-            if isinstance(response_format, BaseModel):
-                response_format.model_validate_json(self.content)
+        if (response_format := self._model_options.get("response_format")) and isinstance(response_format, BaseModel):
+            response_format.model_validate_json(self.content)
-class StructuredModelResponse(ModelResponse, Generic[T]):
+class StructuredModelResponse(ModelResponse, Generic[T]):  # noqa: UP046
     """Response wrapper for structured/typed LLM output.
-    @public
     Primary usage is accessing the .parsed property for the structured data.
     """

ai_pipeline_core/llm/model_types.py CHANGED Viewed

@@ -10,9 +10,9 @@ Model categories:
 - Search models: Models with web search capabilities
 """
-from typing import Literal, TypeAlias
+from typing import Literal
-ModelName: TypeAlias = (
+type ModelName = (
     Literal[
         # Core models
         "gemini-3-pro",
@@ -23,20 +23,20 @@ ModelName: TypeAlias = (
         "grok-4.1-fast",
         # Search models
         "gemini-3-flash-search",
+        "gpt-5-mini-search",
+        "grok-4.1-fast-search",
         "sonar-pro-search",
     ]
     | str
 )
 """Type-safe model name identifiers with support for custom models.
-@public
 Provides IDE autocompletion for common model names while allowing any
 string for custom models. The type is a union of predefined literals
 and str, giving you the best of both worlds: suggestions for known
 models and flexibility for custom ones.
-Note: These are example common model names as of Q1 2026. Actual availability
+These are example common model names as of Q1 2026. Actual availability
 depends on your LiteLLM proxy configuration and provider access.
 Model categories:
@@ -58,22 +58,7 @@ Using custom models:
     - Custom models work seamlessly as strings
     - No need for Union types or additional type aliases
-Example:
-    >>> from ai_pipeline_core import llm, ModelName
-    >>>
-    >>> # Predefined model with IDE autocomplete
-    >>> model: ModelName = "gpt-5.1"  # IDE suggests common models
-    >>> response = await llm.generate(model, messages="Hello")
-    >>>
-    >>> # Custom model works directly
-    >>> model: ModelName = "custom-model-v2"  # Any string is valid
-    >>> response = await llm.generate(model, messages="Hello")
-    >>>
-    >>> # Both types work seamlessly
-    >>> models: list[ModelName] = ["gpt-5.1", "custom-llm", "gemini-3-pro"]
-Note:
-    The ModelName type includes both predefined literals and str,
-    allowing full flexibility while maintaining IDE support for
-    common models.
+The ModelName type includes both predefined literals and str,
+allowing full flexibility while maintaining IDE support for
+common models.
 """

ai_pipeline_core/logging/__init__.py CHANGED Viewed

@@ -2,11 +2,6 @@
 Provides a Prefect-integrated logging facade for unified logging across pipelines.
 Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
-Example:
-    >>> from ai_pipeline_core import get_pipeline_logger
-    >>> logger = get_pipeline_logger(__name__)
-    >>> logger.info("Processing started")
 """
 from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
@@ -14,8 +9,8 @@ from .logging_mixin import LoggerMixin, StructuredLoggerMixin
 __all__ = [
     "LoggerMixin",
-    "StructuredLoggerMixin",
     "LoggingConfig",
-    "setup_logging",
+    "StructuredLoggerMixin",
     "get_pipeline_logger",
+    "setup_logging",
 ]

ai_pipeline_core/logging/logging.yml CHANGED Viewed

@@ -48,7 +48,7 @@ loggers:
   ai_pipeline_core.llm:
     level: INFO
-  ai_pipeline_core.flow:
+  ai_pipeline_core.pipeline:
     level: INFO
   ai_pipeline_core.testing:

ai_pipeline_core/logging/logging_config.py CHANGED Viewed

@@ -6,7 +6,7 @@ Provides logging configuration management that integrates with Prefect's logging
 import logging.config
 import os
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any
 import yaml
 from prefect.logging import get_logger
@@ -16,7 +16,7 @@ DEFAULT_LOG_LEVELS = {
     "ai_pipeline_core": "INFO",
     "ai_pipeline_core.documents": "INFO",
     "ai_pipeline_core.llm": "INFO",
-    "ai_pipeline_core.flow": "INFO",
+    "ai_pipeline_core.pipeline": "INFO",
     "ai_pipeline_core.testing": "DEBUG",
 }
@@ -32,22 +32,19 @@ class LoggingConfig:
         3. PREFECT_LOGGING_SETTINGS_PATH environment variable
         4. Default configuration
-    Example:
-        >>> config = LoggingConfig()
-        >>> config.apply()
     """
-    def __init__(self, config_path: Optional[Path] = None):
+    def __init__(self, config_path: Path | None = None):
         """Initialize logging configuration.
         Args:
             config_path: Optional path to YAML configuration file.
         """
         self.config_path = config_path or self._get_default_config_path()
-        self._config: Optional[Dict[str, Any]] = None
+        self._config: dict[str, Any] | None = None
     @staticmethod
-    def _get_default_config_path() -> Optional[Path]:
+    def _get_default_config_path() -> Path | None:
         """Get default config path from environment variables.
         Returns:
@@ -63,7 +60,7 @@ class LoggingConfig:
         return None
-    def load_config(self) -> Dict[str, Any]:
+    def load_config(self) -> dict[str, Any]:
         """Load logging configuration from file or defaults.
         Returns:
@@ -71,7 +68,7 @@ class LoggingConfig:
         """
         if self._config is None:
             if self.config_path and self.config_path.exists():
-                with open(self.config_path, "r") as f:
+                with open(self.config_path, encoding="utf-8") as f:
                     self._config = yaml.safe_load(f)
             else:
                 self._config = self._get_default_config()
@@ -80,7 +77,7 @@ class LoggingConfig:
         return self._config
     @staticmethod
-    def _get_default_config() -> Dict[str, Any]:
+    def _get_default_config() -> dict[str, Any]:
         """Get default logging configuration.
         Returns:
@@ -95,10 +92,7 @@ class LoggingConfig:
                     "datefmt": "%H:%M:%S",
                 },
                 "detailed": {
-                    "format": (
-                        "%(asctime)s | %(levelname)-7s | %(name)s | "
-                        "%(funcName)s:%(lineno)d - %(message)s"
-                    ),
+                    "format": ("%(asctime)s | %(levelname)-7s | %(name)s | %(funcName)s:%(lineno)d - %(message)s"),
                     "datefmt": "%Y-%m-%d %H:%M:%S",
                 },
             },
@@ -134,10 +128,10 @@ class LoggingConfig:
 # Global configuration instance
-_logging_config: Optional[LoggingConfig] = None
+_logging_config: LoggingConfig | None = None
-def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
+def setup_logging(config_path: Path | None = None, level: str | None = None):
     """Setup logging for the AI Pipeline Core library.
     Initializes logging configuration for the pipeline system.
@@ -149,18 +143,8 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
         config_path: Optional path to YAML logging configuration file.
         level: Optional log level override (INFO, DEBUG, WARNING, etc.).
-    Example:
-        >>> # In your main.py or application entry point:
-        >>> def main():
-        ...     setup_logging()  # Call once at startup
-        ...     # Your application code here
-        ...
-        >>> # Or with custom level:
-        >>> if __name__ == "__main__":
-        ...     setup_logging(level="DEBUG")
-        ...     run_application()
     """
-    global _logging_config
+    global _logging_config  # noqa: PLW0603
     _logging_config = LoggingConfig(config_path)
     _logging_config.apply()
@@ -179,22 +163,28 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
 def get_pipeline_logger(name: str):
     """Get a logger for pipeline components.
-    @public
-    Returns a Prefect-integrated logger with proper configuration.
+    Returns a Prefect-integrated logger with the OTel span-event bridge
+    attached.  Any log record at INFO+ emitted while an OTel span is
+    recording will be captured as a span event in the trace.
     Args:
         name: Logger name, typically __name__.
     Returns:
-        Prefect logger instance.
+        Prefect logger instance with bridge handler.
-    Example:
-        >>> logger = get_pipeline_logger(__name__)
-        >>> logger.info("Module initialized")
     """
-    # Ensure logging is setup
     if _logging_config is None:
         setup_logging()
-    return get_logger(name)
+    logger = get_logger(name)
+    # Attach the singleton bridge handler so log records become OTel span events.
+    # The handler is a no-op when no span is recording, so early attachment is safe.
+    from ai_pipeline_core.observability._logging_bridge import get_bridge_handler  # noqa: PLC0415
+    handler = get_bridge_handler()
+    if handler not in logger.handlers:
+        logger.addHandler(handler)
+    return logger

ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

ai-pipeline-core 0.3.3py3-none-any.whl → 0.4.0py3-none-any.whl