PyPI - ai-pipeline-core - Versions diffs - 0.4.5__tar.gz → 0.4.7__tar.gz - Mend

ai-pipeline-core 0.4.5tar.gz → 0.4.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

{ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.4.5
+Version: 0.4.7
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core

{ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -64,7 +64,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .testing import disable_run_logger, prefect_test_harness
-__version__ = "0.4.5"
+__version__ = "0.4.6"
 __all__ = [
     "AIMessageType",

{ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/deployment/deploy.py RENAMED Viewed

@@ -383,7 +383,7 @@ class Deployer:
         dest_uri = f"gs://{self.config['bucket']}/{flow_folder}/{tarball.name}"
         self._info(f"Uploading to {dest_uri}")
-        tarball_bytes = tarball.read_bytes()  # noqa: ASYNC240
+        tarball_bytes = tarball.read_bytes()
         await bucket.write_path(tarball.name, tarball_bytes)
         self._success(f"Package uploaded to {flow_folder}/{tarball.name}")
@@ -527,15 +527,16 @@ class Deployer:
         # Phase 3: Build vendor packages from [tool.deploy].vendor_packages
         vendor_wheels = self._build_vendor_packages()
-        # Also include cli_agents wheels from agent builds
-        if agent_builds:
-            seen_agent: set[str] = set()
-            for build_info in agent_builds.values():
-                for filename, filepath in build_info["files"].items():
-                    if filename.endswith(".whl") and filename not in seen_agent and "cli_agents" in filename:
-                        if filename not in {w.name for w in vendor_wheels}:
-                            vendor_wheels.append(filepath)
-                        seen_agent.add(filename)
+        # Build cli-agents wheel if source is configured — it's a private package
+        # not on PyPI, so the worker needs the wheel even when no agents are deployed
+        cli_agents_source = self._get_cli_agents_source()
+        if cli_agents_source:
+            cli_dir = Path(cli_agents_source).resolve()
+            if (cli_dir / "pyproject.toml").exists():
+                cli_wheel = self._build_wheel_from_source(cli_dir)
+                if cli_wheel.name not in {w.name for w in vendor_wheels}:
+                    vendor_wheels.append(cli_wheel)
+                    self._success(f"Built cli-agents vendor wheel: {cli_wheel.name}")
         # Phase 4: Upload flow package + vendor wheels
         await self._upload_package(tarball, vendor_wheels)

ai_pipeline_core-0.4.7/ai_pipeline_core/deployment/remote.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
+import asyncio
+from collections.abc import Awaitable, Callable, Coroutine
+from functools import wraps
+from typing import Any, TypeVar, cast
+from uuid import UUID
+from prefect import get_client
+from prefect.client.orchestration import PrefectClient
+from prefect.client.schemas import FlowRun
+from prefect.context import AsyncClientContext
+from prefect.deployments.flow_runs import run_deployment
+from prefect.exceptions import ObjectNotFound
+from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
+from ai_pipeline_core.documents import Document
+from ai_pipeline_core.logging import get_pipeline_logger
+from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
+from ai_pipeline_core.pipeline.options import FlowOptions
+from ai_pipeline_core.settings import settings
+logger = get_pipeline_logger(__name__)
+TOptions = TypeVar("TOptions", bound=FlowOptions)
+TResult = TypeVar("TResult", bound=DeploymentResult)
+ProgressCallback = Callable[[float, str], Awaitable[None]]
+"""Signature for remote deployment progress callbacks: (fraction, message) -> None."""
+def _is_already_traced(func: Callable[..., Any]) -> bool:
+    """Check if function or its __wrapped__ has __is_traced__ attribute."""
+    if getattr(func, "__is_traced__", False):
+        return True
+    wrapped = getattr(func, "__wrapped__", None)
+    return getattr(wrapped, "__is_traced__", False) if wrapped else False
+_POLL_INTERVAL = 5.0
+async def _poll_remote_flow_run(
+    client: PrefectClient,
+    flow_run_id: UUID,
+    deployment_name: str,
+    poll_interval: float = _POLL_INTERVAL,
+    on_progress: ProgressCallback | None = None,
+) -> Any:
+    """Poll a remote flow run until final, invoking on_progress callback with progress.
+    Reads the remote flow run's progress labels on each poll cycle and calls
+    on_progress(fraction, message) if provided. Without a callback, no progress
+    is reported. Only sends 1.0 on successful completion (not failure).
+    """
+    last_fraction = 0.0
+    while True:
+        try:
+            flow_run = await client.read_flow_run(flow_run_id)
+        except Exception:
+            logger.warning("Failed to poll remote flow run %s", flow_run_id, exc_info=True)
+            await asyncio.sleep(poll_interval)
+            continue
+        state = flow_run.state
+        if state and state.is_final():
+            if on_progress and state.is_completed():
+                await on_progress(1.0, f"[{deployment_name}] Completed")
+            return await state.result()  # type: ignore[union-attr]
+        if on_progress:
+            labels: dict[str, Any] = flow_run.labels or {}
+            progress_val = labels.get("progress.progress")
+            if progress_val is not None:
+                fraction = max(float(progress_val), last_fraction)
+                last_fraction = fraction
+                flow_name = str(labels.get("progress.flow_name", ""))
+                message = str(labels.get("progress.message", ""))
+                display = f"[{deployment_name}] {flow_name}: {message}" if flow_name else f"[{deployment_name}] Running"
+                await on_progress(fraction, display)
+            else:
+                await on_progress(last_fraction, f"[{deployment_name}] Waiting to start")
+        await asyncio.sleep(poll_interval)
+async def run_remote_deployment(
+    deployment_name: str,
+    parameters: dict[str, Any],
+    on_progress: ProgressCallback | None = None,
+) -> Any:
+    """Run a remote Prefect deployment with optional progress callback.
+    Creates the remote flow run immediately (timeout=0) then polls its state,
+    invoking on_progress(fraction, message) on each poll cycle if provided.
+    """
+    async def _create_and_poll(client: PrefectClient, as_subflow: bool) -> Any:
+        fr: FlowRun = await run_deployment(
+            client=client,
+            name=deployment_name,
+            parameters=parameters,
+            as_subflow=as_subflow,
+            timeout=0,
+        )  # type: ignore
+        return await _poll_remote_flow_run(client, fr.id, deployment_name, on_progress=on_progress)
+    async with get_client() as client:
+        try:
+            await client.read_deployment_by_name(name=deployment_name)
+            return await _create_and_poll(client, True)  # noqa: FBT003
+        except ObjectNotFound:
+            pass
+    if not settings.prefect_api_url:
+        raise ValueError(f"{deployment_name} not found, PREFECT_API_URL not set")
+    async with PrefectClient(
+        api=settings.prefect_api_url,
+        api_key=settings.prefect_api_key,
+        auth_string=settings.prefect_api_auth_string,
+    ) as client:
+        try:
+            await client.read_deployment_by_name(name=deployment_name)
+            ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
+            with ctx:
+                return await _create_and_poll(client, False)  # noqa: FBT003
+        except ObjectNotFound:
+            pass
+    raise ValueError(f"{deployment_name} deployment not found")
+def remote_deployment(
+    deployment_class: type[PipelineDeployment[TOptions, TResult]],
+    *,
+    deployment_name: str | None = None,
+    name: str | None = None,
+    trace_level: TraceLevel = "always",
+    trace_cost: float | None = None,
+) -> Callable[[Callable[..., Any]], Callable[..., Coroutine[Any, Any, TResult]]]:
+    """Decorator to call PipelineDeployment flows remotely with automatic serialization.
+    The decorated function's body is never executed — it serves as a typed stub.
+    The wrapper enforces the deployment contract: (project_name, documents, options, context).
+    """
+    def decorator(func: Callable[..., Any]) -> Callable[..., Coroutine[Any, Any, TResult]]:
+        fname = getattr(func, "__name__", deployment_class.name)
+        if _is_already_traced(func):
+            raise TypeError(f"@remote_deployment target '{fname}' already has @trace")
+        @wraps(func)
+        async def _wrapper(
+            project_name: str,
+            documents: list[Document],
+            options: TOptions,
+            context: DeploymentContext | None = None,
+            *,
+            on_progress: ProgressCallback | None = None,
+        ) -> TResult:
+            parameters: dict[str, Any] = {
+                "project_name": project_name,
+                "documents": documents,
+                "options": options,
+                "context": context if context is not None else DeploymentContext(),
+            }
+            full_name = f"{deployment_class.name}/{deployment_name or deployment_class.name.replace('-', '_')}"
+            result = await run_remote_deployment(full_name, parameters, on_progress=on_progress)
+            if trace_cost is not None and trace_cost > 0:
+                set_trace_cost(trace_cost)
+            if isinstance(result, DeploymentResult):
+                return cast(TResult, result)
+            if isinstance(result, dict):
+                return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
+            raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
+        traced_wrapper = trace(
+            level=trace_level,
+            name=name or deployment_class.name,
+        )(_wrapper)
+        return traced_wrapper
+    return decorator

{ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/llm/ai_messages.py RENAMED Viewed

@@ -38,6 +38,34 @@ def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes,
     return buf.getvalue(), "image/png"
+def _looks_like_text(content: bytes) -> bool:
+    """Check if content is valid UTF-8 text (not binary).
+    Uses heuristics: must decode as UTF-8 and have no null bytes.
+    Null bytes are common in binary files but rare in text.
+    """
+    if not content:
+        return True
+    # Null bytes indicate binary content
+    if b"\x00" in content:
+        return False
+    try:
+        content.decode("utf-8")
+        return True
+    except UnicodeDecodeError:
+        return False
+def _has_pdf_signature(content: bytes) -> bool:
+    """Check if content starts with PDF magic bytes (%PDF-).
+    Real PDFs start with %PDF- (possibly after whitespace).
+    This prevents false positives when a real PDF happens to be
+    partly UTF-8 decodable (e.g., ASCII-heavy PDF metadata).
+    """
+    return content.lstrip().startswith(b"%PDF-")
 AIMessageType = str | Document | ModelResponse
 """Type for messages in AIMessages container.
@@ -350,7 +378,7 @@ class AIMessages(list[AIMessageType]):  # noqa: PLR0904
         return count
     @staticmethod
-    def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:  # noqa: PLR0912, PLR0914
+    def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:  # noqa: C901, PLR0912, PLR0914, PLR0915
         """Convert a document to prompt format for LLM consumption.
         Renders the document as XML with text/image/PDF content, followed by any
@@ -368,8 +396,15 @@ class AIMessages(list[AIMessageType]):  # noqa: PLR0904
         description = f"<description>{document.description}</description>\n" if document.description else ""
         header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
+        # Check if "PDF" is actually text (misnamed file from URL ending in .pdf)
+        # Real PDFs start with %PDF- magic bytes; if missing and content is UTF-8, it's text
+        is_text = document.is_text
+        if not is_text and document.is_pdf and _looks_like_text(document.content) and not _has_pdf_signature(document.content):
+            is_text = True
+            logger.debug(f"Document '{document.name}' has PDF extension but contains text content - sending as text")
         # Handle text documents
-        if document.is_text:
+        if is_text:
             text_content = document.content.decode("utf-8")
             content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
             prompt.append({"type": "text", "text": content_text})
@@ -407,8 +442,16 @@ class AIMessages(list[AIMessageType]):  # noqa: PLR0904
             desc_attr = f' description="{att.description}"' if att.description else ""
             att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
-            if att.is_text:
-                prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
+            # Check if "PDF" attachment is actually text (same logic as document)
+            att_is_text = att.is_text
+            if not att_is_text and att.is_pdf and _looks_like_text(att.content) and not _has_pdf_signature(att.content):
+                att_is_text = True
+                logger.debug(f"Attachment '{att.name}' has PDF extension but contains text content - sending as text")
+            if att_is_text:
+                # Use content.decode() directly - att.text property raises ValueError if is_text is False
+                att_text = att.content.decode("utf-8")
+                prompt.append({"type": "text", "text": f"{att_open}{att_text}\n</attachment>\n"})
             elif att.is_image or att.is_pdf:
                 prompt.append({"type": "text", "text": att_open})

{ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ai-pipeline-core"
-version = "0.4.5"
+version = "0.4.7"
 description = "Core utilities for AI-powered processing pipelines using prefect"
 readme = "README.md"
 license = {text = "MIT"}
@@ -142,7 +142,7 @@ convention = "google"
 ".vulture_whitelist.py" = ["B018", "E402", "F401", "D"]
 "ai_pipeline_core/__init__.py" = ["RUF067"]
 "ai_pipeline_core/deployment/base.py" = ["T20", "C901", "PLR0912", "PLR0914", "PLR0915", "PLR0917", "PLR6301", "PLC0415", "UP046"]
-"ai_pipeline_core/deployment/deploy.py" = ["T20", "PLC2701", "S404", "S602", "S603"]
+"ai_pipeline_core/deployment/deploy.py" = ["T20", "PLC2701", "S404", "S602", "S603", "ASYNC"]
 "ai_pipeline_core/deployment/remote.py" = ["UP047"]
 "ai_pipeline_core/deployment/progress.py" = ["RUF029"]
 "ai_pipeline_core/documents/document.py" = ["PLR0904"]

ai_pipeline_core-0.4.5/ai_pipeline_core/deployment/remote.py DELETED Viewed

@@ -1,116 +0,0 @@
-"""Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
-import inspect
-from collections.abc import Callable
-from functools import wraps
-from typing import Any, ParamSpec, TypeVar, cast
-from prefect import get_client
-from prefect.client.orchestration import PrefectClient
-from prefect.client.schemas import FlowRun
-from prefect.context import AsyncClientContext
-from prefect.deployments.flow_runs import run_deployment
-from prefect.exceptions import ObjectNotFound
-from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
-from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
-from ai_pipeline_core.pipeline.options import FlowOptions
-from ai_pipeline_core.settings import settings
-P = ParamSpec("P")
-TOptions = TypeVar("TOptions", bound=FlowOptions)
-TResult = TypeVar("TResult", bound=DeploymentResult)
-def _is_already_traced(func: Callable[..., Any]) -> bool:
-    """Check if function or its __wrapped__ has __is_traced__ attribute."""
-    if getattr(func, "__is_traced__", False):
-        return True
-    wrapped = getattr(func, "__wrapped__", None)
-    return getattr(wrapped, "__is_traced__", False) if wrapped else False
-async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]) -> Any:
-    """Run a remote Prefect deployment, trying local client first then remote."""
-    async def _run(client: PrefectClient, as_subflow: bool) -> Any:
-        fr: FlowRun = await run_deployment(client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow)  # type: ignore
-        return await fr.state.result()  # type: ignore
-    async with get_client() as client:
-        try:
-            await client.read_deployment_by_name(name=deployment_name)
-            return await _run(client, True)  # noqa: FBT003
-        except ObjectNotFound:
-            pass
-    if not settings.prefect_api_url:
-        raise ValueError(f"{deployment_name} not found, PREFECT_API_URL not set")
-    async with PrefectClient(
-        api=settings.prefect_api_url,
-        api_key=settings.prefect_api_key,
-        auth_string=settings.prefect_api_auth_string,
-    ) as client:
-        try:
-            await client.read_deployment_by_name(name=deployment_name)
-            ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
-            with ctx:
-                return await _run(client, False)  # noqa: FBT003
-        except ObjectNotFound:
-            pass
-    raise ValueError(f"{deployment_name} deployment not found")
-def remote_deployment(
-    deployment_class: type[PipelineDeployment[TOptions, TResult]],
-    *,
-    deployment_name: str | None = None,
-    name: str | None = None,
-    trace_level: TraceLevel = "always",
-    trace_cost: float | None = None,
-) -> Callable[[Callable[P, TResult]], Callable[P, TResult]]:
-    """Decorator to call PipelineDeployment flows remotely with automatic serialization."""
-    def decorator(func: Callable[P, TResult]) -> Callable[P, TResult]:
-        fname = getattr(func, "__name__", deployment_class.name)
-        if _is_already_traced(func):
-            raise TypeError(f"@remote_deployment target '{fname}' already has @trace")
-        @wraps(func)
-        async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> TResult:
-            sig = inspect.signature(func)
-            bound = sig.bind(*args, **kwargs)
-            bound.apply_defaults()
-            # Pass parameters with proper types - Prefect handles Pydantic serialization
-            parameters: dict[str, Any] = {}
-            for pname, value in bound.arguments.items():
-                if value is None and pname == "context":
-                    parameters[pname] = DeploymentContext()
-                else:
-                    parameters[pname] = value
-            full_name = f"{deployment_class.name}/{deployment_name or deployment_class.name.replace('-', '_')}"
-            result = await run_remote_deployment(full_name, parameters)
-            if trace_cost is not None and trace_cost > 0:
-                set_trace_cost(trace_cost)
-            if isinstance(result, DeploymentResult):
-                return cast(TResult, result)
-            if isinstance(result, dict):
-                return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
-            raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
-        traced_wrapper = trace(
-            level=trace_level,
-            name=name or deployment_class.name,
-        )(_wrapper)
-        return traced_wrapper  # type: ignore[return-value]
-    return decorator