PyPI - prehend - Versions diffs - 0.2.0__py3-none-any.whl - Mend

prehend 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

prehend/__init__.py +19 -0
prehend/clients/__init__.py +59 -0
prehend/clients/anthropic.py +120 -0
prehend/clients/azure_openai.py +152 -0
prehend/clients/base_lm.py +43 -0
prehend/clients/coordination.py +164 -0
prehend/clients/gemini.py +172 -0
prehend/clients/openai.py +564 -0
prehend/clients/portkey.py +104 -0
prehend/clients/scheduler.py +321 -0
prehend/core/__init__.py +0 -0
prehend/core/comms_utils.py +270 -0
prehend/core/lm_handler.py +430 -0
prehend/core/rlm.py +1270 -0
prehend/core/srlm.py +459 -0
prehend/core/types.py +303 -0
prehend/core/verifier.py +215 -0
prehend/environments/__init__.py +82 -0
prehend/environments/base_env.py +388 -0
prehend/environments/constants.py +32 -0
prehend/environments/daytona_repl.py +708 -0
prehend/environments/docker_repl.py +355 -0
prehend/environments/e2b_repl.py +515 -0
prehend/environments/ipython_repl.py +1521 -0
prehend/environments/local_repl.py +765 -0
prehend/environments/modal_repl.py +518 -0
prehend/environments/prime_repl.py +604 -0
prehend/logger/__init__.py +4 -0
prehend/logger/rlm_logger.py +91 -0
prehend/logger/verbose.py +538 -0
prehend/memory/__init__.py +54 -0
prehend/memory/bank.py +95 -0
prehend/memory/distill.py +147 -0
prehend/memory/embed.py +67 -0
prehend/memory/embed_openai.py +35 -0
prehend/memory/factory.py +94 -0
prehend/memory/harness.py +116 -0
prehend/memory/inject.py +56 -0
prehend/memory/pruning_rules.py +57 -0
prehend/memory/reflect.py +62 -0
prehend/memory/retrieve.py +102 -0
prehend/memory/tagger.py +25 -0
prehend/metrics.py +404 -0
prehend/utils/__init__.py +0 -0
prehend/utils/exceptions.py +73 -0
prehend/utils/parsing.py +122 -0
prehend/utils/prompts.py +195 -0
prehend/utils/rlm_utils.py +12 -0
prehend/utils/token_utils.py +143 -0
prehend-0.2.0.dist-info/METADATA +229 -0
prehend-0.2.0.dist-info/RECORD +54 -0
prehend-0.2.0.dist-info/WHEEL +5 -0
prehend-0.2.0.dist-info/licenses/LICENSE +21 -0
prehend-0.2.0.dist-info/top_level.txt +1 -0

prehend/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from prehend.core.rlm import RLM
+from prehend.core.srlm import SRLM
+from prehend.utils.exceptions import (
+    BudgetExceededError,
+    CancellationError,
+    ErrorThresholdExceededError,
+    TimeoutExceededError,
+    TokenLimitExceededError,
+)
+__all__ = [
+    "RLM",
+    "SRLM",
+    "BudgetExceededError",
+    "TimeoutExceededError",
+    "TokenLimitExceededError",
+    "ErrorThresholdExceededError",
+    "CancellationError",
+]

prehend/clients/__init__.py ADDED Viewed

@@ -0,0 +1,59 @@
+from typing import Any
+from dotenv import load_dotenv
+from prehend.clients.base_lm import BaseLM
+from prehend.core.types import ClientBackend
+load_dotenv()
+def get_client(
+    backend: ClientBackend,
+    backend_kwargs: dict[str, Any],
+) -> BaseLM:
+    """
+    Routes a specific backend and the args (as a dict) to the appropriate client if supported.
+    Currently supported backends: ['openai']
+    """
+    if backend == "openai":
+        from prehend.clients.openai import OpenAIClient
+        return OpenAIClient(**backend_kwargs)
+    elif backend == "vllm":
+        from prehend.clients.openai import OpenAIClient
+        assert "base_url" in backend_kwargs, (
+            "base_url is required to be set to local vLLM server address for vLLM"
+        )
+        return OpenAIClient(**backend_kwargs)
+    elif backend == "portkey":
+        from prehend.clients.portkey import PortkeyClient
+        return PortkeyClient(**backend_kwargs)
+    elif backend == "openrouter":
+        from prehend.clients.openai import OpenAIClient
+        backend_kwargs.setdefault("base_url", "https://openrouter.ai/api/v1")
+        return OpenAIClient(**backend_kwargs)
+    elif backend == "vercel":
+        from prehend.clients.openai import OpenAIClient
+        backend_kwargs.setdefault("base_url", "https://ai-gateway.vercel.sh/v1")
+        return OpenAIClient(**backend_kwargs)
+    elif backend == "anthropic":
+        from prehend.clients.anthropic import AnthropicClient
+        return AnthropicClient(**backend_kwargs)
+    elif backend == "gemini":
+        from prehend.clients.gemini import GeminiClient
+        return GeminiClient(**backend_kwargs)
+    elif backend == "azure_openai":
+        from prehend.clients.azure_openai import AzureOpenAIClient
+        return AzureOpenAIClient(**backend_kwargs)
+    else:
+        raise ValueError(
+            f"Unknown backend: {backend}. Supported backends: ['openai', 'vllm', 'portkey', 'openrouter', 'anthropic', 'azure_openai', 'gemini', 'vercel']"
+        )

prehend/clients/anthropic.py ADDED Viewed

@@ -0,0 +1,120 @@
+from collections import defaultdict
+from typing import Any
+import anthropic
+from prehend.clients.base_lm import BaseLM
+from prehend.core.types import ModelUsageSummary, UsageSummary
+class AnthropicClient(BaseLM):
+    """
+    LM Client for running models with the Anthropic API.
+    """
+    def __init__(
+        self,
+        api_key: str,
+        model_name: str | None = None,
+        max_tokens: int = 32768,
+        **kwargs,
+    ):
+        super().__init__(model_name=model_name, **kwargs)
+        self.client = anthropic.Anthropic(api_key=api_key, timeout=self.timeout)
+        self.async_client = anthropic.AsyncAnthropic(api_key=api_key, timeout=self.timeout)
+        self.model_name = model_name
+        self.max_tokens = max_tokens
+        # Per-model usage tracking
+        self.model_call_counts: dict[str, int] = defaultdict(int)
+        self.model_input_tokens: dict[str, int] = defaultdict(int)
+        self.model_output_tokens: dict[str, int] = defaultdict(int)
+        self.model_total_tokens: dict[str, int] = defaultdict(int)
+    def completion(
+        self,
+        prompt: str | list[dict[str, Any]],
+        model: str | None = None,
+        priority: str | int | None = None,  # accepted for interface parity; no scheduler here
+    ) -> str:
+        messages, system = self._prepare_messages(prompt)
+        model = model or self.model_name
+        if not model:
+            raise ValueError("Model name is required for Anthropic client.")
+        kwargs = {"model": model, "max_tokens": self.max_tokens, "messages": messages}
+        if system:
+            kwargs["system"] = system
+        response = self.client.messages.create(**kwargs)
+        self._track_cost(response, model)
+        return response.content[0].text
+    async def acompletion(
+        self,
+        prompt: str | list[dict[str, Any]],
+        model: str | None = None,
+        priority: str | int | None = None,
+    ) -> str:
+        messages, system = self._prepare_messages(prompt)
+        model = model or self.model_name
+        if not model:
+            raise ValueError("Model name is required for Anthropic client.")
+        kwargs = {"model": model, "max_tokens": self.max_tokens, "messages": messages}
+        if system:
+            kwargs["system"] = system
+        response = await self.async_client.messages.create(**kwargs)
+        self._track_cost(response, model)
+        return response.content[0].text
+    def _prepare_messages(
+        self, prompt: str | list[dict[str, Any]]
+    ) -> tuple[list[dict[str, Any]], str | None]:
+        """Prepare messages and extract system prompt for Anthropic API."""
+        system = None
+        if isinstance(prompt, str):
+            messages = [{"role": "user", "content": prompt}]
+        elif isinstance(prompt, list) and all(isinstance(item, dict) for item in prompt):
+            # Extract system message if present (Anthropic handles system separately)
+            messages = []
+            for msg in prompt:
+                if msg.get("role") == "system":
+                    system = msg.get("content")
+                else:
+                    messages.append(msg)
+        else:
+            raise ValueError(f"Invalid prompt type: {type(prompt)}")
+        return messages, system
+    def _track_cost(self, response: anthropic.types.Message, model: str):
+        self.model_call_counts[model] += 1
+        self.model_input_tokens[model] += response.usage.input_tokens
+        self.model_output_tokens[model] += response.usage.output_tokens
+        self.model_total_tokens[model] += response.usage.input_tokens + response.usage.output_tokens
+        # Track last call for handler to read
+        self.last_prompt_tokens = response.usage.input_tokens
+        self.last_completion_tokens = response.usage.output_tokens
+    def get_usage_summary(self) -> UsageSummary:
+        model_summaries = {}
+        for model in self.model_call_counts:
+            model_summaries[model] = ModelUsageSummary(
+                total_calls=self.model_call_counts[model],
+                total_input_tokens=self.model_input_tokens[model],
+                total_output_tokens=self.model_output_tokens[model],
+            )
+        return UsageSummary(model_usage_summaries=model_summaries)
+    def get_last_usage(self) -> ModelUsageSummary:
+        return ModelUsageSummary(
+            total_calls=1,
+            total_input_tokens=self.last_prompt_tokens,
+            total_output_tokens=self.last_completion_tokens,
+        )

prehend/clients/azure_openai.py ADDED Viewed

@@ -0,0 +1,152 @@
+import os
+from collections import defaultdict
+from typing import Any
+import openai
+from dotenv import load_dotenv
+from prehend.clients.base_lm import BaseLM
+from prehend.core.types import ModelUsageSummary, UsageSummary
+load_dotenv()
+# Load API key from environment variable
+DEFAULT_AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
+class AzureOpenAIClient(BaseLM):
+    """
+    LM Client for running models with the Azure OpenAI API.
+    """
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model_name: str | None = None,
+        azure_endpoint: str | None = None,
+        api_version: str | None = None,
+        azure_deployment: str | None = None,
+        **kwargs,
+    ):
+        super().__init__(model_name=model_name, **kwargs)
+        if api_key is None:
+            api_key = DEFAULT_AZURE_OPENAI_API_KEY
+        if azure_endpoint is None:
+            azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+        if api_version is None:
+            api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01")
+        if azure_deployment is None:
+            azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
+        if azure_endpoint is None:
+            raise ValueError(
+                "azure_endpoint is required for Azure OpenAI client. "
+                "Set it via argument or AZURE_OPENAI_ENDPOINT environment variable."
+            )
+        self.client = openai.AzureOpenAI(
+            api_key=api_key,
+            azure_endpoint=azure_endpoint,
+            api_version=api_version,
+            azure_deployment=azure_deployment,
+            timeout=self.timeout,
+        )
+        self.async_client = openai.AsyncAzureOpenAI(
+            api_key=api_key,
+            azure_endpoint=azure_endpoint,
+            api_version=api_version,
+            azure_deployment=azure_deployment,
+            timeout=self.timeout,
+        )
+        self.model_name = model_name
+        self.azure_deployment = azure_deployment
+        # Per-model usage tracking
+        self.model_call_counts: dict[str, int] = defaultdict(int)
+        self.model_input_tokens: dict[str, int] = defaultdict(int)
+        self.model_output_tokens: dict[str, int] = defaultdict(int)
+        self.model_total_tokens: dict[str, int] = defaultdict(int)
+    def completion(
+        self,
+        prompt: str | list[dict[str, Any]],
+        model: str | None = None,
+        priority: str | int | None = None,  # accepted for interface parity; no scheduler here
+    ) -> str:
+        if isinstance(prompt, str):
+            messages = [{"role": "user", "content": prompt}]
+        elif isinstance(prompt, list) and all(isinstance(item, dict) for item in prompt):
+            messages = prompt
+        else:
+            raise ValueError(f"Invalid prompt type: {type(prompt)}")
+        model = model or self.model_name
+        if not model:
+            raise ValueError("Model name is required for Azure OpenAI client.")
+        response = self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+        )
+        self._track_cost(response, model)
+        return response.choices[0].message.content
+    async def acompletion(
+        self,
+        prompt: str | list[dict[str, Any]],
+        model: str | None = None,
+        priority: str | int | None = None,
+    ) -> str:
+        if isinstance(prompt, str):
+            messages = [{"role": "user", "content": prompt}]
+        elif isinstance(prompt, list) and all(isinstance(item, dict) for item in prompt):
+            messages = prompt
+        else:
+            raise ValueError(f"Invalid prompt type: {type(prompt)}")
+        model = model or self.model_name
+        if not model:
+            raise ValueError("Model name is required for Azure OpenAI client.")
+        response = await self.async_client.chat.completions.create(
+            model=model,
+            messages=messages,
+        )
+        self._track_cost(response, model)
+        return response.choices[0].message.content
+    def _track_cost(self, response: openai.ChatCompletion, model: str):
+        self.model_call_counts[model] += 1
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            raise ValueError("No usage data received. Tracking tokens not possible.")
+        self.model_input_tokens[model] += usage.prompt_tokens
+        self.model_output_tokens[model] += usage.completion_tokens
+        self.model_total_tokens[model] += usage.total_tokens
+        # Track last call for handler to read
+        self.last_prompt_tokens = usage.prompt_tokens
+        self.last_completion_tokens = usage.completion_tokens
+    def get_usage_summary(self) -> UsageSummary:
+        model_summaries = {}
+        for model in self.model_call_counts:
+            model_summaries[model] = ModelUsageSummary(
+                total_calls=self.model_call_counts[model],
+                total_input_tokens=self.model_input_tokens[model],
+                total_output_tokens=self.model_output_tokens[model],
+            )
+        return UsageSummary(model_usage_summaries=model_summaries)
+    def get_last_usage(self) -> ModelUsageSummary:
+        return ModelUsageSummary(
+            total_calls=1,
+            total_input_tokens=self.last_prompt_tokens,
+            total_output_tokens=self.last_completion_tokens,
+        )

prehend/clients/base_lm.py ADDED Viewed

@@ -0,0 +1,43 @@
+from abc import ABC, abstractmethod
+from typing import Any
+from prehend.core.types import ModelUsageSummary, UsageSummary
+# Default timeout for LM API calls (in seconds)
+DEFAULT_TIMEOUT: float = 300.0
+class BaseLM(ABC):
+    """
+    Base class for all language model routers / clients. When the RLM makes sub-calls, it currently
+    does so in a model-agnostic way, so this class provides a base interface for all language models.
+    """
+    def __init__(self, model_name: str, timeout: float = DEFAULT_TIMEOUT, **kwargs):
+        self.model_name = model_name
+        self.timeout = timeout
+        self.kwargs = kwargs
+    @abstractmethod
+    def completion(
+        self, prompt: str | dict[str, Any], priority: str | int | None = None
+    ) -> str:
+        """Run a completion. priority is a scheduling hint ("high"/"low"/"normal" or 1-5);
+        backends without a request scheduler may ignore it."""
+        raise NotImplementedError
+    @abstractmethod
+    async def acompletion(
+        self, prompt: str | dict[str, Any], priority: str | int | None = None
+    ) -> str:
+        raise NotImplementedError
+    @abstractmethod
+    def get_usage_summary(self) -> UsageSummary:
+        """Get cost summary for all model calls."""
+        raise NotImplementedError
+    @abstractmethod
+    def get_last_usage(self) -> ModelUsageSummary:
+        """Get the last cost summary of the model."""
+        raise NotImplementedError

prehend/clients/coordination.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""Cross-process admission gate for RequestScheduler (two-flock gate+pool).
+Design: docs/superpowers/specs/2026-06-10-cross-process-coordination-design.md.
+Two lock files per server key in a shared coordination directory:
+    <dir>/<key>.gate  - doorway. Normal requests hold SH momentarily on the
+                        way in; a p1 holds EX for its whole run, which freezes
+                        new admissions machine-wide (the cross-process
+                        _waiting_p1 rule) and serializes p1s globally.
+    <dir>/<key>.pool  - the in-flight set. Normal requests hold SH for the
+                        request duration; a p1 takes EX, granted only when
+                        every holder drains (the cross-process _active == 0
+                        rule).
+Crash cleanup is the kernel's: flock drops when an fd closes, including on
+process death. The gate distinguishes only p1 vs everything else; p2-p5
+ordering stays in-process. Same-host processes only (flock does not span
+machines, and network filesystems are explicitly out of scope).
+"""
+import asyncio
+import fcntl
+import logging
+import os
+import threading
+from pathlib import Path
+from prehend.clients.scheduler import Priority
+log = logging.getLogger(__name__)
+# Async acquisition polls LOCK_NB at this interval instead of blocking a
+# thread: a cancelled task cannot interrupt a blocking flock in an executor
+# thread, and that thread would eventually acquire a lock nobody releases.
+POLL_INTERVAL = 0.025
+class CrossProcessGate:
+    """Two-flock readers-writer gate with writer preference.
+    enter()/aenter() acquire for one request; exit() releases one acquisition
+    (non-blocking fd closes, so both sync and async paths use it). Normal
+    requests' pool fds are fungible: exit(NORMAL) closes any one of this
+    process's SH holds, which the kernel treats identically.
+    """
+    def __init__(self, coordination_dir: str | Path, server_key: str):
+        self._dir = Path(coordination_dir)
+        self._gate_path = self._dir / f"{server_key}.gate"
+        self._pool_path = self._dir / f"{server_key}.pool"
+        self._mu = threading.Lock()
+        self._pool_fds: list[int] = []  # one SH fd per in-flight normal request
+        self._p1_fds: tuple[int, int] | None = None  # (gate_fd, pool_fd) of the active p1
+        # Fail fast: surface an unwritable dir or a no-flock filesystem at
+        # construction, not on request N.
+        try:
+            self._dir.mkdir(parents=True, exist_ok=True)
+            for path in (self._gate_path, self._pool_path):
+                fd = self._open(path)
+                try:
+                    fcntl.flock(fd, fcntl.LOCK_SH | fcntl.LOCK_NB)
+                    fcntl.flock(fd, fcntl.LOCK_UN)
+                except BlockingIOError:
+                    pass  # held EX by a live p1 elsewhere: flock works here
+                finally:
+                    os.close(fd)
+        except OSError as e:
+            raise RuntimeError(
+                f"cross-process coordination unavailable at {self._dir}: {e}"
+            ) from e
+    @staticmethod
+    def _open(path: Path) -> int:
+        return os.open(path, os.O_RDWR | os.O_CREAT, 0o644)
+    def enter(self, priority: int) -> None:
+        """Blocking acquisition for one request. Releases partial holds and
+        re-raises on failure, leaving no lock behind."""
+        if priority == Priority.CONTENTION_RETRY:
+            gate_fd = self._open(self._gate_path)
+            try:
+                fcntl.flock(gate_fd, fcntl.LOCK_EX)
+                pool_fd = self._open(self._pool_path)
+                try:
+                    fcntl.flock(pool_fd, fcntl.LOCK_EX)
+                except BaseException:
+                    os.close(pool_fd)
+                    raise
+            except BaseException:
+                os.close(gate_fd)
+                raise
+            with self._mu:
+                self._p1_fds = (gate_fd, pool_fd)
+        else:
+            gate_fd = self._open(self._gate_path)
+            try:
+                fcntl.flock(gate_fd, fcntl.LOCK_SH)
+                pool_fd = self._open(self._pool_path)
+                try:
+                    fcntl.flock(pool_fd, fcntl.LOCK_SH)
+                except BaseException:
+                    os.close(pool_fd)
+                    raise
+            finally:
+                # The gate is only the doorway: release it whether or not the
+                # pool acquisition succeeded.
+                os.close(gate_fd)
+            with self._mu:
+                self._pool_fds.append(pool_fd)
+    async def aenter(self, priority: int) -> None:
+        """Async acquisition: LOCK_NB poll loop (POLL_INTERVAL) instead of a
+        blocking flock in an executor thread, so task cancellation can never
+        strand a lock in a thread nobody joins. On any failure, including
+        CancelledError, partial holds are released before re-raising."""
+        op = fcntl.LOCK_EX if priority == Priority.CONTENTION_RETRY else fcntl.LOCK_SH
+        gate_fd = self._open(self._gate_path)
+        try:
+            await self._apoll(gate_fd, op)
+            pool_fd = self._open(self._pool_path)
+            try:
+                await self._apoll(pool_fd, op)
+            except BaseException:
+                os.close(pool_fd)
+                raise
+        except BaseException:
+            os.close(gate_fd)
+            raise
+        if priority == Priority.CONTENTION_RETRY:
+            with self._mu:
+                self._p1_fds = (gate_fd, pool_fd)
+        else:
+            os.close(gate_fd)
+            with self._mu:
+                self._pool_fds.append(pool_fd)
+    @staticmethod
+    async def _apoll(fd: int, op: int) -> None:
+        while True:
+            try:
+                fcntl.flock(fd, op | fcntl.LOCK_NB)
+                return
+            except BlockingIOError:
+                await asyncio.sleep(POLL_INTERVAL)
+    def exit(self, priority: int) -> None:
+        """Release one acquisition. Never raises: it sits in finally paths,
+        and the locks are released by the fd close regardless."""
+        try:
+            if priority == Priority.CONTENTION_RETRY:
+                with self._mu:
+                    fds, self._p1_fds = self._p1_fds, None
+                if fds is not None:
+                    gate_fd, pool_fd = fds
+                    os.close(pool_fd)
+                    os.close(gate_fd)
+            else:
+                with self._mu:
+                    pool_fd = self._pool_fds.pop() if self._pool_fds else None
+                if pool_fd is not None:
+                    os.close(pool_fd)
+        except OSError as e:
+            log.warning("gate exit failed (locks still released on close): %s", e)