PyPI - alpha-engine-lib - Versions diffs - 0.40.0__tar.gz → 0.41.0__tar.gz - Mend

alpha-engine-lib 0.40.0tar.gz → 0.41.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alpha-engine-lib
-Version: 0.40.0
+Version: 0.41.0
 Summary: Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README.
 Author: Brian McMahon
 License: Proprietary

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "alpha-engine-lib"
-version = "0.40.0"
+version = "0.41.0"
 description = "Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README."
 readme = "README.md"
 # EC2 still runs Python 3.9 on the always-on micro instance (boto3 drops

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/src/alpha_engine_lib/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """alpha-engine-lib — shared utilities for Alpha Engine modules."""
-__version__ = "0.40.0"
+__version__ = "0.41.0"

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/src/alpha_engine_lib/anthropic_payload.py RENAMED Viewed

@@ -215,3 +215,95 @@ def build_messages_payload(
     validate_payload(payload)
     return payload
+def build_batches_request_params(
+    *,
+    custom_id: str,
+    model: str,
+    max_tokens: int,
+    user_content: str,
+    tools: list[dict] | None = None,
+    tool_choice: dict[str, Any] | None = None,
+    system_prompt: str | None = None,
+    cache_system: bool = False,
+    extra: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Construct one entry of the ``messages.batches.create`` ``requests`` array.
+    The Anthropic Batches API takes a list of ``{"custom_id", "params"}``
+    dicts, where each ``params`` value is a kwargs dict for an underlying
+    ``messages.create()`` call. This helper builds one such entry,
+    validating the embedded payload via :func:`validate_payload`.
+    Differs from :func:`build_messages_payload` along three axes the
+    judge-batch path requires:
+    1. **Optional system prompt.** Synchronous callers nearly always have
+       a static system prompt (the lib default caches it); judge batches
+       inject the entire rubric into the user message and have no system
+       block. Pass ``system_prompt=None`` (the default) to emit no
+       system block at all.
+    2. **No cache_control by default.** The Batches API discounts every
+       call 50% before prompt caching applies; the marginal value of
+       caching is small enough that the existing judge path opts out.
+       ``cache_system=False`` is the default for this reason; pass
+       ``cache_system=True`` explicitly if the system prompt is large
+       enough to benefit.
+    3. **Explicit tool_choice.** Forced tool calls (
+       ``{"type": "tool", "name": ...}``) are the dominant Batches use
+       case (structured-output via a known schema). Pass ``tool_choice``
+       directly rather than smuggling through ``extra``.
+    All :func:`validate_payload` invariants run against the embedded
+    ``params`` — including the server-tool ⊥ assistant-prefill check —
+    so a future Batches caller that mixes ``web_search`` with a
+    prefill won't reach Anthropic's HTTP 400.
+    Args:
+        custom_id: Per-request identifier returned in the batch result.
+            Caller-owned; must be unique within a batch.
+        model: Anthropic model identifier (e.g. ``"claude-haiku-4-5"``).
+        max_tokens: ``max_tokens`` for the embedded call.
+        user_content: The user-message content (typically the full
+            rendered rubric / prompt body, since batch calls usually
+            omit the system block).
+        tools: Optional list of tool specs.
+        tool_choice: Optional tool-choice spec (e.g.
+            ``{"type": "tool", "name": "RubricEvalLLMOutput"}`` to force
+            structured output via a specific tool).
+        system_prompt: Optional system-prompt text. When ``None`` (the
+            default), no ``system`` block is emitted.
+        cache_system: When ``True``, attach ``cache_control: ephemeral``
+            to the system block. Default ``False`` because Batches
+            already discounts 50% and the marginal cache value is small.
+            Ignored when ``system_prompt is None``.
+        extra: Optional dict merged into ``params`` after construction
+            (e.g. ``metadata``, ``stop_sequences``). Validation runs
+            AFTER the merge.
+    Returns:
+        ``{"custom_id": custom_id, "params": <validated kwargs dict>}``,
+        ready to splat into ``messages.batches.create(requests=[...])``.
+    Raises :exc:`PayloadInvariantError` on a known-incompatible shape.
+    """
+    params: dict[str, Any] = {
+        "model": model,
+        "max_tokens": max_tokens,
+        "messages": [{"role": "user", "content": user_content}],
+    }
+    if system_prompt is not None:
+        system_block: dict[str, Any] = {"type": "text", "text": system_prompt}
+        if cache_system:
+            system_block["cache_control"] = {"type": "ephemeral"}
+        params["system"] = [system_block]
+    if tools:
+        params["tools"] = list(tools)
+    if tool_choice is not None:
+        params["tool_choice"] = tool_choice
+    if extra:
+        params.update(extra)
+    validate_payload(params)
+    return {"custom_id": custom_id, "params": params}

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/src/alpha_engine_lib/cost.py RENAMED Viewed

@@ -58,6 +58,7 @@ Workstream design: ``alpha-engine-config/private-docs/ROADMAP.md`` line ~1708
 from __future__ import annotations
+import re
 from datetime import date, datetime, timezone
 from importlib import resources
 from pathlib import Path
@@ -68,6 +69,17 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator
 from alpha_engine_lib.decision_capture import ModelMetadata
+# Anthropic SDK model IDs come in two forms: the family alias
+# (e.g. ``claude-haiku-4-5``) and the dated snapshot form
+# (e.g. ``claude-haiku-4-5-20251001``). ``Message.model`` returns the dated
+# form even when the caller requested the alias, but our pricing YAML is
+# keyed on the alias so a new snapshot date doesn't require a card refresh.
+_DATED_SNAPSHOT_SUFFIX_RE = re.compile(r"-\d{8}$")
+def _strip_dated_snapshot_suffix(model_name: str) -> str:
+    return _DATED_SNAPSHOT_SUFFIX_RE.sub("", model_name)
 if TYPE_CHECKING:
     # Structural Protocol below describes the only attributes we touch on
     # an Anthropic SDK ``Message`` — kept here so that ``anthropic`` does
@@ -172,14 +184,27 @@ class PriceTable(BaseModel):
         component is used for lookup) or a ``date``. The returned card is
         the one whose ``effective_from`` is the latest among cards ≤ ``at``.
-        Raises :exc:`PriceCardLookupError` if the model has no cards or
-        every card's ``effective_from`` is later than ``at``.
+        Lookup tries the model name as-given first; on miss, retries with
+        any trailing ``-YYYYMMDD`` snapshot suffix stripped. This lets the
+        YAML stay keyed on family aliases (``claude-haiku-4-5``) while
+        accepting the dated form (``claude-haiku-4-5-20251001``) that the
+        Anthropic SDK returns in ``Message.model``.
+        Raises :exc:`PriceCardLookupError` if neither form matches.
         """
         query_date = at.date() if isinstance(at, datetime) else at
-        candidates = [
-            c for c in self.cards
-            if c.model_name == model_name and c.effective_from <= query_date
-        ]
+        def _candidates_for(name: str) -> list[PriceCard]:
+            return [
+                c for c in self.cards
+                if c.model_name == name and c.effective_from <= query_date
+            ]
+        candidates = _candidates_for(model_name)
+        if not candidates:
+            alias = _strip_dated_snapshot_suffix(model_name)
+            if alias != model_name:
+                candidates = _candidates_for(alias)
         if not candidates:
             raise PriceCardLookupError(
                 f"No price card for model {model_name!r} active on {query_date}"

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/src/alpha_engine_lib.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alpha-engine-lib
-Version: 0.40.0
+Version: 0.41.0
 Summary: Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README.
 Author: Brian McMahon
 License: Proprietary

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/tests/test_anthropic_payload.py RENAMED Viewed

@@ -30,6 +30,7 @@ from alpha_engine_lib.anthropic_payload import (
     DEFAULT_WEB_SEARCH_MAX_USES,
     SERVER_TOOL_PREFIXES,
     PayloadInvariantError,
+    build_batches_request_params,
     build_messages_payload,
     build_web_search_tool,
     validate_payload,
@@ -271,3 +272,119 @@ def test_build_messages_payload_morning_signal_replication():
     assert payload["tools"][0]["max_uses"] == 20
     assert len(payload["messages"]) == 1  # no assistant prefill
     assert opener in payload["messages"][0]["content"]
+# ── build_batches_request_params ─────────────────────────────────────────────
+_FORCE_TOOL_CHOICE = {"type": "tool", "name": "RubricEvalLLMOutput"}
+def _custom_tool_spec():
+    """A non-server-side tool — what the judge batch uses for structured output."""
+    return {
+        "name": "RubricEvalLLMOutput",
+        "description": "Emit the rubric eval payload as structured JSON.",
+        "input_schema": {
+            "type": "object",
+            "properties": {"score": {"type": "integer"}},
+            "required": ["score"],
+        },
+    }
+def test_build_batches_request_params_judge_shape():
+    """Replicates the alpha-engine-research judge call shape: no system
+    prompt, custom tool, forced tool_choice, no caching. Locks the
+    minimal viable Batches request envelope the judge actually ships."""
+    req = build_batches_request_params(
+        custom_id="judge-abc-123",
+        model="claude-haiku-4-5",
+        max_tokens=2048,
+        user_content="Rubric prompt body here…",
+        tools=[_custom_tool_spec()],
+        tool_choice=_FORCE_TOOL_CHOICE,
+    )
+    assert req["custom_id"] == "judge-abc-123"
+    params = req["params"]
+    assert params["model"] == "claude-haiku-4-5"
+    assert params["max_tokens"] == 2048
+    assert params["messages"] == [{"role": "user", "content": "Rubric prompt body here…"}]
+    assert params["tools"] == [_custom_tool_spec()]
+    assert params["tool_choice"] == _FORCE_TOOL_CHOICE
+    # No system prompt by default — judge inlines rubric into user content.
+    assert "system" not in params
+def test_build_batches_request_params_with_system_prompt_no_cache_default():
+    """When a system prompt IS provided, it lands as a one-element system
+    array. Caching is OFF by default for batches per the docstring rationale."""
+    req = build_batches_request_params(
+        custom_id="x",
+        model="claude-sonnet-4-6",
+        max_tokens=256,
+        user_content="u",
+        system_prompt="You are a helpful assistant.",
+    )
+    sys_blocks = req["params"]["system"]
+    assert sys_blocks == [{"type": "text", "text": "You are a helpful assistant."}]
+    assert "cache_control" not in sys_blocks[0]
+def test_build_batches_request_params_with_system_prompt_cache_opt_in():
+    """``cache_system=True`` attaches ephemeral cache_control (the
+    opt-in path for batches with large repeated system prompts)."""
+    req = build_batches_request_params(
+        custom_id="x",
+        model="claude-sonnet-4-6",
+        max_tokens=256,
+        user_content="u",
+        system_prompt="Large repeated system prompt.",
+        cache_system=True,
+    )
+    assert req["params"]["system"][0]["cache_control"] == {"type": "ephemeral"}
+def test_build_batches_request_params_validates_server_tool_prefill_invariant():
+    """The Batches builder honors the same server-tool ⊥ assistant-prefill
+    invariant as the sync builder — caught via ``extra`` smuggling."""
+    with pytest.raises(PayloadInvariantError):
+        build_batches_request_params(
+            custom_id="x",
+            model="claude-sonnet-4-6",
+            max_tokens=256,
+            user_content="u",
+            tools=[build_web_search_tool()],
+            extra={
+                "messages": [
+                    {"role": "user", "content": "hi"},
+                    {"role": "assistant", "content": "Y"},
+                ]
+            },
+        )
+def test_build_batches_request_params_no_system_no_tools_minimal():
+    """Minimal shape: only model + max_tokens + messages. Pins that
+    optional fields don't leak ``None`` keys into the payload."""
+    req = build_batches_request_params(
+        custom_id="x",
+        model="claude-haiku-4-5",
+        max_tokens=64,
+        user_content="ping",
+    )
+    params = req["params"]
+    assert set(params.keys()) == {"model", "max_tokens", "messages"}
+def test_build_batches_request_params_extra_merges_into_params():
+    """``extra`` keys merge into ``params`` (e.g. metadata for batch-side
+    observability). Validation still runs."""
+    req = build_batches_request_params(
+        custom_id="x",
+        model="claude-haiku-4-5",
+        max_tokens=64,
+        user_content="u",
+        extra={"metadata": {"user_id": "judge-v3"}},
+    )
+    assert req["params"]["metadata"] == {"user_id": "judge-v3"}

{alpha_engine_lib-0.40.0 → alpha_engine_lib-0.41.0}/tests/test_cost.py RENAMED Viewed

@@ -155,6 +155,47 @@ class TestPriceTableLookup:
             self.table.get("haiku", date(2025, 12, 31))
+class TestPriceTableLookupDatedSnapshotSuffix:
+    """Anthropic SDK returns ``Message.model`` in the dated snapshot form
+    (e.g. ``claude-haiku-4-5-20251001``) even when the caller requested
+    the alias; the YAML is keyed on the alias. Lookup must accept both.
+    """
+    def setup_method(self):
+        self.table = PriceTable(cards=[
+            _card("claude-haiku-4-5", 2026, 1, 1, in_p=1.0),
+            _card("claude-sonnet-4-6", 2026, 1, 1, in_p=3.0),
+        ])
+    def test_dated_suffix_falls_back_to_alias(self):
+        c = self.table.get("claude-haiku-4-5-20251001", date(2026, 5, 28))
+        assert c.input_per_1m == 1.0
+    def test_alias_lookup_unchanged(self):
+        c = self.table.get("claude-haiku-4-5", date(2026, 5, 28))
+        assert c.input_per_1m == 1.0
+    def test_exact_dated_match_wins_over_alias_fallback(self):
+        # If someone adds a dated card explicitly, it takes precedence.
+        table = PriceTable(cards=[
+            _card("claude-haiku-4-5", 2026, 1, 1, in_p=1.0),
+            _card("claude-haiku-4-5-20251001", 2026, 1, 1, in_p=9.99),
+        ])
+        c = table.get("claude-haiku-4-5-20251001", date(2026, 5, 28))
+        assert c.input_per_1m == 9.99
+    def test_unknown_alias_with_dated_suffix_still_hard_fails(self):
+        with pytest.raises(
+            PriceCardLookupError, match="claude-foo-9-9-20251001"
+        ):
+            self.table.get("claude-foo-9-9-20251001", date(2026, 5, 28))
+    def test_non_dated_suffix_is_not_stripped(self):
+        # Bare 8-digit substring without leading dash → no normalization.
+        with pytest.raises(PriceCardLookupError):
+            self.table.get("claude-haiku-4-5.20251001", date(2026, 5, 28))
 # ── compute_cost ──────────────────────────────────────────────────────────