npm - @miller-tech/uap - Versions diffs - 1.22.0 → 1.26.0 - Mend

@miller-tech/uap 1.22.0 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/README.md +65 -21
package/dist/.tsbuildinfo +1 -1
package/dist/benchmarks/token-throughput.d.ts +53 -53
package/dist/bin/cli.js +88 -5
package/dist/bin/cli.js.map +1 -1
package/dist/bin/llama-server-optimize.js +0 -0
package/dist/bin/policy.js +0 -0
package/dist/cli/agent.js +1 -1
package/dist/cli/agent.js.map +1 -1
package/dist/cli/droids.d.ts +21 -1
package/dist/cli/droids.d.ts.map +1 -1
package/dist/cli/droids.js +142 -0
package/dist/cli/droids.js.map +1 -1
package/dist/cli/expert-route.d.ts +11 -0
package/dist/cli/expert-route.d.ts.map +1 -0
package/dist/cli/expert-route.js +67 -0
package/dist/cli/expert-route.js.map +1 -0
package/dist/cli/harness.d.ts +24 -0
package/dist/cli/harness.d.ts.map +1 -0
package/dist/cli/harness.js +84 -0
package/dist/cli/harness.js.map +1 -0
package/dist/cli/hooks.d.ts +13 -2
package/dist/cli/hooks.d.ts.map +1 -1
package/dist/cli/hooks.js +333 -3
package/dist/cli/hooks.js.map +1 -1
package/dist/cli/ideate.d.ts +18 -0
package/dist/cli/ideate.d.ts.map +1 -0
package/dist/cli/ideate.js +148 -0
package/dist/cli/ideate.js.map +1 -0
package/dist/cli/patterns.js +55 -0
package/dist/cli/patterns.js.map +1 -1
package/dist/cli/setup.d.ts.map +1 -1
package/dist/cli/setup.js +14 -1
package/dist/cli/setup.js.map +1 -1
package/dist/coordination/capability-router.d.ts +1 -1
package/dist/coordination/capability-router.d.ts.map +1 -1
package/dist/coordination/capability-router.js +132 -0
package/dist/coordination/capability-router.js.map +1 -1
package/dist/coordination/expert-orchestrator.d.ts +66 -0
package/dist/coordination/expert-orchestrator.d.ts.map +1 -0
package/dist/coordination/expert-orchestrator.js +150 -0
package/dist/coordination/expert-orchestrator.js.map +1 -0
package/dist/coordination/service.d.ts +8 -1
package/dist/coordination/service.d.ts.map +1 -1
package/dist/coordination/service.js +18 -4
package/dist/coordination/service.js.map +1 -1
package/dist/mcp-router/experts/registry.d.ts +54 -0
package/dist/mcp-router/experts/registry.d.ts.map +1 -0
package/dist/mcp-router/experts/registry.js +143 -0
package/dist/mcp-router/experts/registry.js.map +1 -0
package/dist/mcp-router/index.d.ts +2 -0
package/dist/mcp-router/index.d.ts.map +1 -1
package/dist/mcp-router/index.js +1 -0
package/dist/mcp-router/index.js.map +1 -1
package/dist/mcp-router/server.d.ts.map +1 -1
package/dist/mcp-router/server.js +16 -0
package/dist/mcp-router/server.js.map +1 -1
package/dist/mcp-router/tools/execute.d.ts.map +1 -1
package/dist/mcp-router/tools/execute.js +40 -0
package/dist/mcp-router/tools/execute.js.map +1 -1
package/dist/models/planner.d.ts +7 -1
package/dist/models/planner.d.ts.map +1 -1
package/dist/models/planner.js +61 -0
package/dist/models/planner.js.map +1 -1
package/dist/models/types.d.ts +14 -12
package/dist/models/types.d.ts.map +1 -1
package/dist/models/types.js.map +1 -1
package/dist/observability/halo-exporter.d.ts +86 -0
package/dist/observability/halo-exporter.d.ts.map +1 -0
package/dist/observability/halo-exporter.js +139 -0
package/dist/observability/halo-exporter.js.map +1 -0
package/dist/telemetry/session-telemetry.d.ts.map +1 -1
package/dist/telemetry/session-telemetry.js +7 -0
package/dist/telemetry/session-telemetry.js.map +1 -1
package/dist/types/config.d.ts +170 -170
package/docs/architecture/EXPERT_STACK.md +137 -0
package/docs/architecture/PLATFORM_GATING.md +68 -0
package/docs/reference/EXPERT_DROIDS.md +219 -0
package/package.json +1 -1
package/templates/hooks/pre-tool-use-edit-write.sh +29 -8
package/templates/hooks/uap-policy-gate-hermes.sh +42 -0
package/tools/agents/scripts/anthropic_proxy.py +166 -30
package/tools/agents/tests/test_attractor_detection.py +213 -0
package/dist/utils/baseline-metrics.d.ts +0 -21
package/dist/utils/baseline-metrics.d.ts.map +0 -1
package/dist/utils/baseline-metrics.js +0 -111
package/dist/utils/baseline-metrics.js.map +0 -1
package/tools/agents/__pycache__/claude_local_agent.cpython-313.pyc +0 -0
package/tools/agents/__pycache__/opencode_uap_agent.cpython-313.pyc +0 -0
package/tools/agents/scripts/__pycache__/anthropic_proxy.cpython-313.pyc +0 -0
package/tools/agents/tests/__pycache__/test_anthropic_proxy_streaming.cpython-313-pytest-9.0.2.pyc +0 -0

package/tools/agents/scripts/anthropic_proxy.py CHANGED Viewed

@@ -390,6 +390,20 @@ PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD = int(
 PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD = int(
     os.environ.get("PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD", "2")
 )
+# Attractor-aware contamination escape. When the same fault excerpt repeats
+# across consecutive contamination resets the model is in a stable output
+# attractor that the standard kept_last reset cannot escape (the preserved
+# tail re-primes the same fixed-point response). Detect via excerpt hash and
+# respond with a harder reset + corrective injection + temperature bump.
+PROXY_ATTRACTOR_DETECT = os.environ.get(
+    "PROXY_ATTRACTOR_DETECT", "on"
+).lower() not in {"0", "false", "off", "no"}
+PROXY_ATTRACTOR_TEMP_OVERRIDE = float(
+    os.environ.get("PROXY_ATTRACTOR_TEMP_OVERRIDE", "1.20")
+)
+PROXY_ATTRACTOR_FINALIZE_THRESHOLD = max(1, int(
+    os.environ.get("PROXY_ATTRACTOR_FINALIZE_THRESHOLD", "2")
+))
 PROXY_AGENTIC_SUPPLEMENT_MODE = (
     os.environ.get("PROXY_AGENTIC_SUPPLEMENT_MODE", "clean").strip().lower()
 )
@@ -750,6 +764,8 @@ class SessionMonitor:
     invalid_tool_call_streak: int = 0  # consecutive invalid tool arg payloads
     required_tool_miss_streak: int = 0  # required tool turns with no tool call
     contamination_resets: int = 0  # how many contamination resets were applied
+    last_fault_excerpt_hash: str = ""  # hash of last TOOL RESPONSE ISSUE excerpt (attractor detection)
+    attractor_correction_active: bool = False  # next turn uses high-temp escape sampling
     forced_auto_cooldown_turns: int = 0  # temporary auto override turns remaining
     forced_dampener_triggers: int = 0  # number of dampener activations
     arg_preflight_rejections: int = 0  # rejected tool calls from arg preflight
@@ -3586,23 +3602,37 @@ def build_openai_request(
         openai_body["stop"] = anthropic_body["stop_sequences"]
     # Force controlled temperature for tool-call turns to reduce garbled output
-    # Cycle 15 Option 2: use lower temperature after contamination resets
+    # Cycle 15 Option 2: use lower temperature after contamination resets.
+    # Attractor escape: when an attractor correction is active, OVERRIDE the
+    # low-temp default with a HIGH-temp sample so the deterministic output
+    # trajectory has a chance to break. Single-turn override (cleared on
+    # successful tool_use further down in the response handler).
     if has_tools:
         client_temp = openai_body.get("temperature")
         target_temp = PROXY_TOOL_TURN_TEMPERATURE
-        if monitor.contamination_resets > 0:
-            target_temp = min(target_temp, 0.1)
-        if client_temp is None or client_temp > target_temp:
+        attractor_active = getattr(monitor, "attractor_correction_active", False)
+        if attractor_active:
+            target_temp = max(target_temp, PROXY_ATTRACTOR_TEMP_OVERRIDE)
             openai_body["temperature"] = target_temp
-            extra = ""
-            if monitor.contamination_resets > 0:
-                extra = f" (post-contamination reset, resets={monitor.contamination_resets})"
             logger.info(
-                "TOOL TURN TEMP: forcing temperature=%.2f (was %s) for tool-enabled request%s",
+                "TOOL TURN TEMP: ATTRACTOR ESCAPE temperature=%.2f (was %s)",
                 target_temp,
                 client_temp,
-                extra,
             )
+        else:
+            if monitor.contamination_resets > 0:
+                target_temp = min(target_temp, 0.1)
+            if client_temp is None or client_temp > target_temp:
+                openai_body["temperature"] = target_temp
+                extra = ""
+                if monitor.contamination_resets > 0:
+                    extra = f" (post-contamination reset, resets={monitor.contamination_resets})"
+                logger.info(
+                    "TOOL TURN TEMP: forcing temperature=%.2f (was %s) for tool-enabled request%s",
+                    target_temp,
+                    client_temp,
+                    extra,
+                )
     # Convert Anthropic tools to OpenAI function-calling tools
     full_openai_tools: list[dict] = []
@@ -4144,6 +4174,17 @@ def _openai_message_text(openai_resp: dict) -> str:
     return content if isinstance(content, str) else str(content)
+def _hash_fault_excerpt(excerpt: str) -> str:
+    """Stable hash of a fault excerpt for attractor-repeat detection. Lowercased
+    + whitespace-collapsed so trivial rendering differences don't break the match."""
+    if not excerpt:
+        return ""
+    normalized = " ".join(excerpt.lower().split())[:200]
+    if not normalized:
+        return ""
+    return hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16]
 def _extract_openai_tool_calls(openai_resp: dict) -> list[dict]:
     _, message = _extract_openai_choice(openai_resp)
     tool_calls = message.get("tool_calls") or []
@@ -6348,6 +6389,13 @@ async def _apply_malformed_tool_guardrail(
             monitor.invalid_tool_call_streak = 0
             monitor.required_tool_miss_streak = 0
             monitor.last_response_garbled = False
+            if monitor.attractor_correction_active:
+                logger.info(
+                    "ATTRACTOR ESCAPE succeeded: session=%s — tool_use emitted, clearing attractor flag",
+                    session_id,
+                )
+                monitor.attractor_correction_active = False
+                monitor.last_fault_excerpt_hash = ""
         if repair_count > 0:
             monitor.arg_preflight_repairs += repair_count
             logger.info(
@@ -6385,6 +6433,11 @@ async def _apply_malformed_tool_guardrail(
             if raw_args and _is_garbled_tool_arguments(raw_args):
                 arg_excerpt = raw_args[:200].replace("\n", " ")
                 break
+    # Attractor detection — hash the normalized fault excerpt so the
+    # contamination breaker can recognize the same fixed-point response
+    # reappearing across consecutive resets. Whitespace-normalized so trivial
+    # rendering differences don't break the match.
+    monitor.last_fault_excerpt_hash = _hash_fault_excerpt(excerpt)
     logger.warning(
         "TOOL RESPONSE ISSUE: session=%s kind=%s reason=%s malformed=%d invalid=%d required_miss=%d excerpt=%.220s args=%.200s",
         session_id,
@@ -6627,7 +6680,16 @@ def _maybe_apply_session_contamination_breaker(
     # Cycle 15 Option 3: if contamination has already reset N+ times in this
     # session, the model is fundamentally unable to produce valid tool calls.
     # Force finalize so the Droid framework can intervene.
-    max_contamination_resets = 3
+    #
+    # Lower the threshold when an attractor correction has already been
+    # applied — if the corrective injection + temp bump didn't break the
+    # attractor on the next turn, more resets won't help. Cuts wasted retry
+    # budget from 3 resets (~60 min observed) to 2 (~25 min).
+    max_contamination_resets = (
+        PROXY_ATTRACTOR_FINALIZE_THRESHOLD
+        if monitor.attractor_correction_active
+        else 3
+    )
     if monitor.contamination_resets >= max_contamination_resets:
         logger.error(
             "SESSION CONTAMINATION LOOP: session=%s contamination_resets=%d >= %d, forcing finalize",
@@ -6654,26 +6716,89 @@ def _maybe_apply_session_contamination_breaker(
         return updated
     messages = anthropic_body.get("messages", [])
+    # Attractor detection: if the fault excerpt that triggered this reset
+    # hashes to the same value as the *previous* reset's fault excerpt, the
+    # model is in a stable output attractor — keep_last reset preserves the
+    # priming tail that pulls it back in. Apply a harder reset (system +
+    # initial user turn only) plus a corrective injection. Temperature gets
+    # bumped UP on the next turn (see _apply_request_sampling) instead of
+    # the standard post-contamination drop, to break the deterministic
+    # output trajectory.
+    attractor_detected = bool(
+        PROXY_ATTRACTOR_DETECT
+        and monitor.contamination_resets >= 1
+        and monitor.last_fault_excerpt_hash
+        and monitor.last_fault_excerpt_hash
+        == getattr(monitor, "_prev_reset_fault_hash", "")
+    )
+    monitor._prev_reset_fault_hash = monitor.last_fault_excerpt_hash
     keep_last = max(2, PROXY_SESSION_CONTAMINATION_KEEP_LAST)
-    if len(messages) <= keep_last + 1:
+    if not attractor_detected and len(messages) <= keep_last + 1:
         monitor.malformed_tool_streak = 0
         monitor.invalid_tool_call_streak = 0
         monitor.required_tool_miss_streak = 0
         monitor.reset_tool_turn_state(reason="contamination_guardrail_soft_reset")
         return anthropic_body
-    head = messages[:1]
-    tail = messages[-keep_last:]
-    reset_marker = {
-        "role": "user",
-        "content": (
-            "[SESSION RESET: tool-call quality degraded in earlier turns. "
-            "Continue from the recent context and emit valid tool calls with strict JSON arguments only.]"
-        ),
-    }
+    if attractor_detected:
+        # Hard reset: drop the entire trailing context. Keep only the system
+        # turn (if present) and the first user turn so the model has the
+        # original goal but none of the attractor-priming tail.
+        first_user_idx = next(
+            (i for i, m in enumerate(messages) if m.get("role") == "user"),
+            None,
+        )
+        if first_user_idx is None:
+            head = messages[:1]
+        else:
+            head = messages[: first_user_idx + 1]
+        # Phase 2 (PR #192): stronger, more structured intervention wording.
+        # The Phase 1 single-paragraph message + temp 0.95 escaped one
+        # production attractor (2026-05-25 02:39:59 fp:1f7e2c95...) but failed
+        # to escape another (2026-05-24 19:11 fp:d19b7a44...). Increase the
+        # signal-to-noise on the corrective by: (1) splitting MUST/MUST NOT
+        # into bullet points the model attends to better, (2) using ALL CAPS
+        # on the critical negative ("DO NOT narrate"), (3) explicitly naming
+        # the attractor failure mode so the model can recognize and avoid it.
+        reset_marker = {
+            "role": "user",
+            "content": (
+                "[ATTRACTOR INTERVENTION — CRITICAL]\n\n"
+                "Your previous responses REPEATEDLY emitted prose summaries "
+                "instead of tool calls. This is the exact failure mode this "
+                "intervention is designed to break. The trailing conversation "
+                "has been REMOVED.\n\n"
+                "YOUR NEXT RESPONSE MUST:\n"
+                "  1. Begin with a tool_use block (no preamble, no thinking)\n"
+                "  2. Invoke one of the available tools\n"
+                "  3. Take a CONCRETE action toward the original task\n\n"
+                "DO NOT:\n"
+                "  • Summarize what you have done or plan to do\n"
+                "  • Narrate, explain, or describe\n"
+                "  • Emit any prose before the tool_use block\n\n"
+                "Just call the tool."
+            ),
+        }
+        new_messages = head + [reset_marker]
+        monitor.attractor_correction_active = True
+        log_reason = "attractor"
+    else:
+        head = messages[:1]
+        tail = messages[-keep_last:]
+        reset_marker = {
+            "role": "user",
+            "content": (
+                "[SESSION RESET: tool-call quality degraded in earlier turns. "
+                "Continue from the recent context and emit valid tool calls with strict JSON arguments only.]"
+            ),
+        }
+        new_messages = head + [reset_marker] + tail
+        log_reason = "standard"
     updated_body = dict(anthropic_body)
-    updated_body["messages"] = head + [reset_marker] + tail
+    updated_body["messages"] = new_messages
     forced_before = monitor.consecutive_forced_count
     required_miss_before = monitor.required_tool_miss_streak
@@ -6684,15 +6809,26 @@ def _maybe_apply_session_contamination_breaker(
     monitor.no_progress_streak = 0
     monitor.consecutive_forced_count = 0
     monitor.forced_auto_cooldown_turns = 0
-    monitor.reset_tool_turn_state(reason="contamination_guardrail_reset")
-    logger.warning(
-        "SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
-        session_id,
-        len(updated_body["messages"]),
-        bad_streak,
-        forced_before,
-        required_miss_before,
-    )
+    monitor.reset_tool_turn_state(reason=f"contamination_guardrail_reset_{log_reason}")
+    if attractor_detected:
+        logger.warning(
+            "CONTAMINATION ATTRACTOR DETECTED: session=%s hash=%s — hard reset "
+            "applied, kept=%d messages (initial intent only), temp override "
+            "and finalize threshold lowered to %d",
+            session_id,
+            monitor.last_fault_excerpt_hash,
+            len(updated_body["messages"]),
+            PROXY_ATTRACTOR_FINALIZE_THRESHOLD,
+        )
+    else:
+        logger.warning(
+            "SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
+            session_id,
+            len(updated_body["messages"]),
+            bad_streak,
+            forced_before,
+            required_miss_before,
+        )
     return updated_body

package/tools/agents/tests/test_attractor_detection.py ADDED Viewed

@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""Unit tests for the attractor-aware contamination-breaker path.
+Validates that a repeated fault-excerpt hash across consecutive contamination
+resets triggers the hard-reset + corrective-injection path, and that the
+standard kept-last path remains unchanged when no repeat is observed.
+"""
+import importlib.util
+import unittest
+from pathlib import Path
+def _load_proxy_module():
+    proxy_path = Path(__file__).resolve().parents[1] / "scripts" / "anthropic_proxy.py"
+    spec = importlib.util.spec_from_file_location("anthropic_proxy", proxy_path)
+    assert spec is not None and spec.loader is not None
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+proxy = _load_proxy_module()
+def _make_monitor(**overrides):
+    m = proxy.SessionMonitor()
+    for k, v in overrides.items():
+        setattr(m, k, v)
+    return m
+def _make_body(n_msgs: int):
+    """Build an anthropic_body with a system + N user/assistant turns."""
+    messages = [{"role": "user", "content": "Run a recon on /repos/pay2u."}]
+    for i in range(n_msgs - 1):
+        role = "assistant" if i % 2 == 0 else "user"
+        messages.append({"role": role, "content": f"turn-{i}"})
+    return {
+        "model": "qwen36-35b-a3b-iq4xs",
+        "messages": messages,
+        "tools": [{"name": "Bash", "input_schema": {"type": "object"}}],
+        "tool_choice": {"type": "any"},
+    }
+class TestHashFaultExcerpt(unittest.TestCase):
+    def test_empty_returns_empty(self):
+        self.assertEqual(proxy._hash_fault_excerpt(""), "")
+        self.assertEqual(proxy._hash_fault_excerpt("   "), "")
+    def test_whitespace_normalized(self):
+        a = proxy._hash_fault_excerpt("The   security  architecture is layered.")
+        b = proxy._hash_fault_excerpt("The security architecture is layered.")
+        c = proxy._hash_fault_excerpt("The\nsecurity\narchitecture\nis\nlayered.")
+        self.assertEqual(a, b)
+        self.assertEqual(a, c)
+    def test_case_normalized(self):
+        a = proxy._hash_fault_excerpt("FAIL CLOSED security")
+        b = proxy._hash_fault_excerpt("fail closed security")
+        self.assertEqual(a, b)
+    def test_distinct_excerpts_distinct_hashes(self):
+        a = proxy._hash_fault_excerpt("Pay2U API analysis")
+        b = proxy._hash_fault_excerpt("Different attractor text")
+        self.assertNotEqual(a, b)
+        self.assertEqual(len(a), 16)
+        self.assertEqual(len(b), 16)
+class TestAttractorDetectionPath(unittest.TestCase):
+    """First reset → standard. Second reset with same excerpt → attractor."""
+    def _trip_breaker(self, monitor):
+        # Make the breaker think it should reset.
+        monitor.required_tool_miss_streak = (
+            proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
+        )
+    def test_first_reset_is_standard(self):
+        monitor = _make_monitor()
+        monitor.last_fault_excerpt_hash = "deadbeefcafebabe"
+        self._trip_breaker(monitor)
+        body = _make_body(n_msgs=20)
+        updated = proxy._maybe_apply_session_contamination_breaker(
+            body, monitor, "test-session"
+        )
+        self.assertEqual(monitor.contamination_resets, 1)
+        self.assertFalse(monitor.attractor_correction_active)
+        # Standard keeps head + reset_marker + last keep_last messages
+        kept_last = max(2, proxy.PROXY_SESSION_CONTAMINATION_KEEP_LAST)
+        self.assertEqual(len(updated["messages"]), 1 + 1 + kept_last)
+        # Reset marker carries the standard wording, not the attractor wording.
+        self.assertIn("SESSION RESET", updated["messages"][1]["content"])
+    def test_second_reset_same_hash_triggers_attractor(self):
+        monitor = _make_monitor()
+        monitor.last_fault_excerpt_hash = "deadbeefcafebabe"
+        # Pretend we've already done one reset with the same fault excerpt.
+        monitor._prev_reset_fault_hash = "deadbeefcafebabe"
+        monitor.contamination_resets = 1
+        self._trip_breaker(monitor)
+        body = _make_body(n_msgs=20)
+        updated = proxy._maybe_apply_session_contamination_breaker(
+            body, monitor, "test-session"
+        )
+        self.assertTrue(monitor.attractor_correction_active)
+        # Hard reset keeps only system + first user (+ corrective marker)
+        # → 2 messages total for this body (first user + marker).
+        self.assertLessEqual(len(updated["messages"]), 3)
+        self.assertIn("ATTRACTOR INTERVENTION", updated["messages"][-1]["content"])
+    def test_second_reset_different_hash_stays_standard(self):
+        monitor = _make_monitor()
+        monitor.last_fault_excerpt_hash = "newhashvalue1234"
+        monitor._prev_reset_fault_hash = "deadbeefcafebabe"
+        monitor.contamination_resets = 1
+        self._trip_breaker(monitor)
+        body = _make_body(n_msgs=20)
+        updated = proxy._maybe_apply_session_contamination_breaker(
+            body, monitor, "test-session"
+        )
+        self.assertFalse(monitor.attractor_correction_active)
+        self.assertIn("SESSION RESET", updated["messages"][1]["content"])
+class TestAttractorFinalizeThreshold(unittest.TestCase):
+    """Once attractor correction is active, finalize fires at the lower
+    threshold instead of waiting for 3 resets."""
+    def test_attractor_lowers_finalize_threshold(self):
+        monitor = _make_monitor()
+        monitor.attractor_correction_active = True
+        # Just at the lowered threshold.
+        monitor.contamination_resets = proxy.PROXY_ATTRACTOR_FINALIZE_THRESHOLD
+        monitor.required_tool_miss_streak = (
+            proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
+        )
+        body = _make_body(n_msgs=20)
+        updated = proxy._maybe_apply_session_contamination_breaker(
+            body, monitor, "test-session"
+        )
+        # Finalize path strips tools and appends the "respond with plain text" prompt.
+        self.assertNotIn("tools", updated)
+        self.assertNotIn("tool_choice", updated)
+        self.assertIn("plain text only", updated["messages"][-1]["content"])
+    def test_standard_path_keeps_3_reset_budget(self):
+        monitor = _make_monitor()
+        monitor.attractor_correction_active = False
+        # 2 resets done — under the standard 3-reset budget.
+        monitor.contamination_resets = 2
+        monitor.required_tool_miss_streak = (
+            proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
+        )
+        body = _make_body(n_msgs=20)
+        updated = proxy._maybe_apply_session_contamination_breaker(
+            body, monitor, "test-session"
+        )
+        # Standard reset, not finalize.
+        self.assertIn("tools", updated)
+class TestAttractorPhase2Defaults(unittest.TestCase):
+    """Phase 2 (PR #192) raises the default temp override and strengthens the
+    intervention wording. Verify the defaults the operator gets out of the box."""
+    def test_temp_override_default_is_1_20(self):
+        # Phase 1 default was 0.95; Phase 2 raises to 1.20 after one
+        # production attractor (fp:d19b7a44...) failed to escape at 0.95.
+        self.assertGreaterEqual(proxy.PROXY_ATTRACTOR_TEMP_OVERRIDE, 1.20 - 0.001)
+    def test_intervention_message_has_structured_directives(self):
+        """The Phase 2 wording uses MUST / DO NOT bullets and explicitly
+        names the failure mode. Trigger the attractor path and inspect the
+        injected marker."""
+        monitor = _make_monitor()
+        monitor.last_fault_excerpt_hash = "deadbeefcafebabe"
+        monitor._prev_reset_fault_hash = "deadbeefcafebabe"
+        monitor.contamination_resets = 1
+        monitor.required_tool_miss_streak = (
+            proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
+        )
+        body = _make_body(n_msgs=20)
+        updated = proxy._maybe_apply_session_contamination_breaker(
+            body, monitor, "test-session"
+        )
+        content = updated["messages"][-1]["content"]
+        # Phase 2 wording signals
+        self.assertIn("CRITICAL", content)
+        self.assertIn("MUST", content)
+        self.assertIn("DO NOT", content)
+        # Specifically forbids the attractor's preferred behaviors
+        self.assertIn("prose", content.lower())
+        # Still has the marker substring callers may grep on
+        self.assertIn("ATTRACTOR INTERVENTION", content)
+if __name__ == "__main__":
+    unittest.main()

package/dist/utils/baseline-metrics.d.ts DELETED Viewed

@@ -1,21 +0,0 @@
-export interface MetricsBaselineResult {
-    timestamp: string;
-    version: string;
-    memory: {
-        heapUsedMB: any;
-        heapTotalMB: any;
-        externalMB: any;
-        rssMB: any;
-    };
-}
-export declare function runBaselineBenchmark(): Promise<void>;
-export declare function generateReport({ improvement, regression, stable: string }: {
-    improvement: any;
-    regression: any;
-    stable: any;
-}, { let, report, ");: report }: {
-    let: any;
-    report?: string | undefined;
-    ");": any;
-}, : any, : any, Date: any): any;
-//# sourceMappingURL=baseline-metrics.d.ts.map

package/dist/utils/baseline-metrics.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"baseline-metrics.d.ts","sourceRoot":"","sources":["../../src/utils/baseline-metrics.ts"],"names":[],"mappings":"AAQA,MAAM,WAAW,qBAAqB;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE;QACN,UAAU,MAAC;QACX,WAAW,MAAC;QACZ,UAAU,MAAC;QACX,KAAK,MAAC;KACP,CAAC;CAAA;AA6DJ,wBAAsB,oBAAoB,IAAE,OAAO,CAAC,IAAI,CAAC,CASxD;AAED,wBAAgB,cAAc,CAAC,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAA;;;;CAAA,EAAC,EACvE,GAAG,EAAC,MAA8C,EAAC,GAAG,EACtD,MAAM,EAAG;;;;CAAA,EAAC,AAAD,KAAA,EAAe,KAAA,EAAI,IAAI,KAAA,OAAA"}

package/dist/utils/baseline-metrics.js DELETED Viewed

@@ -1,111 +0,0 @@
-import { writeFileSync, existsSync, mkdirSync } from 'fs';
-import { join } from 'path';
-import { fileURLToPath } from 'url';
-import { dirname } from 'path';
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(filename);
-queryLatency;
-{
-    p50;
-    p95;
-    p99;
-    avg;
-}
-;
-cachePerformance;
-{
-    hitRate;
-    avgLookups;
-}
-;
-dbPerformance;
-{
-    p50;
-    p95;
-    p99;
-    avg;
-    ;
-    patternMatching;
-    {
-        p50;
-        p95;
-        p99;
-        ;
-        summary;
-    }
-    export function SnapshotBaseline() {
-        const memory = process.memoryUsage();
-        return {
-            timestamp: new Date().toISOString(),
-            version: '1.17.2', ");: memory
-        };
-        {
-            heapUsedMB: Math.round(memory.heapUsed / 1024 / 1024),
-                heapTotalMB;
-            Math.round(memory.heapTotal / 1024 / 1024),
-                externalMB;
-            Math.round(memory.external / 1024 / 1024),
-                rssMB;
-            Math.round(memory.rss / 1024 / 1024),
-            ;
-        }
-        queryLatency: {
-            p50: 0,
-                p95;
-            0,
-                p99;
-            0,
-                avg;
-            0,
-            ;
-        }
-        cachePerformance: {
-            hitRate: 0.85,
-                avgLookups;
-            100,
-            ;
-        }
-        dbPerformance: {
-            p50: 0.5,
-                p95;
-            1.5,
-                p99;
-            3.5,
-                avg;
-            0.5,
-            ;
-        }
-        patternMatching: {
-            p50: 0.2,
-                p95;
-            0.1,
-                p99;
-            0.2,
-            ;
-        }
-        summary: 'Baseline metrics snapshot ready for comparison',
-        ;
-    }
-    ;
-}
-export async function runBaselineBenchmark() {
-    const snapshot = SnapshotBaseline();
-    const docsDir = join(__dirname, '../../docs/performance');
-    if (!existsSync(docs)) {
-        mkdirSync(docs, { recursive: true });
-    }
-    const filePath = join(docs, 'baseline-2026-03-27.json');
-    writeFileSync(filePath, JSON.stringify(snapshot, null, 2));
-    console.log('Baseline saved to:', filePath);
-}
-export function generateReport({ improvement, regression, stable: string }, { let, report = 'Performance Optimization Report\n\n', ");: report }, , , Date) { }
-().toISOString();
-n;
-n `;
-  report += 'Latency\n';");
-  report += 'Improvement: ${improvement.length}\n`;
-report += 'Regression: ${regression.length}\n`;;
-report += 'Stable: ${stable.length}\n`;;
-return report;
-//# sourceMappingURL=baseline-metrics.js.map

package/dist/utils/baseline-metrics.js.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"baseline-metrics.js","sourceRoot":"","sources":["../../src/utils/baseline-metrics.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAA,MAAM,KAAK,CAAC;AAClC,OAAO,EAAE,OAAO,EAAA,MAAM,MAAM,CAAC;AAE7B,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;AAWlC,YAAY,CAAA;AAAC,CAAC;IACZ,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;AACN,CAAC;AAAA,CAAC;AACF,gBAAgB,CAAA;AAAC,CAAC;IAChB,OAAO,CAAC;IACR,UAAU,CAAC;AACb,CAAC;AAAA,CAAC;AACF,aAAa,CAAA;AAAC,CAAC;IACb,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;IACP,CAAC;IACA,eAAe,CAAA;IAAC,CAAC;QACf,GAAG,CAAC;QACJ,GAAG,CAAC;QACJ,GAAG,CAAC;QACP,CAAC;QACA,OAAO,CAAC;IACV,CAAC;IAED,MAAM,UAAU,gBAAgB;QAC9B,MAAM,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QACrC,OAAO;YACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,OAAO,EAAE,QAAQ,EAAC,GAAG,EACrB,MAAM;SAAC,CAAA;QAAC,CAAC;YACP,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC;gBACrD,WAAW,CAAA;YAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;gBACvD,UAAU,CAAA;YAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC;gBACrD,KAAK,CAAA;YAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;gBAC7C,AAD8C,JAAA,CAAA;QAC9C,CAAC;QACD,YAAY,EAAE,CAAC;YACb,GAAG,EAAE,CAAC;gBACN,GAAG,CAAA;YAAE,CAAC;gBACN,GAAG,CAAA;YAAE,CAAC;gBACN,GAAG,CAAA;YAAE,CAAC;gBACR,AADS,JAAA,CAAA;QACT,CAAC;QACD,gBAAgB,EAAE,CAAC;YACjB,OAAO,EAAE,IAAI;gBACb,UAAU,CAAA;YAAE,GAAG;gBACjB,AADkB,JAAA,CAAA;QAClB,CAAC;QACD,aAAa,EAAE,CAAC;YACd,GAAG,EAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACV,AADW,JAAA,CAAA;QACX,CAAC;QACD,eAAe,EAAE,CAAC;YAChB,GAAG,EAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACV,AADW,JAAA,CAAA;QACX,CAAC;QACD,OAAO,EAAE,gDAAgD;YAC3D,AAD4D,JAAA,CAAA;IAC5D,CAAC;IAAA,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB;IACxC,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,EAAE,wBAAwB,CAAC,CAAC;IAC1D,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,SAAS,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACvC,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,0BAA0B,CAAC,CAAC;IACxD,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3D,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,QAAQ,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAA,EAAC,EACvE,GAAG,EAAC,MAAM,GAAG,qCAAqC,EAAC,GAAG,EACtD,MAAM,EAAG,EAAC,AAAD,EAAe,EAAI,IAAI,IAAA,CAAC,AAAD;AAAA,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;AAAE,CAAC,CAAA;AAAC,CAAC,CAAA;;;4BAG3B,WAAW,CAAC,MAAM,IAAI,CAAC;AACjD,MAAM,IAAI,qCAAqC,CAAA;AAC/C,MAAM,IAAI,6BAA6B,CAAA;AACvC,OAAO,MAAM,CAAC"}

package/tools/agents/__pycache__/claude_local_agent.cpython-313.pyc DELETED Viewed

Binary file

package/tools/agents/__pycache__/opencode_uap_agent.cpython-313.pyc DELETED Viewed

Binary file

package/tools/agents/scripts/__pycache__/anthropic_proxy.cpython-313.pyc DELETED Viewed

Binary file

package/tools/agents/tests/__pycache__/test_anthropic_proxy_streaming.cpython-313-pytest-9.0.2.pyc DELETED Viewed

Binary file