@miller-tech/uap 1.22.0 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +65 -21
  2. package/dist/.tsbuildinfo +1 -1
  3. package/dist/benchmarks/token-throughput.d.ts +53 -53
  4. package/dist/bin/cli.js +88 -5
  5. package/dist/bin/cli.js.map +1 -1
  6. package/dist/bin/llama-server-optimize.js +0 -0
  7. package/dist/bin/policy.js +0 -0
  8. package/dist/cli/agent.js +1 -1
  9. package/dist/cli/agent.js.map +1 -1
  10. package/dist/cli/droids.d.ts +21 -1
  11. package/dist/cli/droids.d.ts.map +1 -1
  12. package/dist/cli/droids.js +142 -0
  13. package/dist/cli/droids.js.map +1 -1
  14. package/dist/cli/expert-route.d.ts +11 -0
  15. package/dist/cli/expert-route.d.ts.map +1 -0
  16. package/dist/cli/expert-route.js +67 -0
  17. package/dist/cli/expert-route.js.map +1 -0
  18. package/dist/cli/harness.d.ts +24 -0
  19. package/dist/cli/harness.d.ts.map +1 -0
  20. package/dist/cli/harness.js +84 -0
  21. package/dist/cli/harness.js.map +1 -0
  22. package/dist/cli/hooks.d.ts +13 -2
  23. package/dist/cli/hooks.d.ts.map +1 -1
  24. package/dist/cli/hooks.js +333 -3
  25. package/dist/cli/hooks.js.map +1 -1
  26. package/dist/cli/ideate.d.ts +18 -0
  27. package/dist/cli/ideate.d.ts.map +1 -0
  28. package/dist/cli/ideate.js +148 -0
  29. package/dist/cli/ideate.js.map +1 -0
  30. package/dist/cli/patterns.js +55 -0
  31. package/dist/cli/patterns.js.map +1 -1
  32. package/dist/cli/setup.d.ts.map +1 -1
  33. package/dist/cli/setup.js +14 -1
  34. package/dist/cli/setup.js.map +1 -1
  35. package/dist/coordination/capability-router.d.ts +1 -1
  36. package/dist/coordination/capability-router.d.ts.map +1 -1
  37. package/dist/coordination/capability-router.js +132 -0
  38. package/dist/coordination/capability-router.js.map +1 -1
  39. package/dist/coordination/expert-orchestrator.d.ts +66 -0
  40. package/dist/coordination/expert-orchestrator.d.ts.map +1 -0
  41. package/dist/coordination/expert-orchestrator.js +150 -0
  42. package/dist/coordination/expert-orchestrator.js.map +1 -0
  43. package/dist/coordination/service.d.ts +8 -1
  44. package/dist/coordination/service.d.ts.map +1 -1
  45. package/dist/coordination/service.js +18 -4
  46. package/dist/coordination/service.js.map +1 -1
  47. package/dist/mcp-router/experts/registry.d.ts +54 -0
  48. package/dist/mcp-router/experts/registry.d.ts.map +1 -0
  49. package/dist/mcp-router/experts/registry.js +143 -0
  50. package/dist/mcp-router/experts/registry.js.map +1 -0
  51. package/dist/mcp-router/index.d.ts +2 -0
  52. package/dist/mcp-router/index.d.ts.map +1 -1
  53. package/dist/mcp-router/index.js +1 -0
  54. package/dist/mcp-router/index.js.map +1 -1
  55. package/dist/mcp-router/server.d.ts.map +1 -1
  56. package/dist/mcp-router/server.js +16 -0
  57. package/dist/mcp-router/server.js.map +1 -1
  58. package/dist/mcp-router/tools/execute.d.ts.map +1 -1
  59. package/dist/mcp-router/tools/execute.js +40 -0
  60. package/dist/mcp-router/tools/execute.js.map +1 -1
  61. package/dist/models/planner.d.ts +7 -1
  62. package/dist/models/planner.d.ts.map +1 -1
  63. package/dist/models/planner.js +61 -0
  64. package/dist/models/planner.js.map +1 -1
  65. package/dist/models/types.d.ts +14 -12
  66. package/dist/models/types.d.ts.map +1 -1
  67. package/dist/models/types.js.map +1 -1
  68. package/dist/observability/halo-exporter.d.ts +86 -0
  69. package/dist/observability/halo-exporter.d.ts.map +1 -0
  70. package/dist/observability/halo-exporter.js +139 -0
  71. package/dist/observability/halo-exporter.js.map +1 -0
  72. package/dist/telemetry/session-telemetry.d.ts.map +1 -1
  73. package/dist/telemetry/session-telemetry.js +7 -0
  74. package/dist/telemetry/session-telemetry.js.map +1 -1
  75. package/dist/types/config.d.ts +170 -170
  76. package/docs/architecture/EXPERT_STACK.md +137 -0
  77. package/docs/architecture/PLATFORM_GATING.md +68 -0
  78. package/docs/reference/EXPERT_DROIDS.md +219 -0
  79. package/package.json +1 -1
  80. package/templates/hooks/pre-tool-use-edit-write.sh +29 -8
  81. package/templates/hooks/uap-policy-gate-hermes.sh +42 -0
  82. package/tools/agents/scripts/anthropic_proxy.py +166 -30
  83. package/tools/agents/tests/test_attractor_detection.py +213 -0
  84. package/dist/utils/baseline-metrics.d.ts +0 -21
  85. package/dist/utils/baseline-metrics.d.ts.map +0 -1
  86. package/dist/utils/baseline-metrics.js +0 -111
  87. package/dist/utils/baseline-metrics.js.map +0 -1
  88. package/tools/agents/__pycache__/claude_local_agent.cpython-313.pyc +0 -0
  89. package/tools/agents/__pycache__/opencode_uap_agent.cpython-313.pyc +0 -0
  90. package/tools/agents/scripts/__pycache__/anthropic_proxy.cpython-313.pyc +0 -0
  91. package/tools/agents/tests/__pycache__/test_anthropic_proxy_streaming.cpython-313-pytest-9.0.2.pyc +0 -0
@@ -390,6 +390,20 @@ PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD = int(
390
390
  PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD = int(
391
391
  os.environ.get("PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD", "2")
392
392
  )
393
+ # Attractor-aware contamination escape. When the same fault excerpt repeats
394
+ # across consecutive contamination resets the model is in a stable output
395
+ # attractor that the standard kept_last reset cannot escape (the preserved
396
+ # tail re-primes the same fixed-point response). Detect via excerpt hash and
397
+ # respond with a harder reset + corrective injection + temperature bump.
398
+ PROXY_ATTRACTOR_DETECT = os.environ.get(
399
+ "PROXY_ATTRACTOR_DETECT", "on"
400
+ ).lower() not in {"0", "false", "off", "no"}
401
+ PROXY_ATTRACTOR_TEMP_OVERRIDE = float(
402
+ os.environ.get("PROXY_ATTRACTOR_TEMP_OVERRIDE", "1.20")
403
+ )
404
+ PROXY_ATTRACTOR_FINALIZE_THRESHOLD = max(1, int(
405
+ os.environ.get("PROXY_ATTRACTOR_FINALIZE_THRESHOLD", "2")
406
+ ))
393
407
  PROXY_AGENTIC_SUPPLEMENT_MODE = (
394
408
  os.environ.get("PROXY_AGENTIC_SUPPLEMENT_MODE", "clean").strip().lower()
395
409
  )
@@ -750,6 +764,8 @@ class SessionMonitor:
750
764
  invalid_tool_call_streak: int = 0 # consecutive invalid tool arg payloads
751
765
  required_tool_miss_streak: int = 0 # required tool turns with no tool call
752
766
  contamination_resets: int = 0 # how many contamination resets were applied
767
+ last_fault_excerpt_hash: str = "" # hash of last TOOL RESPONSE ISSUE excerpt (attractor detection)
768
+ attractor_correction_active: bool = False # next turn uses high-temp escape sampling
753
769
  forced_auto_cooldown_turns: int = 0 # temporary auto override turns remaining
754
770
  forced_dampener_triggers: int = 0 # number of dampener activations
755
771
  arg_preflight_rejections: int = 0 # rejected tool calls from arg preflight
@@ -3586,23 +3602,37 @@ def build_openai_request(
3586
3602
  openai_body["stop"] = anthropic_body["stop_sequences"]
3587
3603
 
3588
3604
  # Force controlled temperature for tool-call turns to reduce garbled output
3589
- # Cycle 15 Option 2: use lower temperature after contamination resets
3605
+ # Cycle 15 Option 2: use lower temperature after contamination resets.
3606
+ # Attractor escape: when an attractor correction is active, OVERRIDE the
3607
+ # low-temp default with a HIGH-temp sample so the deterministic output
3608
+ # trajectory has a chance to break. Single-turn override (cleared on
3609
+ # successful tool_use further down in the response handler).
3590
3610
  if has_tools:
3591
3611
  client_temp = openai_body.get("temperature")
3592
3612
  target_temp = PROXY_TOOL_TURN_TEMPERATURE
3593
- if monitor.contamination_resets > 0:
3594
- target_temp = min(target_temp, 0.1)
3595
- if client_temp is None or client_temp > target_temp:
3613
+ attractor_active = getattr(monitor, "attractor_correction_active", False)
3614
+ if attractor_active:
3615
+ target_temp = max(target_temp, PROXY_ATTRACTOR_TEMP_OVERRIDE)
3596
3616
  openai_body["temperature"] = target_temp
3597
- extra = ""
3598
- if monitor.contamination_resets > 0:
3599
- extra = f" (post-contamination reset, resets={monitor.contamination_resets})"
3600
3617
  logger.info(
3601
- "TOOL TURN TEMP: forcing temperature=%.2f (was %s) for tool-enabled request%s",
3618
+ "TOOL TURN TEMP: ATTRACTOR ESCAPE temperature=%.2f (was %s)",
3602
3619
  target_temp,
3603
3620
  client_temp,
3604
- extra,
3605
3621
  )
3622
+ else:
3623
+ if monitor.contamination_resets > 0:
3624
+ target_temp = min(target_temp, 0.1)
3625
+ if client_temp is None or client_temp > target_temp:
3626
+ openai_body["temperature"] = target_temp
3627
+ extra = ""
3628
+ if monitor.contamination_resets > 0:
3629
+ extra = f" (post-contamination reset, resets={monitor.contamination_resets})"
3630
+ logger.info(
3631
+ "TOOL TURN TEMP: forcing temperature=%.2f (was %s) for tool-enabled request%s",
3632
+ target_temp,
3633
+ client_temp,
3634
+ extra,
3635
+ )
3606
3636
 
3607
3637
  # Convert Anthropic tools to OpenAI function-calling tools
3608
3638
  full_openai_tools: list[dict] = []
@@ -4144,6 +4174,17 @@ def _openai_message_text(openai_resp: dict) -> str:
4144
4174
  return content if isinstance(content, str) else str(content)
4145
4175
 
4146
4176
 
4177
+ def _hash_fault_excerpt(excerpt: str) -> str:
4178
+ """Stable hash of a fault excerpt for attractor-repeat detection. Lowercased
4179
+ + whitespace-collapsed so trivial rendering differences don't break the match."""
4180
+ if not excerpt:
4181
+ return ""
4182
+ normalized = " ".join(excerpt.lower().split())[:200]
4183
+ if not normalized:
4184
+ return ""
4185
+ return hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16]
4186
+
4187
+
4147
4188
  def _extract_openai_tool_calls(openai_resp: dict) -> list[dict]:
4148
4189
  _, message = _extract_openai_choice(openai_resp)
4149
4190
  tool_calls = message.get("tool_calls") or []
@@ -6348,6 +6389,13 @@ async def _apply_malformed_tool_guardrail(
6348
6389
  monitor.invalid_tool_call_streak = 0
6349
6390
  monitor.required_tool_miss_streak = 0
6350
6391
  monitor.last_response_garbled = False
6392
+ if monitor.attractor_correction_active:
6393
+ logger.info(
6394
+ "ATTRACTOR ESCAPE succeeded: session=%s — tool_use emitted, clearing attractor flag",
6395
+ session_id,
6396
+ )
6397
+ monitor.attractor_correction_active = False
6398
+ monitor.last_fault_excerpt_hash = ""
6351
6399
  if repair_count > 0:
6352
6400
  monitor.arg_preflight_repairs += repair_count
6353
6401
  logger.info(
@@ -6385,6 +6433,11 @@ async def _apply_malformed_tool_guardrail(
6385
6433
  if raw_args and _is_garbled_tool_arguments(raw_args):
6386
6434
  arg_excerpt = raw_args[:200].replace("\n", " ")
6387
6435
  break
6436
+ # Attractor detection — hash the normalized fault excerpt so the
6437
+ # contamination breaker can recognize the same fixed-point response
6438
+ # reappearing across consecutive resets. Whitespace-normalized so trivial
6439
+ # rendering differences don't break the match.
6440
+ monitor.last_fault_excerpt_hash = _hash_fault_excerpt(excerpt)
6388
6441
  logger.warning(
6389
6442
  "TOOL RESPONSE ISSUE: session=%s kind=%s reason=%s malformed=%d invalid=%d required_miss=%d excerpt=%.220s args=%.200s",
6390
6443
  session_id,
@@ -6627,7 +6680,16 @@ def _maybe_apply_session_contamination_breaker(
6627
6680
  # Cycle 15 Option 3: if contamination has already reset N+ times in this
6628
6681
  # session, the model is fundamentally unable to produce valid tool calls.
6629
6682
  # Force finalize so the Droid framework can intervene.
6630
- max_contamination_resets = 3
6683
+ #
6684
+ # Lower the threshold when an attractor correction has already been
6685
+ # applied — if the corrective injection + temp bump didn't break the
6686
+ # attractor on the next turn, more resets won't help. Cuts wasted retry
6687
+ # budget from 3 resets (~60 min observed) to 2 (~25 min).
6688
+ max_contamination_resets = (
6689
+ PROXY_ATTRACTOR_FINALIZE_THRESHOLD
6690
+ if monitor.attractor_correction_active
6691
+ else 3
6692
+ )
6631
6693
  if monitor.contamination_resets >= max_contamination_resets:
6632
6694
  logger.error(
6633
6695
  "SESSION CONTAMINATION LOOP: session=%s contamination_resets=%d >= %d, forcing finalize",
@@ -6654,26 +6716,89 @@ def _maybe_apply_session_contamination_breaker(
6654
6716
  return updated
6655
6717
 
6656
6718
  messages = anthropic_body.get("messages", [])
6719
+
6720
+ # Attractor detection: if the fault excerpt that triggered this reset
6721
+ # hashes to the same value as the *previous* reset's fault excerpt, the
6722
+ # model is in a stable output attractor — keep_last reset preserves the
6723
+ # priming tail that pulls it back in. Apply a harder reset (system +
6724
+ # initial user turn only) plus a corrective injection. Temperature gets
6725
+ # bumped UP on the next turn (see _apply_request_sampling) instead of
6726
+ # the standard post-contamination drop, to break the deterministic
6727
+ # output trajectory.
6728
+ attractor_detected = bool(
6729
+ PROXY_ATTRACTOR_DETECT
6730
+ and monitor.contamination_resets >= 1
6731
+ and monitor.last_fault_excerpt_hash
6732
+ and monitor.last_fault_excerpt_hash
6733
+ == getattr(monitor, "_prev_reset_fault_hash", "")
6734
+ )
6735
+ monitor._prev_reset_fault_hash = monitor.last_fault_excerpt_hash
6736
+
6657
6737
  keep_last = max(2, PROXY_SESSION_CONTAMINATION_KEEP_LAST)
6658
- if len(messages) <= keep_last + 1:
6738
+ if not attractor_detected and len(messages) <= keep_last + 1:
6659
6739
  monitor.malformed_tool_streak = 0
6660
6740
  monitor.invalid_tool_call_streak = 0
6661
6741
  monitor.required_tool_miss_streak = 0
6662
6742
  monitor.reset_tool_turn_state(reason="contamination_guardrail_soft_reset")
6663
6743
  return anthropic_body
6664
6744
 
6665
- head = messages[:1]
6666
- tail = messages[-keep_last:]
6667
- reset_marker = {
6668
- "role": "user",
6669
- "content": (
6670
- "[SESSION RESET: tool-call quality degraded in earlier turns. "
6671
- "Continue from the recent context and emit valid tool calls with strict JSON arguments only.]"
6672
- ),
6673
- }
6745
+ if attractor_detected:
6746
+ # Hard reset: drop the entire trailing context. Keep only the system
6747
+ # turn (if present) and the first user turn so the model has the
6748
+ # original goal but none of the attractor-priming tail.
6749
+ first_user_idx = next(
6750
+ (i for i, m in enumerate(messages) if m.get("role") == "user"),
6751
+ None,
6752
+ )
6753
+ if first_user_idx is None:
6754
+ head = messages[:1]
6755
+ else:
6756
+ head = messages[: first_user_idx + 1]
6757
+ # Phase 2 (PR #192): stronger, more structured intervention wording.
6758
+ # The Phase 1 single-paragraph message + temp 0.95 escaped one
6759
+ # production attractor (2026-05-25 02:39:59 fp:1f7e2c95...) but failed
6760
+ # to escape another (2026-05-24 19:11 fp:d19b7a44...). Increase the
6761
+ # signal-to-noise on the corrective by: (1) splitting MUST/MUST NOT
6762
+ # into bullet points the model attends to better, (2) using ALL CAPS
6763
+ # on the critical negative ("DO NOT narrate"), (3) explicitly naming
6764
+ # the attractor failure mode so the model can recognize and avoid it.
6765
+ reset_marker = {
6766
+ "role": "user",
6767
+ "content": (
6768
+ "[ATTRACTOR INTERVENTION — CRITICAL]\n\n"
6769
+ "Your previous responses REPEATEDLY emitted prose summaries "
6770
+ "instead of tool calls. This is the exact failure mode this "
6771
+ "intervention is designed to break. The trailing conversation "
6772
+ "has been REMOVED.\n\n"
6773
+ "YOUR NEXT RESPONSE MUST:\n"
6774
+ " 1. Begin with a tool_use block (no preamble, no thinking)\n"
6775
+ " 2. Invoke one of the available tools\n"
6776
+ " 3. Take a CONCRETE action toward the original task\n\n"
6777
+ "DO NOT:\n"
6778
+ " • Summarize what you have done or plan to do\n"
6779
+ " • Narrate, explain, or describe\n"
6780
+ " • Emit any prose before the tool_use block\n\n"
6781
+ "Just call the tool."
6782
+ ),
6783
+ }
6784
+ new_messages = head + [reset_marker]
6785
+ monitor.attractor_correction_active = True
6786
+ log_reason = "attractor"
6787
+ else:
6788
+ head = messages[:1]
6789
+ tail = messages[-keep_last:]
6790
+ reset_marker = {
6791
+ "role": "user",
6792
+ "content": (
6793
+ "[SESSION RESET: tool-call quality degraded in earlier turns. "
6794
+ "Continue from the recent context and emit valid tool calls with strict JSON arguments only.]"
6795
+ ),
6796
+ }
6797
+ new_messages = head + [reset_marker] + tail
6798
+ log_reason = "standard"
6674
6799
 
6675
6800
  updated_body = dict(anthropic_body)
6676
- updated_body["messages"] = head + [reset_marker] + tail
6801
+ updated_body["messages"] = new_messages
6677
6802
 
6678
6803
  forced_before = monitor.consecutive_forced_count
6679
6804
  required_miss_before = monitor.required_tool_miss_streak
@@ -6684,15 +6809,26 @@ def _maybe_apply_session_contamination_breaker(
6684
6809
  monitor.no_progress_streak = 0
6685
6810
  monitor.consecutive_forced_count = 0
6686
6811
  monitor.forced_auto_cooldown_turns = 0
6687
- monitor.reset_tool_turn_state(reason="contamination_guardrail_reset")
6688
- logger.warning(
6689
- "SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
6690
- session_id,
6691
- len(updated_body["messages"]),
6692
- bad_streak,
6693
- forced_before,
6694
- required_miss_before,
6695
- )
6812
+ monitor.reset_tool_turn_state(reason=f"contamination_guardrail_reset_{log_reason}")
6813
+ if attractor_detected:
6814
+ logger.warning(
6815
+ "CONTAMINATION ATTRACTOR DETECTED: session=%s hash=%s — hard reset "
6816
+ "applied, kept=%d messages (initial intent only), temp override "
6817
+ "and finalize threshold lowered to %d",
6818
+ session_id,
6819
+ monitor.last_fault_excerpt_hash,
6820
+ len(updated_body["messages"]),
6821
+ PROXY_ATTRACTOR_FINALIZE_THRESHOLD,
6822
+ )
6823
+ else:
6824
+ logger.warning(
6825
+ "SESSION CONTAMINATION BREAKER: session=%s reset applied, kept=%d messages (bad_streak=%d forced=%d required_miss=%d)",
6826
+ session_id,
6827
+ len(updated_body["messages"]),
6828
+ bad_streak,
6829
+ forced_before,
6830
+ required_miss_before,
6831
+ )
6696
6832
 
6697
6833
  return updated_body
6698
6834
 
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env python3
2
+ """Unit tests for the attractor-aware contamination-breaker path.
3
+
4
+ Validates that a repeated fault-excerpt hash across consecutive contamination
5
+ resets triggers the hard-reset + corrective-injection path, and that the
6
+ standard kept-last path remains unchanged when no repeat is observed.
7
+ """
8
+
9
+ import importlib.util
10
+ import unittest
11
+ from pathlib import Path
12
+
13
+
14
+ def _load_proxy_module():
15
+ proxy_path = Path(__file__).resolve().parents[1] / "scripts" / "anthropic_proxy.py"
16
+ spec = importlib.util.spec_from_file_location("anthropic_proxy", proxy_path)
17
+ assert spec is not None and spec.loader is not None
18
+ module = importlib.util.module_from_spec(spec)
19
+ spec.loader.exec_module(module)
20
+ return module
21
+
22
+
23
+ proxy = _load_proxy_module()
24
+
25
+
26
+ def _make_monitor(**overrides):
27
+ m = proxy.SessionMonitor()
28
+ for k, v in overrides.items():
29
+ setattr(m, k, v)
30
+ return m
31
+
32
+
33
+ def _make_body(n_msgs: int):
34
+ """Build an anthropic_body with a system + N user/assistant turns."""
35
+ messages = [{"role": "user", "content": "Run a recon on /repos/pay2u."}]
36
+ for i in range(n_msgs - 1):
37
+ role = "assistant" if i % 2 == 0 else "user"
38
+ messages.append({"role": role, "content": f"turn-{i}"})
39
+ return {
40
+ "model": "qwen36-35b-a3b-iq4xs",
41
+ "messages": messages,
42
+ "tools": [{"name": "Bash", "input_schema": {"type": "object"}}],
43
+ "tool_choice": {"type": "any"},
44
+ }
45
+
46
+
47
+ class TestHashFaultExcerpt(unittest.TestCase):
48
+ def test_empty_returns_empty(self):
49
+ self.assertEqual(proxy._hash_fault_excerpt(""), "")
50
+ self.assertEqual(proxy._hash_fault_excerpt(" "), "")
51
+
52
+ def test_whitespace_normalized(self):
53
+ a = proxy._hash_fault_excerpt("The security architecture is layered.")
54
+ b = proxy._hash_fault_excerpt("The security architecture is layered.")
55
+ c = proxy._hash_fault_excerpt("The\nsecurity\narchitecture\nis\nlayered.")
56
+ self.assertEqual(a, b)
57
+ self.assertEqual(a, c)
58
+
59
+ def test_case_normalized(self):
60
+ a = proxy._hash_fault_excerpt("FAIL CLOSED security")
61
+ b = proxy._hash_fault_excerpt("fail closed security")
62
+ self.assertEqual(a, b)
63
+
64
+ def test_distinct_excerpts_distinct_hashes(self):
65
+ a = proxy._hash_fault_excerpt("Pay2U API analysis")
66
+ b = proxy._hash_fault_excerpt("Different attractor text")
67
+ self.assertNotEqual(a, b)
68
+ self.assertEqual(len(a), 16)
69
+ self.assertEqual(len(b), 16)
70
+
71
+
72
+ class TestAttractorDetectionPath(unittest.TestCase):
73
+ """First reset → standard. Second reset with same excerpt → attractor."""
74
+
75
+ def _trip_breaker(self, monitor):
76
+ # Make the breaker think it should reset.
77
+ monitor.required_tool_miss_streak = (
78
+ proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
79
+ )
80
+
81
+ def test_first_reset_is_standard(self):
82
+ monitor = _make_monitor()
83
+ monitor.last_fault_excerpt_hash = "deadbeefcafebabe"
84
+ self._trip_breaker(monitor)
85
+
86
+ body = _make_body(n_msgs=20)
87
+ updated = proxy._maybe_apply_session_contamination_breaker(
88
+ body, monitor, "test-session"
89
+ )
90
+
91
+ self.assertEqual(monitor.contamination_resets, 1)
92
+ self.assertFalse(monitor.attractor_correction_active)
93
+ # Standard keeps head + reset_marker + last keep_last messages
94
+ kept_last = max(2, proxy.PROXY_SESSION_CONTAMINATION_KEEP_LAST)
95
+ self.assertEqual(len(updated["messages"]), 1 + 1 + kept_last)
96
+ # Reset marker carries the standard wording, not the attractor wording.
97
+ self.assertIn("SESSION RESET", updated["messages"][1]["content"])
98
+
99
+ def test_second_reset_same_hash_triggers_attractor(self):
100
+ monitor = _make_monitor()
101
+ monitor.last_fault_excerpt_hash = "deadbeefcafebabe"
102
+ # Pretend we've already done one reset with the same fault excerpt.
103
+ monitor._prev_reset_fault_hash = "deadbeefcafebabe"
104
+ monitor.contamination_resets = 1
105
+ self._trip_breaker(monitor)
106
+
107
+ body = _make_body(n_msgs=20)
108
+ updated = proxy._maybe_apply_session_contamination_breaker(
109
+ body, monitor, "test-session"
110
+ )
111
+
112
+ self.assertTrue(monitor.attractor_correction_active)
113
+ # Hard reset keeps only system + first user (+ corrective marker)
114
+ # → 2 messages total for this body (first user + marker).
115
+ self.assertLessEqual(len(updated["messages"]), 3)
116
+ self.assertIn("ATTRACTOR INTERVENTION", updated["messages"][-1]["content"])
117
+
118
+ def test_second_reset_different_hash_stays_standard(self):
119
+ monitor = _make_monitor()
120
+ monitor.last_fault_excerpt_hash = "newhashvalue1234"
121
+ monitor._prev_reset_fault_hash = "deadbeefcafebabe"
122
+ monitor.contamination_resets = 1
123
+ self._trip_breaker(monitor)
124
+
125
+ body = _make_body(n_msgs=20)
126
+ updated = proxy._maybe_apply_session_contamination_breaker(
127
+ body, monitor, "test-session"
128
+ )
129
+
130
+ self.assertFalse(monitor.attractor_correction_active)
131
+ self.assertIn("SESSION RESET", updated["messages"][1]["content"])
132
+
133
+
134
+ class TestAttractorFinalizeThreshold(unittest.TestCase):
135
+ """Once attractor correction is active, finalize fires at the lower
136
+ threshold instead of waiting for 3 resets."""
137
+
138
+ def test_attractor_lowers_finalize_threshold(self):
139
+ monitor = _make_monitor()
140
+ monitor.attractor_correction_active = True
141
+ # Just at the lowered threshold.
142
+ monitor.contamination_resets = proxy.PROXY_ATTRACTOR_FINALIZE_THRESHOLD
143
+ monitor.required_tool_miss_streak = (
144
+ proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
145
+ )
146
+
147
+ body = _make_body(n_msgs=20)
148
+ updated = proxy._maybe_apply_session_contamination_breaker(
149
+ body, monitor, "test-session"
150
+ )
151
+
152
+ # Finalize path strips tools and appends the "respond with plain text" prompt.
153
+ self.assertNotIn("tools", updated)
154
+ self.assertNotIn("tool_choice", updated)
155
+ self.assertIn("plain text only", updated["messages"][-1]["content"])
156
+
157
+ def test_standard_path_keeps_3_reset_budget(self):
158
+ monitor = _make_monitor()
159
+ monitor.attractor_correction_active = False
160
+ # 2 resets done — under the standard 3-reset budget.
161
+ monitor.contamination_resets = 2
162
+ monitor.required_tool_miss_streak = (
163
+ proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
164
+ )
165
+
166
+ body = _make_body(n_msgs=20)
167
+ updated = proxy._maybe_apply_session_contamination_breaker(
168
+ body, monitor, "test-session"
169
+ )
170
+
171
+ # Standard reset, not finalize.
172
+ self.assertIn("tools", updated)
173
+
174
+
175
+ class TestAttractorPhase2Defaults(unittest.TestCase):
176
+ """Phase 2 (PR #192) raises the default temp override and strengthens the
177
+ intervention wording. Verify the defaults the operator gets out of the box."""
178
+
179
+ def test_temp_override_default_is_1_20(self):
180
+ # Phase 1 default was 0.95; Phase 2 raises to 1.20 after one
181
+ # production attractor (fp:d19b7a44...) failed to escape at 0.95.
182
+ self.assertGreaterEqual(proxy.PROXY_ATTRACTOR_TEMP_OVERRIDE, 1.20 - 0.001)
183
+
184
+ def test_intervention_message_has_structured_directives(self):
185
+ """The Phase 2 wording uses MUST / DO NOT bullets and explicitly
186
+ names the failure mode. Trigger the attractor path and inspect the
187
+ injected marker."""
188
+ monitor = _make_monitor()
189
+ monitor.last_fault_excerpt_hash = "deadbeefcafebabe"
190
+ monitor._prev_reset_fault_hash = "deadbeefcafebabe"
191
+ monitor.contamination_resets = 1
192
+ monitor.required_tool_miss_streak = (
193
+ proxy.PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD
194
+ )
195
+
196
+ body = _make_body(n_msgs=20)
197
+ updated = proxy._maybe_apply_session_contamination_breaker(
198
+ body, monitor, "test-session"
199
+ )
200
+
201
+ content = updated["messages"][-1]["content"]
202
+ # Phase 2 wording signals
203
+ self.assertIn("CRITICAL", content)
204
+ self.assertIn("MUST", content)
205
+ self.assertIn("DO NOT", content)
206
+ # Specifically forbids the attractor's preferred behaviors
207
+ self.assertIn("prose", content.lower())
208
+ # Still has the marker substring callers may grep on
209
+ self.assertIn("ATTRACTOR INTERVENTION", content)
210
+
211
+
212
+ if __name__ == "__main__":
213
+ unittest.main()
@@ -1,21 +0,0 @@
1
- export interface MetricsBaselineResult {
2
- timestamp: string;
3
- version: string;
4
- memory: {
5
- heapUsedMB: any;
6
- heapTotalMB: any;
7
- externalMB: any;
8
- rssMB: any;
9
- };
10
- }
11
- export declare function runBaselineBenchmark(): Promise<void>;
12
- export declare function generateReport({ improvement, regression, stable: string }: {
13
- improvement: any;
14
- regression: any;
15
- stable: any;
16
- }, { let, report, ");: report }: {
17
- let: any;
18
- report?: string | undefined;
19
- ");": any;
20
- }, : any, : any, Date: any): any;
21
- //# sourceMappingURL=baseline-metrics.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"baseline-metrics.d.ts","sourceRoot":"","sources":["../../src/utils/baseline-metrics.ts"],"names":[],"mappings":"AAQA,MAAM,WAAW,qBAAqB;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE;QACN,UAAU,MAAC;QACX,WAAW,MAAC;QACZ,UAAU,MAAC;QACX,KAAK,MAAC;KACP,CAAC;CAAA;AA6DJ,wBAAsB,oBAAoB,IAAE,OAAO,CAAC,IAAI,CAAC,CASxD;AAED,wBAAgB,cAAc,CAAC,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAA;;;;CAAA,EAAC,EACvE,GAAG,EAAC,MAA8C,EAAC,GAAG,EACtD,MAAM,EAAG;;;;CAAA,EAAC,AAAD,KAAA,EAAe,KAAA,EAAI,IAAI,KAAA,OAAA"}
@@ -1,111 +0,0 @@
1
- import { writeFileSync, existsSync, mkdirSync } from 'fs';
2
- import { join } from 'path';
3
- import { fileURLToPath } from 'url';
4
- import { dirname } from 'path';
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = dirname(filename);
7
- queryLatency;
8
- {
9
- p50;
10
- p95;
11
- p99;
12
- avg;
13
- }
14
- ;
15
- cachePerformance;
16
- {
17
- hitRate;
18
- avgLookups;
19
- }
20
- ;
21
- dbPerformance;
22
- {
23
- p50;
24
- p95;
25
- p99;
26
- avg;
27
- ;
28
- patternMatching;
29
- {
30
- p50;
31
- p95;
32
- p99;
33
- ;
34
- summary;
35
- }
36
- export function SnapshotBaseline() {
37
- const memory = process.memoryUsage();
38
- return {
39
- timestamp: new Date().toISOString(),
40
- version: '1.17.2', ");: memory
41
- };
42
- {
43
- heapUsedMB: Math.round(memory.heapUsed / 1024 / 1024),
44
- heapTotalMB;
45
- Math.round(memory.heapTotal / 1024 / 1024),
46
- externalMB;
47
- Math.round(memory.external / 1024 / 1024),
48
- rssMB;
49
- Math.round(memory.rss / 1024 / 1024),
50
- ;
51
- }
52
- queryLatency: {
53
- p50: 0,
54
- p95;
55
- 0,
56
- p99;
57
- 0,
58
- avg;
59
- 0,
60
- ;
61
- }
62
- cachePerformance: {
63
- hitRate: 0.85,
64
- avgLookups;
65
- 100,
66
- ;
67
- }
68
- dbPerformance: {
69
- p50: 0.5,
70
- p95;
71
- 1.5,
72
- p99;
73
- 3.5,
74
- avg;
75
- 0.5,
76
- ;
77
- }
78
- patternMatching: {
79
- p50: 0.2,
80
- p95;
81
- 0.1,
82
- p99;
83
- 0.2,
84
- ;
85
- }
86
- summary: 'Baseline metrics snapshot ready for comparison',
87
- ;
88
- }
89
- ;
90
- }
91
- export async function runBaselineBenchmark() {
92
- const snapshot = SnapshotBaseline();
93
- const docsDir = join(__dirname, '../../docs/performance');
94
- if (!existsSync(docs)) {
95
- mkdirSync(docs, { recursive: true });
96
- }
97
- const filePath = join(docs, 'baseline-2026-03-27.json');
98
- writeFileSync(filePath, JSON.stringify(snapshot, null, 2));
99
- console.log('Baseline saved to:', filePath);
100
- }
101
- export function generateReport({ improvement, regression, stable: string }, { let, report = 'Performance Optimization Report\n\n', ");: report }, , , Date) { }
102
- ().toISOString();
103
- n;
104
- n `;
105
-
106
- report += 'Latency\n';");
107
- report += 'Improvement: ${improvement.length}\n`;
108
- report += 'Regression: ${regression.length}\n`;;
109
- report += 'Stable: ${stable.length}\n`;;
110
- return report;
111
- //# sourceMappingURL=baseline-metrics.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"baseline-metrics.js","sourceRoot":"","sources":["../../src/utils/baseline-metrics.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAA,MAAM,KAAK,CAAC;AAClC,OAAO,EAAE,OAAO,EAAA,MAAM,MAAM,CAAC;AAE7B,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;AAWlC,YAAY,CAAA;AAAC,CAAC;IACZ,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;AACN,CAAC;AAAA,CAAC;AACF,gBAAgB,CAAA;AAAC,CAAC;IAChB,OAAO,CAAC;IACR,UAAU,CAAC;AACb,CAAC;AAAA,CAAC;AACF,aAAa,CAAA;AAAC,CAAC;IACb,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;IACJ,GAAG,CAAC;IACP,CAAC;IACA,eAAe,CAAA;IAAC,CAAC;QACf,GAAG,CAAC;QACJ,GAAG,CAAC;QACJ,GAAG,CAAC;QACP,CAAC;QACA,OAAO,CAAC;IACV,CAAC;IAED,MAAM,UAAU,gBAAgB;QAC9B,MAAM,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QACrC,OAAO;YACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,OAAO,EAAE,QAAQ,EAAC,GAAG,EACrB,MAAM;SAAC,CAAA;QAAC,CAAC;YACP,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC;gBACrD,WAAW,CAAA;YAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;gBACvD,UAAU,CAAA;YAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC;gBACrD,KAAK,CAAA;YAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;gBAC7C,AAD8C,JAAA,CAAA;QAC9C,CAAC;QACD,YAAY,EAAE,CAAC;YACb,GAAG,EAAE,CAAC;gBACN,GAAG,CAAA;YAAE,CAAC;gBACN,GAAG,CAAA;YAAE,CAAC;gBACN,GAAG,CAAA;YAAE,CAAC;gBACR,AADS,JAAA,CAAA;QACT,CAAC;QACD,gBAAgB,EAAE,CAAC;YACjB,OAAO,EAAE,IAAI;gBACb,UAAU,CAAA;YAAE,GAAG;gBACjB,AADkB,JAAA,CAAA;QAClB,CAAC;QACD,aAAa,EAAE,CAAC;YACd,GAAG,EAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACV,AADW,JAAA,CAAA;QACX,CAAC;QACD,eAAe,EAAE,CAAC;YAChB,GAAG,EAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACR,GAAG,CAAA;YAAE,GAAG;gBACV,AADW,JAAA,CAAA;QACX,CAAC;QACD,OAAO,EAAE,gDAAgD;YAC3D,AAD4D,JAAA,CAAA;IAC5D,CAAC;IAAA,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB;IACxC,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,EAAE,wBAAwB,CAAC,CAAC;IAC1D,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,SAAS,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACvC,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,0BAA0B,CAAC,CAAC;IACxD,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3D,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,QAAQ,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAA,EAAC,EACvE,GAAG,EAAC,MAAM,GAAG,qCAAqC,EAAC,GAAG,EACtD,MAAM,EAAG,EAAC,AAAD,EAAe,EAAI,IAAI,IAAA,CAAC,AAAD;AAAA,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;AAAE,CAAC,CAAA;AAAC,CAAC,CAAA;;;4BAG3B,WAAW,CAAC,MAAM,IAAI,CAAC;AACjD,MAAM,IAAI,qCAAqC,CAAA;AAC/C,MAAM,IAAI,6BAA6B,CAAA;AACvC,OAAO,MAAM,CAAC"}