cortex-loop 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +7 -0
- cortex/adapters.py +339 -0
- cortex/blocklist.py +51 -0
- cortex/challenges.py +210 -0
- cortex/cli.py +7 -0
- cortex/core.py +601 -0
- cortex/core_helpers.py +190 -0
- cortex/data/identity_preamble.md +5 -0
- cortex/data/layer1_part_a.md +65 -0
- cortex/data/layer1_part_b.md +17 -0
- cortex/executive.py +295 -0
- cortex/foundation.py +185 -0
- cortex/genome.py +348 -0
- cortex/graveyard.py +226 -0
- cortex/hooks/__init__.py +27 -0
- cortex/hooks/_shared.py +167 -0
- cortex/hooks/post_tool_use.py +13 -0
- cortex/hooks/pre_tool_use.py +13 -0
- cortex/hooks/session_start.py +13 -0
- cortex/hooks/stop.py +13 -0
- cortex/invariants.py +258 -0
- cortex/packs.py +118 -0
- cortex/repomap.py +6 -0
- cortex/requirements.py +497 -0
- cortex/retry.py +312 -0
- cortex/stop_contract.py +217 -0
- cortex/stop_payload.py +122 -0
- cortex/stop_policy.py +100 -0
- cortex/stop_runtime.py +400 -0
- cortex/stop_signals.py +75 -0
- cortex/store.py +793 -0
- cortex/templates/__init__.py +10 -0
- cortex/utils.py +58 -0
- cortex_loop-0.1.0a1.dist-info/METADATA +121 -0
- cortex_loop-0.1.0a1.dist-info/RECORD +52 -0
- cortex_loop-0.1.0a1.dist-info/WHEEL +5 -0
- cortex_loop-0.1.0a1.dist-info/entry_points.txt +3 -0
- cortex_loop-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- cortex_loop-0.1.0a1.dist-info/top_level.txt +3 -0
- cortex_ops_cli/__init__.py +3 -0
- cortex_ops_cli/_adapter_validation.py +119 -0
- cortex_ops_cli/_check_report.py +454 -0
- cortex_ops_cli/_check_report_output.py +270 -0
- cortex_ops_cli/_openai_bridge_probe.py +241 -0
- cortex_ops_cli/_openai_bridge_protocol.py +469 -0
- cortex_ops_cli/_runtime_profile_templates.py +341 -0
- cortex_ops_cli/_runtime_profiles.py +445 -0
- cortex_ops_cli/gemini_hooks.py +301 -0
- cortex_ops_cli/main.py +911 -0
- cortex_ops_cli/openai_app_server_bridge.py +375 -0
- cortex_repomap/__init__.py +1 -0
- cortex_repomap/engine.py +1201 -0
cortex/retry.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from collections.abc import Mapping
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
RETRYABLE_STATUSES: frozenset[str] = frozenset({"error", "failed", "fail"})
|
|
11
|
+
NON_RETRYABLE_REASONS: frozenset[str] = frozenset({"invariant_violation", "stop_gate_failure", "policy_violation", "challenge_failure", "permission_denied", "authentication_failure"})
|
|
12
|
+
CORRECTIVE_RETRYABLE: str = "corrective_retryable"
|
|
13
|
+
TERMINAL_HARD_GATE: str = "terminal_hard_gate"
|
|
14
|
+
_CORRECTIVE_CANONICAL: frozenset[str] = frozenset({"tool_error", "timeout", "shape_mismatch", "range_error", "format_error"})
|
|
15
|
+
_REASON_RETRY_LIMITS: Mapping[str, int] = {
|
|
16
|
+
"timeout": 3,
|
|
17
|
+
"tool_error": 3,
|
|
18
|
+
"shape_mismatch": 2,
|
|
19
|
+
"format_error": 2,
|
|
20
|
+
"range_error": 1,
|
|
21
|
+
}
|
|
22
|
+
_DELTA_OBJECTIVE_KEYS: tuple[str, ...] = ("changed_files", "updated_files")
|
|
23
|
+
_DELTA_SENSITIVE_REASONS: frozenset[str] = frozenset({"shape_mismatch", "range_error", "format_error"})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(slots=True)
|
|
27
|
+
class FailureVerdict:
|
|
28
|
+
retryable: bool
|
|
29
|
+
reason: str
|
|
30
|
+
hard_stop: bool = False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(slots=True)
|
|
34
|
+
class RetryVerdict:
|
|
35
|
+
should_retry: bool
|
|
36
|
+
hard_stop: bool
|
|
37
|
+
failure_class: str
|
|
38
|
+
reason: str
|
|
39
|
+
budget_remaining: int
|
|
40
|
+
budget_exhausted: bool
|
|
41
|
+
decision_code: str
|
|
42
|
+
failure_signature: str
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def compute_retry_verdict(
|
|
46
|
+
*,
|
|
47
|
+
store: Any,
|
|
48
|
+
session_id: str,
|
|
49
|
+
payload: Mapping[str, Any],
|
|
50
|
+
max_retries: int,
|
|
51
|
+
) -> RetryVerdict | None:
|
|
52
|
+
failure = classify_failure(payload)
|
|
53
|
+
if failure is None:
|
|
54
|
+
return None
|
|
55
|
+
reason = failure.reason
|
|
56
|
+
reason_limit = reason_retry_limit(reason, default_limit=max_retries)
|
|
57
|
+
attempts_state = store.get_retry_attempts(session_id, reason=reason)
|
|
58
|
+
budget_state = _retry_budget_state(
|
|
59
|
+
attempts=int(attempts_state["attempts"]),
|
|
60
|
+
reason_attempts=int(attempts_state["reason_attempts"]),
|
|
61
|
+
max_retries=max_retries,
|
|
62
|
+
reason_limit=reason_limit,
|
|
63
|
+
)
|
|
64
|
+
signature = failure_signature(payload, reason=reason)
|
|
65
|
+
delta_hash = objective_delta_hash(payload)
|
|
66
|
+
delta_sensitive = reason in _DELTA_SENSITIVE_REASONS
|
|
67
|
+
previous_delta_hash = (
|
|
68
|
+
store.get_retry_delta_hash(session_id, reason, signature)
|
|
69
|
+
if delta_sensitive
|
|
70
|
+
else None
|
|
71
|
+
)
|
|
72
|
+
remaining_session = int(budget_state["remaining"])
|
|
73
|
+
remaining_reason = int(budget_state["reason_remaining"])
|
|
74
|
+
remaining = int(budget_state["budget_remaining"])
|
|
75
|
+
terminal_code = (
|
|
76
|
+
"hard_stop"
|
|
77
|
+
if failure.hard_stop
|
|
78
|
+
else "reason_budget_exhausted"
|
|
79
|
+
if remaining_reason <= 0
|
|
80
|
+
else "session_budget_exhausted"
|
|
81
|
+
if remaining_session <= 0
|
|
82
|
+
else ""
|
|
83
|
+
)
|
|
84
|
+
if terminal_code:
|
|
85
|
+
return RetryVerdict(
|
|
86
|
+
should_retry=False,
|
|
87
|
+
hard_stop=terminal_code == "hard_stop",
|
|
88
|
+
failure_class=TERMINAL_HARD_GATE if terminal_code == "hard_stop" else CORRECTIVE_RETRYABLE,
|
|
89
|
+
reason=reason,
|
|
90
|
+
budget_remaining=remaining,
|
|
91
|
+
budget_exhausted=remaining == 0 or terminal_code != "hard_stop",
|
|
92
|
+
decision_code=terminal_code,
|
|
93
|
+
failure_signature=signature,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if delta_sensitive and previous_delta_hash is not None and not previous_delta_hash and not delta_hash:
|
|
97
|
+
store.upsert_retry_delta_hash(
|
|
98
|
+
session_id,
|
|
99
|
+
reason=reason,
|
|
100
|
+
failure_signature=signature,
|
|
101
|
+
delta_hash=delta_hash,
|
|
102
|
+
)
|
|
103
|
+
return RetryVerdict(
|
|
104
|
+
should_retry=False,
|
|
105
|
+
hard_stop=False,
|
|
106
|
+
failure_class=CORRECTIVE_RETRYABLE,
|
|
107
|
+
reason=reason,
|
|
108
|
+
budget_remaining=remaining,
|
|
109
|
+
budget_exhausted=remaining == 0,
|
|
110
|
+
decision_code="no_delta",
|
|
111
|
+
failure_signature=signature,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
outcome = _consume_retry_with_budget(
|
|
115
|
+
store=store,
|
|
116
|
+
session_id=session_id,
|
|
117
|
+
reason=reason,
|
|
118
|
+
max_retries=max_retries,
|
|
119
|
+
reason_limit=reason_limit,
|
|
120
|
+
)
|
|
121
|
+
if delta_sensitive:
|
|
122
|
+
store.upsert_retry_delta_hash(
|
|
123
|
+
session_id,
|
|
124
|
+
reason=reason,
|
|
125
|
+
failure_signature=signature,
|
|
126
|
+
delta_hash=delta_hash,
|
|
127
|
+
)
|
|
128
|
+
return RetryVerdict(
|
|
129
|
+
should_retry=bool(outcome["consumed"]),
|
|
130
|
+
hard_stop=False,
|
|
131
|
+
failure_class=CORRECTIVE_RETRYABLE,
|
|
132
|
+
reason=reason,
|
|
133
|
+
budget_remaining=int(outcome["budget_remaining"]),
|
|
134
|
+
budget_exhausted=bool(outcome["budget_exhausted"]),
|
|
135
|
+
decision_code=str(outcome["decision_code"]),
|
|
136
|
+
failure_signature=signature,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def reason_retry_limit(reason: str, *, default_limit: int) -> int:
|
|
141
|
+
return max(0, min(default_limit, int(_REASON_RETRY_LIMITS.get(reason, default_limit))))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _retry_budget_state(
|
|
145
|
+
*,
|
|
146
|
+
attempts: int,
|
|
147
|
+
reason_attempts: int,
|
|
148
|
+
max_retries: int,
|
|
149
|
+
reason_limit: int,
|
|
150
|
+
) -> dict[str, int | str]:
|
|
151
|
+
session_limit = max(0, int(max_retries))
|
|
152
|
+
reason_cap = max(0, int(reason_limit))
|
|
153
|
+
session_attempts = max(0, int(attempts))
|
|
154
|
+
reason_attempts = max(0, int(reason_attempts))
|
|
155
|
+
remaining = max(0, session_limit - session_attempts)
|
|
156
|
+
reason_remaining = max(0, reason_cap - reason_attempts)
|
|
157
|
+
decision_code = "retry_allowed"
|
|
158
|
+
if remaining <= 0:
|
|
159
|
+
decision_code = "session_budget_exhausted"
|
|
160
|
+
elif reason_remaining <= 0:
|
|
161
|
+
decision_code = "reason_budget_exhausted"
|
|
162
|
+
return {
|
|
163
|
+
"attempts": session_attempts,
|
|
164
|
+
"reason_attempts": reason_attempts,
|
|
165
|
+
"remaining": remaining,
|
|
166
|
+
"reason_remaining": reason_remaining,
|
|
167
|
+
"budget_remaining": min(remaining, reason_remaining),
|
|
168
|
+
"decision_code": decision_code,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _consume_retry_with_budget(
|
|
173
|
+
*,
|
|
174
|
+
store: Any,
|
|
175
|
+
session_id: str,
|
|
176
|
+
reason: str,
|
|
177
|
+
max_retries: int,
|
|
178
|
+
reason_limit: int,
|
|
179
|
+
) -> dict[str, int | bool | str]:
|
|
180
|
+
for _ in range(16):
|
|
181
|
+
attempts_state = store.get_retry_attempts(session_id, reason=reason)
|
|
182
|
+
state = _retry_budget_state(
|
|
183
|
+
attempts=int(attempts_state["attempts"]),
|
|
184
|
+
reason_attempts=int(attempts_state["reason_attempts"]),
|
|
185
|
+
max_retries=max_retries,
|
|
186
|
+
reason_limit=reason_limit,
|
|
187
|
+
)
|
|
188
|
+
if state["decision_code"] != "retry_allowed":
|
|
189
|
+
return {
|
|
190
|
+
"consumed": False,
|
|
191
|
+
"attempts": int(state["attempts"]),
|
|
192
|
+
"reason_attempts": int(state["reason_attempts"]),
|
|
193
|
+
"budget_remaining": int(state["budget_remaining"]),
|
|
194
|
+
"decision_code": str(state["decision_code"]),
|
|
195
|
+
"budget_exhausted": True,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
write_outcome = store.try_increment_retry_attempts(
|
|
199
|
+
session_id,
|
|
200
|
+
reason=reason,
|
|
201
|
+
expected_attempts=int(state["attempts"]),
|
|
202
|
+
expected_reason_attempts=int(state["reason_attempts"]),
|
|
203
|
+
)
|
|
204
|
+
if bool(write_outcome["consumed"]):
|
|
205
|
+
post = _retry_budget_state(
|
|
206
|
+
attempts=int(write_outcome["attempts"]),
|
|
207
|
+
reason_attempts=int(write_outcome["reason_attempts"]),
|
|
208
|
+
max_retries=max_retries,
|
|
209
|
+
reason_limit=reason_limit,
|
|
210
|
+
)
|
|
211
|
+
return {
|
|
212
|
+
"consumed": True,
|
|
213
|
+
"attempts": int(post["attempts"]),
|
|
214
|
+
"reason_attempts": int(post["reason_attempts"]),
|
|
215
|
+
"budget_remaining": int(post["budget_remaining"]),
|
|
216
|
+
"decision_code": "retry_allowed",
|
|
217
|
+
"budget_exhausted": int(post["budget_remaining"]) == 0,
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
final_attempts = store.get_retry_attempts(session_id, reason=reason)
|
|
221
|
+
final_state = _retry_budget_state(
|
|
222
|
+
attempts=int(final_attempts["attempts"]),
|
|
223
|
+
reason_attempts=int(final_attempts["reason_attempts"]),
|
|
224
|
+
max_retries=max_retries,
|
|
225
|
+
reason_limit=reason_limit,
|
|
226
|
+
)
|
|
227
|
+
return {
|
|
228
|
+
"consumed": False,
|
|
229
|
+
"attempts": int(final_state["attempts"]),
|
|
230
|
+
"reason_attempts": int(final_state["reason_attempts"]),
|
|
231
|
+
"budget_remaining": int(final_state["budget_remaining"]),
|
|
232
|
+
"decision_code": (
|
|
233
|
+
str(final_state["decision_code"])
|
|
234
|
+
if final_state["decision_code"] != "retry_allowed"
|
|
235
|
+
else "retry_contention"
|
|
236
|
+
),
|
|
237
|
+
"budget_exhausted": final_state["decision_code"] != "retry_allowed",
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def objective_delta_hash(payload: Mapping[str, Any]) -> str:
|
|
242
|
+
files = _collect_objective_delta_files(payload)
|
|
243
|
+
if not files:
|
|
244
|
+
return ""
|
|
245
|
+
return hashlib.sha256(
|
|
246
|
+
json.dumps(files, separators=(",", ":"), ensure_ascii=True).encode("utf-8")
|
|
247
|
+
).hexdigest()
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def failure_signature(payload: Mapping[str, Any], *, reason: str) -> str:
|
|
251
|
+
canonical = {
|
|
252
|
+
"reason": reason,
|
|
253
|
+
"tool_name": _normalize_token(payload.get("tool_name")),
|
|
254
|
+
"status": _normalize_token(payload.get("status")),
|
|
255
|
+
"message": _normalize_token(payload.get("error") or payload.get("message") or payload.get("stderr"))[:240],
|
|
256
|
+
"target_files": _normalize_files(payload.get("target_files")),
|
|
257
|
+
"planned_files": _normalize_files(payload.get("planned_files")),
|
|
258
|
+
}
|
|
259
|
+
return hashlib.sha256(
|
|
260
|
+
json.dumps(canonical, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
|
261
|
+
).hexdigest()
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _canonical_reason(raw_reason: Any) -> str:
|
|
265
|
+
token = re.sub(r"[^a-z0-9_]+", "_", re.sub(r"[\s\-]+", "_", str(raw_reason or "").lower().strip())).strip("_")
|
|
266
|
+
if not token or token in _CORRECTIVE_CANONICAL:
|
|
267
|
+
return token
|
|
268
|
+
parts = set(token.split("_"))
|
|
269
|
+
if {"shape", "mismatch"} <= parts:
|
|
270
|
+
return "shape_mismatch"
|
|
271
|
+
if {"range", "error"} <= parts:
|
|
272
|
+
return "range_error"
|
|
273
|
+
if {"format", "error"} <= parts:
|
|
274
|
+
return "format_error"
|
|
275
|
+
return "timeout" if "timeout" in parts else token
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def classify_failure(payload: Mapping[str, Any]) -> FailureVerdict | None:
|
|
279
|
+
if str(payload.get("status", "")).lower().strip() not in RETRYABLE_STATUSES:
|
|
280
|
+
return None
|
|
281
|
+
reason = _canonical_reason(payload.get("failure_reason") or payload.get("reason"))
|
|
282
|
+
if reason in NON_RETRYABLE_REASONS:
|
|
283
|
+
return FailureVerdict(retryable=False, reason=reason, hard_stop=True)
|
|
284
|
+
if not reason:
|
|
285
|
+
inferred = _canonical_reason(payload.get("error") or payload.get("message") or payload.get("stderr"))
|
|
286
|
+
if inferred in _CORRECTIVE_CANONICAL:
|
|
287
|
+
return FailureVerdict(retryable=True, reason=inferred)
|
|
288
|
+
return FailureVerdict(retryable=True, reason="tool_error")
|
|
289
|
+
if reason in _CORRECTIVE_CANONICAL:
|
|
290
|
+
return FailureVerdict(retryable=True, reason=reason)
|
|
291
|
+
return FailureVerdict(retryable=False, reason=reason, hard_stop=True)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _normalize_token(value: Any) -> str:
|
|
295
|
+
return re.sub(r"\s+", " ", str(value or "").strip().lower())
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _normalize_files(value: Any) -> list[str]:
|
|
299
|
+
if not isinstance(value, list):
|
|
300
|
+
return []
|
|
301
|
+
tokens = [str(item).strip() for item in value]
|
|
302
|
+
return sorted({token for token in tokens if token})
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _collect_objective_delta_files(payload: Mapping[str, Any]) -> list[str]:
|
|
306
|
+
files: list[str] = []
|
|
307
|
+
for key in _DELTA_OBJECTIVE_KEYS:
|
|
308
|
+
value = payload.get(key)
|
|
309
|
+
if not isinstance(value, list):
|
|
310
|
+
continue
|
|
311
|
+
files.extend(str(item).strip() for item in value)
|
|
312
|
+
return sorted({token for token in files if token})
|
cortex/stop_contract.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from .stop_payload import extract_stop_fields, resolve_stop_value
|
|
8
|
+
from .utils import _as_string_list
|
|
9
|
+
|
|
10
|
+
STOP_PAYLOAD_KEYS = (
|
|
11
|
+
"challenge_coverage",
|
|
12
|
+
"requirement_audit",
|
|
13
|
+
"truth_claims",
|
|
14
|
+
"required_requirement_ids",
|
|
15
|
+
"failed_approach",
|
|
16
|
+
"stuck_declaration",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class StopContract:
|
|
22
|
+
warnings: list[str]
|
|
23
|
+
stop_source: str
|
|
24
|
+
stop_fields_source: str
|
|
25
|
+
stop_fields_fallback_used: bool
|
|
26
|
+
stop_key_normalization_count: int
|
|
27
|
+
challenge_coverage: Any
|
|
28
|
+
requirement_audit: Any
|
|
29
|
+
truth_claims: Any
|
|
30
|
+
required_requirement_ids: list[str]
|
|
31
|
+
failed_approach: dict[str, Any] | None
|
|
32
|
+
stuck_declaration: dict[str, Any] | None
|
|
33
|
+
structured_stop_violation: bool
|
|
34
|
+
contract_diagnostic: dict[str, Any] | None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def resolve_stop_contract(
|
|
38
|
+
payload: Mapping[str, Any],
|
|
39
|
+
*,
|
|
40
|
+
allow_message_fallback: bool,
|
|
41
|
+
require_structured_stop_payload: bool,
|
|
42
|
+
) -> StopContract:
|
|
43
|
+
stop_fields, stop_fields_source, warnings, normalization_count = extract_stop_fields(
|
|
44
|
+
payload, allow_message_fallback=allow_message_fallback
|
|
45
|
+
)
|
|
46
|
+
values = {
|
|
47
|
+
key: resolve_stop_value(
|
|
48
|
+
key=key,
|
|
49
|
+
payload=payload,
|
|
50
|
+
stop_fields=stop_fields,
|
|
51
|
+
stop_fields_source=stop_fields_source,
|
|
52
|
+
warnings=warnings,
|
|
53
|
+
value_label=key,
|
|
54
|
+
)
|
|
55
|
+
for key in STOP_PAYLOAD_KEYS
|
|
56
|
+
}
|
|
57
|
+
stop_source = _stop_source_label(stop_fields_source)
|
|
58
|
+
used_message_stop_fallback = bool(stop_source == "message_fallback" and stop_fields is not None)
|
|
59
|
+
if used_message_stop_fallback:
|
|
60
|
+
warnings.append(
|
|
61
|
+
"Recovered stop fields from last_assistant_message STOP_FIELDS_JSON fallback; emit structured stop fields directly."
|
|
62
|
+
)
|
|
63
|
+
if normalization_count > 0:
|
|
64
|
+
warnings.append(
|
|
65
|
+
f"Canonicalized {normalization_count} stop payload key(s) with surrounding whitespace."
|
|
66
|
+
)
|
|
67
|
+
has_structured_stop_source = _has_structured_stop_source(payload, stop_fields_source)
|
|
68
|
+
structured_stop_violation = bool(require_structured_stop_payload and not has_structured_stop_source)
|
|
69
|
+
contract_diagnostic = None
|
|
70
|
+
if structured_stop_violation:
|
|
71
|
+
if used_message_stop_fallback:
|
|
72
|
+
warnings.append(
|
|
73
|
+
"Structured stop payload is required; trailer-only STOP_FIELDS_JSON fallback is rejected."
|
|
74
|
+
)
|
|
75
|
+
contract_diagnostic = structured_stop_contract_diagnostic("trailer_fallback_rejected")
|
|
76
|
+
else:
|
|
77
|
+
warnings.append(
|
|
78
|
+
"Structured stop payload is required; include stop fields directly or via payload.stop_fields."
|
|
79
|
+
)
|
|
80
|
+
contract_diagnostic = structured_stop_contract_diagnostic("missing_structured_stop_fields")
|
|
81
|
+
|
|
82
|
+
return StopContract(
|
|
83
|
+
warnings=warnings,
|
|
84
|
+
stop_source=stop_source,
|
|
85
|
+
stop_fields_source=stop_fields_source or "none",
|
|
86
|
+
stop_fields_fallback_used=used_message_stop_fallback,
|
|
87
|
+
stop_key_normalization_count=normalization_count,
|
|
88
|
+
challenge_coverage=values["challenge_coverage"],
|
|
89
|
+
requirement_audit=values["requirement_audit"],
|
|
90
|
+
truth_claims=values["truth_claims"],
|
|
91
|
+
required_requirement_ids=_as_string_list(values["required_requirement_ids"]),
|
|
92
|
+
failed_approach=_resolve_failed_approach(payload, values["failed_approach"], stop_fields=stop_fields),
|
|
93
|
+
stuck_declaration=_resolve_stuck_declaration(
|
|
94
|
+
values["stuck_declaration"],
|
|
95
|
+
warnings=warnings,
|
|
96
|
+
),
|
|
97
|
+
structured_stop_violation=structured_stop_violation,
|
|
98
|
+
contract_diagnostic=contract_diagnostic,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _resolve_failed_approach(
|
|
103
|
+
payload: Mapping[str, Any],
|
|
104
|
+
failed_approach: Any,
|
|
105
|
+
*,
|
|
106
|
+
stop_fields: Mapping[str, Any] | None = None,
|
|
107
|
+
) -> dict[str, Any] | None:
|
|
108
|
+
summary = ""
|
|
109
|
+
reason = ""
|
|
110
|
+
files: list[str] = []
|
|
111
|
+
|
|
112
|
+
if isinstance(failed_approach, Mapping):
|
|
113
|
+
summary = str(
|
|
114
|
+
failed_approach.get("summary")
|
|
115
|
+
or failed_approach.get("what_was_tried")
|
|
116
|
+
or failed_approach.get("approach")
|
|
117
|
+
or ""
|
|
118
|
+
).strip()
|
|
119
|
+
reason = str(failed_approach.get("reason") or failed_approach.get("why_failed") or "").strip()
|
|
120
|
+
files = _as_string_list(failed_approach.get("files"))
|
|
121
|
+
elif isinstance(failed_approach, str):
|
|
122
|
+
summary = failed_approach.strip()
|
|
123
|
+
|
|
124
|
+
if not summary:
|
|
125
|
+
for key in ("what_was_tried", "failed_summary", "approach", "failed_approach_summary"):
|
|
126
|
+
candidate = str(payload.get(key) or (stop_fields or {}).get(key) or "").strip()
|
|
127
|
+
if candidate:
|
|
128
|
+
summary = candidate
|
|
129
|
+
break
|
|
130
|
+
if not reason:
|
|
131
|
+
for key in ("why_failed", "failure_reason", "reason"):
|
|
132
|
+
candidate = str(payload.get(key) or (stop_fields or {}).get(key) or "").strip()
|
|
133
|
+
if candidate:
|
|
134
|
+
reason = candidate
|
|
135
|
+
break
|
|
136
|
+
if not files:
|
|
137
|
+
files = (
|
|
138
|
+
_as_string_list(payload.get("failed_files"))
|
|
139
|
+
or _as_string_list(payload.get("files"))
|
|
140
|
+
or _as_string_list(payload.get("target_files"))
|
|
141
|
+
or _as_string_list((stop_fields or {}).get("failed_files"))
|
|
142
|
+
or _as_string_list((stop_fields or {}).get("files"))
|
|
143
|
+
or _as_string_list((stop_fields or {}).get("target_files"))
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if not summary or not reason:
|
|
147
|
+
return None
|
|
148
|
+
return {"summary": summary, "reason": reason, "files": files}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _resolve_stuck_declaration(
|
|
152
|
+
value: Any,
|
|
153
|
+
*,
|
|
154
|
+
warnings: list[str],
|
|
155
|
+
) -> dict[str, Any] | None:
|
|
156
|
+
if value is None:
|
|
157
|
+
return None
|
|
158
|
+
if not isinstance(value, Mapping):
|
|
159
|
+
warnings.append("Ignoring invalid stuck_declaration; expected an object.")
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
check = str(value.get("check") or "").strip()
|
|
163
|
+
approaches_tried = _as_string_list(value.get("approaches_tried"))
|
|
164
|
+
obstacle = str(value.get("obstacle") or "").strip()
|
|
165
|
+
if not (check and approaches_tried and obstacle):
|
|
166
|
+
warnings.append(
|
|
167
|
+
"Ignoring incomplete stuck_declaration; expected check, approaches_tried, and obstacle."
|
|
168
|
+
)
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
"check": check,
|
|
173
|
+
"approaches_tried": approaches_tried,
|
|
174
|
+
"obstacle": obstacle,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def reconcile_required_requirement_ids(
|
|
179
|
+
session_required_ids: list[str], stop_required_ids: list[str]
|
|
180
|
+
) -> tuple[list[str], str, str | None]:
|
|
181
|
+
if session_required_ids:
|
|
182
|
+
warning = (
|
|
183
|
+
"Ignoring required_requirement_ids from Stop payload; using SessionStart contract."
|
|
184
|
+
if stop_required_ids and set(stop_required_ids) != set(session_required_ids)
|
|
185
|
+
else None
|
|
186
|
+
)
|
|
187
|
+
return list(session_required_ids), "session", warning
|
|
188
|
+
if stop_required_ids:
|
|
189
|
+
return (
|
|
190
|
+
list(stop_required_ids),
|
|
191
|
+
"stop_payload",
|
|
192
|
+
"No SessionStart requirement contract found; using Stop-provided required_requirement_ids.",
|
|
193
|
+
)
|
|
194
|
+
return [], "none", None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _stop_source_label(stop_fields_source: str | None) -> str:
|
|
198
|
+
if stop_fields_source == "last_assistant_message":
|
|
199
|
+
return "message_fallback"
|
|
200
|
+
if stop_fields_source == "payload.stop_fields":
|
|
201
|
+
return "payload.stop_fields"
|
|
202
|
+
return "native"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _has_structured_stop_source(payload: Mapping[str, Any], stop_fields_source: str | None) -> bool:
|
|
206
|
+
return bool(
|
|
207
|
+
any(payload.get(key) is not None for key in STOP_PAYLOAD_KEYS)
|
|
208
|
+
or stop_fields_source == "payload.stop_fields"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def structured_stop_contract_diagnostic(kind: str) -> dict[str, Any]:
|
|
213
|
+
if kind == "trailer_fallback_rejected":
|
|
214
|
+
return {"evidence_found": ["stop fields only via last_assistant_message STOP_FIELDS_JSON trailer"], "evidence_expected": ["native structured stop fields or payload.stop_fields"], "gap_description": "Structured stop evidence was only provided through trailer fallback.", "gap_characterization": "execution_gap", "distance_signal": "close"}
|
|
215
|
+
if kind == "strict_message_fallback_rejected":
|
|
216
|
+
return {"evidence_found": ["stop_source=message_fallback"], "evidence_expected": ["stop_source=native or payload.stop_fields"], "gap_description": "Strict mode rejected message-fallback stop evidence.", "gap_characterization": "execution_gap", "distance_signal": "close"}
|
|
217
|
+
return {"evidence_found": ["no machine-readable stop fields"], "evidence_expected": ["native structured stop fields or payload.stop_fields"], "gap_description": "Completion was claimed without machine-readable stop evidence.", "gap_characterization": "comprehension_gap", "distance_signal": "far"}
|
cortex/stop_payload.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
STOP_FIELD_KEYS = (
|
|
9
|
+
"challenge_coverage",
|
|
10
|
+
"requirement_audit",
|
|
11
|
+
"truth_claims",
|
|
12
|
+
"required_requirement_ids",
|
|
13
|
+
"failed_approach",
|
|
14
|
+
"stuck_declaration",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_stop_fields(
|
|
19
|
+
payload: Mapping[str, Any], *, allow_message_fallback: bool = True
|
|
20
|
+
) -> tuple[dict[str, Any] | None, str | None, list[str], int]:
|
|
21
|
+
warnings: list[str] = []
|
|
22
|
+
|
|
23
|
+
raw = payload.get("stop_fields")
|
|
24
|
+
if isinstance(raw, Mapping):
|
|
25
|
+
normalized, normalization_count = _canonicalize_map_keys(raw)
|
|
26
|
+
return normalized, "payload.stop_fields", warnings, normalization_count
|
|
27
|
+
if raw is not None:
|
|
28
|
+
warnings.append("Ignoring invalid stop_fields field; expected an object.")
|
|
29
|
+
|
|
30
|
+
if not allow_message_fallback:
|
|
31
|
+
return None, None, warnings, 0
|
|
32
|
+
if any(payload.get(key) is not None for key in STOP_FIELD_KEYS):
|
|
33
|
+
return None, None, warnings, 0
|
|
34
|
+
|
|
35
|
+
last_message = payload.get("last_assistant_message")
|
|
36
|
+
if isinstance(last_message, str):
|
|
37
|
+
parsed, marker_found, error = parse_stop_fields_json(last_message)
|
|
38
|
+
if parsed is not None:
|
|
39
|
+
normalized, normalization_count = _canonicalize_map_keys(parsed)
|
|
40
|
+
return normalized, "last_assistant_message", warnings, normalization_count
|
|
41
|
+
if marker_found and error:
|
|
42
|
+
warnings.append(f"Ignoring invalid STOP_FIELDS_JSON trailer: {error}")
|
|
43
|
+
|
|
44
|
+
return None, None, warnings, 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def resolve_stop_value(
|
|
48
|
+
*,
|
|
49
|
+
key: str,
|
|
50
|
+
payload: Mapping[str, Any],
|
|
51
|
+
stop_fields: dict[str, Any] | None,
|
|
52
|
+
stop_fields_source: str | None,
|
|
53
|
+
warnings: list[str],
|
|
54
|
+
value_label: str,
|
|
55
|
+
) -> Any:
|
|
56
|
+
value = payload.get(key)
|
|
57
|
+
if value is not None or not (stop_fields and key in stop_fields):
|
|
58
|
+
return value
|
|
59
|
+
value = stop_fields[key]
|
|
60
|
+
if stop_fields_source == "last_assistant_message":
|
|
61
|
+
warnings.append(f"Using {value_label} parsed from last assistant message (STOP_FIELDS_JSON).")
|
|
62
|
+
elif stop_fields_source == "payload.stop_fields":
|
|
63
|
+
warnings.append(f"Using {value_label} from payload.stop_fields.")
|
|
64
|
+
return value
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def parse_stop_fields_json(text: str) -> tuple[dict[str, Any] | None, bool, str | None]:
|
|
68
|
+
for pattern in (
|
|
69
|
+
r"```(?:stop-fields|stop_fields)\s*(\{.*?\})\s*```",
|
|
70
|
+
r"```json\s*(\{.*?\"challenge_coverage\".*?\})\s*```",
|
|
71
|
+
):
|
|
72
|
+
match = re.search(pattern, text, flags=re.DOTALL)
|
|
73
|
+
if not match:
|
|
74
|
+
continue
|
|
75
|
+
try:
|
|
76
|
+
parsed = json.loads(match.group(1))
|
|
77
|
+
except json.JSONDecodeError as exc:
|
|
78
|
+
return None, True, str(exc)
|
|
79
|
+
if not isinstance(parsed, dict):
|
|
80
|
+
return None, True, "expected a JSON object"
|
|
81
|
+
return parsed, True, None
|
|
82
|
+
|
|
83
|
+
marker = "STOP_FIELDS_JSON:"
|
|
84
|
+
idx = text.rfind(marker)
|
|
85
|
+
if idx == -1:
|
|
86
|
+
return None, False, None
|
|
87
|
+
decoder = json.JSONDecoder()
|
|
88
|
+
try:
|
|
89
|
+
parsed, _ = decoder.raw_decode(text[idx + len(marker) :].lstrip())
|
|
90
|
+
except json.JSONDecodeError as exc:
|
|
91
|
+
return None, True, str(exc)
|
|
92
|
+
if not isinstance(parsed, dict):
|
|
93
|
+
return None, True, "expected a JSON object"
|
|
94
|
+
return parsed, True, None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _canonicalize_map_keys(value: Mapping[str, Any]) -> tuple[dict[str, Any], int]:
|
|
98
|
+
normalized: dict[str, Any] = {}
|
|
99
|
+
normalization_count = 0
|
|
100
|
+
for raw_key, raw_value in value.items():
|
|
101
|
+
key = str(raw_key)
|
|
102
|
+
canonical_key = key.strip()
|
|
103
|
+
if canonical_key != key:
|
|
104
|
+
normalization_count += 1
|
|
105
|
+
canonical_value, nested_count = _canonicalize_value(raw_value)
|
|
106
|
+
normalization_count += nested_count
|
|
107
|
+
normalized[canonical_key] = canonical_value
|
|
108
|
+
return normalized, normalization_count
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _canonicalize_value(value: Any) -> tuple[Any, int]:
|
|
112
|
+
if isinstance(value, Mapping):
|
|
113
|
+
return _canonicalize_map_keys(value)
|
|
114
|
+
if isinstance(value, list):
|
|
115
|
+
items: list[Any] = []
|
|
116
|
+
normalization_count = 0
|
|
117
|
+
for item in value:
|
|
118
|
+
canonical_item, nested_count = _canonicalize_value(item)
|
|
119
|
+
items.append(canonical_item)
|
|
120
|
+
normalization_count += nested_count
|
|
121
|
+
return items, normalization_count
|
|
122
|
+
return value, 0
|