prizmkit 1.1.70 → 1.1.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/VERSION.json +3 -3
- package/bundled/dev-pipeline/lib/common.sh +427 -0
- package/bundled/dev-pipeline/lib/heartbeat.sh +36 -0
- package/bundled/dev-pipeline/run-feature.sh +109 -29
- package/bundled/dev-pipeline/scripts/parse-stream-progress.py +160 -3
- package/bundled/dev-pipeline/scripts/update-feature-status.py +27 -3
- package/bundled/dev-pipeline/templates/sections/phase-commit-full.md +11 -0
- package/bundled/dev-pipeline/templates/sections/phase-commit.md +11 -0
- package/bundled/dev-pipeline-windows/lib/common.ps1 +61 -1
- package/bundled/dev-pipeline-windows/lib/pipeline.ps1 +299 -14
- package/bundled/dev-pipeline-windows/scripts/parse-stream-progress.py +160 -3
- package/bundled/dev-pipeline-windows/scripts/update-feature-status.py +27 -3
- package/bundled/dev-pipeline-windows/templates/sections/phase-commit-full.md +11 -0
- package/bundled/dev-pipeline-windows/templates/sections/phase-commit.md +11 -0
- package/bundled/skills/_metadata.json +1 -1
- package/package.json +1 -1
|
@@ -17,6 +17,7 @@ The script runs until:
|
|
|
17
17
|
import argparse
|
|
18
18
|
import json
|
|
19
19
|
import os
|
|
20
|
+
import re
|
|
20
21
|
import signal
|
|
21
22
|
import sys
|
|
22
23
|
import tempfile
|
|
@@ -59,6 +60,58 @@ PHASE_KEYWORDS = {
|
|
|
59
60
|
},
|
|
60
61
|
}
|
|
61
62
|
|
|
63
|
+
CONTEXT_ERROR_PATTERNS = [
|
|
64
|
+
re.compile(pattern, re.IGNORECASE)
|
|
65
|
+
for pattern in (
|
|
66
|
+
r"context_too_large",
|
|
67
|
+
r"model_context_window_exceeded",
|
|
68
|
+
r"Your input exceeds the context window",
|
|
69
|
+
r"input exceeds the context window",
|
|
70
|
+
r"context window of this model",
|
|
71
|
+
r"context window exceeded",
|
|
72
|
+
r"invalid_request_error.*context window",
|
|
73
|
+
r"context window.*invalid_request_error",
|
|
74
|
+
)
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
ERROR_CONTEXT_PATTERNS = [
|
|
78
|
+
re.compile(pattern, re.IGNORECASE)
|
|
79
|
+
for pattern in (
|
|
80
|
+
r"\bapi error\b",
|
|
81
|
+
r"invalid_request_error",
|
|
82
|
+
r"\bstatus\s*[:=]?\s*(400|413)\b",
|
|
83
|
+
r"\bapi_error_status\b",
|
|
84
|
+
r"\bapi_error_code\b",
|
|
85
|
+
r"\blast_result_is_error\b\s*[\"':=]*\s*true\b",
|
|
86
|
+
r"\bis_error\b\s*[\"':=]*\s*true\b",
|
|
87
|
+
)
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _has_error_context(text):
|
|
92
|
+
"""Return true when free text looks like a runtime/provider error."""
|
|
93
|
+
if not text:
|
|
94
|
+
return False
|
|
95
|
+
return any(pattern.search(text) for pattern in ERROR_CONTEXT_PATTERNS)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def detect_api_error_code(text, require_error_context=False):
|
|
99
|
+
"""Return a normalized fatal/runtime error code from terminal text.
|
|
100
|
+
|
|
101
|
+
Structured terminal result/error events and raw stderr can be matched
|
|
102
|
+
directly. Ordinary assistant prose is noisier: it may mention the phrase
|
|
103
|
+
"input exceeds the context window" while explaining a test or recovery
|
|
104
|
+
rule, so callers can require additional error-like context there.
|
|
105
|
+
"""
|
|
106
|
+
if not text:
|
|
107
|
+
return ""
|
|
108
|
+
if require_error_context and not _has_error_context(text):
|
|
109
|
+
return ""
|
|
110
|
+
for pattern in CONTEXT_ERROR_PATTERNS:
|
|
111
|
+
if pattern.search(text):
|
|
112
|
+
return "context_too_large"
|
|
113
|
+
return ""
|
|
114
|
+
|
|
62
115
|
|
|
63
116
|
class ProgressTracker:
|
|
64
117
|
"""Tracks progress state from stream-json events."""
|
|
@@ -73,6 +126,12 @@ class ProgressTracker:
|
|
|
73
126
|
self.tool_call_counts = Counter()
|
|
74
127
|
self.total_tool_calls = 0
|
|
75
128
|
self.last_text_snippet = ""
|
|
129
|
+
self.last_result_is_error = False
|
|
130
|
+
self.api_error_status = None
|
|
131
|
+
self.api_error_code = ""
|
|
132
|
+
self.terminal_result_text = ""
|
|
133
|
+
self.terminal_success_at = ""
|
|
134
|
+
self.fatal_error_code = ""
|
|
76
135
|
self.is_active = True
|
|
77
136
|
self.errors = []
|
|
78
137
|
self.event_format = ""
|
|
@@ -164,11 +223,13 @@ class ProgressTracker:
|
|
|
164
223
|
elif event_type == "turn.failed":
|
|
165
224
|
error = event.get("error") or event.get("message") or "Codex turn failed"
|
|
166
225
|
self.errors.append(str(error))
|
|
226
|
+
self._detect_terminal_error(str(error))
|
|
167
227
|
self.current_tool = None
|
|
168
228
|
|
|
169
229
|
elif event_type == "error":
|
|
170
230
|
error = event.get("error") or event.get("message") or "Unknown error"
|
|
171
231
|
self.errors.append(str(error))
|
|
232
|
+
self._detect_terminal_error(str(error))
|
|
172
233
|
|
|
173
234
|
return
|
|
174
235
|
|
|
@@ -196,6 +257,7 @@ class ProgressTracker:
|
|
|
196
257
|
if text.strip():
|
|
197
258
|
self.last_text_snippet = text.strip()[:120]
|
|
198
259
|
self._detect_phase(text)
|
|
260
|
+
self._detect_terminal_error(text, require_error_context=True)
|
|
199
261
|
|
|
200
262
|
elif event_type == "tool_result" or event_type == "user":
|
|
201
263
|
# tool_result contains output from tool execution
|
|
@@ -274,6 +336,28 @@ class ProgressTracker:
|
|
|
274
336
|
state.setdefault("subagent_type", "")
|
|
275
337
|
self._update_claude_subagent_status_counts()
|
|
276
338
|
|
|
339
|
+
elif event_type == "result":
|
|
340
|
+
self.event_format = self.event_format or "stream-json"
|
|
341
|
+
self.is_active = False
|
|
342
|
+
result_text = event.get("result") or event.get("message") or ""
|
|
343
|
+
error_obj = event.get("error")
|
|
344
|
+
if isinstance(error_obj, dict):
|
|
345
|
+
error_text = " ".join(
|
|
346
|
+
str(error_obj.get(key) or "")
|
|
347
|
+
for key in ("type", "code", "message")
|
|
348
|
+
if error_obj.get(key)
|
|
349
|
+
)
|
|
350
|
+
result_text = " ".join(part for part in (str(result_text), error_text) if part)
|
|
351
|
+
api_error_code = event.get("api_error_code") or event.get("error_code") or ""
|
|
352
|
+
if isinstance(error_obj, dict) and not api_error_code:
|
|
353
|
+
api_error_code = error_obj.get("code") or error_obj.get("type") or ""
|
|
354
|
+
self._record_terminal_result(
|
|
355
|
+
text=str(result_text or ""),
|
|
356
|
+
is_error=bool(event.get("is_error")),
|
|
357
|
+
api_error_status=event.get("api_error_status"),
|
|
358
|
+
api_error_code=str(api_error_code or ""),
|
|
359
|
+
)
|
|
360
|
+
|
|
277
361
|
# ── Claude API raw stream format ────────────────────────────
|
|
278
362
|
elif event_type == "message_start":
|
|
279
363
|
self.event_format = self.event_format or "stream-json"
|
|
@@ -316,6 +400,7 @@ class ProgressTracker:
|
|
|
316
400
|
self.last_text_snippet = stripped[:120]
|
|
317
401
|
# Try to detect phase from text
|
|
318
402
|
self._detect_phase(text)
|
|
403
|
+
self._detect_terminal_error(text, require_error_context=True)
|
|
319
404
|
|
|
320
405
|
elif delta_type == "input_json_delta":
|
|
321
406
|
partial = delta.get("partial_json", "")
|
|
@@ -331,21 +416,73 @@ class ProgressTracker:
|
|
|
331
416
|
self._extract_tool_summary(full_input)
|
|
332
417
|
self._detect_phase(full_input)
|
|
333
418
|
else:
|
|
334
|
-
# Text block finished - detect phase from accumulated text
|
|
419
|
+
# Text block finished - detect phase and terminal errors from accumulated text
|
|
335
420
|
if self._text_buffer:
|
|
336
421
|
self._detect_phase(self._text_buffer)
|
|
422
|
+
self._detect_terminal_error(
|
|
423
|
+
self._text_buffer,
|
|
424
|
+
require_error_context=True,
|
|
425
|
+
)
|
|
337
426
|
self._in_tool_use = False
|
|
338
427
|
self._current_tool_input_parts = []
|
|
339
428
|
|
|
340
429
|
elif event_type == "error":
|
|
341
430
|
error_msg = event.get("error", {}).get("message", "Unknown error")
|
|
342
431
|
self.errors.append(error_msg)
|
|
432
|
+
self._detect_terminal_error(str(error_msg))
|
|
343
433
|
|
|
344
434
|
# Check for subagent indicator
|
|
345
435
|
if event.get("parent_tool_use_id"):
|
|
346
436
|
# This is a sub-agent event; tool name is still tracked normally
|
|
347
437
|
pass
|
|
348
438
|
|
|
439
|
+
def _record_terminal_result(self, text="", is_error=False, api_error_status=None, api_error_code=""):
|
|
440
|
+
"""Record a Claude Code terminal result event."""
|
|
441
|
+
terminal_text = str(text or "")
|
|
442
|
+
self.last_result_is_error = bool(is_error)
|
|
443
|
+
if api_error_status not in (None, ""):
|
|
444
|
+
try:
|
|
445
|
+
self.api_error_status = int(api_error_status)
|
|
446
|
+
except (TypeError, ValueError):
|
|
447
|
+
self.api_error_status = api_error_status
|
|
448
|
+
error_like_result = (
|
|
449
|
+
self.last_result_is_error
|
|
450
|
+
or api_error_status not in (None, "")
|
|
451
|
+
or bool(api_error_code)
|
|
452
|
+
or _has_error_context(terminal_text)
|
|
453
|
+
)
|
|
454
|
+
normalized_code = detect_api_error_code(
|
|
455
|
+
" ".join([str(api_error_code or ""), terminal_text]),
|
|
456
|
+
require_error_context=not error_like_result,
|
|
457
|
+
)
|
|
458
|
+
if normalized_code:
|
|
459
|
+
self.api_error_code = normalized_code
|
|
460
|
+
self.fatal_error_code = normalized_code
|
|
461
|
+
elif api_error_code:
|
|
462
|
+
self.api_error_code = str(api_error_code)
|
|
463
|
+
self.terminal_result_text = terminal_text[:1000]
|
|
464
|
+
if terminal_text.strip():
|
|
465
|
+
self.last_text_snippet = terminal_text.strip()[:120]
|
|
466
|
+
if not self.last_result_is_error and not self.fatal_error_code:
|
|
467
|
+
self.terminal_success_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
468
|
+
elif self.fatal_error_code:
|
|
469
|
+
self.errors.append(self.fatal_error_code)
|
|
470
|
+
|
|
471
|
+
def _detect_terminal_error(self, text, require_error_context=False):
|
|
472
|
+
"""Detect fatal context-window errors from unstructured text."""
|
|
473
|
+
code = detect_api_error_code(
|
|
474
|
+
str(text or ""),
|
|
475
|
+
require_error_context=require_error_context,
|
|
476
|
+
)
|
|
477
|
+
if not code:
|
|
478
|
+
return
|
|
479
|
+
self.last_result_is_error = True
|
|
480
|
+
self.api_error_code = code
|
|
481
|
+
self.fatal_error_code = code
|
|
482
|
+
self.terminal_result_text = str(text or "")[:1000]
|
|
483
|
+
if text:
|
|
484
|
+
self.last_text_snippet = str(text).strip()[:120]
|
|
485
|
+
|
|
349
486
|
def _detect_phase(self, text):
|
|
350
487
|
"""Detect pipeline phase from text content.
|
|
351
488
|
|
|
@@ -692,6 +829,12 @@ class ProgressTracker:
|
|
|
692
829
|
"child_activity_signature": self.child_activity_signature,
|
|
693
830
|
"last_child_activity_at": self.last_child_activity_at,
|
|
694
831
|
"last_text_snippet": self.last_text_snippet,
|
|
832
|
+
"last_result_is_error": self.last_result_is_error,
|
|
833
|
+
"api_error_status": self.api_error_status,
|
|
834
|
+
"api_error_code": self.api_error_code,
|
|
835
|
+
"terminal_result_text": self.terminal_result_text,
|
|
836
|
+
"terminal_success_at": self.terminal_success_at,
|
|
837
|
+
"fatal_error_code": self.fatal_error_code,
|
|
695
838
|
"is_active": self.is_active,
|
|
696
839
|
"errors": self.errors[-10:], # Keep last 10 errors
|
|
697
840
|
}
|
|
@@ -728,6 +871,12 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
|
|
|
728
871
|
state["current_phase"],
|
|
729
872
|
state["total_tool_calls"],
|
|
730
873
|
state.get("child_activity_signature", ""),
|
|
874
|
+
state.get("last_result_is_error"),
|
|
875
|
+
state.get("api_error_status"),
|
|
876
|
+
state.get("api_error_code", ""),
|
|
877
|
+
state.get("fatal_error_code", ""),
|
|
878
|
+
state.get("terminal_result_text", ""),
|
|
879
|
+
tuple(state.get("errors", [])),
|
|
731
880
|
)
|
|
732
881
|
|
|
733
882
|
# Wait for log file to appear
|
|
@@ -752,11 +901,19 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
|
|
|
752
901
|
event = json.loads(line)
|
|
753
902
|
tracker.process_event(event)
|
|
754
903
|
except json.JSONDecodeError:
|
|
755
|
-
# Not a JSON line (could be stderr mixed in)
|
|
756
|
-
#
|
|
904
|
+
# Not a JSON line (could be stderr mixed in). Use it as a
|
|
905
|
+
# text snippet and only treat it as terminal when it has a
|
|
906
|
+
# strong API/runtime error marker; ordinary assistant prose
|
|
907
|
+
# can discuss context limits without being fatal.
|
|
757
908
|
stripped = line.strip()
|
|
758
909
|
if stripped and len(stripped) > 5:
|
|
759
910
|
tracker.last_text_snippet = stripped[:120]
|
|
911
|
+
tracker._detect_terminal_error(stripped, require_error_context=True)
|
|
912
|
+
current_state = tracker.to_dict()
|
|
913
|
+
current_state_key = state_key(current_state)
|
|
914
|
+
if current_state_key != last_write_state:
|
|
915
|
+
atomic_write_json(current_state, progress_file)
|
|
916
|
+
last_write_state = current_state_key
|
|
760
917
|
continue
|
|
761
918
|
|
|
762
919
|
# Write progress if state changed
|
|
@@ -49,6 +49,7 @@ SESSION_STATUS_VALUES = [
|
|
|
49
49
|
"commit_missing",
|
|
50
50
|
"docs_missing",
|
|
51
51
|
"merge_conflict",
|
|
52
|
+
"finalization_needed",
|
|
52
53
|
]
|
|
53
54
|
|
|
54
55
|
TERMINAL_STATUSES = {"completed", "failed", "skipped", "auto_skipped", "split"}
|
|
@@ -644,7 +645,25 @@ def action_update(args, feature_list_path, state_dir):
|
|
|
644
645
|
fs["degraded_reason"] = session_status
|
|
645
646
|
fs["resume_from_phase"] = None
|
|
646
647
|
fs["sessions"] = []
|
|
647
|
-
|
|
648
|
+
if session_id:
|
|
649
|
+
fs["last_session_id"] = session_id
|
|
650
|
+
fs["last_failed_session_id"] = session_id
|
|
651
|
+
|
|
652
|
+
err = update_feature_in_list(feature_list_path, feature_id, new_status)
|
|
653
|
+
if err:
|
|
654
|
+
error_out("Failed to update .prizmkit/plans/feature-list.json: {}".format(err))
|
|
655
|
+
return
|
|
656
|
+
elif session_status == "finalization_needed":
|
|
657
|
+
# Runtime preserved dirty post-completion changes but could not safely
|
|
658
|
+
# clean them for automatic merge. Preserve the dev branch and stop for
|
|
659
|
+
# manual finalization instead of spending code retry budget.
|
|
660
|
+
new_status = "failed"
|
|
661
|
+
fs["degraded_reason"] = session_status
|
|
662
|
+
fs["resume_from_phase"] = None
|
|
663
|
+
fs["finalization_needed_count"] = fs.get("finalization_needed_count", 0) + 1
|
|
664
|
+
if session_id:
|
|
665
|
+
fs["last_session_id"] = session_id
|
|
666
|
+
fs["last_failed_session_id"] = session_id
|
|
648
667
|
|
|
649
668
|
err = update_feature_in_list(feature_list_path, feature_id, new_status)
|
|
650
669
|
if err:
|
|
@@ -657,6 +676,8 @@ def action_update(args, feature_list_path, state_dir):
|
|
|
657
676
|
new_status = "pending"
|
|
658
677
|
fs["infra_error_count"] = fs.get("infra_error_count", 0) + 1
|
|
659
678
|
fs["last_infra_error_session_id"] = session_id
|
|
679
|
+
if session_id:
|
|
680
|
+
fs["last_session_id"] = session_id
|
|
660
681
|
fs["resume_from_phase"] = None
|
|
661
682
|
|
|
662
683
|
err = update_feature_in_list(feature_list_path, feature_id, new_status)
|
|
@@ -673,6 +694,9 @@ def action_update(args, feature_list_path, state_dir):
|
|
|
673
694
|
new_status = "pending"
|
|
674
695
|
|
|
675
696
|
fs["resume_from_phase"] = None
|
|
697
|
+
if session_id:
|
|
698
|
+
fs["last_session_id"] = session_id
|
|
699
|
+
fs["last_failed_session_id"] = session_id
|
|
676
700
|
# Keep sessions list and last_session_id for debugging
|
|
677
701
|
|
|
678
702
|
err = update_feature_in_list(feature_list_path, feature_id, new_status)
|
|
@@ -712,9 +736,9 @@ def action_update(args, feature_list_path, state_dir):
|
|
|
712
736
|
}
|
|
713
737
|
if auto_skipped_features:
|
|
714
738
|
summary["auto_skipped"] = [info["feature_id"] for info in auto_skipped_features]
|
|
715
|
-
if session_status in ("commit_missing", "docs_missing", "merge_conflict"):
|
|
739
|
+
if session_status in ("commit_missing", "docs_missing", "merge_conflict", "finalization_needed"):
|
|
716
740
|
summary["degraded_reason"] = session_status
|
|
717
|
-
summary["restart_policy"] = "finalization_retry"
|
|
741
|
+
summary["restart_policy"] = "manual_finalization" if session_status == "finalization_needed" else "finalization_retry"
|
|
718
742
|
elif session_status == "infra_error":
|
|
719
743
|
summary["restart_policy"] = "infra_retry"
|
|
720
744
|
summary["infra_error_count"] = fs.get("infra_error_count", 0)
|
|
@@ -69,3 +69,14 @@ Invoke-PrizmPython {{PIPELINE_DIR}}\scripts\update-checkpoint.py `
|
|
|
69
69
|
--step prizmkit-committer `
|
|
70
70
|
--status completed
|
|
71
71
|
```
|
|
72
|
+
|
|
73
|
+
### TERMINAL STOP — S09 Completed
|
|
74
|
+
|
|
75
|
+
After all of the following are true, the session is **TERMINAL**:
|
|
76
|
+
|
|
77
|
+
- `/prizmkit-committer` has completed
|
|
78
|
+
- `git status --short` is clean (except ignored/local-only pipeline artifacts)
|
|
79
|
+
- `.prizmkit/specs/{{FEATURE_SLUG}}/completion-summary.json` is written
|
|
80
|
+
- the S09 / `prizmkit-committer` checkpoint is marked `completed`
|
|
81
|
+
|
|
82
|
+
At that point, stop immediately. Do **not** process delayed teammate/reviewer findings, do **not** edit files, do **not** amend the commit, and do **not** start any new tool calls. Any findings that arrive after S09 is completed are follow-up work for a later session, not part of this feature commit.
|
|
@@ -62,3 +62,14 @@ Invoke-PrizmPython {{PIPELINE_DIR}}\scripts\update-checkpoint.py `
|
|
|
62
62
|
--step prizmkit-committer `
|
|
63
63
|
--status completed
|
|
64
64
|
```
|
|
65
|
+
|
|
66
|
+
### TERMINAL STOP — S09 Completed
|
|
67
|
+
|
|
68
|
+
After all of the following are true, the session is **TERMINAL**:
|
|
69
|
+
|
|
70
|
+
- `/prizmkit-committer` has completed
|
|
71
|
+
- `git status --short` is clean (except ignored/local-only pipeline artifacts)
|
|
72
|
+
- `.prizmkit/specs/{{FEATURE_SLUG}}/completion-summary.json` is written
|
|
73
|
+
- the S09 / `prizmkit-committer` checkpoint is marked `completed`
|
|
74
|
+
|
|
75
|
+
At that point, stop immediately. Do **not** process delayed teammate/reviewer findings, do **not** edit files, do **not** amend the commit, and do **not** start any new tool calls. Any findings that arrive after S09 is completed are follow-up work for a later session, not part of this feature commit.
|