prizmkit 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/VERSION.json +5 -0
- package/bundled/adapters/claude/agent-adapter.js +108 -0
- package/bundled/adapters/claude/command-adapter.js +104 -0
- package/bundled/adapters/claude/paths.js +35 -0
- package/bundled/adapters/claude/rules-adapter.js +77 -0
- package/bundled/adapters/claude/settings-adapter.js +73 -0
- package/bundled/adapters/claude/team-adapter.js +183 -0
- package/bundled/adapters/codebuddy/agent-adapter.js +43 -0
- package/bundled/adapters/codebuddy/paths.js +29 -0
- package/bundled/adapters/codebuddy/settings-adapter.js +47 -0
- package/bundled/adapters/codebuddy/skill-adapter.js +68 -0
- package/bundled/adapters/codebuddy/team-adapter.js +46 -0
- package/bundled/adapters/shared/frontmatter.js +77 -0
- package/bundled/agents/prizm-dev-team-coordinator.md +142 -0
- package/bundled/agents/prizm-dev-team-dev.md +99 -0
- package/bundled/agents/prizm-dev-team-pm.md +114 -0
- package/bundled/agents/prizm-dev-team-reviewer.md +119 -0
- package/bundled/dev-pipeline/README.md +482 -0
- package/bundled/dev-pipeline/assets/feature-list-example.json +147 -0
- package/bundled/dev-pipeline/assets/prizm-dev-team-integration.md +138 -0
- package/bundled/dev-pipeline/launch-bugfix-daemon.sh +425 -0
- package/bundled/dev-pipeline/launch-daemon.sh +549 -0
- package/bundled/dev-pipeline/reset-feature.sh +209 -0
- package/bundled/dev-pipeline/retry-bug.sh +344 -0
- package/bundled/dev-pipeline/retry-feature.sh +338 -0
- package/bundled/dev-pipeline/run-bugfix.sh +638 -0
- package/bundled/dev-pipeline/run.sh +845 -0
- package/bundled/dev-pipeline/scripts/check-session-status.py +158 -0
- package/bundled/dev-pipeline/scripts/detect-stuck.py +385 -0
- package/bundled/dev-pipeline/scripts/generate-bootstrap-prompt.py +598 -0
- package/bundled/dev-pipeline/scripts/generate-bugfix-prompt.py +402 -0
- package/bundled/dev-pipeline/scripts/init-bugfix-pipeline.py +294 -0
- package/bundled/dev-pipeline/scripts/init-dev-team.py +134 -0
- package/bundled/dev-pipeline/scripts/init-pipeline.py +335 -0
- package/bundled/dev-pipeline/scripts/update-bug-status.py +748 -0
- package/bundled/dev-pipeline/scripts/update-feature-status.py +1076 -0
- package/bundled/dev-pipeline/templates/bootstrap-prompt.md +262 -0
- package/bundled/dev-pipeline/templates/bug-fix-list-schema.json +159 -0
- package/bundled/dev-pipeline/templates/bugfix-bootstrap-prompt.md +291 -0
- package/bundled/dev-pipeline/templates/feature-list-schema.json +112 -0
- package/bundled/dev-pipeline/templates/session-status-schema.json +77 -0
- package/bundled/skills/_metadata.json +267 -0
- package/bundled/skills/app-planner/SKILL.md +580 -0
- package/bundled/skills/app-planner/assets/planning-guide.md +313 -0
- package/bundled/skills/app-planner/scripts/validate-and-generate.py +758 -0
- package/bundled/skills/bug-planner/SKILL.md +235 -0
- package/bundled/skills/bugfix-pipeline-launcher/SKILL.md +252 -0
- package/bundled/skills/dev-pipeline-launcher/SKILL.md +223 -0
- package/bundled/skills/prizm-kit/SKILL.md +151 -0
- package/bundled/skills/prizm-kit/assets/claude-md-template.md +38 -0
- package/bundled/skills/prizm-kit/assets/codebuddy-md-template.md +35 -0
- package/bundled/skills/prizm-kit/assets/hooks/prizm-commit-hook.json +15 -0
- package/bundled/skills/prizmkit-adr-manager/SKILL.md +68 -0
- package/bundled/skills/prizmkit-adr-manager/assets/adr-template.md +26 -0
- package/bundled/skills/prizmkit-analyze/SKILL.md +194 -0
- package/bundled/skills/prizmkit-api-doc-generator/SKILL.md +56 -0
- package/bundled/skills/prizmkit-bug-fix-workflow/SKILL.md +351 -0
- package/bundled/skills/prizmkit-bug-reproducer/SKILL.md +62 -0
- package/bundled/skills/prizmkit-ci-cd-generator/SKILL.md +54 -0
- package/bundled/skills/prizmkit-clarify/SKILL.md +52 -0
- package/bundled/skills/prizmkit-code-review/SKILL.md +70 -0
- package/bundled/skills/prizmkit-committer/SKILL.md +117 -0
- package/bundled/skills/prizmkit-db-migration/SKILL.md +65 -0
- package/bundled/skills/prizmkit-dependency-health/SKILL.md +123 -0
- package/bundled/skills/prizmkit-deployment-strategy/SKILL.md +58 -0
- package/bundled/skills/prizmkit-error-triage/SKILL.md +55 -0
- package/bundled/skills/prizmkit-implement/SKILL.md +47 -0
- package/bundled/skills/prizmkit-init/SKILL.md +156 -0
- package/bundled/skills/prizmkit-log-analyzer/SKILL.md +55 -0
- package/bundled/skills/prizmkit-monitoring-setup/SKILL.md +75 -0
- package/bundled/skills/prizmkit-onboarding-generator/SKILL.md +70 -0
- package/bundled/skills/prizmkit-perf-profiler/SKILL.md +55 -0
- package/bundled/skills/prizmkit-plan/SKILL.md +54 -0
- package/bundled/skills/prizmkit-plan/assets/plan-template.md +37 -0
- package/bundled/skills/prizmkit-prizm-docs/SKILL.md +140 -0
- package/bundled/skills/prizmkit-prizm-docs/assets/PRIZM-SPEC.md +943 -0
- package/bundled/skills/prizmkit-retrospective/SKILL.md +79 -0
- package/bundled/skills/prizmkit-security-audit/SKILL.md +130 -0
- package/bundled/skills/prizmkit-specify/SKILL.md +52 -0
- package/bundled/skills/prizmkit-specify/assets/spec-template.md +37 -0
- package/bundled/skills/prizmkit-summarize/SKILL.md +51 -0
- package/bundled/skills/prizmkit-summarize/assets/registry-template.md +18 -0
- package/bundled/skills/prizmkit-tasks/SKILL.md +50 -0
- package/bundled/skills/prizmkit-tasks/assets/tasks-template.md +21 -0
- package/bundled/skills/prizmkit-tech-debt-tracker/SKILL.md +139 -0
- package/bundled/team/prizm-dev-team.json +47 -0
- package/bundled/templates/claude-md-template.md +38 -0
- package/bundled/templates/codebuddy-md-template.md +35 -0
- package/package.json +2 -1
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Parse a session-status.json file and output a simple status string for the shell runner.
|
|
3
|
+
|
|
4
|
+
Reads the session status written by an agent at session end, validates required
|
|
5
|
+
fields, and prints a single-line result to stdout. Detailed JSON is written to
|
|
6
|
+
stderr for logging.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python3 check-session-status.py --status-file <path>
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
REQUIRED_FIELDS = ["session_id", "feature_id", "status", "timestamp"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_args():
|
|
21
|
+
parser = argparse.ArgumentParser(
|
|
22
|
+
description="Parse session-status.json and output a status string for the shell runner."
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--status-file",
|
|
26
|
+
required=True,
|
|
27
|
+
help="Path to the session-status.json file",
|
|
28
|
+
)
|
|
29
|
+
return parser.parse_args()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_status_file(path):
|
|
33
|
+
"""Load and parse the session status JSON file.
|
|
34
|
+
|
|
35
|
+
Returns (data, error_message). On success error_message is None.
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
39
|
+
data = json.load(f)
|
|
40
|
+
except (IOError, OSError) as e:
|
|
41
|
+
return None, "Cannot read status file: {}".format(str(e))
|
|
42
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
43
|
+
return None, "Malformed JSON in status file: {}".format(str(e))
|
|
44
|
+
return data, None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def validate_required_fields(data):
|
|
48
|
+
"""Check that all required fields are present and non-empty.
|
|
49
|
+
|
|
50
|
+
Returns a list of missing/invalid field names.
|
|
51
|
+
"""
|
|
52
|
+
missing = []
|
|
53
|
+
for field in REQUIRED_FIELDS:
|
|
54
|
+
if field not in data:
|
|
55
|
+
missing.append(field)
|
|
56
|
+
elif not isinstance(data[field], str) or not data[field].strip():
|
|
57
|
+
missing.append(field)
|
|
58
|
+
return missing
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def determine_status(data):
|
|
62
|
+
"""Determine the single-line status string from the parsed data.
|
|
63
|
+
|
|
64
|
+
Returns one of: success, partial_resumable, partial_not_resumable, failed.
|
|
65
|
+
"""
|
|
66
|
+
status = data.get("status", "")
|
|
67
|
+
|
|
68
|
+
if status == "success":
|
|
69
|
+
return "success"
|
|
70
|
+
elif status == "partial":
|
|
71
|
+
can_resume = data.get("can_resume", False)
|
|
72
|
+
if can_resume:
|
|
73
|
+
return "partial_resumable"
|
|
74
|
+
else:
|
|
75
|
+
return "partial_not_resumable"
|
|
76
|
+
elif status == "failed":
|
|
77
|
+
return "failed"
|
|
78
|
+
else:
|
|
79
|
+
# Unknown status value — treat as crashed
|
|
80
|
+
return "crashed"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def build_detail_report(data, resolved_status):
|
|
84
|
+
"""Build the detailed JSON report for stderr logging."""
|
|
85
|
+
errors = data.get("errors", [])
|
|
86
|
+
error_count = len(errors) if isinstance(errors, list) else 0
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
"status": resolved_status,
|
|
90
|
+
"feature_id": data.get("feature_id"),
|
|
91
|
+
"completed_phases": data.get("completed_phases", []),
|
|
92
|
+
"checkpoint_reached": data.get("checkpoint_reached"),
|
|
93
|
+
"tasks_completed": data.get("tasks_completed", 0),
|
|
94
|
+
"tasks_total": data.get("tasks_total", 0),
|
|
95
|
+
"error_count": error_count,
|
|
96
|
+
"can_resume": data.get("can_resume", False),
|
|
97
|
+
"resume_from_phase": data.get("resume_from_phase"),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def main():
|
|
102
|
+
args = parse_args()
|
|
103
|
+
|
|
104
|
+
# Load the status file
|
|
105
|
+
data, load_error = load_status_file(args.status_file)
|
|
106
|
+
if load_error is not None:
|
|
107
|
+
# File missing or malformed JSON
|
|
108
|
+
detail = {
|
|
109
|
+
"status": "crashed",
|
|
110
|
+
"feature_id": None,
|
|
111
|
+
"completed_phases": [],
|
|
112
|
+
"checkpoint_reached": None,
|
|
113
|
+
"tasks_completed": 0,
|
|
114
|
+
"tasks_total": 0,
|
|
115
|
+
"error_count": 1,
|
|
116
|
+
"can_resume": False,
|
|
117
|
+
"resume_from_phase": None,
|
|
118
|
+
"load_error": load_error,
|
|
119
|
+
}
|
|
120
|
+
sys.stderr.write(json.dumps(detail, indent=2, ensure_ascii=False) + "\n")
|
|
121
|
+
print("crashed")
|
|
122
|
+
sys.exit(0)
|
|
123
|
+
|
|
124
|
+
# Validate required fields
|
|
125
|
+
missing = validate_required_fields(data)
|
|
126
|
+
if missing:
|
|
127
|
+
detail = {
|
|
128
|
+
"status": "crashed",
|
|
129
|
+
"feature_id": data.get("feature_id"),
|
|
130
|
+
"completed_phases": data.get("completed_phases", []),
|
|
131
|
+
"checkpoint_reached": data.get("checkpoint_reached"),
|
|
132
|
+
"tasks_completed": data.get("tasks_completed", 0),
|
|
133
|
+
"tasks_total": data.get("tasks_total", 0),
|
|
134
|
+
"error_count": 1,
|
|
135
|
+
"can_resume": False,
|
|
136
|
+
"resume_from_phase": None,
|
|
137
|
+
"validation_error": "Missing or invalid required fields: {}".format(
|
|
138
|
+
", ".join(missing)
|
|
139
|
+
),
|
|
140
|
+
}
|
|
141
|
+
sys.stderr.write(json.dumps(detail, indent=2, ensure_ascii=False) + "\n")
|
|
142
|
+
print("crashed")
|
|
143
|
+
sys.exit(0)
|
|
144
|
+
|
|
145
|
+
# Determine status
|
|
146
|
+
resolved_status = determine_status(data)
|
|
147
|
+
|
|
148
|
+
# Build and emit detail report to stderr
|
|
149
|
+
detail = build_detail_report(data, resolved_status)
|
|
150
|
+
sys.stderr.write(json.dumps(detail, indent=2, ensure_ascii=False) + "\n")
|
|
151
|
+
|
|
152
|
+
# Emit single-line status to stdout
|
|
153
|
+
print(resolved_status)
|
|
154
|
+
sys.exit(0)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
if __name__ == "__main__":
|
|
158
|
+
main()
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Detect stuck features in the dev-pipeline.
|
|
3
|
+
|
|
4
|
+
Checks each feature for conditions that indicate it is stuck:
|
|
5
|
+
1. Max retries exceeded
|
|
6
|
+
2. Same checkpoint for consecutive sessions
|
|
7
|
+
3. Stale or missing heartbeat (for in_progress features)
|
|
8
|
+
4. Dependency deadlock (depends on a failed feature)
|
|
9
|
+
|
|
10
|
+
Outputs a JSON report to stdout and exits with code 1 if any stuck
|
|
11
|
+
features are found, 0 otherwise.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
python3 detect-stuck.py --state-dir <path> [--feature-id <id>]
|
|
15
|
+
[--max-retries <n>] [--stale-threshold <seconds>]
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_args():
|
|
26
|
+
parser = argparse.ArgumentParser(
|
|
27
|
+
description="Detect stuck features in the dev-pipeline."
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--state-dir",
|
|
31
|
+
required=True,
|
|
32
|
+
help="Path to the state/ directory",
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--feature-id",
|
|
36
|
+
default=None,
|
|
37
|
+
help="Check a specific feature ID, or check all if omitted",
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--max-retries",
|
|
41
|
+
type=int,
|
|
42
|
+
default=3,
|
|
43
|
+
help="Maximum allowed retries before a feature is considered stuck (default: 3)",
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--stale-threshold",
|
|
47
|
+
type=int,
|
|
48
|
+
default=600,
|
|
49
|
+
help="Heartbeat staleness threshold in seconds (default: 600)",
|
|
50
|
+
)
|
|
51
|
+
return parser.parse_args()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def load_json(path):
|
|
55
|
+
"""Load and return parsed JSON from a file. Returns None on any error."""
|
|
56
|
+
try:
|
|
57
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
58
|
+
return json.load(f)
|
|
59
|
+
except (IOError, OSError, json.JSONDecodeError, ValueError):
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def discover_feature_ids(state_dir):
|
|
64
|
+
"""Return a sorted list of feature IDs found in state/features/."""
|
|
65
|
+
features_dir = os.path.join(state_dir, "features")
|
|
66
|
+
if not os.path.isdir(features_dir):
|
|
67
|
+
return []
|
|
68
|
+
ids = []
|
|
69
|
+
for name in os.listdir(features_dir):
|
|
70
|
+
feature_path = os.path.join(features_dir, name)
|
|
71
|
+
if os.path.isdir(feature_path):
|
|
72
|
+
ids.append(name)
|
|
73
|
+
return sorted(ids)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_session_statuses(feature_dir):
|
|
77
|
+
"""Return session-status.json data for all sessions of a feature, sorted by session ID.
|
|
78
|
+
|
|
79
|
+
Returns a list of (session_id, data) tuples.
|
|
80
|
+
"""
|
|
81
|
+
sessions_dir = os.path.join(feature_dir, "sessions")
|
|
82
|
+
if not os.path.isdir(sessions_dir):
|
|
83
|
+
return []
|
|
84
|
+
results = []
|
|
85
|
+
for session_name in sorted(os.listdir(sessions_dir)):
|
|
86
|
+
session_path = os.path.join(sessions_dir, session_name)
|
|
87
|
+
if not os.path.isdir(session_path):
|
|
88
|
+
continue
|
|
89
|
+
status_path = os.path.join(session_path, "session-status.json")
|
|
90
|
+
data = load_json(status_path)
|
|
91
|
+
if data is not None:
|
|
92
|
+
results.append((session_name, data))
|
|
93
|
+
return results
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def parse_iso_timestamp(ts_str):
|
|
97
|
+
"""Parse an ISO 8601 timestamp string to a datetime object.
|
|
98
|
+
|
|
99
|
+
Handles formats with and without timezone info. Returns None on failure.
|
|
100
|
+
"""
|
|
101
|
+
if not isinstance(ts_str, str):
|
|
102
|
+
return None
|
|
103
|
+
# Try parsing with timezone (Z suffix or +HH:MM offset)
|
|
104
|
+
formats = [
|
|
105
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
|
106
|
+
"%Y-%m-%dT%H:%M:%S+00:00",
|
|
107
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
108
|
+
"%Y-%m-%dT%H:%M:%S.%f+00:00",
|
|
109
|
+
]
|
|
110
|
+
for fmt in formats:
|
|
111
|
+
try:
|
|
112
|
+
dt = datetime.strptime(ts_str, fmt)
|
|
113
|
+
return dt.replace(tzinfo=timezone.utc)
|
|
114
|
+
except ValueError:
|
|
115
|
+
continue
|
|
116
|
+
# Fallback: try stripping and replacing
|
|
117
|
+
try:
|
|
118
|
+
clean = ts_str.replace("Z", "+00:00")
|
|
119
|
+
# Python 3.7+ fromisoformat
|
|
120
|
+
if hasattr(datetime, "fromisoformat"):
|
|
121
|
+
dt = datetime.fromisoformat(clean)
|
|
122
|
+
if dt.tzinfo is None:
|
|
123
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
124
|
+
return dt
|
|
125
|
+
except (ValueError, AttributeError):
|
|
126
|
+
pass
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def check_max_retries(feature_status, max_retries):
|
|
131
|
+
"""Check 1: Has the feature exceeded the maximum retry count?
|
|
132
|
+
|
|
133
|
+
Returns a stuck-report dict or None.
|
|
134
|
+
"""
|
|
135
|
+
retry_count = feature_status.get("retry_count", 0)
|
|
136
|
+
if not isinstance(retry_count, int):
|
|
137
|
+
return None
|
|
138
|
+
if retry_count >= max_retries:
|
|
139
|
+
return {
|
|
140
|
+
"reason": "max_retries_exceeded",
|
|
141
|
+
"details": "Retry count {} has reached or exceeded max retries {}".format(
|
|
142
|
+
retry_count, max_retries
|
|
143
|
+
),
|
|
144
|
+
"suggestion": "Investigate recurring failures and consider resetting the feature or adjusting the approach",
|
|
145
|
+
}
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def check_stuck_checkpoint(feature_dir):
|
|
150
|
+
"""Check 2: Is the feature stuck at the same checkpoint for 3 consecutive sessions?
|
|
151
|
+
|
|
152
|
+
Returns a stuck-report dict or None.
|
|
153
|
+
"""
|
|
154
|
+
session_statuses = get_session_statuses(feature_dir)
|
|
155
|
+
if len(session_statuses) < 3:
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
# Take the last 3 sessions
|
|
159
|
+
last_three = session_statuses[-3:]
|
|
160
|
+
checkpoints = []
|
|
161
|
+
for _sid, data in last_three:
|
|
162
|
+
cp = data.get("checkpoint_reached")
|
|
163
|
+
checkpoints.append(cp)
|
|
164
|
+
|
|
165
|
+
# All three must be non-None and identical
|
|
166
|
+
if checkpoints[0] is not None and all(cp == checkpoints[0] for cp in checkpoints):
|
|
167
|
+
return {
|
|
168
|
+
"reason": "stuck_at_checkpoint",
|
|
169
|
+
"details": "Stuck at {} for 3 consecutive sessions".format(checkpoints[0]),
|
|
170
|
+
"suggestion": "Review plan.md generation - checkpoint {} validation is repeatedly failing".format(
|
|
171
|
+
checkpoints[0]
|
|
172
|
+
),
|
|
173
|
+
}
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def check_stale_heartbeat(feature_id, feature_status, state_dir, stale_threshold):
|
|
178
|
+
"""Check 3: Is the heartbeat stale or missing for an in_progress feature?
|
|
179
|
+
|
|
180
|
+
Only applies to features whose status is 'in_progress' and whose session
|
|
181
|
+
matches the current session.
|
|
182
|
+
|
|
183
|
+
Returns a stuck-report dict or None.
|
|
184
|
+
"""
|
|
185
|
+
status = feature_status.get("status")
|
|
186
|
+
if status != "in_progress":
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
# Read current-session.json to find the active session
|
|
190
|
+
current_session_path = os.path.join(state_dir, "current-session.json")
|
|
191
|
+
current_session = load_json(current_session_path)
|
|
192
|
+
if current_session is None:
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
# Check if the current session is for this feature
|
|
196
|
+
session_feature = current_session.get("feature_id")
|
|
197
|
+
if session_feature != feature_id:
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
session_id = current_session.get("session_id")
|
|
201
|
+
if not session_id:
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
# Check heartbeat file
|
|
205
|
+
heartbeat_path = os.path.join(
|
|
206
|
+
state_dir, "features", feature_id, "sessions", session_id, "heartbeat.json"
|
|
207
|
+
)
|
|
208
|
+
heartbeat = load_json(heartbeat_path)
|
|
209
|
+
|
|
210
|
+
if heartbeat is None:
|
|
211
|
+
return {
|
|
212
|
+
"reason": "no_heartbeat",
|
|
213
|
+
"details": "Feature is in_progress but no heartbeat.json found for session {}".format(
|
|
214
|
+
session_id
|
|
215
|
+
),
|
|
216
|
+
"suggestion": "The agent session may have crashed without writing a heartbeat - check session logs",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
# Parse heartbeat timestamp and check staleness
|
|
220
|
+
ts_str = heartbeat.get("timestamp")
|
|
221
|
+
heartbeat_time = parse_iso_timestamp(ts_str)
|
|
222
|
+
if heartbeat_time is None:
|
|
223
|
+
return {
|
|
224
|
+
"reason": "stale_heartbeat",
|
|
225
|
+
"details": "Heartbeat has invalid or unparseable timestamp: {}".format(ts_str),
|
|
226
|
+
"suggestion": "Check the agent session - heartbeat timestamp is malformed",
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
now = datetime.now(timezone.utc)
|
|
230
|
+
age_seconds = (now - heartbeat_time).total_seconds()
|
|
231
|
+
if age_seconds > stale_threshold:
|
|
232
|
+
return {
|
|
233
|
+
"reason": "stale_heartbeat",
|
|
234
|
+
"details": "Heartbeat is {:.0f}s old (threshold: {}s) for session {}".format(
|
|
235
|
+
age_seconds, stale_threshold, session_id
|
|
236
|
+
),
|
|
237
|
+
"suggestion": "The agent may be hung or crashed - consider terminating and retrying the session",
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def check_dependency_deadlock(feature_id, feature_list_data, state_dir):
|
|
244
|
+
"""Check 4: Does this feature depend on a failed feature?
|
|
245
|
+
|
|
246
|
+
Returns a stuck-report dict or None.
|
|
247
|
+
"""
|
|
248
|
+
if feature_list_data is None:
|
|
249
|
+
return None
|
|
250
|
+
|
|
251
|
+
features = feature_list_data.get("features", [])
|
|
252
|
+
if not isinstance(features, list):
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
# Find this feature in the feature list to get its dependencies
|
|
256
|
+
deps = None
|
|
257
|
+
for f in features:
|
|
258
|
+
if not isinstance(f, dict):
|
|
259
|
+
continue
|
|
260
|
+
if f.get("id") == feature_id:
|
|
261
|
+
deps = f.get("dependencies", [])
|
|
262
|
+
break
|
|
263
|
+
|
|
264
|
+
if not deps or not isinstance(deps, list):
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
# Check each dependency's status in state
|
|
268
|
+
for dep_id in deps:
|
|
269
|
+
dep_status_path = os.path.join(
|
|
270
|
+
state_dir, "features", dep_id, "status.json"
|
|
271
|
+
)
|
|
272
|
+
dep_status = load_json(dep_status_path)
|
|
273
|
+
if dep_status is None:
|
|
274
|
+
continue
|
|
275
|
+
dep_state = dep_status.get("status")
|
|
276
|
+
if dep_state == "failed":
|
|
277
|
+
return {
|
|
278
|
+
"reason": "dependency_failed",
|
|
279
|
+
"details": "Depends on {} which has status 'failed'".format(dep_id),
|
|
280
|
+
"suggestion": "Fix or skip {} to unblock {}".format(dep_id, feature_id),
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def find_feature_list(state_dir):
|
|
287
|
+
"""Attempt to locate and load feature-list.json via pipeline.json reference."""
|
|
288
|
+
pipeline_path = os.path.join(state_dir, "pipeline.json")
|
|
289
|
+
pipeline = load_json(pipeline_path)
|
|
290
|
+
if pipeline is None:
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
fl_path = pipeline.get("feature_list_path")
|
|
294
|
+
if fl_path and os.path.isfile(fl_path):
|
|
295
|
+
return load_json(fl_path)
|
|
296
|
+
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def check_feature(feature_id, state_dir, feature_list_data, max_retries, stale_threshold):
|
|
301
|
+
"""Run all stuck-detection checks on a single feature.
|
|
302
|
+
|
|
303
|
+
Returns a list of stuck-report dicts (may be empty if feature is not stuck).
|
|
304
|
+
"""
|
|
305
|
+
feature_dir = os.path.join(state_dir, "features", feature_id)
|
|
306
|
+
status_path = os.path.join(feature_dir, "status.json")
|
|
307
|
+
feature_status = load_json(status_path)
|
|
308
|
+
|
|
309
|
+
if feature_status is None:
|
|
310
|
+
# Cannot read status — skip silently
|
|
311
|
+
return []
|
|
312
|
+
|
|
313
|
+
reports = []
|
|
314
|
+
|
|
315
|
+
# Check 1: Max retries exceeded
|
|
316
|
+
result = check_max_retries(feature_status, max_retries)
|
|
317
|
+
if result is not None:
|
|
318
|
+
reports.append(result)
|
|
319
|
+
|
|
320
|
+
# Check 2: Stuck at same checkpoint
|
|
321
|
+
result = check_stuck_checkpoint(feature_dir)
|
|
322
|
+
if result is not None:
|
|
323
|
+
reports.append(result)
|
|
324
|
+
|
|
325
|
+
# Check 3: Stale heartbeat
|
|
326
|
+
result = check_stale_heartbeat(feature_id, feature_status, state_dir, stale_threshold)
|
|
327
|
+
if result is not None:
|
|
328
|
+
reports.append(result)
|
|
329
|
+
|
|
330
|
+
# Check 4: Dependency deadlock
|
|
331
|
+
result = check_dependency_deadlock(feature_id, feature_list_data, state_dir)
|
|
332
|
+
if result is not None:
|
|
333
|
+
reports.append(result)
|
|
334
|
+
|
|
335
|
+
return reports
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def main():
|
|
339
|
+
args = parse_args()
|
|
340
|
+
state_dir = os.path.abspath(args.state_dir)
|
|
341
|
+
|
|
342
|
+
if not os.path.isdir(state_dir):
|
|
343
|
+
sys.stderr.write("Error: state directory not found: {}\n".format(state_dir))
|
|
344
|
+
sys.exit(2)
|
|
345
|
+
|
|
346
|
+
# Determine which features to check
|
|
347
|
+
if args.feature_id:
|
|
348
|
+
feature_ids = [args.feature_id]
|
|
349
|
+
else:
|
|
350
|
+
feature_ids = discover_feature_ids(state_dir)
|
|
351
|
+
|
|
352
|
+
# Load feature list for dependency checks
|
|
353
|
+
feature_list_data = find_feature_list(state_dir)
|
|
354
|
+
|
|
355
|
+
stuck_features = []
|
|
356
|
+
for fid in feature_ids:
|
|
357
|
+
reports = check_feature(
|
|
358
|
+
fid, state_dir, feature_list_data, args.max_retries, args.stale_threshold
|
|
359
|
+
)
|
|
360
|
+
for report in reports:
|
|
361
|
+
stuck_features.append(
|
|
362
|
+
{
|
|
363
|
+
"feature_id": fid,
|
|
364
|
+
"reason": report["reason"],
|
|
365
|
+
"details": report["details"],
|
|
366
|
+
"suggestion": report["suggestion"],
|
|
367
|
+
}
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
output = {
|
|
371
|
+
"stuck_features": stuck_features,
|
|
372
|
+
"total_checked": len(feature_ids),
|
|
373
|
+
"stuck_count": len(stuck_features),
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
print(json.dumps(output, indent=2, ensure_ascii=False))
|
|
377
|
+
|
|
378
|
+
if stuck_features:
|
|
379
|
+
sys.exit(1)
|
|
380
|
+
else:
|
|
381
|
+
sys.exit(0)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
if __name__ == "__main__":
|
|
385
|
+
main()
|