theslopmachine 1.0.13 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/agents/developer.md +6 -7
- package/assets/agents/slopmachine-claude.md +66 -9
- package/assets/agents/slopmachine.md +68 -9
- package/assets/claude/agents/developer.md +5 -1
- package/assets/skills/clarification-gate/SKILL.md +56 -20
- package/assets/skills/claude-worker-management/SKILL.md +14 -4
- package/assets/skills/deep-retrospective/SKILL.md +179 -0
- package/assets/skills/deep-retrospective/run.py +446 -0
- package/assets/skills/deep-retrospective/workflow-reference.md +240 -0
- package/assets/skills/developer-session-lifecycle/SKILL.md +18 -4
- package/assets/skills/development-guidance/SKILL.md +52 -31
- package/assets/skills/evaluation-triage/SKILL.md +21 -7
- package/assets/skills/final-evaluation-orchestration/SKILL.md +92 -28
- package/assets/skills/integrated-verification/SKILL.md +38 -42
- package/assets/skills/p8-readiness-reconciliation/SKILL.md +31 -10
- package/assets/skills/planning-gate/SKILL.md +10 -7
- package/assets/skills/planning-guidance/SKILL.md +60 -52
- package/assets/skills/retrospective-analysis/SKILL.md +172 -58
- package/assets/skills/scaffold-guidance/SKILL.md +18 -6
- package/assets/skills/submission-packaging/SKILL.md +11 -3
- package/assets/slopmachine/clarifier-agent-prompt.md +7 -6
- package/assets/slopmachine/exact-readme-template.md +8 -12
- package/assets/slopmachine/owner-verification-checklist.md +1 -1
- package/assets/slopmachine/phase-1-design-prompt.md +5 -10
- package/assets/slopmachine/phase-1-design-template.md +15 -11
- package/assets/slopmachine/phase-2-execution-planning-prompt.md +5 -2
- package/assets/slopmachine/phase-2-plan-template.md +14 -4
- package/assets/slopmachine/scaffold-playbooks/shared-contract.md +2 -1
- package/assets/slopmachine/templates/AGENTS.md +3 -1
- package/assets/slopmachine/templates/CLAUDE.md +3 -1
- package/assets/slopmachine/test-coverage-prompt.md +8 -1
- package/assets/slopmachine/utils/README.md +1 -5
- package/assets/slopmachine/utils/claude_live_common.mjs +2 -5
- package/assets/slopmachine/utils/prepare_evaluation_send_packet.mjs +3 -3
- package/package.json +1 -1
- package/src/constants.js +0 -9
- package/src/init.js +17 -24
- package/src/install.js +30 -28
- package/assets/slopmachine/utils/prepare_evaluation_prompt.mjs +0 -81
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: deep-retrospective
|
|
3
|
+
description: Deep retrospective analysis for slopmachine workflow runs. Triggered exclusively by "deep-retrospective <path>". Extracts raw session data then performs multi-phase analysis comparing actual workflow traces against expected behavior.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Deep Retrospective
|
|
7
|
+
|
|
8
|
+
Two-phase skill for analyzing completed slopmachine workflow runs.
|
|
9
|
+
|
|
10
|
+
## Phase 1: Extraction
|
|
11
|
+
|
|
12
|
+
When the user triggers `deep-retrospective <path>`, first run the extraction script:
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
python3 ~/.agents/skills/deep-retrospective/run.py <path>
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
This writes raw data to `retrospectives/<project>_<timestamp>/extraction/`. Proceed to Phase 2 immediately — no pause, no user confirmation.
|
|
19
|
+
|
|
20
|
+
## Phase 2: Deep Analysis
|
|
21
|
+
|
|
22
|
+
Load `~/.agents/skills/deep-retrospective/workflow-reference.md` before starting. This defines expected behavior for every phase, developer lane rules, and known anti-patterns.
|
|
23
|
+
|
|
24
|
+
**Output directory:** `retrospectives/<project>_<timestamp>/analysis/`
|
|
25
|
+
|
|
26
|
+
**Critical rule:** The analysis must be exhaustive. Read every text response from every session. Do not skim. Quote specific excerpts from developer, evaluator, and owner traces as evidence for every finding. The per-phase files especially must be deeply detailed — think of them as investigation reports where each session interaction is evidence.
|
|
27
|
+
|
|
28
|
+
### Step 1: Build the Phase Timeline
|
|
29
|
+
|
|
30
|
+
Read `extraction/sessions.json`, `extraction/beads.json`, and `extraction/ai-metadata.json`. Build a timeline with per-phase: start/end times, duration, bead IDs, active session IDs, total tokens consumed. Write to `analysis/phase-timeline.md`.
|
|
31
|
+
|
|
32
|
+
### Step 2: Read Every Session Trace — Do Not Skim
|
|
33
|
+
|
|
34
|
+
**This is the most important step.** Open every file in `extraction/sessions/`. For each session, read ALL text_parts. Do not stop at the first 10-15 entries. Read everything. Take notes.
|
|
35
|
+
|
|
36
|
+
For each session, record:
|
|
37
|
+
- Session ID, type, part count, token totals
|
|
38
|
+
- Every text response with timestamp (quote the full text, not truncated snippets)
|
|
39
|
+
- Every tool call name and count
|
|
40
|
+
- Every task launch (subagent spawn) with description and prompt excerpt
|
|
41
|
+
|
|
42
|
+
### Step 3: Map Sessions to Phases
|
|
43
|
+
|
|
44
|
+
Using the bead timestamps and session time_created/time_updated fields from `extraction/sessions.json`, assign each session to the phase(s) it was active during. The owner session spans all phases — split its text responses by phase. Developer sessions for specific phases (design, primary dev, bugfix, coverage) are assigned to their respective phases.
|
|
45
|
+
|
|
46
|
+
### Step 4: Per-Phase Deep Analysis — The Core Deliverable
|
|
47
|
+
|
|
48
|
+
For each phase (1 through 8 as available), produce `analysis/phases/phase-<N>-<name>.md`. Each file must be thorough — these are the primary analysis artifacts.
|
|
49
|
+
|
|
50
|
+
**Every phase file must include these sections, with no section skipped:**
|
|
51
|
+
|
|
52
|
+
#### 4a. Session Inventory
|
|
53
|
+
List every session active during this phase with: ID, slug, title, type, part count, input tokens, output tokens, time created, time updated.
|
|
54
|
+
|
|
55
|
+
#### 4b. Owner Actions — Step by Step
|
|
56
|
+
Reconstruct what the owner did from the owner session's text_parts for this phase. Read every owner text response and list each action chronologically. For each action, include:
|
|
57
|
+
- Timestamp
|
|
58
|
+
- What the owner did (launched subagent, sent prompt, reviewed output, recorded bead)
|
|
59
|
+
- The exact text the owner wrote or the prompt that was sent
|
|
60
|
+
- Which subagent session was launched (if any), with session ID
|
|
61
|
+
|
|
62
|
+
#### 4c. Developer/Worker Interactions — Complete Trace
|
|
63
|
+
Read ALL text responses from every developer/general/evaluator session active during this phase. Present them chronologically. For each interaction:
|
|
64
|
+
- Timestamp
|
|
65
|
+
- What was said (quote the actual text — do not summarize or paraphrase)
|
|
66
|
+
- What actions the developer took (tool calls, file edits)
|
|
67
|
+
- What was produced (test counts, files created, outcomes)
|
|
68
|
+
|
|
69
|
+
For the primary developer session spanning multiple phases (design into development): split the responses by module/topic. For Phase 3 specifically, group by module (scaffold, auth, courses, resources, announcements, search, stores, admin).
|
|
70
|
+
|
|
71
|
+
For the bugfix developer: present each fix round as a subsection with the issues in that round, the fixes applied, and the test impact.
|
|
72
|
+
|
|
73
|
+
#### 4d. Bead Commentary
|
|
74
|
+
Every bead comment for this phase — quoted in full (not truncated). Include the bead ID, timestamp, author, and full text of the comment.
|
|
75
|
+
|
|
76
|
+
#### 4e. Rule Compliance — Expected vs Actual
|
|
77
|
+
Using `workflow-reference.md` as the standard, create a table comparing each relevant rule against what actually happened:
|
|
78
|
+
|
|
79
|
+
| Rule from workflow-reference | Expected | Actual | Verdict | Evidence |
|
|
80
|
+
|---|---|---|---|---|
|
|
81
|
+
| [specific rule] | [what should happen] | [what happened based on traces] | PASS/FAIL/MISSED/UNCLEAR | [session ID + excerpt] |
|
|
82
|
+
|
|
83
|
+
#### 4f. Interactions Between Sessions
|
|
84
|
+
Trace the flow of work between sessions in this phase:
|
|
85
|
+
- Owner → developer: what was the prompt? Was it natural language? Did it expose workflow mechanics?
|
|
86
|
+
- Developer → owner: what was the response? Did the developer report completion? How?
|
|
87
|
+
- Owner → evaluator: what prompt was sent? Was it verbatim?
|
|
88
|
+
- Evaluator → owner: what verdict? What issues were found?
|
|
89
|
+
- Owner → bugfix developer: how were issues communicated?
|
|
90
|
+
|
|
91
|
+
Quote the actual messages exchanged. Do not summarize.
|
|
92
|
+
|
|
93
|
+
#### 4g. What Worked Well / What Went Wrong
|
|
94
|
+
Specific findings with exact evidence:
|
|
95
|
+
- What patterns were correct (e.g. "natural language prompting maintained throughout — see excerpt X")
|
|
96
|
+
- What patterns were incorrect (e.g. "permissive assertion at timestamp X — see excerpt Y")
|
|
97
|
+
- Every deviation from the workflow-reference must be listed with the rule it violates
|
|
98
|
+
|
|
99
|
+
#### 4h. What Should Have Been Caught
|
|
100
|
+
Issues that originated in this phase but only surfaced later. For each:
|
|
101
|
+
- What the issue was
|
|
102
|
+
- Which later phase discovered it (with bead comment citation)
|
|
103
|
+
- Why it was missed in this phase
|
|
104
|
+
- What specific check would have caught it
|
|
105
|
+
|
|
106
|
+
### Step 5: Cross-Phase Root Cause Chain
|
|
107
|
+
|
|
108
|
+
Write to `analysis/root-cause-chain.md`. Build the end-to-end trace:
|
|
109
|
+
```
|
|
110
|
+
P1-P2: [what was accepted without proper verification]
|
|
111
|
+
→ Specific gap: [exact plan/design deficiency]
|
|
112
|
+
↓
|
|
113
|
+
P3: [specific development failures with excerpts]
|
|
114
|
+
→ Module by module: [what went wrong in each]
|
|
115
|
+
↓
|
|
116
|
+
P4: [what verification missed]
|
|
117
|
+
→ [specific evaluator pass deficiency]
|
|
118
|
+
↓
|
|
119
|
+
P5: [what evaluation found but couldn't prevent]
|
|
120
|
+
→ [coverage chase details]
|
|
121
|
+
↓
|
|
122
|
+
P6-P7: [what was caught at the last moment]
|
|
123
|
+
→ [exact bugs discovered, when, how]
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Then for every late finding: a table with finding, phase surfaced, phase that should have caught it, why it was missed, specific workflow fix.
|
|
127
|
+
|
|
128
|
+
Also identify the **single highest-leverage fix** — the one change that would have prevented the most downstream churn.
|
|
129
|
+
|
|
130
|
+
### Step 6: Developer Pattern Analysis
|
|
131
|
+
|
|
132
|
+
Write to `analysis/dev-patterns.md`. Read ALL developer text responses across all developer sessions. For each anti-pattern from workflow-reference.md, count occurrences and quote the exact excerpts. Include:
|
|
133
|
+
- Pattern name
|
|
134
|
+
- Count
|
|
135
|
+
- Every instance with session ID, timestamp, full text excerpt
|
|
136
|
+
- Which rule from workflow-reference.md or developer.md is violated
|
|
137
|
+
|
|
138
|
+
### Step 7: Evaluator Session Analysis
|
|
139
|
+
|
|
140
|
+
Write to `analysis/evaluator-sessions.md`. For every evaluator session (all sessions with type "evaluator"):
|
|
141
|
+
|
|
142
|
+
- Session ID, parts, tokens
|
|
143
|
+
- What prompt was sent (was it the full evaluation prompt? regeneration prompt? follow-up?)
|
|
144
|
+
- What verdict was returned (Fail, Partial Pass, Pass)
|
|
145
|
+
- What issues were found (quote the key findings)
|
|
146
|
+
- Whether the evaluator had file-write capability
|
|
147
|
+
- Whether this was part of a regeneration cycle or coverage chase
|
|
148
|
+
|
|
149
|
+
Group evaluator sessions by purpose: internal evaluator (Phase 4), audit cycle 1, audit cycle 2, coverage/README audit, coverage chase sessions.
|
|
150
|
+
|
|
151
|
+
### Step 8: Owner Decision Quality
|
|
152
|
+
|
|
153
|
+
Write to `analysis/owner-decisions.md`. Rate every significant owner decision with:
|
|
154
|
+
- What was decided (specific action or inaction)
|
|
155
|
+
- Quality rating (GOOD / WEAK / POOR)
|
|
156
|
+
- What resulted from this decision
|
|
157
|
+
- Evidence: session ID, timestamp, exact excerpt
|
|
158
|
+
|
|
159
|
+
Cover at minimum: design/plan acceptance, module completion acceptance, evaluator pass count, browser verification approach, coverage scoring response, package validation, prompt paste discipline, session lifecycle management.
|
|
160
|
+
|
|
161
|
+
### Step 9: Playbook Cross-Reference
|
|
162
|
+
|
|
163
|
+
Write to `analysis/playbook-matches.md`. For all 10 playbook lessons, assess applicability with specific evidence from this run's traces.
|
|
164
|
+
|
|
165
|
+
### Step 10: Scoring
|
|
166
|
+
|
|
167
|
+
Write to `analysis/scoring.md`. Score each dimension (1-10) with narrative evidence explaining the score — not just numbers.
|
|
168
|
+
|
|
169
|
+
### Step 11: Improvements
|
|
170
|
+
|
|
171
|
+
Write to `analysis/improvements.md`. Immediate and medium-term tiers. Each improvement must reference specific evidence from the analysis.
|
|
172
|
+
|
|
173
|
+
### Step 12: Complete Summary
|
|
174
|
+
|
|
175
|
+
Write `analysis/deep-retrospective.md` — the complete analysis combining all of the above into one document. This is the master report.
|
|
176
|
+
|
|
177
|
+
After writing all files, count the total lines across all markdown files. If the total is less than 2000 lines for a project with 30+ sessions, you have not been thorough enough. Go back and add more detail.
|
|
178
|
+
|
|
179
|
+
Present the user with: grade, key findings, top 3 improvements, and total analysis size.
|
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Extract all raw data from a slopmachine project for deep retrospective analysis.
|
|
3
|
+
|
|
4
|
+
Usage: python3 run.py <project_path>
|
|
5
|
+
|
|
6
|
+
Extracts: OpenCode sessions + parts, beads history, .ai metadata, task docs,
|
|
7
|
+
audit reports, token counts, and conversation traces. Writes everything to
|
|
8
|
+
retrospectives/<project>_<timestamp>/extraction/ — no analysis performed.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import sqlite3
|
|
14
|
+
import sys
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def ts_to_iso(ts_ms):
|
|
20
|
+
return datetime.fromtimestamp(ts_ms / 1000, tz=timezone.utc).isoformat()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ─── Database extraction ───────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
def extract_sessions(db_path, project_path):
|
|
26
|
+
"""Extract all sessions matching project_path from opencode.db."""
|
|
27
|
+
conn = sqlite3.connect(db_path)
|
|
28
|
+
conn.row_factory = sqlite3.Row
|
|
29
|
+
|
|
30
|
+
rows = conn.execute("""
|
|
31
|
+
SELECT id, slug, title, directory, time_created, time_updated,
|
|
32
|
+
agent, model, parent_id
|
|
33
|
+
FROM session
|
|
34
|
+
WHERE directory = ? OR directory LIKE ?
|
|
35
|
+
ORDER BY time_created
|
|
36
|
+
""", (project_path, project_path + "/%")).fetchall()
|
|
37
|
+
sessions = [dict(r) for r in rows]
|
|
38
|
+
|
|
39
|
+
if not sessions:
|
|
40
|
+
rows = conn.execute("""
|
|
41
|
+
SELECT id, slug, title, directory, time_created, time_updated,
|
|
42
|
+
agent, model, parent_id
|
|
43
|
+
FROM session
|
|
44
|
+
WHERE directory LIKE ?
|
|
45
|
+
ORDER BY time_created
|
|
46
|
+
""", (f"%{os.path.basename(project_path)}%",)).fetchall()
|
|
47
|
+
sessions = [dict(r) for r in rows]
|
|
48
|
+
|
|
49
|
+
# Find main owner session (SlopMachine agent with most parts)
|
|
50
|
+
owner_id = None
|
|
51
|
+
slop = [s for s in sessions if s.get("agent") == "SlopMachine"]
|
|
52
|
+
if slop:
|
|
53
|
+
parts_by_id = {}
|
|
54
|
+
for s in slop:
|
|
55
|
+
parts_by_id[s["id"]] = conn.execute(
|
|
56
|
+
"SELECT COUNT(*) FROM part WHERE session_id = ?", (s["id"],)
|
|
57
|
+
).fetchone()[0]
|
|
58
|
+
owner_id = max(slop, key=lambda s: parts_by_id.get(s["id"], 0))["id"]
|
|
59
|
+
|
|
60
|
+
# Get child sessions (subagents)
|
|
61
|
+
if owner_id:
|
|
62
|
+
child_rows = conn.execute("""
|
|
63
|
+
SELECT id, slug, title, directory, time_created, time_updated,
|
|
64
|
+
agent, model, parent_id
|
|
65
|
+
FROM session WHERE parent_id = ? ORDER BY time_created
|
|
66
|
+
""", (owner_id,)).fetchall()
|
|
67
|
+
sessions.extend(dict(r) for r in child_rows)
|
|
68
|
+
|
|
69
|
+
# Deduplicate
|
|
70
|
+
seen = set()
|
|
71
|
+
unique = []
|
|
72
|
+
for s in sessions:
|
|
73
|
+
if s["id"] not in seen:
|
|
74
|
+
seen.add(s["id"])
|
|
75
|
+
unique.append(s)
|
|
76
|
+
sessions = unique
|
|
77
|
+
|
|
78
|
+
# Add part counts and token totals
|
|
79
|
+
for s in sessions:
|
|
80
|
+
s["part_count"] = conn.execute(
|
|
81
|
+
"SELECT COUNT(*) FROM part WHERE session_id = ?", (s["id"],)
|
|
82
|
+
).fetchone()[0]
|
|
83
|
+
row = conn.execute("""
|
|
84
|
+
SELECT SUM(json_extract(m.data, '$.tokens.input')),
|
|
85
|
+
SUM(json_extract(m.data, '$.tokens.output'))
|
|
86
|
+
FROM message m WHERE m.session_id = ?
|
|
87
|
+
""", (s["id"],)).fetchone()
|
|
88
|
+
s["total_input_tokens"] = int(row[0] or 0)
|
|
89
|
+
s["total_output_tokens"] = int(row[1] or 0)
|
|
90
|
+
s["total_tokens"] = s["total_input_tokens"] + s["total_output_tokens"]
|
|
91
|
+
|
|
92
|
+
conn.close()
|
|
93
|
+
return sessions, owner_id
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def extract_parts(db_path, session_id):
|
|
97
|
+
"""Extract all parts from a session."""
|
|
98
|
+
conn = sqlite3.connect(db_path)
|
|
99
|
+
conn.row_factory = sqlite3.Row
|
|
100
|
+
rows = conn.execute("""
|
|
101
|
+
SELECT id, time_created, data FROM part
|
|
102
|
+
WHERE session_id = ? ORDER BY id
|
|
103
|
+
""", (session_id,)).fetchall()
|
|
104
|
+
parts = []
|
|
105
|
+
for r in rows:
|
|
106
|
+
try:
|
|
107
|
+
d = json.loads(r["data"])
|
|
108
|
+
d["_time"] = r["time_created"]
|
|
109
|
+
d["_id"] = r["id"]
|
|
110
|
+
parts.append(d)
|
|
111
|
+
except json.JSONDecodeError:
|
|
112
|
+
pass
|
|
113
|
+
conn.close()
|
|
114
|
+
return parts
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def classify_parts(parts):
|
|
118
|
+
"""Classify session parts into structured categories."""
|
|
119
|
+
result = {
|
|
120
|
+
"text_parts": [],
|
|
121
|
+
"task_launches": [],
|
|
122
|
+
"tool_calls": defaultdict(int),
|
|
123
|
+
"type_counts": defaultdict(int),
|
|
124
|
+
}
|
|
125
|
+
for p in parts:
|
|
126
|
+
ptype = p.get("type", "unknown")
|
|
127
|
+
result["type_counts"][ptype] += 1
|
|
128
|
+
if ptype == "text":
|
|
129
|
+
txt = p.get("text", "")
|
|
130
|
+
if txt.strip():
|
|
131
|
+
result["text_parts"].append({
|
|
132
|
+
"time": ts_to_iso(p["_time"]),
|
|
133
|
+
"text": txt,
|
|
134
|
+
})
|
|
135
|
+
elif ptype == "tool":
|
|
136
|
+
tn = p.get("tool", "?")
|
|
137
|
+
result["tool_calls"][tn] += 1
|
|
138
|
+
if tn == "task":
|
|
139
|
+
state = p.get("state", {})
|
|
140
|
+
inp = state.get("input", {})
|
|
141
|
+
outp = state.get("output", "")[:500] if state.get("output") else ""
|
|
142
|
+
result["task_launches"].append({
|
|
143
|
+
"time": ts_to_iso(p["_time"]),
|
|
144
|
+
"subagent_type": inp.get("subagent_type", "?"),
|
|
145
|
+
"description": inp.get("description", "")[:300],
|
|
146
|
+
"prompt": inp.get("prompt", "")[:1000],
|
|
147
|
+
"result": outp,
|
|
148
|
+
})
|
|
149
|
+
elif ptype == "step-start":
|
|
150
|
+
result["type_counts"]["step_start"] = result["type_counts"].get("step_start", 0) + 1
|
|
151
|
+
elif ptype == "step-finish":
|
|
152
|
+
result["type_counts"]["step_finish"] = result["type_counts"].get("step_finish", 0) + 1
|
|
153
|
+
try:
|
|
154
|
+
snap = p.get("snapshot", "")
|
|
155
|
+
if snap:
|
|
156
|
+
result.setdefault("snapshots", []).append(snap)
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def classify_session(title, agent):
|
|
163
|
+
"""Classify session by type."""
|
|
164
|
+
tl = (title or "").lower()
|
|
165
|
+
ag = (agent or "").lower()
|
|
166
|
+
if ag == "slopmachine":
|
|
167
|
+
return "owner"
|
|
168
|
+
if "evaluator" in tl:
|
|
169
|
+
return "evaluator"
|
|
170
|
+
if any(kw in tl for kw in ["bugfix", "coverage reconciliation", "scaffold",
|
|
171
|
+
"developer", "prompt orientation", "create product design"]):
|
|
172
|
+
return "developer"
|
|
173
|
+
if any(kw in tl for kw in ["clarif", "faithful", "design", "plan"]):
|
|
174
|
+
return "general_subagent"
|
|
175
|
+
if any(kw in tl for kw in ["audit", "strict audit", "strict coverage",
|
|
176
|
+
"coverage audit", "coverage readme", "internal evaluator"]):
|
|
177
|
+
return "evaluator"
|
|
178
|
+
return "other"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ─── File readers ───────────────────────────────────────────────────
|
|
182
|
+
|
|
183
|
+
def read_beads(project_root):
|
|
184
|
+
"""Read beads history from issues.jsonl."""
|
|
185
|
+
for base in [project_root, os.path.dirname(project_root)]:
|
|
186
|
+
issues_path = os.path.join(base, ".beads", "issues.jsonl")
|
|
187
|
+
if os.path.isfile(issues_path):
|
|
188
|
+
with open(issues_path) as f:
|
|
189
|
+
return [json.loads(line) for line in f if line.strip()]
|
|
190
|
+
return []
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def read_ai_metadata(project_root):
|
|
194
|
+
"""Read .ai metadata, plan, requirements, etc."""
|
|
195
|
+
for base in [project_root, os.path.dirname(project_root)]:
|
|
196
|
+
ai_dir = os.path.join(base, ".ai")
|
|
197
|
+
if os.path.isdir(ai_dir):
|
|
198
|
+
result = {}
|
|
199
|
+
meta_path = os.path.join(ai_dir, "metadata.json")
|
|
200
|
+
if os.path.isfile(meta_path):
|
|
201
|
+
with open(meta_path) as f:
|
|
202
|
+
result["metadata"] = json.load(f)
|
|
203
|
+
for fname in sorted(os.listdir(ai_dir)):
|
|
204
|
+
fpath = os.path.join(ai_dir, fname)
|
|
205
|
+
if fname.endswith(".md") and os.path.isfile(fpath):
|
|
206
|
+
with open(fpath) as f:
|
|
207
|
+
result[fname] = f.read()
|
|
208
|
+
# Also read archive
|
|
209
|
+
archive_dir = os.path.join(ai_dir, "archive")
|
|
210
|
+
if os.path.isdir(archive_dir):
|
|
211
|
+
for fname in sorted(os.listdir(archive_dir)):
|
|
212
|
+
fpath = os.path.join(archive_dir, fname)
|
|
213
|
+
if fname.endswith(".md") and os.path.isfile(fpath):
|
|
214
|
+
with open(fpath) as f:
|
|
215
|
+
result[f"archive/{fname}"] = f.read()
|
|
216
|
+
return result
|
|
217
|
+
return {}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def read_task_docs(task_dir):
|
|
221
|
+
"""Read task-level docs and metadata."""
|
|
222
|
+
result = {}
|
|
223
|
+
for sub in ["docs", ""]:
|
|
224
|
+
d = os.path.join(task_dir, sub) if sub else task_dir
|
|
225
|
+
if not os.path.isdir(d):
|
|
226
|
+
continue
|
|
227
|
+
for fname in os.listdir(d):
|
|
228
|
+
if fname.endswith(".md"):
|
|
229
|
+
with open(os.path.join(d, fname)) as f:
|
|
230
|
+
result[f"{sub}/{fname}" if sub else fname] = f.read()
|
|
231
|
+
meta = os.path.join(task_dir, "metadata.json")
|
|
232
|
+
if os.path.isfile(meta):
|
|
233
|
+
with open(meta) as f:
|
|
234
|
+
result["metadata.json"] = json.load(f)
|
|
235
|
+
return result
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def read_audit_reports(task_dir):
|
|
239
|
+
"""Read .tmp audit reports."""
|
|
240
|
+
result = {}
|
|
241
|
+
tmp = os.path.join(task_dir, ".tmp")
|
|
242
|
+
if os.path.isdir(tmp):
|
|
243
|
+
for fname in sorted(os.listdir(tmp)):
|
|
244
|
+
if fname.endswith(".md"):
|
|
245
|
+
with open(os.path.join(tmp, fname)) as f:
|
|
246
|
+
result[fname] = f.read()
|
|
247
|
+
return result
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# ─── Main ───────────────────────────────────────────────────────────
|
|
251
|
+
|
|
252
|
+
def main():
|
|
253
|
+
if len(sys.argv) < 2:
|
|
254
|
+
print("Usage: python3 run.py <project_path>")
|
|
255
|
+
sys.exit(1)
|
|
256
|
+
|
|
257
|
+
project_path = os.path.abspath(sys.argv[1])
|
|
258
|
+
project_name = os.path.basename(os.path.dirname(project_path)) or os.path.basename(project_path)
|
|
259
|
+
|
|
260
|
+
db_path = os.path.expanduser("~/.local/share/opencode/opencode.db")
|
|
261
|
+
if not os.path.isfile(db_path):
|
|
262
|
+
print("ERROR: OpenCode database not found")
|
|
263
|
+
sys.exit(1)
|
|
264
|
+
|
|
265
|
+
print(f"Extracting: {project_path}")
|
|
266
|
+
|
|
267
|
+
# 1. Sessions
|
|
268
|
+
sessions, owner_id = extract_sessions(db_path, project_path)
|
|
269
|
+
print(f" Sessions: {len(sessions)} (owner: {owner_id})")
|
|
270
|
+
|
|
271
|
+
# 2. Read artifacts
|
|
272
|
+
beads = read_beads(project_path)
|
|
273
|
+
ai_data = read_ai_metadata(project_path)
|
|
274
|
+
task_dir = project_path if os.path.isdir(os.path.join(project_path, "docs")) \
|
|
275
|
+
else os.path.join(project_path, "task")
|
|
276
|
+
task_docs = read_task_docs(task_dir)
|
|
277
|
+
audit_reports = read_audit_reports(task_dir)
|
|
278
|
+
print(f" Beads: {len(beads)}, Audit reports: {len(audit_reports)}")
|
|
279
|
+
|
|
280
|
+
# 3. Output directory
|
|
281
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
282
|
+
sm_root = os.path.dirname(os.path.dirname(os.path.dirname(script_dir)))
|
|
283
|
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
284
|
+
safe = project_name.replace("/", "_").replace(" ", "_")
|
|
285
|
+
out_dir = os.path.join(sm_root, "retrospectives", f"{safe}_{ts}", "extraction")
|
|
286
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
287
|
+
|
|
288
|
+
# 4. Write session list
|
|
289
|
+
session_list = []
|
|
290
|
+
for s in sessions:
|
|
291
|
+
session_list.append({
|
|
292
|
+
"id": s["id"], "slug": s["slug"], "title": s["title"],
|
|
293
|
+
"agent": s["agent"], "parent_id": s["parent_id"],
|
|
294
|
+
"time_created": ts_to_iso(s["time_created"]),
|
|
295
|
+
"time_updated": ts_to_iso(s["time_updated"]),
|
|
296
|
+
"part_count": s["part_count"],
|
|
297
|
+
"total_input_tokens": s["total_input_tokens"],
|
|
298
|
+
"total_output_tokens": s["total_output_tokens"],
|
|
299
|
+
"total_tokens": s["total_tokens"],
|
|
300
|
+
"session_type": classify_session(s["title"], s["agent"]),
|
|
301
|
+
})
|
|
302
|
+
with open(os.path.join(out_dir, "sessions.json"), "w") as f:
|
|
303
|
+
json.dump(session_list, f, indent=2, default=str)
|
|
304
|
+
print(f" sessions.json ({len(session_list)} sessions)")
|
|
305
|
+
|
|
306
|
+
# 5. Write parts for each significant session
|
|
307
|
+
parts_dir = os.path.join(out_dir, "sessions")
|
|
308
|
+
os.makedirs(parts_dir, exist_ok=True)
|
|
309
|
+
for s in sessions:
|
|
310
|
+
stype = classify_session(s["title"], s["agent"])
|
|
311
|
+
if stype == "other":
|
|
312
|
+
continue
|
|
313
|
+
if s.get("part_count", 0) == 0:
|
|
314
|
+
continue
|
|
315
|
+
parts = extract_parts(db_path, s["id"])
|
|
316
|
+
classified = classify_parts(parts)
|
|
317
|
+
label = f"{stype}-{s['slug']}-{s['id'][:20]}"
|
|
318
|
+
safe_label = label.replace("/", "_").replace(" ", "_").replace(":", "_")
|
|
319
|
+
data = {
|
|
320
|
+
"session_id": s["id"],
|
|
321
|
+
"session_type": stype,
|
|
322
|
+
"title": s["title"],
|
|
323
|
+
"part_count": s["part_count"],
|
|
324
|
+
"total_input_tokens": s["total_input_tokens"],
|
|
325
|
+
"total_output_tokens": s["total_output_tokens"],
|
|
326
|
+
"type_counts": dict(classified["type_counts"]),
|
|
327
|
+
"tool_calls": dict(classified["tool_calls"]),
|
|
328
|
+
"text_count": len(classified["text_parts"]),
|
|
329
|
+
"text_parts": classified["text_parts"],
|
|
330
|
+
"task_launch_count": len(classified["task_launches"]),
|
|
331
|
+
"task_launches": classified["task_launches"],
|
|
332
|
+
}
|
|
333
|
+
with open(os.path.join(parts_dir, f"{safe_label}.json"), "w") as f:
|
|
334
|
+
json.dump(data, f, indent=2, default=str)
|
|
335
|
+
|
|
336
|
+
print(f" sessions/ ({len(os.listdir(parts_dir))} session extracts)")
|
|
337
|
+
|
|
338
|
+
# 6. Write beads
|
|
339
|
+
with open(os.path.join(out_dir, "beads.json"), "w") as f:
|
|
340
|
+
json.dump(beads, f, indent=2, default=str)
|
|
341
|
+
|
|
342
|
+
# 7. Write AI metadata (truncate large content for inspectability)
|
|
343
|
+
ai_write = {}
|
|
344
|
+
for k, v in ai_data.items():
|
|
345
|
+
if isinstance(v, str) and len(v) > 50000:
|
|
346
|
+
ai_write[k] = v[:50000] + f"\n\n[... truncated, full {len(v)} chars]"
|
|
347
|
+
else:
|
|
348
|
+
ai_write[k] = v
|
|
349
|
+
with open(os.path.join(out_dir, "ai-metadata.json"), "w") as f:
|
|
350
|
+
json.dump(ai_write, f, indent=2, default=str)
|
|
351
|
+
|
|
352
|
+
# 8. Write task docs (truncated)
|
|
353
|
+
with open(os.path.join(out_dir, "task-docs.json"), "w") as f:
|
|
354
|
+
json.dump(task_docs, f, indent=2, default=str)
|
|
355
|
+
|
|
356
|
+
# 9. Write audit reports
|
|
357
|
+
with open(os.path.join(out_dir, "audit-reports.json"), "w") as f:
|
|
358
|
+
json.dump(audit_reports, f, indent=2, default=str)
|
|
359
|
+
|
|
360
|
+
# 10. Phase-session mapping
|
|
361
|
+
beads_map = {b["id"]: b for b in beads}
|
|
362
|
+
meta = ai_data.get("metadata", {}) if isinstance(ai_data, dict) else {}
|
|
363
|
+
phase_map = {}
|
|
364
|
+
for phase_key, bead_refs in (meta.get("beads", {}) or {}).items():
|
|
365
|
+
bead_ids = []
|
|
366
|
+
for ref in bead_refs.split(","):
|
|
367
|
+
bid = ref.strip().split(" ")[0]
|
|
368
|
+
if bid in beads_map:
|
|
369
|
+
bead_ids.append(bid)
|
|
370
|
+
if not bead_ids:
|
|
371
|
+
continue
|
|
372
|
+
pbeads = [beads_map[bid] for bid in bead_ids]
|
|
373
|
+
# Use earliest comment time as start, earliest close time as end
|
|
374
|
+
comment_times = []
|
|
375
|
+
close_times = []
|
|
376
|
+
for b in pbeads:
|
|
377
|
+
for c in b.get("comments", []):
|
|
378
|
+
ct = c.get("created_at", "")
|
|
379
|
+
if ct:
|
|
380
|
+
comment_times.append(ct)
|
|
381
|
+
ct = b.get("closed_at") or b.get("updated_at") or ""
|
|
382
|
+
if ct:
|
|
383
|
+
close_times.append(ct)
|
|
384
|
+
phase_start = min(comment_times)[:19] if comment_times else ""
|
|
385
|
+
phase_end = min(close_times)[:19] if close_times else ""
|
|
386
|
+
phase_map[phase_key] = {
|
|
387
|
+
"start": phase_start, "end": phase_end,
|
|
388
|
+
"bead_ids": bead_ids,
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
phase_sessions = []
|
|
392
|
+
for s in session_list:
|
|
393
|
+
stype = s.get("session_type", "other")
|
|
394
|
+
if stype == "other":
|
|
395
|
+
continue
|
|
396
|
+
sc = s.get("time_created", "")[:19]
|
|
397
|
+
su = s.get("time_updated", "")[:19]
|
|
398
|
+
assigned = []
|
|
399
|
+
for pk, pd in phase_map.items():
|
|
400
|
+
ps = pd.get("start", "")[:19]
|
|
401
|
+
pe = pd.get("end", "")[:19]
|
|
402
|
+
if ps and pe and sc and su:
|
|
403
|
+
if sc <= pe and su >= ps:
|
|
404
|
+
assigned.append(pk)
|
|
405
|
+
if assigned:
|
|
406
|
+
phase_sessions.append({
|
|
407
|
+
"session_id": s["id"],
|
|
408
|
+
"session_type": stype,
|
|
409
|
+
"title": s["title"],
|
|
410
|
+
"time_created": sc,
|
|
411
|
+
"time_updated": su,
|
|
412
|
+
"phases": assigned,
|
|
413
|
+
})
|
|
414
|
+
|
|
415
|
+
with open(os.path.join(out_dir, "phase-session-map.json"), "w") as f:
|
|
416
|
+
json.dump({"phases": {k: {"start": v["start"], "end": v["end"], "bead_ids": v["bead_ids"]}
|
|
417
|
+
for k, v in phase_map.items()},
|
|
418
|
+
"session_assignments": phase_sessions}, f, indent=2, default=str)
|
|
419
|
+
print(f" phase-session-map.json ({len(phase_map)} phases, {len(phase_sessions)} sessions assigned)")
|
|
420
|
+
|
|
421
|
+
# 12. Write summary
|
|
422
|
+
summary = {
|
|
423
|
+
"project_path": project_path,
|
|
424
|
+
"extracted_at": datetime.now(timezone.utc).isoformat(),
|
|
425
|
+
"session_count": len(sessions),
|
|
426
|
+
"bead_count": len(beads),
|
|
427
|
+
"audit_report_count": len(audit_reports),
|
|
428
|
+
"owner_session_id": owner_id,
|
|
429
|
+
"session_types": {
|
|
430
|
+
"owner": len([s for s in session_list if s["session_type"] == "owner"]),
|
|
431
|
+
"developer": len([s for s in session_list if s["session_type"] == "developer"]),
|
|
432
|
+
"evaluator": len([s for s in session_list if s["session_type"] == "evaluator"]),
|
|
433
|
+
},
|
|
434
|
+
"total_tokens": sum(s.get("total_tokens", 0) for s in sessions),
|
|
435
|
+
}
|
|
436
|
+
with open(os.path.join(out_dir, "extraction-summary.json"), "w") as f:
|
|
437
|
+
json.dump(summary, f, indent=2, default=str)
|
|
438
|
+
|
|
439
|
+
print(f"\nExtraction complete: {out_dir}/")
|
|
440
|
+
print(f" Summary: {summary['session_count']} sessions, {summary['bead_count']} beads, "
|
|
441
|
+
f"{summary['total_tokens']:,} tokens")
|
|
442
|
+
print(f"\nReady for analysis. Run the deep-retrospective skill analysis phase.")
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
if __name__ == "__main__":
|
|
446
|
+
main()
|