@ericrisco/rsc 0.1.31 → 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/manifest.json +24 -5
- package/package.json +1 -1
- package/scripts/lib/domains.js +1 -1
- package/skills/analyze/SKILL.md +1 -0
- package/skills/author-skill/SKILL.md +20 -0
- package/skills/author-skill/references/description-recipe.md +2 -0
- package/skills/debug/SKILL.md +1 -1
- package/skills/implement/SKILL.md +72 -2
- package/skills/implement/references/per-task-review.md +46 -0
- package/skills/implement/scripts/review-package +59 -0
- package/skills/implement/scripts/sdd-workspace +47 -0
- package/skills/implement/scripts/task-brief +77 -0
- package/skills/parallel/SKILL.md +29 -0
- package/skills/plan/references/plan-template.md +18 -0
- package/skills/roast-me/SKILL.md +124 -0
- package/skills/roast-me/evals/README.md +76 -0
- package/skills/roast-me/evals/cases.yaml +75 -0
- package/skills/roast-me/prompts/analyze.md +90 -0
- package/skills/roast-me/prompts/compute.md +100 -0
- package/skills/roast-me/prompts/roast.md +181 -0
- package/skills/roast-me/tools/adapters/__init__.py +1 -0
- package/skills/roast-me/tools/adapters/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/base.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/claude.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/codex.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/gemini.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/registry.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/base.py +53 -0
- package/skills/roast-me/tools/adapters/claude.py +140 -0
- package/skills/roast-me/tools/adapters/codex.py +113 -0
- package/skills/roast-me/tools/adapters/gemini.py +121 -0
- package/skills/roast-me/tools/adapters/registry.py +68 -0
- package/skills/roast-me/tools/extract_prompts.py +520 -0
- package/skills/sdd/SKILL.md +23 -0
- package/skills/ship/SKILL.md +9 -1
- package/skills/specify/SKILL.md +26 -1
- package/skills/suggest/SKILL.md +1 -1
- package/skills/tasks/SKILL.md +25 -0
- package/skills/worktrees/SKILL.md +25 -0
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Extract user prompts from AI assistant session files for prompt-quality analysis.
|
|
3
|
+
|
|
4
|
+
Scans session files for one or more runtimes (Claude, Codex, Gemini) and
|
|
5
|
+
extracts user prompts with contextual signals: whether an error followed,
|
|
6
|
+
whether the agent auto-recovered, whether the user issued a correction.
|
|
7
|
+
|
|
8
|
+
Writes a normalised JSON file to a temp path and prints:
|
|
9
|
+
1. The output path
|
|
10
|
+
2. A metadata summary
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
python3 extract_prompts.py [--days N] [--runtime auto|claude|codex|gemini]
|
|
14
|
+
|
|
15
|
+
--days N Look back N days (default 7). Accepts bare numbers too.
|
|
16
|
+
--runtime ID Which runtime to scan (default: auto = all installed runtimes).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import sys
|
|
26
|
+
import tempfile
|
|
27
|
+
import time
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# Add the adapters package to sys.path so imports work regardless of cwd.
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
_TOOLS_DIR = Path(__file__).parent
|
|
35
|
+
sys.path.insert(0, str(_TOOLS_DIR))
|
|
36
|
+
from adapters.registry import get_adapters, list_runtime_ids
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Constants
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
MAX_PROMPTS = 300
|
|
43
|
+
PROMPT_TEXT_LIMIT = 1500
|
|
44
|
+
CORRECTION_TEXT_LIMIT = 500
|
|
45
|
+
CONTEXT_BEFORE_LIMIT = 500
|
|
46
|
+
|
|
47
|
+
CORRECTION_PATTERNS = re.compile(
|
|
48
|
+
r"\b(no[,.]?\s|wrong|instead|actually|don'?t|shouldn'?t|stop|not that|"
|
|
49
|
+
r"I said|I meant|I asked|that'?s not|please don'?t|why did you|"
|
|
50
|
+
r"you should have|that was wrong|incorrect|try again|redo|"
|
|
51
|
+
r"that broke|you broke|revert|undo)\b",
|
|
52
|
+
re.IGNORECASE,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Model tier classification (provider-neutral labels).
|
|
56
|
+
# Maps substrings found in model IDs to tier names.
|
|
57
|
+
MODEL_TIER_MAP: list[tuple[re.Pattern, str]] = [
|
|
58
|
+
(re.compile(r"fable|mythos", re.I), "heavy"),
|
|
59
|
+
(re.compile(r"opus|gpt-4(?!.*mini)", re.I), "heavy"),
|
|
60
|
+
(re.compile(r"sonnet|gpt-4.*mini|gemini-1\.5-pro|gemini-2", re.I), "balanced"),
|
|
61
|
+
(re.compile(r"haiku|gpt-3\.5|gemini-1\.5-flash|gemini-flash", re.I), "light"),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
MODEL_TIER_RANK = {"light": 0, "balanced": 1, "heavy": 2, "unknown": 1}
|
|
65
|
+
|
|
66
|
+
SIMPLE_PATTERNS = [
|
|
67
|
+
re.compile(p, re.IGNORECASE) for p in [
|
|
68
|
+
r"^\s*(yes|ok|go ahead|looks good|lgtm|sure|do it|yep|correct|perfect)\s*[.!]?\s*$",
|
|
69
|
+
r"^\s*(commit|push|merge|ship it|deploy)\s*$",
|
|
70
|
+
r"^\s*(read|show|list|ls|find|check)\b.{0,80}$",
|
|
71
|
+
r"^\s*(format|lint|fix.*style)\b",
|
|
72
|
+
r"^\s*(what does|explain|what is|how does)\b.{0,120}$",
|
|
73
|
+
]
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
COMPLEX_PATTERNS = [
|
|
77
|
+
re.compile(p, re.IGNORECASE) for p in [
|
|
78
|
+
r"\b(design|architect|plan|strategy|migration|roadmap)\b",
|
|
79
|
+
r"\b(debug|race\s*condition|memory\s*leak|performance)\b",
|
|
80
|
+
r"\b(implement|build|create)\b.{30,}",
|
|
81
|
+
r"\b(refactor|rewrite|overhaul)\b.*\b(entire|all|whole)\b",
|
|
82
|
+
]
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# Helpers
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def truncate(s: str, limit: int) -> str:
|
|
90
|
+
if not s or len(s) <= limit:
|
|
91
|
+
return s or ""
|
|
92
|
+
return s[:limit] + "..."
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def classify_tier(model_id: str) -> str:
|
|
96
|
+
"""Map a model identifier string to a tier label."""
|
|
97
|
+
if not model_id:
|
|
98
|
+
return "unknown"
|
|
99
|
+
for pattern, tier in MODEL_TIER_MAP:
|
|
100
|
+
if pattern.search(model_id):
|
|
101
|
+
return tier
|
|
102
|
+
return "unknown"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def classify_complexity(text: str) -> str:
|
|
106
|
+
for p in SIMPLE_PATTERNS:
|
|
107
|
+
if p.match(text):
|
|
108
|
+
return "simple"
|
|
109
|
+
for p in COMPLEX_PATTERNS:
|
|
110
|
+
if p.search(text):
|
|
111
|
+
return "complex"
|
|
112
|
+
return "moderate"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
COMPLEXITY_TO_TIER = {"simple": "light", "moderate": "balanced", "complex": "heavy"}
|
|
116
|
+
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
# Normalisation
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def normalise_record(raw: dict[str, Any], position: int, total: int) -> dict[str, Any]:
|
|
122
|
+
"""Convert a raw adapter record into a normalised PromptRecord."""
|
|
123
|
+
text = raw.get("prompt_text", "")
|
|
124
|
+
length = len(text)
|
|
125
|
+
|
|
126
|
+
has_xml_tags = bool(re.search(r"<\w[\w-]*>", text))
|
|
127
|
+
has_file_paths = bool(re.search(r"(/[\w./\-]+|~\/[\w./\-]+|\.\./)", text))
|
|
128
|
+
has_code_blocks = "```" in text
|
|
129
|
+
|
|
130
|
+
model_id = raw.get("model", "") or ""
|
|
131
|
+
tier = classify_tier(model_id)
|
|
132
|
+
complexity = classify_complexity(text)
|
|
133
|
+
recommended_tier = COMPLEXITY_TO_TIER[complexity]
|
|
134
|
+
was_overkill = MODEL_TIER_RANK.get(tier, 1) > MODEL_TIER_RANK.get(recommended_tier, 1)
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
"runtime": raw.get("runtime", "unknown"),
|
|
138
|
+
"session_file": raw.get("session_file", ""),
|
|
139
|
+
"timestamp": raw.get("timestamp"),
|
|
140
|
+
"prompt_text": truncate(text, PROMPT_TEXT_LIMIT),
|
|
141
|
+
"prompt_length": length,
|
|
142
|
+
"prompt_position": position,
|
|
143
|
+
"total_prompts_in_session": total,
|
|
144
|
+
"has_xml_tags": has_xml_tags,
|
|
145
|
+
"has_file_paths": has_file_paths,
|
|
146
|
+
"has_code_blocks": has_code_blocks,
|
|
147
|
+
# These will be populated by post-processing when session context is available.
|
|
148
|
+
"followed_by_error": False,
|
|
149
|
+
"error_was_recovered": False,
|
|
150
|
+
"followed_by_correction": False,
|
|
151
|
+
"correction_text": "",
|
|
152
|
+
"error_tool": "",
|
|
153
|
+
"error_text": "",
|
|
154
|
+
"context_before": truncate(raw.get("context_before", ""), CONTEXT_BEFORE_LIMIT),
|
|
155
|
+
# Compute fields (best-effort from adapter)
|
|
156
|
+
"model": model_id,
|
|
157
|
+
"model_tier": tier,
|
|
158
|
+
"task_complexity": complexity,
|
|
159
|
+
"recommended_tier": recommended_tier,
|
|
160
|
+
"compute_was_overkill": was_overkill,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
# Session-aware processing (Claude-specific: reads full JSONL for context)
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
def process_claude_sessions(session_files: list[Path], cutoff: float) -> list[dict[str, Any]]:
|
|
169
|
+
"""Full session-aware extraction for Claude Code JSONL files.
|
|
170
|
+
|
|
171
|
+
This matches the original extractor behaviour: reads each JSONL file
|
|
172
|
+
sequentially to detect errors, auto-recovery, and corrections in the turns
|
|
173
|
+
immediately following each user prompt.
|
|
174
|
+
"""
|
|
175
|
+
from adapters.claude import _extract_text, _is_only_tool_results, _parse_timestamp
|
|
176
|
+
|
|
177
|
+
prompts: list[dict[str, Any]] = []
|
|
178
|
+
|
|
179
|
+
for sf in session_files:
|
|
180
|
+
if sf.stat().st_mtime < cutoff:
|
|
181
|
+
continue
|
|
182
|
+
try:
|
|
183
|
+
raw_lines = sf.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
184
|
+
except OSError:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
ordered: list[tuple[int, str, dict]] = []
|
|
188
|
+
for i, line in enumerate(raw_lines):
|
|
189
|
+
line = line.strip()
|
|
190
|
+
if not line:
|
|
191
|
+
continue
|
|
192
|
+
try:
|
|
193
|
+
obj = json.loads(line)
|
|
194
|
+
except json.JSONDecodeError:
|
|
195
|
+
continue
|
|
196
|
+
msg_type = obj.get("type", "")
|
|
197
|
+
if msg_type in ("user", "assistant"):
|
|
198
|
+
ordered.append((i, msg_type, obj))
|
|
199
|
+
|
|
200
|
+
session_prompts: list[int] = [
|
|
201
|
+
idx for idx, (_, t, obj) in enumerate(ordered)
|
|
202
|
+
if t == "user"
|
|
203
|
+
and not obj.get("message", {}).get("isMeta")
|
|
204
|
+
and _extract_text(obj.get("message", {}).get("content", [])).strip()
|
|
205
|
+
and not _is_only_tool_results(obj.get("message", {}).get("content", []))
|
|
206
|
+
]
|
|
207
|
+
total_in_session = len(session_prompts)
|
|
208
|
+
|
|
209
|
+
for position, idx in enumerate(session_prompts, 1):
|
|
210
|
+
_, _, obj = ordered[idx]
|
|
211
|
+
msg = obj.get("message", {})
|
|
212
|
+
content = msg.get("content", [])
|
|
213
|
+
prompt_text = _extract_text(content)
|
|
214
|
+
|
|
215
|
+
# context_before: last assistant text message before this prompt
|
|
216
|
+
context_before = ""
|
|
217
|
+
for k in range(idx - 1, -1, -1):
|
|
218
|
+
_, kt, ko = ordered[k]
|
|
219
|
+
if kt == "assistant":
|
|
220
|
+
a_content = ko.get("message", {}).get("content", [])
|
|
221
|
+
if isinstance(a_content, list):
|
|
222
|
+
for block in a_content:
|
|
223
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
224
|
+
context_before = block.get("text", "")
|
|
225
|
+
break
|
|
226
|
+
if context_before:
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
followed_by_error = False
|
|
230
|
+
error_was_recovered = False
|
|
231
|
+
followed_by_correction = False
|
|
232
|
+
correction_text = ""
|
|
233
|
+
error_tool = ""
|
|
234
|
+
error_text = ""
|
|
235
|
+
error_count = 0
|
|
236
|
+
success_after_error = 0
|
|
237
|
+
|
|
238
|
+
for j in range(idx + 1, len(ordered)):
|
|
239
|
+
_, j_type, j_obj = ordered[j]
|
|
240
|
+
|
|
241
|
+
if j_type == "user":
|
|
242
|
+
u_content = j_obj.get("message", {}).get("content", [])
|
|
243
|
+
if isinstance(u_content, list):
|
|
244
|
+
for block in u_content:
|
|
245
|
+
if not isinstance(block, dict):
|
|
246
|
+
continue
|
|
247
|
+
if block.get("type") == "tool_result":
|
|
248
|
+
if block.get("is_error"):
|
|
249
|
+
error_count += 1
|
|
250
|
+
if not followed_by_error:
|
|
251
|
+
followed_by_error = True
|
|
252
|
+
tid = block.get("tool_use_id")
|
|
253
|
+
for k in range(j - 1, idx, -1):
|
|
254
|
+
_, kt, ko = ordered[k]
|
|
255
|
+
if kt == "assistant":
|
|
256
|
+
ac = ko.get("message", {}).get("content", [])
|
|
257
|
+
if isinstance(ac, list):
|
|
258
|
+
for ab in ac:
|
|
259
|
+
if (isinstance(ab, dict)
|
|
260
|
+
and ab.get("type") == "tool_use"
|
|
261
|
+
and ab.get("id") == tid):
|
|
262
|
+
error_tool = ab.get("name", "")
|
|
263
|
+
break
|
|
264
|
+
rc = block.get("content", [])
|
|
265
|
+
if isinstance(rc, list):
|
|
266
|
+
for rb in rc:
|
|
267
|
+
if isinstance(rb, dict) and rb.get("type") == "text":
|
|
268
|
+
error_text = rb.get("text", "")
|
|
269
|
+
break
|
|
270
|
+
elif isinstance(rc, str):
|
|
271
|
+
error_text = rc
|
|
272
|
+
else:
|
|
273
|
+
if error_count > 0:
|
|
274
|
+
success_after_error += 1
|
|
275
|
+
|
|
276
|
+
if not j_obj.get("message", {}).get("isMeta"):
|
|
277
|
+
next_text = _extract_text(u_content)
|
|
278
|
+
if next_text.strip() and not _is_only_tool_results(u_content):
|
|
279
|
+
if CORRECTION_PATTERNS.search(next_text):
|
|
280
|
+
followed_by_correction = True
|
|
281
|
+
correction_text = next_text
|
|
282
|
+
break
|
|
283
|
+
|
|
284
|
+
error_was_recovered = (
|
|
285
|
+
followed_by_error
|
|
286
|
+
and success_after_error > 0
|
|
287
|
+
and not followed_by_correction
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
ts = _parse_timestamp(obj)
|
|
291
|
+
model_id = ""
|
|
292
|
+
model_tier = "unknown"
|
|
293
|
+
|
|
294
|
+
rec = {
|
|
295
|
+
"runtime": "claude",
|
|
296
|
+
"session_file": str(sf),
|
|
297
|
+
"timestamp": ts,
|
|
298
|
+
"prompt_text": truncate(prompt_text, PROMPT_TEXT_LIMIT),
|
|
299
|
+
"prompt_length": len(prompt_text),
|
|
300
|
+
"prompt_position": position,
|
|
301
|
+
"total_prompts_in_session": total_in_session,
|
|
302
|
+
"has_xml_tags": bool(re.search(r"<\w[\w-]*>", prompt_text)),
|
|
303
|
+
"has_file_paths": bool(re.search(r"(/[\w./\-]+|~\/[\w./\-]+|\.\./)", prompt_text)),
|
|
304
|
+
"has_code_blocks": "```" in prompt_text,
|
|
305
|
+
"followed_by_error": followed_by_error,
|
|
306
|
+
"error_was_recovered": error_was_recovered,
|
|
307
|
+
"followed_by_correction": followed_by_correction,
|
|
308
|
+
"correction_text": truncate(correction_text, CORRECTION_TEXT_LIMIT),
|
|
309
|
+
"error_tool": error_tool,
|
|
310
|
+
"error_text": truncate(error_text, 500),
|
|
311
|
+
"context_before": truncate(context_before, CONTEXT_BEFORE_LIMIT),
|
|
312
|
+
"model": model_id,
|
|
313
|
+
"model_tier": model_tier,
|
|
314
|
+
"task_complexity": classify_complexity(prompt_text),
|
|
315
|
+
"recommended_tier": COMPLEXITY_TO_TIER[classify_complexity(prompt_text)],
|
|
316
|
+
"compute_was_overkill": False,
|
|
317
|
+
}
|
|
318
|
+
prompts.append(rec)
|
|
319
|
+
|
|
320
|
+
return prompts
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# ---------------------------------------------------------------------------
|
|
324
|
+
# Main
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
def main() -> None:
|
|
328
|
+
parser = argparse.ArgumentParser(
|
|
329
|
+
description="Extract user prompts from AI assistant session files."
|
|
330
|
+
)
|
|
331
|
+
parser.add_argument(
|
|
332
|
+
"days_positional",
|
|
333
|
+
nargs="?",
|
|
334
|
+
type=int,
|
|
335
|
+
default=None,
|
|
336
|
+
metavar="DAYS",
|
|
337
|
+
help="Number of days to look back (positional shorthand).",
|
|
338
|
+
)
|
|
339
|
+
parser.add_argument(
|
|
340
|
+
"--days",
|
|
341
|
+
type=int,
|
|
342
|
+
default=None,
|
|
343
|
+
help="Number of days to look back (default 7).",
|
|
344
|
+
)
|
|
345
|
+
parser.add_argument(
|
|
346
|
+
"--runtime",
|
|
347
|
+
type=str,
|
|
348
|
+
default="auto",
|
|
349
|
+
help=f"Runtime to scan: auto, {', '.join(list_runtime_ids())} (default: auto).",
|
|
350
|
+
)
|
|
351
|
+
args = parser.parse_args()
|
|
352
|
+
|
|
353
|
+
# Resolve days: positional takes precedence over --days, both default to 7.
|
|
354
|
+
days = args.days_positional or args.days or 7
|
|
355
|
+
|
|
356
|
+
# Resolve adapters — unknown runtime exits 0 cleanly.
|
|
357
|
+
try:
|
|
358
|
+
adapters = get_adapters(args.runtime)
|
|
359
|
+
except ValueError as exc:
|
|
360
|
+
print(f"No data: {exc}", file=sys.stderr)
|
|
361
|
+
_write_empty(days, args.runtime)
|
|
362
|
+
return
|
|
363
|
+
|
|
364
|
+
cutoff = time.time() - (days * 86400)
|
|
365
|
+
|
|
366
|
+
all_prompts: list[dict[str, Any]] = []
|
|
367
|
+
sessions_scanned = 0
|
|
368
|
+
projects_seen: set[str] = set()
|
|
369
|
+
|
|
370
|
+
for adapter in adapters:
|
|
371
|
+
session_files = adapter.discover()
|
|
372
|
+
|
|
373
|
+
# Filter by age
|
|
374
|
+
recent_files = []
|
|
375
|
+
for sf in session_files:
|
|
376
|
+
try:
|
|
377
|
+
if sf.stat().st_mtime >= cutoff:
|
|
378
|
+
recent_files.append(sf)
|
|
379
|
+
except OSError:
|
|
380
|
+
continue
|
|
381
|
+
|
|
382
|
+
if not recent_files:
|
|
383
|
+
# This adapter found no data — degrade cleanly.
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
sessions_scanned += len(recent_files)
|
|
387
|
+
|
|
388
|
+
if adapter.RUNTIME_ID == "claude":
|
|
389
|
+
# Use full session-aware extraction for Claude to detect error context.
|
|
390
|
+
prompts = process_claude_sessions(recent_files, cutoff)
|
|
391
|
+
else:
|
|
392
|
+
# For other runtimes: simple parse without cross-turn context detection.
|
|
393
|
+
raw_by_session: dict[str, list[dict]] = {}
|
|
394
|
+
for sf in recent_files:
|
|
395
|
+
raws = adapter.parse(sf)
|
|
396
|
+
raw_by_session[str(sf)] = raws
|
|
397
|
+
|
|
398
|
+
prompts = []
|
|
399
|
+
for sf_str, raws in raw_by_session.items():
|
|
400
|
+
total = len(raws)
|
|
401
|
+
for pos, raw in enumerate(raws, 1):
|
|
402
|
+
rec = normalise_record(raw, pos, total)
|
|
403
|
+
prompts.append(rec)
|
|
404
|
+
|
|
405
|
+
# Collect project identifiers.
|
|
406
|
+
for sf in recent_files:
|
|
407
|
+
projects_seen.add(str(sf.parent))
|
|
408
|
+
|
|
409
|
+
all_prompts.extend(prompts)
|
|
410
|
+
|
|
411
|
+
if not all_prompts:
|
|
412
|
+
runtime_label = args.runtime
|
|
413
|
+
print(
|
|
414
|
+
f"No transcript data found for runtime '{runtime_label}' "
|
|
415
|
+
f"in the last {days} days.",
|
|
416
|
+
file=sys.stderr,
|
|
417
|
+
)
|
|
418
|
+
print(
|
|
419
|
+
"If you are using a supported runtime, check that its session "
|
|
420
|
+
"directory exists and contains recent files."
|
|
421
|
+
)
|
|
422
|
+
_write_empty(days, args.runtime)
|
|
423
|
+
return
|
|
424
|
+
|
|
425
|
+
# Prioritise error/correction prompts, then cap.
|
|
426
|
+
error_prompts = [p for p in all_prompts if p.get("followed_by_error") or p.get("followed_by_correction")]
|
|
427
|
+
clean_prompts = [p for p in all_prompts if not p.get("followed_by_error") and not p.get("followed_by_correction")]
|
|
428
|
+
all_prompts = (error_prompts + clean_prompts)[:MAX_PROMPTS]
|
|
429
|
+
|
|
430
|
+
total = len(all_prompts)
|
|
431
|
+
errors = sum(1 for p in all_prompts if p.get("followed_by_error"))
|
|
432
|
+
recovered = sum(1 for p in all_prompts if p.get("error_was_recovered"))
|
|
433
|
+
unrecovered = errors - recovered
|
|
434
|
+
corrections = sum(1 for p in all_prompts if p.get("followed_by_correction"))
|
|
435
|
+
avg_length = sum(p["prompt_length"] for p in all_prompts) / total if total else 0
|
|
436
|
+
xml_count = sum(1 for p in all_prompts if p.get("has_xml_tags"))
|
|
437
|
+
fp_count = sum(1 for p in all_prompts if p.get("has_file_paths"))
|
|
438
|
+
overkill_count = sum(1 for p in all_prompts if p.get("compute_was_overkill"))
|
|
439
|
+
|
|
440
|
+
# Model tier distribution
|
|
441
|
+
tier_dist: dict[str, int] = {}
|
|
442
|
+
for p in all_prompts:
|
|
443
|
+
t = p.get("model_tier", "unknown")
|
|
444
|
+
tier_dist[t] = tier_dist.get(t, 0) + 1
|
|
445
|
+
|
|
446
|
+
compute_stats = {
|
|
447
|
+
"tier_distribution": {t: round(c / total, 3) for t, c in tier_dist.items()},
|
|
448
|
+
"heuristic_overuse_count": overkill_count,
|
|
449
|
+
"heuristic_overuse_rate": round(overkill_count / total, 3) if total else 0,
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
result = {
|
|
453
|
+
"prompts": all_prompts,
|
|
454
|
+
"metadata": {
|
|
455
|
+
"runtime": args.runtime,
|
|
456
|
+
"days": days,
|
|
457
|
+
"sessions_scanned": sessions_scanned,
|
|
458
|
+
"projects_scanned": len(projects_seen),
|
|
459
|
+
"total_prompts": total,
|
|
460
|
+
"error_rate": round(errors / total, 3) if total else 0,
|
|
461
|
+
"recovered_error_rate": round(recovered / total, 3) if total else 0,
|
|
462
|
+
"effective_error_rate": round(unrecovered / total, 3) if total else 0,
|
|
463
|
+
"correction_rate": round(corrections / total, 3) if total else 0,
|
|
464
|
+
"avg_length": round(avg_length, 1),
|
|
465
|
+
"xml_usage_rate": round(xml_count / total, 3) if total else 0,
|
|
466
|
+
"file_path_rate": round(fp_count / total, 3) if total else 0,
|
|
467
|
+
},
|
|
468
|
+
"compute_stats": compute_stats,
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
# Write output to a temp file.
|
|
472
|
+
fd, out_path = tempfile.mkstemp(prefix="roast-me-", suffix=".json")
|
|
473
|
+
try:
|
|
474
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
475
|
+
json.dump(result, fh, indent=2, default=str)
|
|
476
|
+
except OSError as exc:
|
|
477
|
+
print(f"Failed to write output: {exc}", file=sys.stderr)
|
|
478
|
+
sys.exit(1)
|
|
479
|
+
|
|
480
|
+
print(f"Scanned {sessions_scanned} sessions across {len(projects_seen)} projects")
|
|
481
|
+
print(
|
|
482
|
+
f"Extracted {total} prompts "
|
|
483
|
+
f"({errors} with errors, {recovered} auto-recovered, {unrecovered} impactful)"
|
|
484
|
+
)
|
|
485
|
+
print(f"Corrections: {corrections} | Avg length: {avg_length:.0f} chars | XML: {xml_count}/{total}")
|
|
486
|
+
print(f"Compute: {overkill_count} overkill | tiers: {tier_dist}")
|
|
487
|
+
print(f"Output: {out_path}")
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _write_empty(days: int, runtime: str) -> None:
|
|
491
|
+
"""Write an empty result JSON and print its path."""
|
|
492
|
+
result: dict[str, Any] = {
|
|
493
|
+
"prompts": [],
|
|
494
|
+
"metadata": {
|
|
495
|
+
"runtime": runtime,
|
|
496
|
+
"days": days,
|
|
497
|
+
"sessions_scanned": 0,
|
|
498
|
+
"projects_scanned": 0,
|
|
499
|
+
"total_prompts": 0,
|
|
500
|
+
"error_rate": 0,
|
|
501
|
+
"recovered_error_rate": 0,
|
|
502
|
+
"effective_error_rate": 0,
|
|
503
|
+
"correction_rate": 0,
|
|
504
|
+
"avg_length": 0,
|
|
505
|
+
"xml_usage_rate": 0,
|
|
506
|
+
"file_path_rate": 0,
|
|
507
|
+
},
|
|
508
|
+
"compute_stats": {},
|
|
509
|
+
}
|
|
510
|
+
fd, out_path = tempfile.mkstemp(prefix="roast-me-", suffix=".json")
|
|
511
|
+
try:
|
|
512
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
513
|
+
json.dump(result, fh, indent=2)
|
|
514
|
+
except OSError:
|
|
515
|
+
pass
|
|
516
|
+
print(f"Output: {out_path}")
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
if __name__ == "__main__":
|
|
520
|
+
main()
|
package/skills/sdd/SKILL.md
CHANGED
|
@@ -96,6 +96,29 @@ If you genuinely cannot tell which phase you are in, ask the user one question:
|
|
|
96
96
|
- `constitution` runs **once per project**, not per feature. If `02-DOCS/wiki/sdd/constitution.md` exists, read it as guardrails and move on.
|
|
97
97
|
- `clarify` and `analyze` are **gates, not paperwork**. If a spec is genuinely unambiguous and tiny, name that out loud and pass through — but the bias is to run them, because skipped gates are where drift hides.
|
|
98
98
|
|
|
99
|
+
## Autopilot mode — run the whole chain on one up-front yes
|
|
100
|
+
|
|
101
|
+
By default the chain pauses at its gates (spec approval, plan approval). **Autopilot** trades those
|
|
102
|
+
per-phase stops for a **single up-front consent**: the user says "take it all the way" once, and the
|
|
103
|
+
chain runs `specify → clarify → plan → tasks → analyze → implement → verify → review` end to end
|
|
104
|
+
**without asking to continue between phases** — still writing every artifact (spec, plan, decisions)
|
|
105
|
+
to `02-DOCS/wiki/sdd/` as it goes, so the work stays reviewable after the fact.
|
|
106
|
+
|
|
107
|
+
**How it turns on:**
|
|
108
|
+
|
|
109
|
+
- `specify` **offers it at the brainstorm boundary** ("¿lo llevo hasta el final yo solo, o paramos en cada fase?"). A yes engages autopilot for this feature.
|
|
110
|
+
- Or set `sdd.autopilot: true` in `02-DOCS/wiki/sdd/config.yaml` to default it on (still surfaced once per feature).
|
|
111
|
+
|
|
112
|
+
**The up-front yes IS the gate.** It satisfies the new-feature gate's "spec + plan approved before code" — approval was granted in advance, for the whole run. Do not re-ask phase by phase.
|
|
113
|
+
|
|
114
|
+
**Autopilot still STOPS for these — not "continue?" nags, real forks:**
|
|
115
|
+
|
|
116
|
+
- **Genuine ambiguity** that would change scope, a goal, or an acceptance criterion (never guess the product).
|
|
117
|
+
- **A hard failure** it can't resolve (a red test it can't green → `debug`; an `analyze` contradiction).
|
|
118
|
+
- **Destructive / irreversible / outward-facing actions** — push, merge, delete, secrets. `ship` (PR/merge) still confirms: autopilot drives the **build**, not the **release**.
|
|
119
|
+
|
|
120
|
+
Run the fan-out on the `developer` subagent as usual, and narrate at the accompaniment dial's volume (L0: near-silent, just show artifacts; L3: explain each phase as it passes). Autopilot changes **when you ask**, never **what gets written** — every artifact and gate-check still happens; you just don't block on a human between them.
|
|
121
|
+
|
|
99
122
|
## Read the accompaniment dial first
|
|
100
123
|
|
|
101
124
|
Before dispatching, read `02-DOCS/wiki/harness/user-profile.md` and adapt — exactly as every rsc skill does. The dial sets **how much you explain and how many questions you ask at each gate**, not whether the gates exist.
|
package/skills/ship/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ship
|
|
3
|
-
description: "Use when the work is complete and verified and it is time to CLOSE the development branch — the final phase of the rsc SDD chain, after review approves the diff. Triggers: 'ship it', 'close the branch', 'open the PR', 'merge this', 'merge into main', 'create the pull request', 'how do I land this work', 'finish this feature', 'haz el merge', 'abre el PR', 'cierra la rama', 'súbelo a main', 'clean up the branch', 'I'm done, what now'.
|
|
3
|
+
description: "Use when the work is complete and verified and it is time to CLOSE the development branch — the final phase of the rsc SDD chain, after review approves the diff. Triggers: 'ship it', 'close the branch', 'open the PR', 'merge this', 'merge into main', 'create the pull request', 'how do I land this work', 'finish this feature', 'haz el merge', 'abre el PR', 'cierra la rama', 'súbelo a main', 'clean up the branch', 'I'm done, what now'. HARD RULE it enforces: git authorship is ALWAYS Eric — never a Co-Authored-By or 'generated with' footer in any commit or PR. NOT running lint/type/test (that is `verify`), NOT reading the diff adversarially (that is `review`), NOT deploy/release mechanics to a server (that is `deployment`). Honors the harness accompaniment dial."
|
|
4
4
|
tags: [sdd, ship, release, pr]
|
|
5
5
|
recommends: []
|
|
6
6
|
profiles: [core, full]
|
|
@@ -178,6 +178,14 @@ Never stack to hide review risk. Stack because each slice is independently revie
|
|
|
178
178
|
- **Park:** leave the branch, push it so it's not lost (`git push -u origin feature/<slug>`), and log *why it's parked* to `02-DOCS/wiki/sdd/decisions.md`. Do not merge.
|
|
179
179
|
- **Discard:** deletion is **destructive** — require an explicit confirmation that quotes the branch name (e.g. the literal `yes, delete feature/<slug>`) before `git branch -D`. Anything ambiguous means keep it. Log the discard and the reason so the dead-end is remembered, not re-attempted.
|
|
180
180
|
|
|
181
|
+
**If the work lived in a worktree, clean it up provenance-aware.** After the merge/park/discard, only
|
|
182
|
+
remove a worktree **rsc created** (under `.worktrees/`/`worktrees/` or the `../<repo>-<slug>` dir),
|
|
183
|
+
never one the user or a native tool owns. Guard first: confirm it's a linked worktree
|
|
184
|
+
(`git rev-parse --git-dir` ≠ `--git-common-dir`), rule out a submodule
|
|
185
|
+
(`git rev-parse --show-superproject-working-tree` is empty), `cd` to the main working tree before
|
|
186
|
+
removing, and run `git worktree prune` after. Full procedure: `../worktrees/SKILL.md` (Provenance-aware
|
|
187
|
+
cleanup). If a native `EnterWorktree`-style tool created it, exit through that tool, not raw git.
|
|
188
|
+
|
|
181
189
|
## Commit message discipline
|
|
182
190
|
|
|
183
191
|
The commit is the durable record. Make it describe the change and tie it to the spec — and keep it Eric's.
|
package/skills/specify/SKILL.md
CHANGED
|
@@ -22,6 +22,11 @@ Fire on the **faintest** sign the user is thinking about a new feature or change
|
|
|
22
22
|
|
|
23
23
|
You are not slowing them down; you make the intent reviewable *before* code exists, which is far cheaper than discovering the misunderstanding in a PR. End every spec by handing to `clarify`/`plan` — never to `implement`.
|
|
24
24
|
|
|
25
|
+
**Offer autopilot once, right here.** At this boundary, propose how to run the rest of the chain:
|
|
26
|
+
> *"¿Quieres que lo lleve hasta el final yo solo — spec → plan → código → verify, parando solo si algo es ambiguo — o prefieres que pare a que apruebes en cada fase?"*
|
|
27
|
+
|
|
28
|
+
A **yes engages autopilot** (`../sdd/SKILL.md`): you still write the spec and every artifact, but you auto-advance through the phases without re-asking — that up-front yes is the approval that satisfies the hard gate above, for the whole run. A **no** (or silence) keeps the default gated flow: write the spec, hand to `clarify`/`plan`, stop for approval before code. Either way the spec gets written; autopilot only changes whether you pause *between* phases — and it still stops for genuine ambiguity, hard failures, or destructive/outward actions (ship still confirms). If `sdd.autopilot: true` in config, autopilot is the default — still surface it once.
|
|
29
|
+
|
|
25
30
|
## The one rule that defines this skill
|
|
26
31
|
|
|
27
32
|
**No implementation leaks.** The moment the spec names a framework, a table schema, a library, an endpoint shape, a file path, or an algorithm, it has stopped being a spec. Those decisions belong to `plan` and the stack skills. Specify describes the *observable behaviour and the reason for it*; the system that delivers it is deliberately left open.
|
|
@@ -95,7 +100,8 @@ Run these in order. It is a collaborative dialogue, not a form you fill in silen
|
|
|
95
100
|
after EACH section ask "does this look right?" and adjust before moving on
|
|
96
101
|
7. WRITE the spec → 02-DOCS/wiki/sdd/specs/<slug>.md (WHAT/WHY), index it in 02-DOCS/wiki/index.md
|
|
97
102
|
(the Knowledge map; root CLAUDE.md keeps only a short pointer), commit if a repo
|
|
98
|
-
8. SELF-REVIEW → scan for TODO/placeholder, contradictions, ambiguity, scope creep; fix inline
|
|
103
|
+
8. SELF-REVIEW → scan for TODO/placeholder, contradictions, ambiguity, scope creep; fix inline.
|
|
104
|
+
On L2/L3 or a high-risk spec, also dispatch a FRESH-EYES review (below)
|
|
99
105
|
9. USER APPROVES → ask them to read the written spec and confirm; loop on changes until they approve
|
|
100
106
|
10. HAND OFF → only now, result envelope → clarify/plan. NEVER to implement.
|
|
101
107
|
```
|
|
@@ -105,6 +111,25 @@ Run these in order. It is a collaborative dialogue, not a form you fill in silen
|
|
|
105
111
|
|
|
106
112
|
`<slug>` is a short kebab-case name derived from the feature (e.g. `bulk-csv-import`, `magic-link-login`). If a spec with that slug exists, read it and update rather than overwrite.
|
|
107
113
|
|
|
114
|
+
### Fresh-eyes spec review (step 8, scaled to the dial)
|
|
115
|
+
|
|
116
|
+
The author's own context is blind to its own gaps — the same mind that wrote the spec self-reviews it
|
|
117
|
+
with the same blind spots. For an L2/L3 user or a **high-risk** spec (multi-subsystem, security/data,
|
|
118
|
+
irreversible, or large scope), dispatch a **fresh-context subagent** to read the written spec cold,
|
|
119
|
+
*before* the user-approval gate (step 9), and fold its findings in:
|
|
120
|
+
|
|
121
|
+
- **Hand it only the spec file** (and the constitution), not your dialogue or reasoning — a fresh
|
|
122
|
+
reviewer that inherits your context inherits your blind spots.
|
|
123
|
+
- **Calibrated checklist:** placeholders/TODOs, internal contradictions, ambiguity that would stall
|
|
124
|
+
planning, unstated assumptions, scope creep, and YAGNI (asked-for-but-unneeded). Tell it to *only
|
|
125
|
+
flag issues that would cause a real problem at planning time* and to **approve unless there are
|
|
126
|
+
serious gaps** — a fresh reviewer that nitpicks everything is as useless as no reviewer.
|
|
127
|
+
- **It returns** `Approved` or `Issues found` with a short list; you fix the real ones inline, then
|
|
128
|
+
proceed to step 9.
|
|
129
|
+
|
|
130
|
+
**Skip it** for L0/L1 on a small, low-risk spec — the self-review scan is enough there; don't spin up
|
|
131
|
+
a subagent to vet a two-paragraph spec. Like the rest of the chain, ceremony scales to the stakes.
|
|
132
|
+
|
|
108
133
|
## Worked shape (abridged)
|
|
109
134
|
|
|
110
135
|
```markdown
|
package/skills/suggest/SKILL.md
CHANGED
|
@@ -19,7 +19,7 @@ You are always loaded — your body is injected into context at the start of **e
|
|
|
19
19
|
|
|
20
20
|
This is the **highest-precedence rule in the session.** It fires on **every** user turn, in **any language**, and runs **before you plan, answer, or invoke ANY other skill — no matter how many skills are installed or how strongly one of them matches the request.** That covers the stack skills (`nextjs`, `react`, `fastapi`, `flutter`, `go`, `postgresdb`, `building-agents`, `design`) **and any other skill that would build, create, scaffold, generate, or produce the feature** (e.g. `chatbot`, `course-builder`, `marketing`, automation/connector skills). A skill matching the request does **not** override this gate — it runs *inside* the SDD chain, after the plan is approved. There is no skill with priority over this check.
|
|
21
21
|
|
|
22
|
-
**The rule (non-negotiable).** The moment the user wants to **build, add, or change a feature**, you MUST route it into SDD via `specify` **first**. **No feature code is written — by ANY skill — until a spec AND a plan exist and the user has approved them.** A stack skill that matched the same request does **not** get to skip this: it builds only *after* the plan is approved.
|
|
22
|
+
**The rule (non-negotiable).** The moment the user wants to **build, add, or change a feature**, you MUST route it into SDD via `specify` **first**. **No feature code is written — by ANY skill — until a spec AND a plan exist and the user has approved them.** A stack skill that matched the same request does **not** get to skip this: it builds only *after* the plan is approved. (Exception: if the user engaged **SDD autopilot**, that single up-front consent IS the approval for the whole run — auto-advance through the phases without re-asking; see `../sdd/SKILL.md`.)
|
|
23
23
|
|
|
24
24
|
**Where does this turn go? When unsure, choose `specify` — the safe default.**
|
|
25
25
|
|
package/skills/tasks/SKILL.md
CHANGED
|
@@ -175,6 +175,31 @@ delete a user's map entry).
|
|
|
175
175
|
| T0NN | | All done-checks pass + `verify.sh` green | every row above checked; `scripts/verify.sh` exits 0 | all | spec §Acceptance |
|
|
176
176
|
```
|
|
177
177
|
|
|
178
|
+
### Per-task Interfaces (for context-isolated implementers)
|
|
179
|
+
|
|
180
|
+
`implement` and `parallel` dispatch tasks to **context-isolated** workers (the `developer`
|
|
181
|
+
subagent) that see *only their own task* — not the whole plan. Such a worker can't infer a
|
|
182
|
+
neighbor's function signature, payload shape, or column name from a one-line row. For any task
|
|
183
|
+
whose correctness depends on a contract it doesn't own, attach an **Interfaces block** right under
|
|
184
|
+
its row. (Trivial, self-contained tasks don't need one — don't add ceremony where there's no
|
|
185
|
+
cross-task contract.)
|
|
186
|
+
|
|
187
|
+
```markdown
|
|
188
|
+
**T004 — Interfaces**
|
|
189
|
+
- Consumes: `auth.verifyPassword(plain: str, hash: str) -> bool` (from T003); `users.email UNIQUE`
|
|
190
|
+
- Produces: `POST /login` → `200 {token}` | `401 {error}`; sets `Set-Cookie: sid=…; HttpOnly`
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Rules:
|
|
194
|
+
|
|
195
|
+
- Quote **exact** signatures/shapes, not descriptions — the isolated worker copies them, it can't
|
|
196
|
+
go look them up. "Returns the user" is invisible; `-> {id, email}` is usable.
|
|
197
|
+
- `Consumes` names what the task reads from a neighbor or the environment; `Produces` names the
|
|
198
|
+
contract later tasks (and the per-task reviewer) will hold it to.
|
|
199
|
+
- The plan's **§0 Global Constraints** are inherited by every task implicitly — do **not** repeat
|
|
200
|
+
them per task. Interfaces carry the *task-local* contract; Global Constraints carry the
|
|
201
|
+
*project-wide* one. Together they are everything a blind implementer needs.
|
|
202
|
+
|
|
178
203
|
## Review workload + delivery strategy forecast
|
|
179
204
|
|
|
180
205
|
After the task table, append a short forecast. This protects the human reviewer
|