@ericrisco/rsc 0.1.32 → 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/manifest.json +24 -5
- package/package.json +1 -1
- package/scripts/lib/domains.js +1 -1
- package/skills/analyze/SKILL.md +1 -0
- package/skills/author-skill/SKILL.md +20 -0
- package/skills/author-skill/references/description-recipe.md +2 -0
- package/skills/debug/SKILL.md +1 -1
- package/skills/implement/SKILL.md +72 -2
- package/skills/implement/references/per-task-review.md +46 -0
- package/skills/implement/scripts/review-package +59 -0
- package/skills/implement/scripts/sdd-workspace +47 -0
- package/skills/implement/scripts/task-brief +77 -0
- package/skills/parallel/SKILL.md +29 -0
- package/skills/plan/references/plan-template.md +18 -0
- package/skills/roast-me/SKILL.md +124 -0
- package/skills/roast-me/evals/README.md +76 -0
- package/skills/roast-me/evals/cases.yaml +75 -0
- package/skills/roast-me/prompts/analyze.md +90 -0
- package/skills/roast-me/prompts/compute.md +100 -0
- package/skills/roast-me/prompts/roast.md +181 -0
- package/skills/roast-me/tools/adapters/__init__.py +1 -0
- package/skills/roast-me/tools/adapters/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/base.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/claude.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/codex.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/gemini.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/__pycache__/registry.cpython-312.pyc +0 -0
- package/skills/roast-me/tools/adapters/base.py +53 -0
- package/skills/roast-me/tools/adapters/claude.py +140 -0
- package/skills/roast-me/tools/adapters/codex.py +113 -0
- package/skills/roast-me/tools/adapters/gemini.py +121 -0
- package/skills/roast-me/tools/adapters/registry.py +68 -0
- package/skills/roast-me/tools/extract_prompts.py +520 -0
- package/skills/ship/SKILL.md +9 -1
- package/skills/specify/SKILL.md +21 -1
- package/skills/tasks/SKILL.md +25 -0
- package/skills/worktrees/SKILL.md +25 -0
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Extract user prompts from AI assistant session files for prompt-quality analysis.
|
|
3
|
+
|
|
4
|
+
Scans session files for one or more runtimes (Claude, Codex, Gemini) and
|
|
5
|
+
extracts user prompts with contextual signals: whether an error followed,
|
|
6
|
+
whether the agent auto-recovered, whether the user issued a correction.
|
|
7
|
+
|
|
8
|
+
Writes a normalised JSON file to a temp path and prints:
|
|
9
|
+
1. The output path
|
|
10
|
+
2. A metadata summary
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
python3 extract_prompts.py [--days N] [--runtime auto|claude|codex|gemini]
|
|
14
|
+
|
|
15
|
+
--days N Look back N days (default 7). Accepts bare numbers too.
|
|
16
|
+
--runtime ID Which runtime to scan (default: auto = all installed runtimes).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import sys
|
|
26
|
+
import tempfile
|
|
27
|
+
import time
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# Add the adapters package to sys.path so imports work regardless of cwd.
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
_TOOLS_DIR = Path(__file__).parent
|
|
35
|
+
sys.path.insert(0, str(_TOOLS_DIR))
|
|
36
|
+
from adapters.registry import get_adapters, list_runtime_ids
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Constants
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
MAX_PROMPTS = 300
|
|
43
|
+
PROMPT_TEXT_LIMIT = 1500
|
|
44
|
+
CORRECTION_TEXT_LIMIT = 500
|
|
45
|
+
CONTEXT_BEFORE_LIMIT = 500
|
|
46
|
+
|
|
47
|
+
CORRECTION_PATTERNS = re.compile(
|
|
48
|
+
r"\b(no[,.]?\s|wrong|instead|actually|don'?t|shouldn'?t|stop|not that|"
|
|
49
|
+
r"I said|I meant|I asked|that'?s not|please don'?t|why did you|"
|
|
50
|
+
r"you should have|that was wrong|incorrect|try again|redo|"
|
|
51
|
+
r"that broke|you broke|revert|undo)\b",
|
|
52
|
+
re.IGNORECASE,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Model tier classification (provider-neutral labels).
|
|
56
|
+
# Maps substrings found in model IDs to tier names.
|
|
57
|
+
MODEL_TIER_MAP: list[tuple[re.Pattern, str]] = [
|
|
58
|
+
(re.compile(r"fable|mythos", re.I), "heavy"),
|
|
59
|
+
(re.compile(r"opus|gpt-4(?!.*mini)", re.I), "heavy"),
|
|
60
|
+
(re.compile(r"sonnet|gpt-4.*mini|gemini-1\.5-pro|gemini-2", re.I), "balanced"),
|
|
61
|
+
(re.compile(r"haiku|gpt-3\.5|gemini-1\.5-flash|gemini-flash", re.I), "light"),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
MODEL_TIER_RANK = {"light": 0, "balanced": 1, "heavy": 2, "unknown": 1}
|
|
65
|
+
|
|
66
|
+
SIMPLE_PATTERNS = [
|
|
67
|
+
re.compile(p, re.IGNORECASE) for p in [
|
|
68
|
+
r"^\s*(yes|ok|go ahead|looks good|lgtm|sure|do it|yep|correct|perfect)\s*[.!]?\s*$",
|
|
69
|
+
r"^\s*(commit|push|merge|ship it|deploy)\s*$",
|
|
70
|
+
r"^\s*(read|show|list|ls|find|check)\b.{0,80}$",
|
|
71
|
+
r"^\s*(format|lint|fix.*style)\b",
|
|
72
|
+
r"^\s*(what does|explain|what is|how does)\b.{0,120}$",
|
|
73
|
+
]
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
COMPLEX_PATTERNS = [
|
|
77
|
+
re.compile(p, re.IGNORECASE) for p in [
|
|
78
|
+
r"\b(design|architect|plan|strategy|migration|roadmap)\b",
|
|
79
|
+
r"\b(debug|race\s*condition|memory\s*leak|performance)\b",
|
|
80
|
+
r"\b(implement|build|create)\b.{30,}",
|
|
81
|
+
r"\b(refactor|rewrite|overhaul)\b.*\b(entire|all|whole)\b",
|
|
82
|
+
]
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# Helpers
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def truncate(s: str, limit: int) -> str:
|
|
90
|
+
if not s or len(s) <= limit:
|
|
91
|
+
return s or ""
|
|
92
|
+
return s[:limit] + "..."
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def classify_tier(model_id: str) -> str:
|
|
96
|
+
"""Map a model identifier string to a tier label."""
|
|
97
|
+
if not model_id:
|
|
98
|
+
return "unknown"
|
|
99
|
+
for pattern, tier in MODEL_TIER_MAP:
|
|
100
|
+
if pattern.search(model_id):
|
|
101
|
+
return tier
|
|
102
|
+
return "unknown"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def classify_complexity(text: str) -> str:
|
|
106
|
+
for p in SIMPLE_PATTERNS:
|
|
107
|
+
if p.match(text):
|
|
108
|
+
return "simple"
|
|
109
|
+
for p in COMPLEX_PATTERNS:
|
|
110
|
+
if p.search(text):
|
|
111
|
+
return "complex"
|
|
112
|
+
return "moderate"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
COMPLEXITY_TO_TIER = {"simple": "light", "moderate": "balanced", "complex": "heavy"}
|
|
116
|
+
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
# Normalisation
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def normalise_record(raw: dict[str, Any], position: int, total: int) -> dict[str, Any]:
|
|
122
|
+
"""Convert a raw adapter record into a normalised PromptRecord."""
|
|
123
|
+
text = raw.get("prompt_text", "")
|
|
124
|
+
length = len(text)
|
|
125
|
+
|
|
126
|
+
has_xml_tags = bool(re.search(r"<\w[\w-]*>", text))
|
|
127
|
+
has_file_paths = bool(re.search(r"(/[\w./\-]+|~\/[\w./\-]+|\.\./)", text))
|
|
128
|
+
has_code_blocks = "```" in text
|
|
129
|
+
|
|
130
|
+
model_id = raw.get("model", "") or ""
|
|
131
|
+
tier = classify_tier(model_id)
|
|
132
|
+
complexity = classify_complexity(text)
|
|
133
|
+
recommended_tier = COMPLEXITY_TO_TIER[complexity]
|
|
134
|
+
was_overkill = MODEL_TIER_RANK.get(tier, 1) > MODEL_TIER_RANK.get(recommended_tier, 1)
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
"runtime": raw.get("runtime", "unknown"),
|
|
138
|
+
"session_file": raw.get("session_file", ""),
|
|
139
|
+
"timestamp": raw.get("timestamp"),
|
|
140
|
+
"prompt_text": truncate(text, PROMPT_TEXT_LIMIT),
|
|
141
|
+
"prompt_length": length,
|
|
142
|
+
"prompt_position": position,
|
|
143
|
+
"total_prompts_in_session": total,
|
|
144
|
+
"has_xml_tags": has_xml_tags,
|
|
145
|
+
"has_file_paths": has_file_paths,
|
|
146
|
+
"has_code_blocks": has_code_blocks,
|
|
147
|
+
# These will be populated by post-processing when session context is available.
|
|
148
|
+
"followed_by_error": False,
|
|
149
|
+
"error_was_recovered": False,
|
|
150
|
+
"followed_by_correction": False,
|
|
151
|
+
"correction_text": "",
|
|
152
|
+
"error_tool": "",
|
|
153
|
+
"error_text": "",
|
|
154
|
+
"context_before": truncate(raw.get("context_before", ""), CONTEXT_BEFORE_LIMIT),
|
|
155
|
+
# Compute fields (best-effort from adapter)
|
|
156
|
+
"model": model_id,
|
|
157
|
+
"model_tier": tier,
|
|
158
|
+
"task_complexity": complexity,
|
|
159
|
+
"recommended_tier": recommended_tier,
|
|
160
|
+
"compute_was_overkill": was_overkill,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
# Session-aware processing (Claude-specific: reads full JSONL for context)
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
def process_claude_sessions(session_files: list[Path], cutoff: float) -> list[dict[str, Any]]:
|
|
169
|
+
"""Full session-aware extraction for Claude Code JSONL files.
|
|
170
|
+
|
|
171
|
+
This matches the original extractor behaviour: reads each JSONL file
|
|
172
|
+
sequentially to detect errors, auto-recovery, and corrections in the turns
|
|
173
|
+
immediately following each user prompt.
|
|
174
|
+
"""
|
|
175
|
+
from adapters.claude import _extract_text, _is_only_tool_results, _parse_timestamp
|
|
176
|
+
|
|
177
|
+
prompts: list[dict[str, Any]] = []
|
|
178
|
+
|
|
179
|
+
for sf in session_files:
|
|
180
|
+
if sf.stat().st_mtime < cutoff:
|
|
181
|
+
continue
|
|
182
|
+
try:
|
|
183
|
+
raw_lines = sf.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
184
|
+
except OSError:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
ordered: list[tuple[int, str, dict]] = []
|
|
188
|
+
for i, line in enumerate(raw_lines):
|
|
189
|
+
line = line.strip()
|
|
190
|
+
if not line:
|
|
191
|
+
continue
|
|
192
|
+
try:
|
|
193
|
+
obj = json.loads(line)
|
|
194
|
+
except json.JSONDecodeError:
|
|
195
|
+
continue
|
|
196
|
+
msg_type = obj.get("type", "")
|
|
197
|
+
if msg_type in ("user", "assistant"):
|
|
198
|
+
ordered.append((i, msg_type, obj))
|
|
199
|
+
|
|
200
|
+
session_prompts: list[int] = [
|
|
201
|
+
idx for idx, (_, t, obj) in enumerate(ordered)
|
|
202
|
+
if t == "user"
|
|
203
|
+
and not obj.get("message", {}).get("isMeta")
|
|
204
|
+
and _extract_text(obj.get("message", {}).get("content", [])).strip()
|
|
205
|
+
and not _is_only_tool_results(obj.get("message", {}).get("content", []))
|
|
206
|
+
]
|
|
207
|
+
total_in_session = len(session_prompts)
|
|
208
|
+
|
|
209
|
+
for position, idx in enumerate(session_prompts, 1):
|
|
210
|
+
_, _, obj = ordered[idx]
|
|
211
|
+
msg = obj.get("message", {})
|
|
212
|
+
content = msg.get("content", [])
|
|
213
|
+
prompt_text = _extract_text(content)
|
|
214
|
+
|
|
215
|
+
# context_before: last assistant text message before this prompt
|
|
216
|
+
context_before = ""
|
|
217
|
+
for k in range(idx - 1, -1, -1):
|
|
218
|
+
_, kt, ko = ordered[k]
|
|
219
|
+
if kt == "assistant":
|
|
220
|
+
a_content = ko.get("message", {}).get("content", [])
|
|
221
|
+
if isinstance(a_content, list):
|
|
222
|
+
for block in a_content:
|
|
223
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
224
|
+
context_before = block.get("text", "")
|
|
225
|
+
break
|
|
226
|
+
if context_before:
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
followed_by_error = False
|
|
230
|
+
error_was_recovered = False
|
|
231
|
+
followed_by_correction = False
|
|
232
|
+
correction_text = ""
|
|
233
|
+
error_tool = ""
|
|
234
|
+
error_text = ""
|
|
235
|
+
error_count = 0
|
|
236
|
+
success_after_error = 0
|
|
237
|
+
|
|
238
|
+
for j in range(idx + 1, len(ordered)):
|
|
239
|
+
_, j_type, j_obj = ordered[j]
|
|
240
|
+
|
|
241
|
+
if j_type == "user":
|
|
242
|
+
u_content = j_obj.get("message", {}).get("content", [])
|
|
243
|
+
if isinstance(u_content, list):
|
|
244
|
+
for block in u_content:
|
|
245
|
+
if not isinstance(block, dict):
|
|
246
|
+
continue
|
|
247
|
+
if block.get("type") == "tool_result":
|
|
248
|
+
if block.get("is_error"):
|
|
249
|
+
error_count += 1
|
|
250
|
+
if not followed_by_error:
|
|
251
|
+
followed_by_error = True
|
|
252
|
+
tid = block.get("tool_use_id")
|
|
253
|
+
for k in range(j - 1, idx, -1):
|
|
254
|
+
_, kt, ko = ordered[k]
|
|
255
|
+
if kt == "assistant":
|
|
256
|
+
ac = ko.get("message", {}).get("content", [])
|
|
257
|
+
if isinstance(ac, list):
|
|
258
|
+
for ab in ac:
|
|
259
|
+
if (isinstance(ab, dict)
|
|
260
|
+
and ab.get("type") == "tool_use"
|
|
261
|
+
and ab.get("id") == tid):
|
|
262
|
+
error_tool = ab.get("name", "")
|
|
263
|
+
break
|
|
264
|
+
rc = block.get("content", [])
|
|
265
|
+
if isinstance(rc, list):
|
|
266
|
+
for rb in rc:
|
|
267
|
+
if isinstance(rb, dict) and rb.get("type") == "text":
|
|
268
|
+
error_text = rb.get("text", "")
|
|
269
|
+
break
|
|
270
|
+
elif isinstance(rc, str):
|
|
271
|
+
error_text = rc
|
|
272
|
+
else:
|
|
273
|
+
if error_count > 0:
|
|
274
|
+
success_after_error += 1
|
|
275
|
+
|
|
276
|
+
if not j_obj.get("message", {}).get("isMeta"):
|
|
277
|
+
next_text = _extract_text(u_content)
|
|
278
|
+
if next_text.strip() and not _is_only_tool_results(u_content):
|
|
279
|
+
if CORRECTION_PATTERNS.search(next_text):
|
|
280
|
+
followed_by_correction = True
|
|
281
|
+
correction_text = next_text
|
|
282
|
+
break
|
|
283
|
+
|
|
284
|
+
error_was_recovered = (
|
|
285
|
+
followed_by_error
|
|
286
|
+
and success_after_error > 0
|
|
287
|
+
and not followed_by_correction
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
ts = _parse_timestamp(obj)
|
|
291
|
+
model_id = ""
|
|
292
|
+
model_tier = "unknown"
|
|
293
|
+
|
|
294
|
+
rec = {
|
|
295
|
+
"runtime": "claude",
|
|
296
|
+
"session_file": str(sf),
|
|
297
|
+
"timestamp": ts,
|
|
298
|
+
"prompt_text": truncate(prompt_text, PROMPT_TEXT_LIMIT),
|
|
299
|
+
"prompt_length": len(prompt_text),
|
|
300
|
+
"prompt_position": position,
|
|
301
|
+
"total_prompts_in_session": total_in_session,
|
|
302
|
+
"has_xml_tags": bool(re.search(r"<\w[\w-]*>", prompt_text)),
|
|
303
|
+
"has_file_paths": bool(re.search(r"(/[\w./\-]+|~\/[\w./\-]+|\.\./)", prompt_text)),
|
|
304
|
+
"has_code_blocks": "```" in prompt_text,
|
|
305
|
+
"followed_by_error": followed_by_error,
|
|
306
|
+
"error_was_recovered": error_was_recovered,
|
|
307
|
+
"followed_by_correction": followed_by_correction,
|
|
308
|
+
"correction_text": truncate(correction_text, CORRECTION_TEXT_LIMIT),
|
|
309
|
+
"error_tool": error_tool,
|
|
310
|
+
"error_text": truncate(error_text, 500),
|
|
311
|
+
"context_before": truncate(context_before, CONTEXT_BEFORE_LIMIT),
|
|
312
|
+
"model": model_id,
|
|
313
|
+
"model_tier": model_tier,
|
|
314
|
+
"task_complexity": classify_complexity(prompt_text),
|
|
315
|
+
"recommended_tier": COMPLEXITY_TO_TIER[classify_complexity(prompt_text)],
|
|
316
|
+
"compute_was_overkill": False,
|
|
317
|
+
}
|
|
318
|
+
prompts.append(rec)
|
|
319
|
+
|
|
320
|
+
return prompts
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# ---------------------------------------------------------------------------
|
|
324
|
+
# Main
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
def main() -> None:
|
|
328
|
+
parser = argparse.ArgumentParser(
|
|
329
|
+
description="Extract user prompts from AI assistant session files."
|
|
330
|
+
)
|
|
331
|
+
parser.add_argument(
|
|
332
|
+
"days_positional",
|
|
333
|
+
nargs="?",
|
|
334
|
+
type=int,
|
|
335
|
+
default=None,
|
|
336
|
+
metavar="DAYS",
|
|
337
|
+
help="Number of days to look back (positional shorthand).",
|
|
338
|
+
)
|
|
339
|
+
parser.add_argument(
|
|
340
|
+
"--days",
|
|
341
|
+
type=int,
|
|
342
|
+
default=None,
|
|
343
|
+
help="Number of days to look back (default 7).",
|
|
344
|
+
)
|
|
345
|
+
parser.add_argument(
|
|
346
|
+
"--runtime",
|
|
347
|
+
type=str,
|
|
348
|
+
default="auto",
|
|
349
|
+
help=f"Runtime to scan: auto, {', '.join(list_runtime_ids())} (default: auto).",
|
|
350
|
+
)
|
|
351
|
+
args = parser.parse_args()
|
|
352
|
+
|
|
353
|
+
# Resolve days: positional takes precedence over --days, both default to 7.
|
|
354
|
+
days = args.days_positional or args.days or 7
|
|
355
|
+
|
|
356
|
+
# Resolve adapters — unknown runtime exits 0 cleanly.
|
|
357
|
+
try:
|
|
358
|
+
adapters = get_adapters(args.runtime)
|
|
359
|
+
except ValueError as exc:
|
|
360
|
+
print(f"No data: {exc}", file=sys.stderr)
|
|
361
|
+
_write_empty(days, args.runtime)
|
|
362
|
+
return
|
|
363
|
+
|
|
364
|
+
cutoff = time.time() - (days * 86400)
|
|
365
|
+
|
|
366
|
+
all_prompts: list[dict[str, Any]] = []
|
|
367
|
+
sessions_scanned = 0
|
|
368
|
+
projects_seen: set[str] = set()
|
|
369
|
+
|
|
370
|
+
for adapter in adapters:
|
|
371
|
+
session_files = adapter.discover()
|
|
372
|
+
|
|
373
|
+
# Filter by age
|
|
374
|
+
recent_files = []
|
|
375
|
+
for sf in session_files:
|
|
376
|
+
try:
|
|
377
|
+
if sf.stat().st_mtime >= cutoff:
|
|
378
|
+
recent_files.append(sf)
|
|
379
|
+
except OSError:
|
|
380
|
+
continue
|
|
381
|
+
|
|
382
|
+
if not recent_files:
|
|
383
|
+
# This adapter found no data — degrade cleanly.
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
sessions_scanned += len(recent_files)
|
|
387
|
+
|
|
388
|
+
if adapter.RUNTIME_ID == "claude":
|
|
389
|
+
# Use full session-aware extraction for Claude to detect error context.
|
|
390
|
+
prompts = process_claude_sessions(recent_files, cutoff)
|
|
391
|
+
else:
|
|
392
|
+
# For other runtimes: simple parse without cross-turn context detection.
|
|
393
|
+
raw_by_session: dict[str, list[dict]] = {}
|
|
394
|
+
for sf in recent_files:
|
|
395
|
+
raws = adapter.parse(sf)
|
|
396
|
+
raw_by_session[str(sf)] = raws
|
|
397
|
+
|
|
398
|
+
prompts = []
|
|
399
|
+
for sf_str, raws in raw_by_session.items():
|
|
400
|
+
total = len(raws)
|
|
401
|
+
for pos, raw in enumerate(raws, 1):
|
|
402
|
+
rec = normalise_record(raw, pos, total)
|
|
403
|
+
prompts.append(rec)
|
|
404
|
+
|
|
405
|
+
# Collect project identifiers.
|
|
406
|
+
for sf in recent_files:
|
|
407
|
+
projects_seen.add(str(sf.parent))
|
|
408
|
+
|
|
409
|
+
all_prompts.extend(prompts)
|
|
410
|
+
|
|
411
|
+
if not all_prompts:
|
|
412
|
+
runtime_label = args.runtime
|
|
413
|
+
print(
|
|
414
|
+
f"No transcript data found for runtime '{runtime_label}' "
|
|
415
|
+
f"in the last {days} days.",
|
|
416
|
+
file=sys.stderr,
|
|
417
|
+
)
|
|
418
|
+
print(
|
|
419
|
+
"If you are using a supported runtime, check that its session "
|
|
420
|
+
"directory exists and contains recent files."
|
|
421
|
+
)
|
|
422
|
+
_write_empty(days, args.runtime)
|
|
423
|
+
return
|
|
424
|
+
|
|
425
|
+
# Prioritise error/correction prompts, then cap.
|
|
426
|
+
error_prompts = [p for p in all_prompts if p.get("followed_by_error") or p.get("followed_by_correction")]
|
|
427
|
+
clean_prompts = [p for p in all_prompts if not p.get("followed_by_error") and not p.get("followed_by_correction")]
|
|
428
|
+
all_prompts = (error_prompts + clean_prompts)[:MAX_PROMPTS]
|
|
429
|
+
|
|
430
|
+
total = len(all_prompts)
|
|
431
|
+
errors = sum(1 for p in all_prompts if p.get("followed_by_error"))
|
|
432
|
+
recovered = sum(1 for p in all_prompts if p.get("error_was_recovered"))
|
|
433
|
+
unrecovered = errors - recovered
|
|
434
|
+
corrections = sum(1 for p in all_prompts if p.get("followed_by_correction"))
|
|
435
|
+
avg_length = sum(p["prompt_length"] for p in all_prompts) / total if total else 0
|
|
436
|
+
xml_count = sum(1 for p in all_prompts if p.get("has_xml_tags"))
|
|
437
|
+
fp_count = sum(1 for p in all_prompts if p.get("has_file_paths"))
|
|
438
|
+
overkill_count = sum(1 for p in all_prompts if p.get("compute_was_overkill"))
|
|
439
|
+
|
|
440
|
+
# Model tier distribution
|
|
441
|
+
tier_dist: dict[str, int] = {}
|
|
442
|
+
for p in all_prompts:
|
|
443
|
+
t = p.get("model_tier", "unknown")
|
|
444
|
+
tier_dist[t] = tier_dist.get(t, 0) + 1
|
|
445
|
+
|
|
446
|
+
compute_stats = {
|
|
447
|
+
"tier_distribution": {t: round(c / total, 3) for t, c in tier_dist.items()},
|
|
448
|
+
"heuristic_overuse_count": overkill_count,
|
|
449
|
+
"heuristic_overuse_rate": round(overkill_count / total, 3) if total else 0,
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
result = {
|
|
453
|
+
"prompts": all_prompts,
|
|
454
|
+
"metadata": {
|
|
455
|
+
"runtime": args.runtime,
|
|
456
|
+
"days": days,
|
|
457
|
+
"sessions_scanned": sessions_scanned,
|
|
458
|
+
"projects_scanned": len(projects_seen),
|
|
459
|
+
"total_prompts": total,
|
|
460
|
+
"error_rate": round(errors / total, 3) if total else 0,
|
|
461
|
+
"recovered_error_rate": round(recovered / total, 3) if total else 0,
|
|
462
|
+
"effective_error_rate": round(unrecovered / total, 3) if total else 0,
|
|
463
|
+
"correction_rate": round(corrections / total, 3) if total else 0,
|
|
464
|
+
"avg_length": round(avg_length, 1),
|
|
465
|
+
"xml_usage_rate": round(xml_count / total, 3) if total else 0,
|
|
466
|
+
"file_path_rate": round(fp_count / total, 3) if total else 0,
|
|
467
|
+
},
|
|
468
|
+
"compute_stats": compute_stats,
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
# Write output to a temp file.
|
|
472
|
+
fd, out_path = tempfile.mkstemp(prefix="roast-me-", suffix=".json")
|
|
473
|
+
try:
|
|
474
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
475
|
+
json.dump(result, fh, indent=2, default=str)
|
|
476
|
+
except OSError as exc:
|
|
477
|
+
print(f"Failed to write output: {exc}", file=sys.stderr)
|
|
478
|
+
sys.exit(1)
|
|
479
|
+
|
|
480
|
+
print(f"Scanned {sessions_scanned} sessions across {len(projects_seen)} projects")
|
|
481
|
+
print(
|
|
482
|
+
f"Extracted {total} prompts "
|
|
483
|
+
f"({errors} with errors, {recovered} auto-recovered, {unrecovered} impactful)"
|
|
484
|
+
)
|
|
485
|
+
print(f"Corrections: {corrections} | Avg length: {avg_length:.0f} chars | XML: {xml_count}/{total}")
|
|
486
|
+
print(f"Compute: {overkill_count} overkill | tiers: {tier_dist}")
|
|
487
|
+
print(f"Output: {out_path}")
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _write_empty(days: int, runtime: str) -> None:
|
|
491
|
+
"""Write an empty result JSON and print its path."""
|
|
492
|
+
result: dict[str, Any] = {
|
|
493
|
+
"prompts": [],
|
|
494
|
+
"metadata": {
|
|
495
|
+
"runtime": runtime,
|
|
496
|
+
"days": days,
|
|
497
|
+
"sessions_scanned": 0,
|
|
498
|
+
"projects_scanned": 0,
|
|
499
|
+
"total_prompts": 0,
|
|
500
|
+
"error_rate": 0,
|
|
501
|
+
"recovered_error_rate": 0,
|
|
502
|
+
"effective_error_rate": 0,
|
|
503
|
+
"correction_rate": 0,
|
|
504
|
+
"avg_length": 0,
|
|
505
|
+
"xml_usage_rate": 0,
|
|
506
|
+
"file_path_rate": 0,
|
|
507
|
+
},
|
|
508
|
+
"compute_stats": {},
|
|
509
|
+
}
|
|
510
|
+
fd, out_path = tempfile.mkstemp(prefix="roast-me-", suffix=".json")
|
|
511
|
+
try:
|
|
512
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
513
|
+
json.dump(result, fh, indent=2)
|
|
514
|
+
except OSError:
|
|
515
|
+
pass
|
|
516
|
+
print(f"Output: {out_path}")
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
if __name__ == "__main__":
|
|
520
|
+
main()
|
package/skills/ship/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ship
|
|
3
|
-
description: "Use when the work is complete and verified and it is time to CLOSE the development branch — the final phase of the rsc SDD chain, after review approves the diff. Triggers: 'ship it', 'close the branch', 'open the PR', 'merge this', 'merge into main', 'create the pull request', 'how do I land this work', 'finish this feature', 'haz el merge', 'abre el PR', 'cierra la rama', 'súbelo a main', 'clean up the branch', 'I'm done, what now'.
|
|
3
|
+
description: "Use when the work is complete and verified and it is time to CLOSE the development branch — the final phase of the rsc SDD chain, after review approves the diff. Triggers: 'ship it', 'close the branch', 'open the PR', 'merge this', 'merge into main', 'create the pull request', 'how do I land this work', 'finish this feature', 'haz el merge', 'abre el PR', 'cierra la rama', 'súbelo a main', 'clean up the branch', 'I'm done, what now'. HARD RULE it enforces: git authorship is ALWAYS Eric — never a Co-Authored-By or 'generated with' footer in any commit or PR. NOT running lint/type/test (that is `verify`), NOT reading the diff adversarially (that is `review`), NOT deploy/release mechanics to a server (that is `deployment`). Honors the harness accompaniment dial."
|
|
4
4
|
tags: [sdd, ship, release, pr]
|
|
5
5
|
recommends: []
|
|
6
6
|
profiles: [core, full]
|
|
@@ -178,6 +178,14 @@ Never stack to hide review risk. Stack because each slice is independently revie
|
|
|
178
178
|
- **Park:** leave the branch, push it so it's not lost (`git push -u origin feature/<slug>`), and log *why it's parked* to `02-DOCS/wiki/sdd/decisions.md`. Do not merge.
|
|
179
179
|
- **Discard:** deletion is **destructive** — require an explicit confirmation that quotes the branch name (e.g. the literal `yes, delete feature/<slug>`) before `git branch -D`. Anything ambiguous means keep it. Log the discard and the reason so the dead-end is remembered, not re-attempted.
|
|
180
180
|
|
|
181
|
+
**If the work lived in a worktree, clean it up provenance-aware.** After the merge/park/discard, only
|
|
182
|
+
remove a worktree **rsc created** (under `.worktrees/`/`worktrees/` or the `../<repo>-<slug>` dir),
|
|
183
|
+
never one the user or a native tool owns. Guard first: confirm it's a linked worktree
|
|
184
|
+
(`git rev-parse --git-dir` ≠ `--git-common-dir`), rule out a submodule
|
|
185
|
+
(`git rev-parse --show-superproject-working-tree` is empty), `cd` to the main working tree before
|
|
186
|
+
removing, and run `git worktree prune` after. Full procedure: `../worktrees/SKILL.md` (Provenance-aware
|
|
187
|
+
cleanup). If a native `EnterWorktree`-style tool created it, exit through that tool, not raw git.
|
|
188
|
+
|
|
181
189
|
## Commit message discipline
|
|
182
190
|
|
|
183
191
|
The commit is the durable record. Make it describe the change and tie it to the spec — and keep it Eric's.
|
package/skills/specify/SKILL.md
CHANGED
|
@@ -100,7 +100,8 @@ Run these in order. It is a collaborative dialogue, not a form you fill in silen
|
|
|
100
100
|
after EACH section ask "does this look right?" and adjust before moving on
|
|
101
101
|
7. WRITE the spec → 02-DOCS/wiki/sdd/specs/<slug>.md (WHAT/WHY), index it in 02-DOCS/wiki/index.md
|
|
102
102
|
(the Knowledge map; root CLAUDE.md keeps only a short pointer), commit if a repo
|
|
103
|
-
8. SELF-REVIEW → scan for TODO/placeholder, contradictions, ambiguity, scope creep; fix inline
|
|
103
|
+
8. SELF-REVIEW → scan for TODO/placeholder, contradictions, ambiguity, scope creep; fix inline.
|
|
104
|
+
On L2/L3 or a high-risk spec, also dispatch a FRESH-EYES review (below)
|
|
104
105
|
9. USER APPROVES → ask them to read the written spec and confirm; loop on changes until they approve
|
|
105
106
|
10. HAND OFF → only now, result envelope → clarify/plan. NEVER to implement.
|
|
106
107
|
```
|
|
@@ -110,6 +111,25 @@ Run these in order. It is a collaborative dialogue, not a form you fill in silen
|
|
|
110
111
|
|
|
111
112
|
`<slug>` is a short kebab-case name derived from the feature (e.g. `bulk-csv-import`, `magic-link-login`). If a spec with that slug exists, read it and update rather than overwrite.
|
|
112
113
|
|
|
114
|
+
### Fresh-eyes spec review (step 8, scaled to the dial)
|
|
115
|
+
|
|
116
|
+
The author's own context is blind to its own gaps — the same mind that wrote the spec self-reviews it
|
|
117
|
+
with the same blind spots. For an L2/L3 user or a **high-risk** spec (multi-subsystem, security/data,
|
|
118
|
+
irreversible, or large scope), dispatch a **fresh-context subagent** to read the written spec cold,
|
|
119
|
+
*before* the user-approval gate (step 9), and fold its findings in:
|
|
120
|
+
|
|
121
|
+
- **Hand it only the spec file** (and the constitution), not your dialogue or reasoning — a fresh
|
|
122
|
+
reviewer that inherits your context inherits your blind spots.
|
|
123
|
+
- **Calibrated checklist:** placeholders/TODOs, internal contradictions, ambiguity that would stall
|
|
124
|
+
planning, unstated assumptions, scope creep, and YAGNI (asked-for-but-unneeded). Tell it to *only
|
|
125
|
+
flag issues that would cause a real problem at planning time* and to **approve unless there are
|
|
126
|
+
serious gaps** — a fresh reviewer that nitpicks everything is as useless as no reviewer.
|
|
127
|
+
- **It returns** `Approved` or `Issues found` with a short list; you fix the real ones inline, then
|
|
128
|
+
proceed to step 9.
|
|
129
|
+
|
|
130
|
+
**Skip it** for L0/L1 on a small, low-risk spec — the self-review scan is enough there; don't spin up
|
|
131
|
+
a subagent to vet a two-paragraph spec. Like the rest of the chain, ceremony scales to the stakes.
|
|
132
|
+
|
|
113
133
|
## Worked shape (abridged)
|
|
114
134
|
|
|
115
135
|
```markdown
|
package/skills/tasks/SKILL.md
CHANGED
|
@@ -175,6 +175,31 @@ delete a user's map entry).
|
|
|
175
175
|
| T0NN | | All done-checks pass + `verify.sh` green | every row above checked; `scripts/verify.sh` exits 0 | all | spec §Acceptance |
|
|
176
176
|
```
|
|
177
177
|
|
|
178
|
+
### Per-task Interfaces (for context-isolated implementers)
|
|
179
|
+
|
|
180
|
+
`implement` and `parallel` dispatch tasks to **context-isolated** workers (the `developer`
|
|
181
|
+
subagent) that see *only their own task* — not the whole plan. Such a worker can't infer a
|
|
182
|
+
neighbor's function signature, payload shape, or column name from a one-line row. For any task
|
|
183
|
+
whose correctness depends on a contract it doesn't own, attach an **Interfaces block** right under
|
|
184
|
+
its row. (Trivial, self-contained tasks don't need one — don't add ceremony where there's no
|
|
185
|
+
cross-task contract.)
|
|
186
|
+
|
|
187
|
+
```markdown
|
|
188
|
+
**T004 — Interfaces**
|
|
189
|
+
- Consumes: `auth.verifyPassword(plain: str, hash: str) -> bool` (from T003); `users.email UNIQUE`
|
|
190
|
+
- Produces: `POST /login` → `200 {token}` | `401 {error}`; sets `Set-Cookie: sid=…; HttpOnly`
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Rules:
|
|
194
|
+
|
|
195
|
+
- Quote **exact** signatures/shapes, not descriptions — the isolated worker copies them, it can't
|
|
196
|
+
go look them up. "Returns the user" is invisible; `-> {id, email}` is usable.
|
|
197
|
+
- `Consumes` names what the task reads from a neighbor or the environment; `Produces` names the
|
|
198
|
+
contract later tasks (and the per-task reviewer) will hold it to.
|
|
199
|
+
- The plan's **§0 Global Constraints** are inherited by every task implicitly — do **not** repeat
|
|
200
|
+
them per task. Interfaces carry the *task-local* contract; Global Constraints carry the
|
|
201
|
+
*project-wide* one. Together they are everything a blind implementer needs.
|
|
202
|
+
|
|
178
203
|
## Review workload + delivery strategy forecast
|
|
179
204
|
|
|
180
205
|
After the task table, append a short forecast. This protects the human reviewer
|
|
@@ -115,6 +115,31 @@ Either way, the contract is identical: **after this step, the cwd is an isolated
|
|
|
115
115
|
base, and the default-branch checkout is exactly as it was.** Confirm that out loud (at the dial's
|
|
116
116
|
level) before handing to `implement`.
|
|
117
117
|
|
|
118
|
+
### Provenance-aware cleanup (don't remove a workspace you didn't create)
|
|
119
|
+
|
|
120
|
+
Removing the wrong worktree leaves phantom state and can destroy someone else's in-progress work.
|
|
121
|
+
Before any `git worktree remove`, clear these guards:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# 1) Are we even inside a linked worktree? (GIT_DIR != GIT_COMMON_DIR ⇒ yes)
|
|
125
|
+
[ "$(git rev-parse --git-dir)" != "$(git rev-parse --git-common-dir)" ] && echo "linked worktree"
|
|
126
|
+
# 2) Submodule false-positive guard — never treat a submodule as a worktree to remove
|
|
127
|
+
git rev-parse --show-superproject-working-tree # non-empty ⇒ this is a submodule; STOP
|
|
128
|
+
# 3) cd to the MAIN working tree before removing (you cannot remove the worktree you stand in)
|
|
129
|
+
cd "$(git rev-parse --git-common-dir)/.."
|
|
130
|
+
git worktree remove <path>
|
|
131
|
+
git worktree prune # self-heal stale administrative entries
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
- **Only remove worktrees rsc/you created** — those under `.worktrees/` or `worktrees/`, or the
|
|
135
|
+
`../<repo>-<slug>` this skill made. A worktree the user or a native tool owns is **not yours to
|
|
136
|
+
delete**; leave it and say so.
|
|
137
|
+
- **Native tool owns its own lifecycle.** If you isolated via a native `EnterWorktree`-style tool,
|
|
138
|
+
exit through that tool's `remove`/`keep` — do **not** hand-run `git worktree remove` on a workspace
|
|
139
|
+
the native tool created; let it clean up so its tracking stays consistent.
|
|
140
|
+
- **`git worktree prune`** after a remove clears stale metadata when a directory vanished out from
|
|
141
|
+
under git — cheap self-healing, safe to run.
|
|
142
|
+
|
|
118
143
|
## Native vs. git fallback — which you're using
|
|
119
144
|
|
|
120
145
|
| You have… | Create | Leave intact | Discard |
|