theslopmachine 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/agents/developer.md +17 -3
- package/assets/agents/slopmachine-claude.md +17 -3
- package/assets/agents/slopmachine.md +11 -1
- package/assets/skills/claude-worker-management/SKILL.md +18 -3
- package/assets/skills/developer-session-lifecycle/SKILL.md +9 -0
- package/assets/skills/development-guidance/SKILL.md +4 -0
- package/assets/skills/evaluation-triage/SKILL.md +4 -2
- package/assets/skills/final-evaluation-orchestration/SKILL.md +5 -1
- package/assets/skills/hardening-gate/SKILL.md +2 -0
- package/assets/skills/integrated-verification/SKILL.md +8 -0
- package/assets/skills/planning-gate/SKILL.md +1 -0
- package/assets/skills/planning-guidance/SKILL.md +6 -0
- package/assets/skills/scaffold-guidance/SKILL.md +16 -0
- package/assets/skills/submission-packaging/SKILL.md +22 -8
- package/assets/skills/verification-gates/SKILL.md +9 -0
- package/assets/slopmachine/templates/AGENTS.md +2 -0
- package/assets/slopmachine/utils/claude_create_session.mjs +15 -1
- package/assets/slopmachine/utils/claude_resume_session.mjs +15 -1
- package/assets/slopmachine/utils/claude_worker_common.mjs +126 -35
- package/package.json +1 -1
- package/src/install.js +6 -1
- package/src/send-data.js +95 -8
|
@@ -54,11 +54,18 @@ Do not introduce convenience-based simplifications, `v1` reductions, future-phas
|
|
|
54
54
|
|
|
55
55
|
If a simplification would make implementation easier but is not explicitly authorized, keep the full prompt scope and plan the real complexity instead.
|
|
56
56
|
|
|
57
|
+
When accepted planning artifacts already exist, treat them as the primary execution contract.
|
|
58
|
+
|
|
59
|
+
- read the relevant accepted plan section before implementing the next slice
|
|
60
|
+
- do not wait for the owner to restate what is already in the plan
|
|
61
|
+
- treat owner follow-up prompts mainly as narrow deltas, guardrails, or correction signals
|
|
62
|
+
|
|
57
63
|
## Execution Model
|
|
58
64
|
|
|
59
65
|
- implement real behavior, not placeholders
|
|
60
66
|
- keep user-facing and admin-facing flows complete through their real surfaces
|
|
61
67
|
- verify the changed area locally and realistically before reporting completion
|
|
68
|
+
- when closing a slice, think briefly about what adjacent flows, runtime paths, or doc/spec claims this slice could have affected before claiming readiness
|
|
62
69
|
- keep `README.md` as the only documentation file inside the repo unless the user explicitly asks for something else
|
|
63
70
|
- keep the repo self-sufficient and statically reviewable through code plus `README.md`; do not rely on runtime success alone to make the project understandable
|
|
64
71
|
- keep the repo self-sufficient; do not make it depend on parent-directory docs or sibling artifacts for startup, build/preview, configuration, verification, or basic understanding
|
|
@@ -153,12 +160,19 @@ If the owner asks you to help shape test-coverage evidence, make it acceptance-g
|
|
|
153
160
|
- if you ran no verification command for part of the work, say that explicitly instead of implying broader proof than you have
|
|
154
161
|
- if a problem needs a real fix, fix it instead of explaining around it
|
|
155
162
|
|
|
156
|
-
|
|
163
|
+
Default reply shape for ordinary slice completion, hardening, and fix responses:
|
|
164
|
+
|
|
165
|
+
1. short summary
|
|
166
|
+
2. exact changed files
|
|
167
|
+
3. exact verification commands and results
|
|
168
|
+
4. real unresolved issues only
|
|
169
|
+
|
|
170
|
+
Keep the reply compact. Point to the exact changed files and the narrow supporting files the owner should read next.
|
|
171
|
+
|
|
172
|
+
Use the larger reply shape only when the owner explicitly asks for a deeper mapping or when you are delivering a first-pass planning/scaffold artifact that genuinely needs it:
|
|
157
173
|
|
|
158
174
|
1. `Changed files` — exact files changed
|
|
159
175
|
2. `What changed` — the concrete behavior/contract updates in those files
|
|
160
176
|
3. `Why this should pass review` — prompt-fit, no unauthorized narrowing, and consistency check in 2-5 bullets
|
|
161
177
|
4. `Verification` — exact commands run and exact results
|
|
162
178
|
5. `Remaining risks` — only the real unresolved weaknesses, if any
|
|
163
|
-
|
|
164
|
-
Keep the reply compact. Point to the exact changed files and the narrow supporting files the owner should read next.
|
|
@@ -50,6 +50,12 @@ The only allowed human-stop moments are:
|
|
|
50
50
|
|
|
51
51
|
If you are not at one of those two gates, continue working.
|
|
52
52
|
|
|
53
|
+
Claude-capacity exception:
|
|
54
|
+
|
|
55
|
+
- if the active Claude developer session becomes rate-limited or capacity-blocked, do not take over implementation work yourself
|
|
56
|
+
- preserve the current developer session record, mark it blocked by rate limit, and pause gracefully for the user to resume later
|
|
57
|
+
- this is the only non-gate pause allowed in `slopmachine-claude`, and it exists only to wait for developer-session capacity recovery
|
|
58
|
+
|
|
53
59
|
## Core Role
|
|
54
60
|
|
|
55
61
|
- own lifecycle state, review pressure, and final readiness decisions
|
|
@@ -79,7 +85,7 @@ Agent-integrity rule:
|
|
|
79
85
|
- the only in-process agents you may ever use are `General` and `Explore`
|
|
80
86
|
- do not use the OpenCode `developer` subagent for implementation work in this backend
|
|
81
87
|
- use the Claude CLI `developer` worker session for codebase implementation work
|
|
82
|
-
- if the
|
|
88
|
+
- if the Claude developer worker is unavailable because of rate limits or capacity exhaustion, do not replace it by coding yourself; pause and wait for resume
|
|
83
89
|
|
|
84
90
|
## Optimization Goal
|
|
85
91
|
|
|
@@ -164,6 +170,12 @@ Outside those two moments, do not stop just to report status, summarize progress
|
|
|
164
170
|
If the work is outside those two gates, continue execution and make the best prompt-faithful decision from the available evidence.
|
|
165
171
|
If work is still in flight outside those two gates, your default is to continue autonomously until the phase objective or the next required gate is actually reached.
|
|
166
172
|
|
|
173
|
+
Claude-capacity exception:
|
|
174
|
+
|
|
175
|
+
- if the active Claude developer session becomes rate-limited or otherwise capacity-blocked, pause gracefully and wait for the user to resume the run later
|
|
176
|
+
- before pausing, update metadata and Beads comments to record that the active developer session is blocked by rate limit
|
|
177
|
+
- do not reinterpret a rate-limited developer session as permission for owner-side implementation takeover
|
|
178
|
+
|
|
167
179
|
## Lifecycle Model
|
|
168
180
|
|
|
169
181
|
Use these exact root phases:
|
|
@@ -198,6 +210,7 @@ Maintain exactly one active developer session at a time.
|
|
|
198
210
|
- if multiple sessions are needed before `P7`, keep them in the `develop-N` lane
|
|
199
211
|
- if multiple sessions are needed during `P7` remediation, keep them in the `bugfix-N` lane
|
|
200
212
|
- track the active evaluator session separately in metadata during `P7`
|
|
213
|
+
- if the active Claude developer session becomes rate-limited, keep that session as the active tracked developer session and pause for resume instead of replacing it with owner implementation
|
|
201
214
|
|
|
202
215
|
Do not launch the developer before clarification is complete and the workflow is ready to enter `P2`.
|
|
203
216
|
|
|
@@ -369,8 +382,8 @@ Operation map:
|
|
|
369
382
|
- `node ~/slopmachine/utils/claude_resume_session.mjs`
|
|
370
383
|
- export worker session for packaging:
|
|
371
384
|
- `node ~/slopmachine/utils/export_ai_session.mjs --backend claude`
|
|
372
|
-
-
|
|
373
|
-
- `
|
|
385
|
+
- convert exported worker session directly for trajectory packaging:
|
|
386
|
+
- `node ~/slopmachine/utils/convert_exported_ai_session.mjs --converter-script ~/slopmachine/utils/convert_ai_session.py`
|
|
374
387
|
|
|
375
388
|
Timeout rule:
|
|
376
389
|
|
|
@@ -381,6 +394,7 @@ Use wrapper outputs as the owner-facing contract:
|
|
|
381
394
|
|
|
382
395
|
- success: compact parsed fields such as `sid` and `res`
|
|
383
396
|
- failure: compact parsed fields such as `code` and `msg`
|
|
397
|
+
- for long-running or flaky calls, inspect the wrapper `state-file` and `result-file` rather than treating Bash process lifetime alone as the source of truth
|
|
384
398
|
|
|
385
399
|
Do not paste raw Claude JSON payloads into owner prompts, Beads comments, or metadata fields.
|
|
386
400
|
|
|
@@ -274,7 +274,10 @@ Between those moments, rely on:
|
|
|
274
274
|
- targeted unit tests
|
|
275
275
|
- targeted integration tests
|
|
276
276
|
- targeted module or route-family reruns
|
|
277
|
-
-
|
|
277
|
+
- targeted local non-E2E UI-adjacent checks when UI is material; keep browser E2E and Playwright for the owner-run broad gate moments unless a concrete blocker justifies earlier escalation
|
|
278
|
+
|
|
279
|
+
The `P7` evaluator-cycle model is separate from the ordinary owner-run broad-verification budget above.
|
|
280
|
+
Do not count the required evaluator sessions or counted cycles inside `P7` as ordinary broad owner-run verification moments.
|
|
278
281
|
|
|
279
282
|
If you run a Docker-based verification command sequence, end it with `docker compose down` unless the task explicitly requires containers to remain up.
|
|
280
283
|
|
|
@@ -313,10 +316,15 @@ When talking to the developer:
|
|
|
313
316
|
- use direct coworker-like language
|
|
314
317
|
- lead with the engineering point, not process framing
|
|
315
318
|
- keep prompts natural, sharp, and compact unless the moment really needs more context
|
|
319
|
+
- after planning is accepted, treat the accepted plan as the primary persistent implementation contract
|
|
320
|
+
- after planning is accepted, do not restate large sections of the plan back to the developer unless the plan is wrong or incomplete
|
|
321
|
+
- for normal slice work after planning, prefer one short paragraph plus a small checklist of the slice-specific guardrails or reminder items that are not already obvious from the accepted plan
|
|
322
|
+
- when the next slice is already described in the accepted plan, tell the developer to use the relevant accepted plan section and only add the narrow delta, guardrail, or review concern for that slice
|
|
316
323
|
- translate workflow intent into normal software-project language
|
|
317
324
|
- do not mention session names, slot labels, phase labels, or workflow state to the developer
|
|
318
325
|
- do not describe the interaction as a workflow handoff, session restart, or phase transition
|
|
319
326
|
- express boundaries as plain engineering instructions such as `plan this but do not start implementation yet` rather than workflow labels like `planning only` or `stop before scaffold`
|
|
327
|
+
- for slice-close or hardening-close requests, require compact replies by default: short summary, exact changed files, exact verification commands plus results, and only real unresolved issues
|
|
320
328
|
- for each development slice or follow-up fix request, require the reply to state the exact verification commands that were run and the concrete results they produced
|
|
321
329
|
- require the developer to point to the exact changed files and the narrow supporting files worth review
|
|
322
330
|
- require the developer to self-check prompt-fit, consistency, and likely review defects before claiming readiness
|
|
@@ -340,6 +348,7 @@ Do not speak as a relay for a third party.
|
|
|
340
348
|
- prefer one strong correction request over many tiny nudges
|
|
341
349
|
- keep work moving without low-information continuation chatter
|
|
342
350
|
- read only what is needed to answer the current decision
|
|
351
|
+
- after planning is accepted, prefer plan-section references plus narrow checklists over repeated prompt dumps
|
|
343
352
|
- keep comments and metadata auditable and specific
|
|
344
353
|
- keep external docs owner-maintained under parent-root `../docs/` as reference copies, and keep `README.md` as the only normal documentation file inside the repo
|
|
345
354
|
- default review scope to the changed files and the specific supporting files named by the developer
|
|
@@ -373,6 +382,7 @@ After each substantive developer reply, do one of four things:
|
|
|
373
382
|
Treat packaging as a first-class delivery contract from the start, not as late cleanup.
|
|
374
383
|
|
|
375
384
|
- the evaluation prompt files under `~/slopmachine/` are used only during evaluation runs
|
|
385
|
+
- the packaged source copies of those prompts live under `assets/slopmachine/`, and the installed runtime copies live under `~/slopmachine/`; ordinary evaluation runs should use the installed runtime copies
|
|
376
386
|
- load `submission-packaging` before any packaging action
|
|
377
387
|
- follow its exact artifact, export, cleanup, and output contract
|
|
378
388
|
- do not invent extra artifact structures during ordinary packaging
|
|
@@ -40,7 +40,7 @@ For a new bounded developer session slot:
|
|
|
40
40
|
Preferred creation pattern:
|
|
41
41
|
|
|
42
42
|
```bash
|
|
43
|
-
node ~/slopmachine/utils/claude_create_session.mjs --cwd "$PWD" --prompt-file <file> --raw-output <file> --raw-error <file>
|
|
43
|
+
node ~/slopmachine/utils/claude_create_session.mjs --cwd "$PWD" --prompt-file <file> --raw-output <file> --raw-error <file> --state-file <file> --result-file <file>
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
When the owner invokes this through the OpenCode Bash tool, use a long-running timeout suitable for real developer work.
|
|
@@ -58,7 +58,7 @@ The default pattern is to let Claude create the session and then persist the ret
|
|
|
58
58
|
For all later turns in the same bounded developer slot:
|
|
59
59
|
|
|
60
60
|
```bash
|
|
61
|
-
node ~/slopmachine/utils/claude_resume_session.mjs --cwd "$PWD" --session-id <session_id> --prompt-file <file> --raw-output <file> --raw-error <file>
|
|
61
|
+
node ~/slopmachine/utils/claude_resume_session.mjs --cwd "$PWD" --session-id <session_id> --prompt-file <file> --raw-output <file> --raw-error <file> --state-file <file> --result-file <file>
|
|
62
62
|
```
|
|
63
63
|
|
|
64
64
|
- use `--resume` inside the wrapper implementation, not `-r`
|
|
@@ -68,16 +68,22 @@ node ~/slopmachine/utils/claude_resume_session.mjs --cwd "$PWD" --session-id <se
|
|
|
68
68
|
|
|
69
69
|
## Result capture rule
|
|
70
70
|
|
|
71
|
-
The wrapper scripts should reduce the raw Claude result to a tiny machine-parseable object.
|
|
71
|
+
The wrapper scripts should reduce the raw Claude result to a tiny machine-parseable object and also persist state/result files for monitoring.
|
|
72
72
|
|
|
73
73
|
Use these fields only:
|
|
74
74
|
|
|
75
75
|
- `sid`
|
|
76
76
|
- `res`
|
|
77
77
|
|
|
78
|
+
Monitoring files should include at least:
|
|
79
|
+
|
|
80
|
+
- a live `state-file` showing running/completed/failed state, pid, byte counts, timestamps, and exit code
|
|
81
|
+
- a final `result-file` containing the normalized success or failure object
|
|
82
|
+
|
|
78
83
|
Treat `res` as the worker's answer.
|
|
79
84
|
Do not feed raw Claude JSON into the owner session.
|
|
80
85
|
Do not rely on transcript scraping for normal turn-to-turn orchestration.
|
|
86
|
+
Do not rely on Bash stdout alone when the wrapper state or result files provide a clearer source of truth.
|
|
81
87
|
|
|
82
88
|
## Developer-slot continuity
|
|
83
89
|
|
|
@@ -161,6 +167,15 @@ Recommended additional fields when useful:
|
|
|
161
167
|
- if a replacement session is required, record the handoff clearly in metadata and tracker comments
|
|
162
168
|
- write raw stdout and stderr to trace files for debugging, but do not surface those raw files back into normal owner prompts unless debugging is explicitly needed
|
|
163
169
|
|
|
170
|
+
## Rate-limit handling
|
|
171
|
+
|
|
172
|
+
- if Claude returns a usage-limit or capacity-exhaustion result for the active developer session, do not take over implementation work in the owner session
|
|
173
|
+
- mark the active developer session status as `rate_limited`
|
|
174
|
+
- preserve the same Claude session id as the active tracked developer session
|
|
175
|
+
- update `../.ai/metadata.json` and Beads `SESSION:` or `HANDOFF:` comments to record the rate-limit pause clearly
|
|
176
|
+
- set workflow state to await user resume rather than creating owner-side implementation fallback work
|
|
177
|
+
- when the user later resumes the run, continue from the same Claude developer session if it is resumable
|
|
178
|
+
|
|
164
179
|
## Worker prompt discipline
|
|
165
180
|
|
|
166
181
|
- rely on the installed Claude `developer` agent definition for the worker persona
|
|
@@ -178,6 +178,7 @@ Keep `../metadata.json` focused on project facts and exported project metadata,
|
|
|
178
178
|
- if the current phase already has an active developer session, recover it instead of silently creating a replacement
|
|
179
179
|
- if an evaluator session is marked active, recover it before continuing the current `P7` cycle
|
|
180
180
|
- treat resume as deterministic recovery, not guesswork
|
|
181
|
+
- if the active Claude developer session is marked `rate_limited`, do not replace it with owner-side coding; preserve it, record the pause, and wait for the user to resume later
|
|
181
182
|
|
|
182
183
|
On recovery, inspect at least:
|
|
183
184
|
|
|
@@ -196,6 +197,14 @@ On recovery, inspect at least:
|
|
|
196
197
|
- if these records disagree, repair them before continuing
|
|
197
198
|
- do not silently create a replacement developer session if the intended existing one can still be resumed
|
|
198
199
|
|
|
200
|
+
## Boundary-summary rule
|
|
201
|
+
|
|
202
|
+
- at meaningful accepted boundaries inside a long developer lane, refresh `last_result_summary` with a compact current-state snapshot instead of relying on the full prior conversation history
|
|
203
|
+
- the boundary summary should capture only the current accepted contract, the current major guardrails, the most relevant changed areas, and the real unresolved issues that still matter
|
|
204
|
+
- prefer boundary summaries at least at: accepted planning, scaffold acceptance, development-complete, integrated-verification completion, hardening completion, and bugfix-lane entry
|
|
205
|
+
- when resuming a long-lived developer lane, use the boundary summary plus the relevant accepted plan section before replaying or re-describing broader history
|
|
206
|
+
- keep these summaries short and decision-oriented so they reduce future context drag instead of becoming another source of prompt bloat
|
|
207
|
+
|
|
199
208
|
## Initial structure rule
|
|
200
209
|
|
|
201
210
|
- parent-root `../docs/` is the owner-maintained external documentation directory
|
|
@@ -12,6 +12,7 @@ Use this skill during `P4 Development` before prompting the developer.
|
|
|
12
12
|
- work in bounded vertical slices
|
|
13
13
|
- complete the real user-facing and admin-facing surface for the slice
|
|
14
14
|
- keep slice-local planning, implementation, verification, and doc sync together
|
|
15
|
+
- after planning is accepted, use the relevant accepted plan section as the slice baseline instead of expecting the owner to restate the full slice contract
|
|
15
16
|
|
|
16
17
|
## Module implementation guidance
|
|
17
18
|
|
|
@@ -19,6 +20,7 @@ Use this skill during `P4 Development` before prompting the developer.
|
|
|
19
20
|
- define the module purpose, constraints, and edge cases before coding
|
|
20
21
|
- define module responsibilities, required flows, inputs and outputs, important failure behavior, permissions or boundaries when relevant, and the tests expected at completion before deeper implementation begins
|
|
21
22
|
- keep the original requirement and clarified interpretation visible while implementing so the module does not silently drift
|
|
23
|
+
- when working inside a slice, explicitly consider what adjacent flows, runtime paths, and documentation/spec claims this slice could affect before reporting readiness
|
|
22
24
|
- implement real behavior, not partial scattered logic
|
|
23
25
|
- handle failure paths and boundary conditions
|
|
24
26
|
- add or update tests as part of the module work
|
|
@@ -28,6 +30,7 @@ Use this skill during `P4 Development` before prompting the developer.
|
|
|
28
30
|
- keep auth, authorization, ownership, validation, and logging concerns in view when relevant
|
|
29
31
|
- keep frontend and backend contracts synchronized when the module spans both sides
|
|
30
32
|
- verify the module integrates cleanly with existing modules, routes, permissions, shared state, and cross-cutting helpers rather than only proving the new feature path in isolation
|
|
33
|
+
- before closing the slice, do a narrow adjacent-flow sweep: what existing flows, commands, or docs should still be true after this slice lands?
|
|
31
34
|
- check cross-cutting consistency where relevant, especially permissions, error handling, audit/logging/redaction behavior, and state or context transition behavior
|
|
32
35
|
- verify tenant or ownership isolation where relevant so access is scoped to the authorized context rather than merely functionally working for one actor
|
|
33
36
|
- verify route-level, object-level, and function-level authorization where those boundaries exist instead of treating “logged in” as sufficient proof
|
|
@@ -70,6 +73,7 @@ Use this skill during `P4 Development` before prompting the developer.
|
|
|
70
73
|
- use the shared validation and normalized error-handling path rather than per-component or per-route improvisation where a common contract exists
|
|
71
74
|
- keep the test surface moving toward at least 90 percent meaningful coverage of the relevant behavior area as slices are completed
|
|
72
75
|
- in each slice reply, report the exact verification commands that were run and the concrete results they produced so the owner can review the evidence without blindly rerunning the same commands
|
|
76
|
+
- keep ordinary slice-complete replies short by default: short summary, exact changed files, exact verification commands plus results, and only real unresolved issues unless the owner explicitly asks for a deeper mapping
|
|
73
77
|
|
|
74
78
|
## Quality rules
|
|
75
79
|
|
|
@@ -33,15 +33,17 @@ Use this skill during `P7 Evaluation and Fix Verification` after an initial audi
|
|
|
33
33
|
|
|
34
34
|
- treat the audit as the start of a counted cycle
|
|
35
35
|
- use its exact issue list as the scope of the cycle
|
|
36
|
-
- send that exact issue list to the developer in explicit detail
|
|
36
|
+
- send that exact issue list to the developer in explicit but compact detail
|
|
37
37
|
|
|
38
38
|
## Issue handoff standard
|
|
39
39
|
|
|
40
|
-
- send the developer the exact issues from the current cycle's initial audit in explicit detail
|
|
40
|
+
- send the developer the exact issues from the current cycle's initial audit in explicit but trimmed detail
|
|
41
41
|
- do not tell the developer to read the audit report directly
|
|
42
42
|
- require the developer to address the full scoped issue list or its explicitly unresolved subset on later loop passes
|
|
43
43
|
- require the developer to report the exact verification commands that were run and the concrete results they produced
|
|
44
44
|
- if the developer claims an issue is invalid or already fixed, require a concrete justification against the audit report instead of silently omitting it
|
|
45
|
+
- keep the handoff complete, but avoid replaying large narrative chunks from the audit when a tighter issue bundle is enough
|
|
46
|
+
- prefer a compact issue-bundle format such as: issue id or short label, exact finding, narrow evidence reference, required fix, and exact verification target
|
|
45
47
|
|
|
46
48
|
## Scoped fix-check standard
|
|
47
49
|
|
|
@@ -18,10 +18,14 @@ Use this skill only during `P7 Evaluation and Fix Verification`.
|
|
|
18
18
|
|
|
19
19
|
The canonical evaluation prompt files are:
|
|
20
20
|
|
|
21
|
+
- packaged source copies:
|
|
22
|
+
- `assets/slopmachine/backend-evaluation-prompt.md`
|
|
23
|
+
- `assets/slopmachine/frontend-evaluation-prompt.md`
|
|
24
|
+
- installed runtime copies used during ordinary evaluation runs:
|
|
21
25
|
- `~/slopmachine/backend-evaluation-prompt.md`
|
|
22
26
|
- `~/slopmachine/frontend-evaluation-prompt.md`
|
|
23
27
|
|
|
24
|
-
|
|
28
|
+
The installed runtime copies under `~/slopmachine/` are the ordinary evaluation prompt sources at runtime.
|
|
25
29
|
|
|
26
30
|
## Evaluation selection rule
|
|
27
31
|
|
|
@@ -58,6 +58,8 @@ Hardening should treat these as the main review buckets before final evaluation
|
|
|
58
58
|
- re-check prompt-critical operational obligations such as scheduled jobs, retention, backups, worker behavior, privacy/accountability logging, and admin controls
|
|
59
59
|
- enter release-candidate mode: stop feature work and focus only on fixes, verification, docs, and packaging preparation
|
|
60
60
|
- make sure the system is genuinely reviewable and reproducible
|
|
61
|
+
- keep hardening narrow: do not turn this phase into a hidden extra development slice or a broad rediscovery pass
|
|
62
|
+
- prefer final honesty, consistency, static-review, and release-readiness cleanup over new implementation work
|
|
61
63
|
|
|
62
64
|
## Required hardening output
|
|
63
65
|
|
|
@@ -11,6 +11,11 @@ Use this skill only during `P5 Integrated Verification`.
|
|
|
11
11
|
|
|
12
12
|
Treat the first broad integrated run as a discovery pass.
|
|
13
13
|
|
|
14
|
+
Integrated verification is expected to find some cross-slice issues.
|
|
15
|
+
|
|
16
|
+
The optimization goal is not to pretend those issues should never exist.
|
|
17
|
+
The optimization goal is to reduce avoidable hard failures and reduce how much debt survives into this phase.
|
|
18
|
+
|
|
14
19
|
Once a failure class is known:
|
|
15
20
|
|
|
16
21
|
- classify it
|
|
@@ -29,6 +34,7 @@ Once a failure class is known:
|
|
|
29
34
|
- verify requirement closure, not just feature existence
|
|
30
35
|
- verify behavior against the current plan, the actual requirements, and any settled project decisions that affect the change
|
|
31
36
|
- verify end-to-end flow behavior where the change affects real workflows
|
|
37
|
+
- verify that tests are real and effective checks of actual code logic rather than bypass-style or fake-confidence test paths
|
|
32
38
|
- for web fullstack work, run Playwright coverage for major flows and review screenshots for real UI behavior and regressions
|
|
33
39
|
- for mobile and desktop work, run the selected stack's platform-appropriate UI/E2E coverage for major flows and review screenshots or equivalent artifacts for real UI behavior and regressions
|
|
34
40
|
- for Electron or other Linux-targetable desktop work, use the Dockerized desktop build/test path plus headless UI/runtime verification through Xvfb or an equivalent Linux-capable harness
|
|
@@ -43,6 +49,8 @@ Once a failure class is known:
|
|
|
43
49
|
- verify secrets are not committed, hardcoded, or leaking through logs/config/docs
|
|
44
50
|
- verify error surfaces and auth-related failures are sanitized for users and operators appropriately
|
|
45
51
|
- trace the changed tests and verification back to the prompt-critical risks, not just the easiest happy paths
|
|
52
|
+
- when integrated verification repeatedly finds the same avoidable failure class, treat that as evidence that earlier slice execution or slice-close acceptance must become more system-aware in future runs
|
|
53
|
+
- before closing the phase, verify the delivered startup path is genuinely runnable, the documented tests really execute, frontend behavior is usable when applicable, UI quality is acceptable, core running logic is complete, and Docker startup works when Docker is the runtime contract
|
|
46
54
|
- tighten parent-root `../docs/test-coverage.md` during or immediately after integrated verification so major requirement and risk points, mapped tests, coverage status, and remaining gaps match the actual verification evidence
|
|
47
55
|
- when security-bearing behavior changes, tighten parent-root `../docs/design.md` and `../docs/api-spec.md` as needed so enforcement points and mapped tests stay accurate
|
|
48
56
|
- when frontend-bearing behavior changes, tighten `README.md` plus parent-root `../docs/design.md` as needed so key pages, interactions, and required UI states stay accurate
|
|
@@ -36,6 +36,7 @@ If the owner notices a concrete role, contract, or scope mismatch, planning does
|
|
|
36
36
|
## Core planning gate
|
|
37
37
|
|
|
38
38
|
- the developer should produce the first in-depth technical plan
|
|
39
|
+
- once accepted, the plan should be detailed and section-addressable enough that later owner prompts can stay short and point the developer back to the relevant accepted section instead of re-dumping the implementation contract
|
|
39
40
|
- do not create deep execution sub-items before the technical plan is accepted
|
|
40
41
|
- do not accept planning that reduces, weakens, narrows, or silently reinterprets the original prompt
|
|
41
42
|
- do not accept convenience-based narrowing, including unauthorized `v1` simplifications, deferred workflows, reduced actor/role models, weaker enforcement, or omitted operator/admin surfaces
|
|
@@ -60,6 +60,7 @@ Selected-stack defaults:
|
|
|
60
60
|
- require implementation-grade planning, not brainstorming
|
|
61
61
|
- start from the actual project prompt and build the plan from there
|
|
62
62
|
- carry the settled project requirements forward consistently as you plan
|
|
63
|
+
- make the accepted plan durable enough to serve as the primary execution contract for later scaffold and development prompts instead of forcing the owner to restate the same implementation context repeatedly
|
|
63
64
|
- identify the hard non-negotiable requirements early and do not quietly trade them away for implementation convenience
|
|
64
65
|
- explicitly check that the plan still fits the business goal, main flows, and implicit constraints from the prompt
|
|
65
66
|
- when planning technical items that depend on a library, framework, API, or tool, check Context7 documentation first for authoritative usage details
|
|
@@ -71,6 +72,7 @@ Selected-stack defaults:
|
|
|
71
72
|
- keep the spec focused on required behavior rather than turning it into a progress or completion narrative
|
|
72
73
|
- make the plan include system overview, architecture choice and reasoning, major modules or chunks, domain model, data model where relevant, interface contracts, failure paths, state transitions, logging strategy, testing strategy, README implications, and Docker execution assumptions when those dimensions apply
|
|
73
74
|
- keep the primary planning package concentrated in parent-root `../docs/design.md`
|
|
75
|
+
- organize the accepted plan so later slices can reference concrete sections cleanly instead of requiring the owner to rewrite the plan in follow-up prompts
|
|
74
76
|
- put the risk-to-test matrix in parent-root `../docs/test-coverage.md`
|
|
75
77
|
- when prompt-critical API/interface details need a dedicated document, keep them in parent-root `../docs/api-spec.md`
|
|
76
78
|
- when additional prompt-critical boundaries must be statically reviewed, prefer adding narrow sections to `../docs/design.md` instead of creating extra in-repo docs
|
|
@@ -110,6 +112,7 @@ Selected-stack defaults:
|
|
|
110
112
|
- if the project has database dependencies, plan for runtime and test entrypoints to call `./init_db.sh` whenever database preparation is required
|
|
111
113
|
- do not hardcode database connection values or database bootstrap values anywhere in the repo; require the database setup flow to be driven by `./init_db.sh`
|
|
112
114
|
- start `./init_db.sh` during scaffold with the real database setup already known, then keep expanding it as migrations, schema setup, bootstrap data, and other database dependencies become real through implementation
|
|
115
|
+
- when the project has database dependencies, plan to inject database setup through initialization scripts rather than packaging local database dependency artifacts or environment-specific database state
|
|
113
116
|
- define the project-standard runtime contract and the universal broad test entrypoint `./run_tests.sh` early, and keep both compatible with the selected stack
|
|
114
117
|
- for web projects, default to a Docker-first runtime contract unless the prompt or existing repository clearly dictates another model
|
|
115
118
|
- for web projects, default the primary runtime command to `docker compose up --build`
|
|
@@ -150,6 +153,9 @@ Selected-stack defaults:
|
|
|
150
153
|
- define end-to-end coverage for major user flows before coding
|
|
151
154
|
- define enough test coverage up front to catch major issues later, especially core happy path, important failure paths, security-critical paths, and obvious high-risk boundaries
|
|
152
155
|
- enforce a plan to reach at least 90 percent meaningful coverage of the relevant behavior surface, not decorative line coverage
|
|
156
|
+
- require API tests to exercise real API endpoints and real call flows rather than bypassing the endpoint layer with internal helper-only checks
|
|
157
|
+
- when API tests are material, plan for them to print simple useful response evidence such as status codes and message/body summaries so verification output is easy to inspect
|
|
158
|
+
- plan endpoint coverage so prompt-required functions and dependent multi-step API flows are actually exercised, not just isolated happy-path fragments
|
|
153
159
|
- plan `../docs/test-coverage.md` in evaluator-facing shape rather than loose prose: requirement or risk point, mapped test file(s), key assertion(s) or fixtures, coverage status, major gap, and minimum test addition
|
|
154
160
|
- do not satisfy `../docs/test-coverage.md` with generic test categories alone; make the matrix concrete enough that the owner can review prompt-critical risks without reconstructing the test story manually
|
|
155
161
|
- when multiple prompt-critical domains exist, group the matrix by domain or risk cluster so each section names the requirement, planned test location, key assertions, current status, and remaining gap explicitly
|
|
@@ -15,6 +15,7 @@ Use this skill during `P3 Scaffold` before prompting the developer.
|
|
|
15
15
|
- make prompt-critical baseline behavior real where required
|
|
16
16
|
- keep repo-local `README.md` honest from the start
|
|
17
17
|
- make the selected-stack primary runtime command and the universal `./run_tests.sh` broad test command real from the scaffold stage
|
|
18
|
+
- make the first scaffold pass strong enough that owner scaffold acceptance can rely on a narrow checklist rather than rereading the whole scaffold broadly
|
|
18
19
|
|
|
19
20
|
For web projects using the default runtime model, scaffold must make these commands real and working before scaffold can pass:
|
|
20
21
|
|
|
@@ -60,6 +61,7 @@ For web projects using the default runtime model, scaffold must make these comma
|
|
|
60
61
|
- put baseline config, logging, validation, and error-normalization structure in place
|
|
61
62
|
- install and configure the local test tooling needed for ordinary iteration during scaffold rather than deferring local testing setup to later phases
|
|
62
63
|
- create baseline test structure intentionally during scaffold so the project can grow toward at least 90 percent meaningful coverage instead of retrofitting tests late
|
|
64
|
+
- when API tests are material, scaffold them so they hit real endpoints and print simple useful response evidence such as status codes and message/body summaries instead of hiding the real API behavior behind helper-only checks
|
|
63
65
|
- for frontend-bearing web projects, install the local browser E2E tooling plus the component/page-or-route frontend test layer during scaffold when the project will need them
|
|
64
66
|
- for mobile projects, install the local mobile testing layer during scaffold, defaulting to Jest plus React Native Testing Library for Expo/React Native work
|
|
65
67
|
- for desktop projects, install the local desktop testing layer during scaffold, defaulting to the selected project test runner and Playwright Electron support or an equivalent desktop UI/E2E tool when UI verification is required
|
|
@@ -71,6 +73,7 @@ For web projects using the default runtime model, scaffold must make these comma
|
|
|
71
73
|
- if the project has database dependencies, wire the runtime and test entrypoints to call `./init_db.sh` whenever database preparation is required
|
|
72
74
|
- if the project has database dependencies, treat `./init_db.sh` as a living project artifact that must be expanded as migrations, schema setup, bootstrap data, and other database dependencies become real through implementation
|
|
73
75
|
- do not hardcode database connection values or database bootstrap values in the repo; drive database setup through `./init_db.sh`
|
|
76
|
+
- when the project has database dependencies, do not package local database dependency files or local database state as part of delivery; the delivery should rely on the initialization-script path instead
|
|
74
77
|
- treat prompt-critical security controls as real baseline runtime behavior, not placeholder checks or visual wiring
|
|
75
78
|
- if a requirement implies enforcement, persistence, statefulness, or rejection behavior, make that behavior real in the scaffold unless the prompt clearly scopes it down
|
|
76
79
|
- do not accept shape-only security implementations such as header presence checks, passive constants, or partially wired middleware when the requirement implies real protection
|
|
@@ -91,6 +94,7 @@ For web projects using the default runtime model, scaffold must make these comma
|
|
|
91
94
|
- establish README structure early instead of leaving it until the end
|
|
92
95
|
- ensure `README.md` clearly documents the primary runtime command and the broad `./run_tests.sh` contract for the selected stack
|
|
93
96
|
- ensure `README.md` focuses on what the project does, how to run it, how to test it, the main repo contents, and any important new-developer information rather than trying to replace the full API catalog
|
|
97
|
+
- ensure `README.md` also explains the delivered architecture and major implementation structure clearly enough for code review and handoff
|
|
94
98
|
- ensure `README.md` stands on its own and does not tell users or reviewers to rely on parent-root docs for core repo understanding
|
|
95
99
|
- for Dockerized web projects, ensure `README.md` explains that local runtime values are bootstrapped automatically by the development startup path and that this is local-development behavior rather than production secret management
|
|
96
100
|
- maintain the seeded parent-root `../docs/design.md` as the owner-maintained planning/design contract from the start
|
|
@@ -103,6 +107,7 @@ For web projects using the default runtime model, scaffold must make these comma
|
|
|
103
107
|
- establish a shared validation path during scaffold so forms, requests, boundary checks, and normalized error behavior do not get invented ad hoc later
|
|
104
108
|
- prove the scaffold in a clean state before deeper feature work
|
|
105
109
|
- verify clean startup and teardown behavior under the selected stack's runtime contract
|
|
110
|
+
- make the scaffold handoff compact and checklist-driven: the developer should be able to state runtime proof, test proof, docs honesty, and required repo-surface proof without a long narrative dump
|
|
106
111
|
- for Dockerized web projects, verify clean startup and teardown behavior under the chosen project namespace
|
|
107
112
|
- when the architecture materially depends on infrastructure capabilities such as rate limiting, encryption, offline support, or browser-storage policy, put the baseline framework and policy in place during scaffold rather than deferring it to late implementation
|
|
108
113
|
- for backend integration paths, prefer production-equivalent test infrastructure when practical rather than silently substituting a weaker database or runtime model that can hide real defects
|
|
@@ -125,6 +130,17 @@ For web projects using the default runtime model, scaffold must make these comma
|
|
|
125
130
|
|
|
126
131
|
Scaffold should make later slices easier, not force them to retrofit missing fundamentals.
|
|
127
132
|
|
|
133
|
+
Before scaffold is handed back for owner acceptance, the developer should already have a compact answer for these scaffold checklist items:
|
|
134
|
+
|
|
135
|
+
- runtime bootstrap works
|
|
136
|
+
- database/bootstrap path works when relevant
|
|
137
|
+
- `./run_tests.sh` works at the broad-scaffold level
|
|
138
|
+
- frontend/backend wiring shape is real
|
|
139
|
+
- config/env/bootstrap path is honest
|
|
140
|
+
- `README.md` and scaffold docs are honest about what is and is not implemented
|
|
141
|
+
- required scaffold files and directories exist
|
|
142
|
+
- prohibited shortcuts or residue are not present
|
|
143
|
+
|
|
128
144
|
## Verification cadence
|
|
129
145
|
|
|
130
146
|
- use local and narrow checks while correcting scaffold work
|
|
@@ -16,6 +16,8 @@ Use this skill only during `P9 Submission Packaging`.
|
|
|
16
16
|
- do not create `submission/` or other packaging-only directories for ordinary final delivery
|
|
17
17
|
- packaging is incomplete until every required final artifact path has been verified to exist
|
|
18
18
|
- do not stop packaging for approval, status confirmation, or handoff once this phase has begun; continue until the package is complete
|
|
19
|
+
- when a task or platform question id exists such as `TASK-123`, use that exact id as the final deliverable/archive name without adding an extra `ID-` prefix
|
|
20
|
+
- normalize project-type metadata and packaging labels to the expected engineering categories such as `full_stack` or `fullstack`, `pure_backend`, `pure_frontend`, `cross_platform_app`, or `mobile_app`
|
|
19
21
|
|
|
20
22
|
## Required final structure
|
|
21
23
|
|
|
@@ -54,38 +56,42 @@ No screenshots are required as packaging artifacts.
|
|
|
54
56
|
- verify parent-root `../docs/test-coverage.md` exists and reflects the final delivered verification coverage
|
|
55
57
|
- verify parent-root `../docs/questions.md` exists from the accepted clarification/question record
|
|
56
58
|
- ensure `README.md` matches the delivered codebase, functionality, runtime steps, test steps, main repo contents, and important new-developer information, and stays friendly to a junior developer
|
|
59
|
+
- ensure `README.md` also describes the delivered architecture at an implementation-review level rather than only listing commands
|
|
57
60
|
- ensure `README.md` remains the primary in-repo documentation surface
|
|
58
61
|
- verify no repo-local file depends on parent-root docs or sibling workflow artifacts for startup, build/preview, configuration, static review, or basic project understanding
|
|
59
62
|
- if the project uses mock, stub, fake, interception, or local-data behavior, ensure `README.md` discloses that scope accurately and does not imply undisclosed real integration
|
|
60
63
|
- if mock or interception behavior is enabled by default, ensure `README.md` says so clearly
|
|
61
64
|
- include `./run_tests.sh` and any supporting runner logic it needs to execute the project's broad test path from a clean environment
|
|
62
65
|
- when the project has database dependencies, include `./init_db.sh` and ensure it reflects the final delivered database setup rather than an earlier scaffold placeholder
|
|
66
|
+
- when the project has database dependencies, package the initialization-script path rather than raw environment-specific database dependency artifacts or local database state
|
|
63
67
|
- verify parent-root `../self_test_reports/` exists and contains the required counted cycle directories
|
|
64
68
|
- export all tracked developer sessions before closing packaging
|
|
65
69
|
- when packaging succeeds, update workflow metadata to mark `packaging_completed` as true
|
|
66
70
|
|
|
67
71
|
## Session export sequence
|
|
68
72
|
|
|
69
|
-
Export every tracked developer session from metadata
|
|
73
|
+
Export every tracked developer session from metadata, keep a numbered cleaned root export, and convert each session into its lane-aware trajectory file.
|
|
70
74
|
|
|
71
|
-
Use the tracked
|
|
75
|
+
Use the tracked lane labels for converted developer sessions, for example:
|
|
72
76
|
|
|
73
77
|
- `develop-1`
|
|
74
78
|
- `bugfix-1`
|
|
75
79
|
|
|
76
80
|
For each tracked developer session:
|
|
77
81
|
|
|
78
|
-
1. `node ~/slopmachine/utils/export_ai_session.mjs --backend
|
|
79
|
-
2. `
|
|
80
|
-
3. `
|
|
82
|
+
1. if `<backend>` is `claude`, run `node ~/slopmachine/utils/export_ai_session.mjs --backend claude --cwd "$PWD" --session-id <session-id> --output ../session-<N>.json`
|
|
83
|
+
2. if `<backend>` is not `claude`, run `opencode export <session-id> > ../session-export-<label>.raw`
|
|
84
|
+
3. if `<backend>` is not `claude`, run `python3 ~/slopmachine/utils/strip_session_parent.py ../session-export-<label>.raw --output ../session-<N>.json`
|
|
85
|
+
4. `node ~/slopmachine/utils/convert_exported_ai_session.mjs --converter-script ~/slopmachine/utils/convert_ai_session.py --input ../session-<N>.json --output ../sessions/<label>.json`
|
|
81
86
|
|
|
82
87
|
Where `<backend>` comes from the tracked developer session record in metadata.
|
|
83
88
|
Use `opencode` when no explicit backend field exists.
|
|
89
|
+
Use the tracked developer-session order to assign `<N>`.
|
|
84
90
|
|
|
85
91
|
After those steps:
|
|
86
92
|
|
|
87
93
|
- verify every tracked developer session has been exported and converted into `../sessions/` before continuing
|
|
88
|
-
- keep `../session-<
|
|
94
|
+
- keep `../session-<N>.json` in the parent root as the cleaned or direct exported session artifact
|
|
89
95
|
- treat only the raw `../session-export-<label>.raw` files as temporary packaging intermediates
|
|
90
96
|
- remove the raw `../session-export-<label>.raw` files before closing packaging
|
|
91
97
|
- if the required utilities, metadata session ids, or output files are missing, packaging is not ready to continue
|
|
@@ -95,23 +101,30 @@ After those steps:
|
|
|
95
101
|
- run `python3 ~/slopmachine/utils/cleanup_delivery_artifacts.py .` once near the end of packaging to remove known recursive cleanup targets from the delivered repo tree
|
|
96
102
|
- remove runtime, editor, cache, tooling noise, generated artifacts, and environment junk recursively anywhere in the delivered repo tree
|
|
97
103
|
- do not remove required delivery artifacts just because they look noisy
|
|
98
|
-
- remove `.opencode/`, `.codex/`, `.vscode/`, env-file variants, caches, `node_modules/`, build outputs not part of delivery, raw test artifact directories, `__pycache__/`, `.pytest_cache/`, repo-local `AGENTS.md`, and accidental in-repo docs directories or extra documentation files beyond `README.md`
|
|
104
|
+
- remove `.opencode/`, `.codex/`, `.vscode/`, env-file variants, caches, `node_modules/`, `.venv/`, `.net/`, build outputs not part of delivery, raw test artifact directories, `__pycache__/`, `.pytest_cache/`, repo-local `AGENTS.md`, and accidental in-repo docs directories or extra documentation files beyond `README.md`
|
|
105
|
+
- remove environment-dependent content, local dependency trees, editor state, package-manager caches, and runtime caches anywhere in the delivery tree
|
|
106
|
+
- do not package database dependency files or local database state when the delivered database setup is supposed to be injected through initialization scripts
|
|
107
|
+
- do not package AI session conversion scripts or similar workflow utility scripts inside the delivered product attachment
|
|
99
108
|
- remove repo-local `.tmp/` or parent-root `../.tmp/` if they exist; they are not part of the final delivery contract
|
|
100
109
|
- the cleanup is recursive; do not leave forbidden directories or generated junk buried deeper in the repo hierarchy after cleanup
|
|
101
110
|
|
|
102
111
|
## Validation checklist
|
|
103
112
|
|
|
104
113
|
- confirm the final package contains only the required delivery structure and necessary repo contents
|
|
114
|
+
- confirm the final archive/deliverable naming uses the task/question id directly when one exists and does not invent an extra `ID-` prefix
|
|
105
115
|
- confirm docs describe delivered behavior, not planned or aspirational behavior
|
|
106
116
|
- confirm the delivered repo is statically reviewable enough that startup, test commands, core entry points, and any mock/local-data boundaries can be traced from repo artifacts alone
|
|
107
117
|
- confirm `README.md` covers build/preview/runtime guidance, test commands, main repo contents, feature flags, debug/demo surfaces, mock defaults, and important new-developer information when those dimensions are material
|
|
118
|
+
- confirm `README.md` also explains the delivered architecture and major implementation structure clearly enough for review
|
|
108
119
|
- when the project has database dependencies, confirm `./init_db.sh` exists in the delivered repo and matches the final schema/bootstrap requirements
|
|
120
|
+
- when the project has database dependencies, confirm database setup is injected through initialization scripts rather than packaged local database dependency artifacts
|
|
109
121
|
- confirm the cleanup helper has been run and that no known recursive cleanup targets remain in the delivered repo tree
|
|
122
|
+
- confirm no environment-dependent dependency directories, editor-state folders, runtime caches, or workflow utility scripts are packaged into the delivered product
|
|
110
123
|
- confirm parent-root `../self_test_reports/` exists and contains the required counted cycle directories
|
|
111
124
|
- confirm each counted cycle directory contains the initial audit report plus any fix-check reports generated for that cycle
|
|
112
125
|
- confirm parent-root `../docs/test-coverage.md` explains the tested flows, mapped tests, and coverage boundaries
|
|
113
126
|
- confirm exported developer sessions exist under parent-root `../sessions/` using the tracked `<label>.json` names
|
|
114
|
-
- confirm cleaned session exports exist in the parent root as `../session-<
|
|
127
|
+
- confirm cleaned session exports exist in the parent root as numbered `../session-<N>.json` files
|
|
115
128
|
- confirm parent-root `../docs/` remains consistent as an external reference set when workflow policy still requires it, but the delivered repo does not depend on it
|
|
116
129
|
- confirm parent-root metadata fields are populated correctly
|
|
117
130
|
- confirm workflow metadata marks `packaging_completed` as true
|
|
@@ -121,6 +134,7 @@ After those steps:
|
|
|
121
134
|
|
|
122
135
|
- do one final package review before declaring packaging complete
|
|
123
136
|
- confirm the package is coherent as a delivered project, not just a working repo snapshot
|
|
137
|
+
- confirm the delivered project is actually runnable in the promised startup model, the documented tests are runnable, frontend behavior is usable when applicable, UI quality is acceptable, core logic is complete, and Docker startup works when Docker is the runtime contract
|
|
124
138
|
- confirm the final git checkpoint can be created cleanly for the packaged state when a checkpoint is needed
|
|
125
139
|
- if packaging reveals a real defect or missing artifact, fix it before closing the phase
|
|
126
140
|
- do not close packaging until all required docs, session exports, self-test files, cleanup conditions, and final structure checks are satisfied
|
|
@@ -100,14 +100,20 @@ Use this skill after development begins whenever you are reviewing work, decidin
|
|
|
100
100
|
|
|
101
101
|
- inspect the result and evidence, not just the developer claim
|
|
102
102
|
- review technical quality, prompt alignment, architecture impact, and verification depth of the current work
|
|
103
|
+
- after planning is accepted, treat the accepted plan and its relevant section as the default slice baseline instead of restating the full slice contract in every owner prompt
|
|
104
|
+
- for ordinary slice work after planning, keep the owner prompt to one short paragraph plus a small checklist of slice-specific guardrails, review concerns, or deltas that are not already clear from the accepted plan
|
|
103
105
|
- during normal implementation iteration, always prefer fast local language-native or framework-native verification for the changed area instead of the selected stack's broad gate path
|
|
104
106
|
- require the developer to set up and use the project-appropriate local test environment in the current working directory when normal local verification is needed
|
|
105
107
|
- require the developer to report the exact verification commands that were run and the concrete results they produced
|
|
108
|
+
- when API tests are used as evidence, require them to hit real endpoints and expose simple useful response evidence such as status codes and message/body summaries
|
|
106
109
|
- require local runtime proof when relevant by starting the app or service through the selected stack's local run path and exercising the changed behavior directly rather than jumping to the broad gate path
|
|
107
110
|
- if the local toolchain is missing, require the developer to install or enable it first; do not jump to Docker, `./run_tests.sh`, Playwright, or another broad gate path during ordinary iteration just because local setup is inconvenient
|
|
108
111
|
- do not accept hand-wavy claims that local verification is unavailable without a real setup attempt and clear explanation
|
|
109
112
|
- do not ask the developer to run browser E2E, Playwright, full test suites, `./run_tests.sh`, or Docker runtime commands during ordinary implementation slices
|
|
110
113
|
- if the developer already ran the relevant targeted local test command and reported it clearly, do not rerun the same command on the owner side unless the evidence is weak, contradictory, flaky, high-risk, or needed to answer a new question
|
|
114
|
+
- for ordinary slice acceptance, default review scope to the changed files and the narrow supporting files named by the developer; expand only when a concrete inconsistency, missing dependency, or suspicious claim forces wider review
|
|
115
|
+
- for ordinary slice acceptance, prefer a narrow acceptance checklist over broad exploratory rereads
|
|
116
|
+
- require compact ordinary slice-close replies by default: short summary, exact changed files, exact verification commands plus results, and only real unresolved issues unless a deeper mapping is explicitly needed
|
|
111
117
|
- if verification is weak, missing, or failing, require fixes and reruns before acceptance
|
|
112
118
|
- if documentation or repo hygiene drifts, secrets leak, contracts drift, or frontend integrity is compromised, require cleanup before acceptance
|
|
113
119
|
- keep looping until the current work is genuinely acceptable
|
|
@@ -117,6 +123,7 @@ Use this skill after development begins whenever you are reviewing work, decidin
|
|
|
117
123
|
- a broad gate is an owner-run integrated verification boundary, not every ordinary phase change
|
|
118
124
|
- a phase change alone does not automatically require a broad gate unless that phase exit explicitly calls for one
|
|
119
125
|
- a broad gate normally means some combination of full clean runtime proof, `./run_tests.sh`, and platform-appropriate UI/E2E evidence when UI-bearing flows exist
|
|
126
|
+
- the evaluator-session cycles required inside `P7` are not part of the ordinary owner-run broad-gate budget; they are the formal final evaluation model for that phase
|
|
120
127
|
- for Electron or other Linux-targetable desktop projects, the broad gate should use the Dockerized desktop build/test path plus headless UI/runtime verification rather than pretending web-style Docker runtime semantics apply
|
|
121
128
|
- for Android projects, the broad gate should use the Dockerized Android build/test path without depending on an emulator
|
|
122
129
|
- for iOS-targeted projects on Linux, the broad gate should rely on `./run_tests.sh` plus static/code review evidence and should not claim native iOS runtime proof unless a real macOS/Xcode checkpoint exists
|
|
@@ -152,9 +159,11 @@ Use evidence such as internal metadata files, structured Beads comments, verific
|
|
|
152
159
|
- for web projects using the default Docker-first runtime model, scaffold acceptance is not complete until the owner has actually run `docker compose up --build` and `./run_tests.sh` once successfully after scaffold completion
|
|
153
160
|
- module implementation requires targeted local verification only; browser E2E and other broad gate evidence belong to owner-run major checkpoints rather than ordinary slice acceptance
|
|
154
161
|
- module implementation acceptance should challenge tenant isolation, path confinement, sanitized error behavior, prototype residue, integration seams, and cross-cutting consistency when those concerns are in scope
|
|
162
|
+
- module implementation acceptance should use a narrow slice-close checklist: required behavior present, adjacent high-risk seams checked, docs or contract honesty preserved, exact verification evidence supplied, and no known release-facing regression left behind
|
|
155
163
|
- integrated verification entry requires one of the limited owner-run broad gate moments once development is complete; this is the normal next place where `docker compose up --build` and `./run_tests.sh` are expected after scaffold acceptance
|
|
156
164
|
- module implementation acceptance should also challenge whether the slice is advancing toward the planned module contract and the planned 90 percent meaningful coverage target instead of accumulating test debt
|
|
157
165
|
- integrated verification completion requires explicit full-system evidence before the phase can close
|
|
166
|
+
- integrated verification completion also requires explicit evidence that the delivered startup path is runnable, the documented tests are real and runnable, frontend behavior is usable when applicable, UI quality is acceptable, core logic is complete, and Docker startup works when Docker is the runtime contract
|
|
158
167
|
- web fullstack integrated verification must include owner-run Playwright coverage for every major flow, plus screenshots used to evaluate frontend behavior and UI quality along the flow using `frontend-design`
|
|
159
168
|
- mobile and desktop integrated verification must include the selected stack's platform-appropriate UI/E2E coverage for every major user flow when UI-bearing flows are material
|
|
160
169
|
- for Electron or other Linux-targetable desktop projects, integrated verification should use the Dockerized desktop build/test path plus headless UI/runtime verification artifacts
|
|
@@ -25,6 +25,7 @@ This file is the repo-local engineering rulebook for `slopmachine` projects.
|
|
|
25
25
|
- Do not rerun broad runtime/test commands on every small change.
|
|
26
26
|
- During ordinary development slices, do not run Docker runtime commands, browser E2E, Playwright, full test suites, or `./run_tests.sh`.
|
|
27
27
|
- Use targeted local tests during ordinary development slices and leave browser E2E plus broad-gate commands for later comprehensive verification.
|
|
28
|
+
- When API tests are material, make them hit real endpoints and print simple useful response evidence such as status codes and message/body summaries.
|
|
28
29
|
- For web projects, default the runtime contract to `docker compose up --build` unless the prompt or existing repository clearly dictates another model.
|
|
29
30
|
- When `docker compose up --build` is not the runtime contract, provide `./run_app.sh` as the single primary runtime wrapper.
|
|
30
31
|
- If the project has database dependencies, keep `./init_db.sh` as the only project-standard database initialization path.
|
|
@@ -33,6 +34,7 @@ This file is the repo-local engineering rulebook for `slopmachine` projects.
|
|
|
33
34
|
|
|
34
35
|
- Keep `README.md` accurate.
|
|
35
36
|
- The README must explain what the project is, what it does, how to run it, how to test it, the main repo contents, and any important information a new developer needs immediately.
|
|
37
|
+
- The README must also explain the delivered architecture and major implementation structure clearly enough for review and handoff.
|
|
36
38
|
- The README must clearly document whether the primary runtime command is `docker compose up --build` or `./run_app.sh`.
|
|
37
39
|
- The README must clearly document `./run_tests.sh` as the broad test command.
|
|
38
40
|
- The README must stand on its own for basic codebase use.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
-
import { parseArgs, readPrompt, buildCreateArgs, emitFailure, emitSuccess, compactClaudeResult, runClaudeWithRetry } from './claude_worker_common.mjs'
|
|
3
|
+
import { parseArgs, readPrompt, buildCreateArgs, emitFailure, emitSuccess, compactClaudeResult, runClaudeWithRetry, writeJsonIfNeeded } from './claude_worker_common.mjs'
|
|
4
4
|
|
|
5
5
|
const argv = parseArgs(process.argv.slice(2))
|
|
6
6
|
|
|
@@ -11,18 +11,32 @@ try {
|
|
|
11
11
|
cwd: argv.cwd,
|
|
12
12
|
rawOutputPath: argv['raw-output'],
|
|
13
13
|
rawErrorPath: argv['raw-error'],
|
|
14
|
+
statePath: argv['state-file'],
|
|
14
15
|
args: buildCreateArgs(argv.agent || 'developer', prompt),
|
|
15
16
|
retryOnLimit: argv['retry-on-limit'] !== '0',
|
|
17
|
+
maxAttempts: Number.parseInt(argv['max-attempts'] || '2', 10),
|
|
16
18
|
})
|
|
17
19
|
|
|
18
20
|
if (failure || !parsed || parsed.is_error === true) {
|
|
21
|
+
await writeJsonIfNeeded(argv['result-file'], {
|
|
22
|
+
ok: false,
|
|
23
|
+
code: failure?.code || 'claude_create_failed',
|
|
24
|
+
msg: failure?.msg || 'claude_create_failed',
|
|
25
|
+
sid: failure?.sid || null,
|
|
26
|
+
})
|
|
19
27
|
emitFailure(failure?.code || 'claude_create_failed', failure?.msg || 'claude_create_failed', failure?.sid ? { sid: failure.sid } : {})
|
|
20
28
|
process.exit(1)
|
|
21
29
|
}
|
|
22
30
|
|
|
23
31
|
const compact = compactClaudeResult(parsed)
|
|
32
|
+
await writeJsonIfNeeded(argv['result-file'], { ok: true, sid: compact.sid, res: compact.res })
|
|
24
33
|
emitSuccess(compact.sid, compact.res)
|
|
25
34
|
} catch (error) {
|
|
35
|
+
await writeJsonIfNeeded(argv['result-file'], {
|
|
36
|
+
ok: false,
|
|
37
|
+
code: 'claude_create_exception',
|
|
38
|
+
msg: error instanceof Error ? error.message : String(error),
|
|
39
|
+
})
|
|
26
40
|
emitFailure('claude_create_exception', error instanceof Error ? error.message : String(error))
|
|
27
41
|
process.exit(1)
|
|
28
42
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
-
import { parseArgs, readPrompt, buildResumeArgs, emitFailure, emitSuccess, compactClaudeResult, runClaudeWithRetry } from './claude_worker_common.mjs'
|
|
3
|
+
import { parseArgs, readPrompt, buildResumeArgs, emitFailure, emitSuccess, compactClaudeResult, runClaudeWithRetry, writeJsonIfNeeded } from './claude_worker_common.mjs'
|
|
4
4
|
|
|
5
5
|
const argv = parseArgs(process.argv.slice(2))
|
|
6
6
|
|
|
@@ -11,18 +11,32 @@ try {
|
|
|
11
11
|
cwd: argv.cwd,
|
|
12
12
|
rawOutputPath: argv['raw-output'],
|
|
13
13
|
rawErrorPath: argv['raw-error'],
|
|
14
|
+
statePath: argv['state-file'],
|
|
14
15
|
args: buildResumeArgs(argv.agent || 'developer', argv['session-id'], prompt),
|
|
15
16
|
retryOnLimit: argv['retry-on-limit'] !== '0',
|
|
17
|
+
maxAttempts: Number.parseInt(argv['max-attempts'] || '2', 10),
|
|
16
18
|
})
|
|
17
19
|
|
|
18
20
|
if (failure || !parsed || parsed.is_error === true) {
|
|
21
|
+
await writeJsonIfNeeded(argv['result-file'], {
|
|
22
|
+
ok: false,
|
|
23
|
+
code: failure?.code || 'claude_resume_failed',
|
|
24
|
+
msg: failure?.msg || 'claude_resume_failed',
|
|
25
|
+
sid: failure?.sid || null,
|
|
26
|
+
})
|
|
19
27
|
emitFailure(failure?.code || 'claude_resume_failed', failure?.msg || 'claude_resume_failed', failure?.sid ? { sid: failure.sid } : {})
|
|
20
28
|
process.exit(1)
|
|
21
29
|
}
|
|
22
30
|
|
|
23
31
|
const compact = compactClaudeResult(parsed)
|
|
32
|
+
await writeJsonIfNeeded(argv['result-file'], { ok: true, sid: compact.sid, res: compact.res })
|
|
24
33
|
emitSuccess(compact.sid, compact.res)
|
|
25
34
|
} catch (error) {
|
|
35
|
+
await writeJsonIfNeeded(argv['result-file'], {
|
|
36
|
+
ok: false,
|
|
37
|
+
code: 'claude_resume_exception',
|
|
38
|
+
msg: error instanceof Error ? error.message : String(error),
|
|
39
|
+
})
|
|
26
40
|
emitFailure('claude_resume_exception', error instanceof Error ? error.message : String(error))
|
|
27
41
|
process.exit(1)
|
|
28
42
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
import fs from 'node:fs/promises'
|
|
4
|
+
import { createWriteStream } from 'node:fs'
|
|
4
5
|
import os from 'node:os'
|
|
5
6
|
import path from 'node:path'
|
|
6
7
|
import { spawn } from 'node:child_process'
|
|
@@ -33,6 +34,11 @@ export async function writeFileIfNeeded(filePath, content) {
|
|
|
33
34
|
await fs.writeFile(filePath, content, 'utf8')
|
|
34
35
|
}
|
|
35
36
|
|
|
37
|
+
export async function writeJsonIfNeeded(filePath, value) {
|
|
38
|
+
if (!filePath) return
|
|
39
|
+
await writeFileIfNeeded(filePath, `${JSON.stringify(value, null, 2)}\n`)
|
|
40
|
+
}
|
|
41
|
+
|
|
36
42
|
export async function readPrompt(promptFile) {
|
|
37
43
|
const content = await fs.readFile(promptFile, 'utf8')
|
|
38
44
|
return content.trim()
|
|
@@ -72,10 +78,63 @@ function buildBaseArgs(agentName) {
|
|
|
72
78
|
]
|
|
73
79
|
}
|
|
74
80
|
|
|
75
|
-
|
|
76
|
-
|
|
81
|
+
function buildStateWriter(statePath, baseState = {}) {
|
|
82
|
+
let state = { ...baseState }
|
|
83
|
+
let chain = Promise.resolve()
|
|
84
|
+
|
|
85
|
+
function queueWrite() {
|
|
86
|
+
if (!statePath) {
|
|
87
|
+
return chain
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const snapshot = { ...state, updated_at: new Date().toISOString() }
|
|
91
|
+
chain = chain
|
|
92
|
+
.catch(() => {})
|
|
93
|
+
.then(() => writeJsonIfNeeded(statePath, snapshot))
|
|
94
|
+
return chain
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
update(patch) {
|
|
99
|
+
state = { ...state, ...patch }
|
|
100
|
+
return queueWrite()
|
|
101
|
+
},
|
|
102
|
+
async flush() {
|
|
103
|
+
await queueWrite()
|
|
104
|
+
await chain.catch(() => {})
|
|
105
|
+
},
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function buildInitialState({ args, cwd, attempt }) {
|
|
110
|
+
return {
|
|
111
|
+
status: 'starting',
|
|
112
|
+
started_at: new Date().toISOString(),
|
|
113
|
+
cwd,
|
|
114
|
+
args,
|
|
115
|
+
attempt,
|
|
116
|
+
pid: null,
|
|
117
|
+
exit_code: null,
|
|
118
|
+
stdout_bytes: 0,
|
|
119
|
+
stderr_bytes: 0,
|
|
120
|
+
last_stdout_at: null,
|
|
121
|
+
last_stderr_at: null,
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function isRetryableTransportFailure(message) {
|
|
126
|
+
return /network|econnreset|timed? out|timeout|temporar|socket|transport|unavailable|rate limit/i.test(String(message || ''))
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export async function runClaude({ claudeCommand, args, cwd, rawOutputPath, rawErrorPath, statePath, attempt = 1 }) {
|
|
130
|
+
const stateWriter = buildStateWriter(statePath, buildInitialState({ args, cwd, attempt }))
|
|
131
|
+
await stateWriter.flush()
|
|
132
|
+
|
|
133
|
+
const stdoutWriter = rawOutputPath ? createWriteStream(rawOutputPath, { flags: 'w' }) : null
|
|
134
|
+
const stderrWriter = rawErrorPath ? createWriteStream(rawErrorPath, { flags: 'w' }) : null
|
|
135
|
+
|
|
77
136
|
const result = await new Promise((resolve, reject) => {
|
|
78
|
-
const child = spawn(claudeCommand,
|
|
137
|
+
const child = spawn(claudeCommand, [...args], {
|
|
79
138
|
cwd,
|
|
80
139
|
env: process.env,
|
|
81
140
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
@@ -84,20 +143,42 @@ export async function runClaude({ claudeCommand, args, cwd, rawOutputPath, rawEr
|
|
|
84
143
|
let stdout = ''
|
|
85
144
|
let stderr = ''
|
|
86
145
|
|
|
146
|
+
void stateWriter.update({ status: 'running', pid: child.pid ?? null })
|
|
147
|
+
|
|
87
148
|
child.stdout.on('data', (chunk) => {
|
|
88
|
-
|
|
149
|
+
const text = chunk.toString()
|
|
150
|
+
stdout += text
|
|
151
|
+
stdoutWriter?.write(text)
|
|
152
|
+
void stateWriter.update({
|
|
153
|
+
stdout_bytes: Buffer.byteLength(stdout, 'utf8'),
|
|
154
|
+
last_stdout_at: new Date().toISOString(),
|
|
155
|
+
})
|
|
89
156
|
})
|
|
90
157
|
|
|
91
158
|
child.stderr.on('data', (chunk) => {
|
|
92
|
-
|
|
159
|
+
const text = chunk.toString()
|
|
160
|
+
stderr += text
|
|
161
|
+
stderrWriter?.write(text)
|
|
162
|
+
void stateWriter.update({
|
|
163
|
+
stderr_bytes: Buffer.byteLength(stderr, 'utf8'),
|
|
164
|
+
last_stderr_at: new Date().toISOString(),
|
|
165
|
+
})
|
|
93
166
|
})
|
|
94
167
|
|
|
95
168
|
child.on('error', reject)
|
|
96
|
-
child.on('close', (code) =>
|
|
169
|
+
child.on('close', async (code) => {
|
|
170
|
+
stdoutWriter?.end()
|
|
171
|
+
stderrWriter?.end()
|
|
172
|
+
await stateWriter.update({
|
|
173
|
+
status: (code ?? 1) === 0 ? 'completed' : 'failed',
|
|
174
|
+
finished_at: new Date().toISOString(),
|
|
175
|
+
exit_code: code ?? 1,
|
|
176
|
+
})
|
|
177
|
+
resolve({ code: code ?? 1, stdout, stderr })
|
|
178
|
+
})
|
|
97
179
|
})
|
|
98
180
|
|
|
99
|
-
await
|
|
100
|
-
await writeFileIfNeeded(rawErrorPath, result.stderr)
|
|
181
|
+
await stateWriter.flush()
|
|
101
182
|
return result
|
|
102
183
|
}
|
|
103
184
|
|
|
@@ -148,6 +229,15 @@ export function classifyClaudeFailure(parsed, fallbackMessage = '') {
|
|
|
148
229
|
}
|
|
149
230
|
}
|
|
150
231
|
|
|
232
|
+
if (isRetryableTransportFailure(rawMessage)) {
|
|
233
|
+
return {
|
|
234
|
+
code: 'claude_transport_failed',
|
|
235
|
+
msg: rawMessage || 'claude_transport_failed',
|
|
236
|
+
retryable: true,
|
|
237
|
+
sid: sessionId,
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
151
241
|
return {
|
|
152
242
|
code: 'claude_call_failed',
|
|
153
243
|
msg: rawMessage || 'claude_call_failed',
|
|
@@ -189,37 +279,38 @@ export function msUntilNextQuotaReset(now = new Date(), { hour = DEFAULT_LIMIT_H
|
|
|
189
279
|
return waitMs + 60 * 1000
|
|
190
280
|
}
|
|
191
281
|
|
|
192
|
-
export async function runClaudeWithRetry({ claudeCommand, args, cwd, rawOutputPath, rawErrorPath, retryOnLimit = true }) {
|
|
193
|
-
|
|
194
|
-
let parsed = null
|
|
195
|
-
try {
|
|
196
|
-
parsed = parseClaudeJson(result.stdout)
|
|
197
|
-
} catch {}
|
|
282
|
+
export async function runClaudeWithRetry({ claudeCommand, args, cwd, rawOutputPath, rawErrorPath, statePath, retryOnLimit = true, maxAttempts = 2 }) {
|
|
283
|
+
let attempt = 1
|
|
198
284
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
285
|
+
while (attempt <= maxAttempts) {
|
|
286
|
+
const result = await runClaude({ claudeCommand, args, cwd, rawOutputPath, rawErrorPath, statePath, attempt })
|
|
287
|
+
let parsed = null
|
|
288
|
+
try {
|
|
289
|
+
parsed = parseClaudeJson(result.stdout)
|
|
290
|
+
} catch {}
|
|
203
291
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
}
|
|
292
|
+
if (parsed && parsed.is_error !== true && result.code === 0) {
|
|
293
|
+
return { result, parsed, attempts: attempt }
|
|
294
|
+
}
|
|
208
295
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
296
|
+
const failure = classifyClaudeFailure(parsed, (result.stderr || result.stdout).trim())
|
|
297
|
+
const canRetry = attempt < maxAttempts && (failure.retryable || (!parsed && result.code !== 0))
|
|
298
|
+
if (!canRetry) {
|
|
299
|
+
return { result, parsed, failure, attempts: attempt }
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (failure.code === 'claude_usage_limit' && retryOnLimit) {
|
|
303
|
+
const waitMs = msUntilNextQuotaReset()
|
|
304
|
+
await sleep(waitMs)
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
attempt += 1
|
|
218
308
|
}
|
|
309
|
+
|
|
219
310
|
return {
|
|
220
|
-
result:
|
|
221
|
-
parsed:
|
|
222
|
-
failure:
|
|
223
|
-
|
|
311
|
+
result: { code: 1, stdout: '', stderr: 'claude_retry_exhausted' },
|
|
312
|
+
parsed: null,
|
|
313
|
+
failure: { code: 'claude_retry_exhausted', msg: 'claude_retry_exhausted', retryable: false, sid: null },
|
|
314
|
+
attempts: maxAttempts,
|
|
224
315
|
}
|
|
225
316
|
}
|
package/package.json
CHANGED
package/src/install.js
CHANGED
|
@@ -13,7 +13,12 @@ import {
|
|
|
13
13
|
REQUIRED_SKILL_DIRS,
|
|
14
14
|
REQUIRED_SLOPMACHINE_FILES,
|
|
15
15
|
} from './constants.js'
|
|
16
|
-
import {
|
|
16
|
+
import {
|
|
17
|
+
ensureUploadEndpoint,
|
|
18
|
+
getSlopmachineConfigPath,
|
|
19
|
+
hasStoredUploadToken,
|
|
20
|
+
promptAndStoreUploadToken,
|
|
21
|
+
} from './config.js'
|
|
17
22
|
import {
|
|
18
23
|
backupFile,
|
|
19
24
|
commandExists,
|
package/src/send-data.js
CHANGED
|
@@ -140,8 +140,10 @@ function getTrackedDeveloperSessions(metadata) {
|
|
|
140
140
|
return sessions
|
|
141
141
|
.filter((entry) => entry && typeof entry.session_id === 'string' && entry.session_id.trim())
|
|
142
142
|
.map((entry, index) => ({
|
|
143
|
+
order: index,
|
|
143
144
|
sequence: Number.isFinite(Number(entry.sequence)) ? Number(entry.sequence) : index + 1,
|
|
144
145
|
label: entry.label || `${entry.lane || 'develop'}-${index + 1}`,
|
|
146
|
+
backend: typeof entry.backend === 'string' && entry.backend.trim() ? entry.backend.trim() : 'opencode',
|
|
145
147
|
sessionId: entry.session_id.trim(),
|
|
146
148
|
}))
|
|
147
149
|
}
|
|
@@ -316,8 +318,29 @@ async function generateBeadsExport(workspaceRoot, projectId, runId, stagingDir)
|
|
|
316
318
|
return exportPath
|
|
317
319
|
}
|
|
318
320
|
|
|
321
|
+
async function listFilesRecursive(rootDir, relativePrefix = '') {
|
|
322
|
+
const entries = await fs.readdir(rootDir, { withFileTypes: true })
|
|
323
|
+
const files = []
|
|
324
|
+
|
|
325
|
+
for (const entry of entries) {
|
|
326
|
+
const relativePath = path.join(relativePrefix, entry.name)
|
|
327
|
+
const absolutePath = path.join(rootDir, entry.name)
|
|
328
|
+
|
|
329
|
+
if (entry.isDirectory()) {
|
|
330
|
+
files.push(...await listFilesRecursive(absolutePath, relativePath))
|
|
331
|
+
continue
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (entry.isFile()) {
|
|
335
|
+
files.push(relativePath)
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return files
|
|
340
|
+
}
|
|
341
|
+
|
|
319
342
|
async function buildManifest(stagingDir, projectId, runId, label, workspaceRoot) {
|
|
320
|
-
const fileNames = (await
|
|
343
|
+
const fileNames = (await listFilesRecursive(stagingDir))
|
|
321
344
|
.filter((name) => name !== 'manifest.json')
|
|
322
345
|
.sort((left, right) => left.localeCompare(right))
|
|
323
346
|
|
|
@@ -362,6 +385,75 @@ async function buildManifest(stagingDir, projectId, runId, label, workspaceRoot)
|
|
|
362
385
|
return { manifest, manifestPath }
|
|
363
386
|
}
|
|
364
387
|
|
|
388
|
+
async function exportDeveloperSessionArtifacts(session, workspaceRoot, stagingDir) {
|
|
389
|
+
const utilsDir = path.join(buildPaths().slopmachineDir, 'utils')
|
|
390
|
+
const exportScript = path.join(utilsDir, 'export_ai_session.mjs')
|
|
391
|
+
const stripScript = path.join(utilsDir, 'strip_session_parent.py')
|
|
392
|
+
const convertScript = path.join(utilsDir, 'convert_exported_ai_session.mjs')
|
|
393
|
+
const converterPythonScript = path.join(utilsDir, 'convert_ai_session.py')
|
|
394
|
+
|
|
395
|
+
const exportNumber = session.order + 1
|
|
396
|
+
const rootSessionFile = path.join(stagingDir, `session-${exportNumber}.json`)
|
|
397
|
+
const convertedSessionFile = path.join(stagingDir, 'sessions', `${session.label}.json`)
|
|
398
|
+
const rawSessionFile = path.join(stagingDir, `.session-export-${session.label}.raw`)
|
|
399
|
+
|
|
400
|
+
await ensureDir(path.dirname(convertedSessionFile))
|
|
401
|
+
|
|
402
|
+
if (session.backend === 'claude') {
|
|
403
|
+
const exportResult = await runCommand(process.execPath, [
|
|
404
|
+
exportScript,
|
|
405
|
+
'--backend',
|
|
406
|
+
'claude',
|
|
407
|
+
'--cwd',
|
|
408
|
+
workspaceRoot,
|
|
409
|
+
'--session-id',
|
|
410
|
+
session.sessionId,
|
|
411
|
+
'--output',
|
|
412
|
+
rootSessionFile,
|
|
413
|
+
])
|
|
414
|
+
|
|
415
|
+
if (exportResult.code !== 0) {
|
|
416
|
+
throw new Error(`Failed to export session ${session.label}: ${(exportResult.stderr || exportResult.stdout).trim()}`)
|
|
417
|
+
}
|
|
418
|
+
} else {
|
|
419
|
+
const opencodeCommand = await resolveCommand('opencode')
|
|
420
|
+
if (!opencodeCommand) {
|
|
421
|
+
throw new Error('Unable to find `opencode` for developer session export')
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
await exportOpenCodeSession(opencodeCommand, session.sessionId, rawSessionFile, workspaceRoot)
|
|
425
|
+
|
|
426
|
+
const stripResult = await runCommand('python3', [
|
|
427
|
+
stripScript,
|
|
428
|
+
rawSessionFile,
|
|
429
|
+
'--output',
|
|
430
|
+
rootSessionFile,
|
|
431
|
+
])
|
|
432
|
+
|
|
433
|
+
if (stripResult.code !== 0) {
|
|
434
|
+
throw new Error(`Failed to clean exported session ${session.label}: ${(stripResult.stderr || stripResult.stdout).trim()}`)
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
const convertResult = await runCommand(process.execPath, [
|
|
439
|
+
convertScript,
|
|
440
|
+
'--converter-script',
|
|
441
|
+
converterPythonScript,
|
|
442
|
+
'--input',
|
|
443
|
+
rootSessionFile,
|
|
444
|
+
'--output',
|
|
445
|
+
convertedSessionFile,
|
|
446
|
+
])
|
|
447
|
+
|
|
448
|
+
if (convertResult.code !== 0) {
|
|
449
|
+
throw new Error(`Failed to convert exported session ${session.label}: ${(convertResult.stderr || convertResult.stdout).trim()}`)
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
if (await pathExists(rawSessionFile)) {
|
|
453
|
+
await fs.rm(rawSessionFile, { force: true })
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
365
457
|
async function createZipArchive(stagingDir, outputPath) {
|
|
366
458
|
await ensureDir(path.dirname(outputPath))
|
|
367
459
|
|
|
@@ -458,13 +550,8 @@ async function stageSendDataBundle({ workspaceRoot, repoPath, ownerSessionId, de
|
|
|
458
550
|
|
|
459
551
|
await exportOpenCodeSession(opencodeCommand, ownerSessionId, path.join(stagingDir, 'owner-session.json'), workspaceRoot)
|
|
460
552
|
|
|
461
|
-
for (const session of developerSessions.sort((left, right) => left.
|
|
462
|
-
await
|
|
463
|
-
opencodeCommand,
|
|
464
|
-
session.sessionId,
|
|
465
|
-
path.join(stagingDir, `${session.label}.json`),
|
|
466
|
-
workspaceRoot,
|
|
467
|
-
)
|
|
553
|
+
for (const session of developerSessions.sort((left, right) => left.order - right.order)) {
|
|
554
|
+
await exportDeveloperSessionArtifacts(session, workspaceRoot, stagingDir)
|
|
468
555
|
}
|
|
469
556
|
|
|
470
557
|
await copyOptionalPath(
|