theslopmachine 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -164,16 +164,15 @@ The v2 workflow also expects:
164
164
 
165
165
  Every bootstrapped project should expose:
166
166
 
167
- - one primary documented launch/run command for its selected stack
168
- - one primary documented full-test command for its selected stack
167
+ - one primary documented runtime command
168
+ - one primary documented broad test command: `./run_tests.sh`
169
169
 
170
170
  Follow the original prompt and the existing repository first. Use the examples below only when they do not already specify the platform or stack.
171
171
 
172
172
  Examples:
173
173
 
174
174
  - web backend/fullstack: `docker compose up --build` and `./run_tests.sh`
175
- - Expo mobile: `npx expo start` and the project's single full-test command
176
- - Electron desktop: `npm run dev` and the project's single full-test command
175
+ - mobile or desktop when Docker runtime is not the direct run path: `./run_app.sh` and `./run_tests.sh`
177
176
 
178
177
  ## Files And Locations
179
178
 
@@ -109,6 +109,18 @@ State split:
109
109
 
110
110
  Do not create another competing workflow-state system.
111
111
 
112
+ ## Git Traceability
113
+
114
+ Use git to preserve meaningful workflow checkpoints.
115
+
116
+ - after each meaningful accepted work unit, run `git add .` and `git commit -m "<message>"`
117
+ - meaningful work includes accepted scaffold completion, accepted major development slices, accepted remediation passes, and other clearly reviewable milestones
118
+ - keep the git flow simple and checkpoint-oriented
119
+ - commit only after the relevant work and verification for that checkpoint are complete enough to preserve useful history
120
+ - keep commit messages descriptive and easy to reason about later
121
+ - do not push unless explicitly requested
122
+ - do not commit secrets, local-only junk, or accidental noise
123
+
112
124
  ## Mandatory Operating Order
113
125
 
114
126
  Operate in this order:
@@ -149,6 +161,7 @@ Use these exact root phases:
149
161
  - `P8 Final Human Decision`
150
162
  - `P9 Remediation`
151
163
  - `P10 Submission Packaging`
164
+ - `P11 Retrospective`
152
165
 
153
166
  Phase rules:
154
167
 
@@ -157,6 +170,7 @@ Phase rules:
157
170
  - do not close multiple root phases in one transition block
158
171
  - `P9 Remediation` stays its own root phase once evaluation has accepted follow-up work
159
172
  - `P6 Hardening` may reopen `P5` if hardening exposes unresolved integrated instability
173
+ - `P11 Retrospective` runs automatically after successful packaging and is non-blocking unless it finds a real delivery defect
160
174
 
161
175
  ## Developer Session Model
162
176
 
@@ -199,8 +213,13 @@ Selected-stack rule:
199
213
 
200
214
  Every project must end up with:
201
215
 
202
- - one primary documented launch/run command for the selected stack
203
- - one primary documented full-test command for the selected stack
216
+ - one primary documented runtime command
217
+ - one primary documented full-test command: `./run_tests.sh`
218
+
219
+ Runtime command rule:
220
+
221
+ - for Dockerized web backend/fullstack projects, `docker compose up --build` may be the primary runtime command directly
222
+ - when `docker compose up --build` is not the runtime contract, the project must provide `./run_app.sh` as the single primary runtime wrapper
204
223
 
205
224
  Default moments:
206
225
 
@@ -208,6 +227,12 @@ Default moments:
208
227
  2. development complete -> integrated verification entry
209
228
  3. final qualified state before packaging
210
229
 
230
+ For Dockerized web backend/fullstack projects, enforce this cadence:
231
+
232
+ - after scaffold completion, the owner runs `docker compose up --build` and `./run_tests.sh` once to confirm the scaffold baseline really works
233
+ - after that, do not run Docker again during ordinary development work
234
+ - the next Docker-based run is at development completion or integrated-verification entry unless a real blocker forces earlier escalation
235
+
211
236
  Between those moments, rely on:
212
237
 
213
238
  - local runtime checks
@@ -245,6 +270,7 @@ Core map:
245
270
  - `P7` -> `final-evaluation-orchestration`, `evaluation-triage`, `report-output-discipline`
246
271
  - `P9` -> `remediation-guidance`
247
272
  - `P10` -> `submission-packaging`, `report-output-discipline`
273
+ - `P11` -> `retrospective-analysis`, `owner-evidence-discipline`, `report-output-discipline`
248
274
  - state mutations -> `beads-operations`
249
275
  - evidence-heavy review -> `owner-evidence-discipline`
250
276
  - planned developer-session switch -> `session-rollover`
@@ -327,6 +353,16 @@ When `P10 Submission Packaging` begins:
327
353
  - follow its exact artifact, export, cleanup, and output contract
328
354
  - do not close packaging until every required final artifact path has been verified
329
355
 
356
+ ## Retrospective
357
+
358
+ After `P10 Submission Packaging` closes successfully:
359
+
360
+ - automatically enter `P11 Retrospective`
361
+ - load `retrospective-analysis`
362
+ - write dated retrospective output under `~/slopmachine/retrospectives/`
363
+ - keep it owner-only and non-blocking by default
364
+ - reopen packaging only if the retrospective finds a real packaged-result defect
365
+
330
366
  ## Completion Standard
331
367
 
332
368
  The workflow is not done until:
@@ -335,6 +371,7 @@ The workflow is not done until:
335
371
  - the current root phase closed cleanly
336
372
  - the workflow ledger closed cleanly
337
373
  - the final package is assembled and verified in its final structure
374
+ - the retrospective phase has either documented improvements or reopened and resolved any real packaging defect it found
338
375
 
339
376
  Success means:
340
377
 
@@ -45,6 +45,8 @@ Use this skill only during `P1 Clarification`.
45
45
  - never use defaults that drift from the original prompt
46
46
  - do not use quick, loose, or simplifying assumptions that shrink what the prompt asked for
47
47
  - do not guess through material ambiguity
48
+ - do not expand the clarification artifact just to exhaust every minor edge case when the scope is already clear enough to plan correctly
49
+ - once the core scope is understood, prefer a compact clarification record plus explicit safe defaults over a giant exhaustive rewrite
48
50
 
49
51
  ## Required outputs
50
52
 
@@ -52,16 +54,63 @@ Use this skill only during `P1 Clarification`.
52
54
  - developer-facing clarification prompt in `../.ai/clarification-prompt.md`
53
55
  - explicit list of safe defaults and resolved ambiguities
54
56
 
57
+ ## `questions.md` contract
58
+
59
+ `../docs/questions.md` is not a general project summary.
60
+
61
+ It exists only for prompt items that needed interpretation because they were unclear, incomplete, or materially ambiguous.
62
+
63
+ Each entry should answer this structure:
64
+
65
+ 1. what was unclear from the original prompt
66
+ 2. how you interpreted it
67
+ 3. what decision or solution you chose for it
68
+ 4. why that choice is prompt-faithful and reasonable
69
+
70
+ Keep the file narrow and explicit.
71
+
72
+ Do not use `questions.md` for:
73
+
74
+ - a full restatement of the entire prompt
75
+ - broad planning notes
76
+ - general project requirements that were already clear
77
+ - implementation details that belong in planning or design docs
78
+
79
+ Preferred entry shape:
80
+
81
+ ```md
82
+ ## Item N: <short ambiguity title>
83
+
84
+ ### What was unclear
85
+ <the exact ambiguity or missing detail>
86
+
87
+ ### Interpretation
88
+ <how it was interpreted>
89
+
90
+ ### Decision
91
+ <the chosen resolution or safe default>
92
+
93
+ ### Why this is reasonable
94
+ <brief justification tied to prompt faithfulness>
95
+ ```
96
+
97
+ If nothing material was unclear, keep `questions.md` minimal rather than inventing content.
98
+
55
99
  ## Clarification-prompt validation loop
56
100
 
57
- - compare the original prompt and the prepared clarification prompt using a fresh ephemeral `General` session, never the developer session
58
- - build one self-contained validation prompt block for that `General` session every time
59
- - include the full original prompt text, the full current questions or clarification record, and the full current `../.ai/clarification-prompt.md` in that block
101
+ - compare the original prompt and the prepared clarification prompt using one dedicated `General` validation session, never the developer session
102
+ - do not create a new validation session for every retry unless the session became unusable or a fundamental misunderstanding requires a clean restart
103
+ - on the first validation pass, build one self-contained validation prompt block for that `General` session
104
+ - on that first pass, include the full original prompt text, the full current questions or clarification record, and the full current `../.ai/clarification-prompt.md`
60
105
  - do not use placeholders such as `same as previous`, `from context`, `see above`, or `latest artifact`
61
106
  - ask that `General` session whether the clarification prompt deviates from, weakens, narrows, or violates the original prompt in any way
62
107
  - require it to judge whether the clarification prompt is a genuine improvement in execution quality while remaining faithful to the original intent
63
- - if mismatches or prompt drift are found, revise the questions record and clarification prompt, then build a newly composed full validation block and run the check again
108
+ - if the validator suggests real fixes, patch the existing questions record and clarification prompt directly; do not restart the clarification phase from scratch unless the validator found a fundamental scope misunderstanding
109
+ - treat validator output as a correction list, not as a reason to regenerate giant clarification blocks repeatedly
110
+ - when rerunning validation in the same validator session, send only the improved clarification payload and the concrete fixes you made; do not resend the original prompt block if the session already has that context
111
+ - rerun validation only after applying the concrete fixes that matter
64
112
  - keep the validation loop bounded and intentional; prefer one strong pass plus a small number of revision cycles over repeated loose churn
113
+ - once prompt-faithfulness is satisfied and the remaining notes are minor or cosmetic, stop iterating and proceed
65
114
  - only treat the clarification prompt as approved for developer use after this validation loop passes and your own review agrees
66
115
  - requesting human approval before this validation loop passes is illegal
67
116
 
@@ -102,6 +102,12 @@ Track at least:
102
102
  - `awaiting_human`
103
103
  - `clarification_approved`
104
104
  - `remediation_round`
105
+ - `clarification_validator_session_id`
106
+ - `evaluation_pass`
107
+ - `backend_evaluation_session_id`
108
+ - `frontend_evaluation_session_id`
109
+ - `last_evaluation_session_id`
110
+ - `passed_evaluation_tracks`
105
111
  - `developer_sessions`
106
112
  - `active_developer_session_index`
107
113
 
@@ -15,15 +15,54 @@ Use this skill during `P7 Evaluation and Triage` after evaluation reports exist.
15
15
  - do not enter remediation just because a report found something; enter it only when the accepted findings justify it
16
16
  - if no remediation is needed, move directly to the final human decision
17
17
 
18
+ ## Non-negotiable evaluation buckets
19
+
20
+ These areas are hard gates and should not be passed with known meaningful failures:
21
+
22
+ 1. prompt compliance
23
+ 2. requirement fulfillment / delivery completeness
24
+ 3. security-critical flaws
25
+
26
+ If evaluation finds a real issue in one of those buckets, the default outcome is remediation, not leniency.
27
+
28
+ Do not wave through:
29
+
30
+ - prompt drift or meaningful requirement mismatch
31
+ - missing core flows or partial delivery of prompt-critical functionality
32
+ - real security defects involving auth, authorization, ownership, isolation, exposure, or secret handling
33
+
34
+ ## Leniency buckets
35
+
36
+ These areas may pass with minor residual issues when the product is still clearly acceptable overall:
37
+
38
+ 1. testing cases / test sufficiency
39
+ 2. engineering architecture / engineering quality
40
+ 3. aesthetics
41
+
42
+ Leniency is allowed only when the issue is:
43
+
44
+ - minor in impact
45
+ - not hiding a likely blocker in another bucket
46
+ - not undermining overall confidence in the delivered product
47
+
48
+ High-severity findings in these leniency buckets may still be passed when they are not materially relevant to actual acceptance readiness, but that should be a deliberate exception backed by direct evidence.
49
+
50
+ If the hard gates pass cleanly, the leniency buckets should usually not force remediation unless the issue is a true `Blocker` or a materially relevant `High` finding.
51
+
18
52
  ## Triage rules
19
53
 
20
54
  - read both reports and merge the findings into one explicit triage set before deciding what happens next
21
55
  - use the evaluator priority ordering directly when triaging findings unless stronger direct evidence says otherwise
22
- - any finding marked `Blocker` or `High` should normally be returned for remediation
56
+ - any finding in the non-negotiable buckets should normally be returned for remediation if it is real
57
+ - findings marked `Blocker` should normally be returned for remediation
58
+ - findings marked `High` should normally be returned for remediation unless they fall in a leniency bucket and your direct evidence shows they are not materially relevant to acceptance
23
59
  - findings marked `Medium` may be passed in limited cases, but should usually be fixed when they materially improve confidence, correctness, or acceptance readiness
24
60
  - findings marked `Low` may be passed without remediation
25
61
  - do not treat complaints about test coverage depth, unverifiable tests, or evaluator inability to confirm a test path as automatic blockers by themselves
26
62
  - if your own direct evidence shows the tests run and the coverage is acceptable for qualification, defend the project and pass those findings instead of automatically remediating
63
+ - minor engineering-architecture quality issues may pass if the system is still structurally credible and maintainable overall
64
+ - minor aesthetics issues may pass if the UI is still clearly usable and credible for the actual use case
65
+ - if prompt compliance, requirement fulfillment, and security all pass, testing/engineering/aesthetics findings should generally be treated more leniently unless they are blocking or materially high-risk
27
66
  - if a report says it could not verify some behavior because of environment limits or avoidable verification setup issues, first decide whether you can remove that constraint and rerun the evaluation in a cleaner state
28
67
  - if the evaluator could not verify something but your own verified evidence already shows the behavior is acceptable, do not treat that as an automatic remediation trigger
29
68
  - challenge weak, random, or overreaching findings using your stronger project context and direct codebase knowledge
@@ -46,6 +46,19 @@ These two files are the only evaluation prompt sources for evaluation runs.
46
46
  - keep reports file-backed and bring only short summaries into chat
47
47
  - rerun only the evaluation track that still needs re-evaluation after remediation
48
48
 
49
+ ## Evaluation pass strategy
50
+
51
+ - use a maximum of 3 full evaluation passes
52
+ - after each evaluation pass, extract a detailed concrete issue list from the failing report(s)
53
+ - send that list back to the active developer session with a direct instruction like: `fix these issues found in evaluation, verify affected flows dont regress after your fixes`
54
+ - if one evaluation track passes, mark it as passed and do not rerun that track in later passes unless a later fix clearly reopens it
55
+ - do not rerun both backend and frontend evaluation tracks when only one still needs re-evaluation
56
+ - after pass 1 and pass 2, use the detailed issue list from the latest failing report(s) to drive the next remediation pass
57
+ - after pass 3, do not create a new evaluation session for the still-failing track
58
+ - after pass 3, send the final fix list back to the developer, then return to the last evaluation session used for that still-failing track and ask whether the last reported issues are now fixed
59
+ - if they are fixed, have that same evaluation session update the report to reflect the current state cleanly, without mentioning recheck, retest, previous issues, or iterative review history
60
+ - the final report should read like a normal current-state evaluation report, not like a patch log
61
+
49
62
  ## Remediation loop
50
63
 
51
64
  - route accepted blocking issues back into the active remediation developer-session slot rather than inventing an untracked side path
@@ -55,7 +68,8 @@ These two files are the only evaluation prompt sources for evaluation runs.
55
68
  - the selected stack's platform-appropriate UI/E2E verification where applicable, with fresh screenshots or equivalent artifacts
56
69
  - if remediation materially reopens an owner-run broad milestone boundary, route the project back to that boundary before re-evaluation instead of treating every remediation pass as an automatic broad rerun moment
57
70
  - keep the remediation loop bounded and explicit so you never lose track of the active evaluation round or the accepted issue set
58
- - remember the external process allows a maximum of 3 repair rounds
71
+ - store backend, frontend, and last-used evaluation session ids in metadata so later passes and packaging can safely reuse the correct session when needed
72
+ - remember the evaluation flow allows a maximum of 3 full evaluation passes before the final issue-verification update path must be used
59
73
 
60
74
  ## Boundaries
61
75
 
@@ -82,9 +82,15 @@ Selected-stack defaults:
82
82
  - define auth edge-case expectations when relevant, such as token refresh, session expiry, or clock-skew tolerance
83
83
  - call out operational obligations early when they are prompt-critical, such as scheduling, retention, backups, workers, auditability, or offline behavior
84
84
  - define infrastructure requirements early when they are material to correctness, such as rate limiting, encryption boundaries, production-equivalent test infrastructure, and browser-storage rules for sensitive data
85
- - define a project-standard launch command and a project-standard full-test command early, and keep both compatible with the selected stack
86
- - for web backend/fullstack projects, default those to `docker compose up --build` and `./run_tests.sh` only when the prompt or existing repo does not already dictate another stack-compatible contract
87
- - for mobile, desktop, CLI, library, or other non-web projects, define the selected stack's appropriate single documented launch command and single documented full-test command instead of forcing Docker conventions
85
+ - define the project-standard runtime contract and the universal broad test entrypoint `./run_tests.sh` early, and keep both compatible with the selected stack
86
+ - for Dockerized web backend/fullstack projects, the runtime contract may be `docker compose up --build` directly when the prompt or existing repo does not already dictate another stack-compatible contract
87
+ - when `docker compose up --build` is not the runtime contract, require `./run_app.sh` as the single primary runtime wrapper for the project
88
+ - for mobile, desktop, CLI, library, or other non-web projects, `./run_app.sh` should own the selected stack's runtime flow instead of assuming host tooling conventions
89
+ - `./run_tests.sh` must exist for every project as the platform-independent broad test wrapper
90
+ - `./run_tests.sh` must prepare or install anything required before running the tests when that setup is needed for a clean environment
91
+ - for Dockerized web backend/fullstack projects, `./run_tests.sh` must run the full test path through Docker rather than a purely local test invocation
92
+ - for non-web or non-Docker projects, `./run_tests.sh` must call the selected stack's equivalent full test path while keeping the same single-command interface
93
+ - local tests should still exist for ordinary developer iteration, but `./run_tests.sh` is the broad final test path for the project
88
94
  - define frontend validation and accessibility expectations when the product surface materially depends on them, including keyboard, focus, feedback, and other user-interaction quality requirements where relevant
89
95
  - if backup or recovery behavior is prompt-critical, plan the designated media, operator drill flow, visibility, and verification expectations explicitly
90
96
  - if the prompt names literal storage, indexing, partitioning, retention, or performance dimensions, represent them literally in the planning artifacts rather than abstracting them away
@@ -0,0 +1,91 @@
1
+ ---
2
+ name: retrospective-analysis
3
+ description: Owner-only final retrospective rules for slopmachine.
4
+ ---
5
+
6
+ # Retrospective Analysis
7
+
8
+ Use this skill only after `P10 Submission Packaging` has materially and formally succeeded.
9
+
10
+ ## Purpose
11
+
12
+ - inspect what happened across the whole workflow run
13
+ - identify what caused churn, waste, late defects, or preventable corrections
14
+ - capture lessons that should improve future runs
15
+ - write package-specific retrospective files under `~/slopmachine/retrospectives/`
16
+
17
+ ## Phase role
18
+
19
+ - this is an automatic owner-only phase
20
+ - it is quiet and non-blocking by default
21
+ - it does not create a new human stop
22
+ - it does not rerun broad verification by default
23
+ - it should not reopen development unless it finds a real defect in the already-packaged result
24
+
25
+ ## Output location
26
+
27
+ Write dated retrospective files under:
28
+
29
+ - `~/slopmachine/retrospectives/`
30
+
31
+ Preferred filenames:
32
+
33
+ - `retrospective-YYYY-MM-DD.md`
34
+ - `improvement-actions-YYYY-MM-DD.md`
35
+
36
+ If only one file is needed, the retrospective file is sufficient.
37
+
38
+ ## Evidence sources
39
+
40
+ Prefer existing workflow artifacts first:
41
+
42
+ - root metadata
43
+ - questions/clarification record
44
+ - clarification prompt
45
+ - planning artifacts
46
+ - Beads comments and transitions
47
+ - developer-session handoffs
48
+ - review and rejection history
49
+ - verification gate notes
50
+ - evaluation reports
51
+ - remediation records
52
+ - packaging outputs
53
+
54
+ Do not reread the entire codebase unless a real inconsistency requires it.
55
+ Do not rerun broad Docker or full-suite verification just for retrospective analysis.
56
+
57
+ ## Required retrospective sections
58
+
59
+ 1. outcome summary
60
+ 2. what worked well
61
+ 3. what caused waste or looping
62
+ 4. what was caught too late
63
+ 5. findings by phase
64
+ 6. findings by instruction plane:
65
+ - owner shell
66
+ - developer prompt
67
+ - skills
68
+ - `AGENTS.md`
69
+ 7. actionable improvements
70
+
71
+ ## Audit buckets
72
+
73
+ Evaluate at least these buckets in hindsight:
74
+
75
+ 1. prompt-fit
76
+ 2. security-critical flaws
77
+ 3. test sufficiency
78
+ 4. major engineering quality
79
+ 5. token/time waste
80
+
81
+ For each meaningful finding, prefer:
82
+
83
+ - what happened
84
+ - why it happened
85
+ - where the fix belongs
86
+ - how it should change future runs
87
+
88
+ ## Rule for reopening work
89
+
90
+ - if retrospective finds a real packaging or delivery defect, reopen `P10` and fix it
91
+ - if it finds only improvements, document them and close the retrospective phase
@@ -14,14 +14,24 @@ Use this skill during `P3 Scaffold` before prompting the developer.
14
14
  - establish the local verification path and the standardized gate path
15
15
  - make prompt-critical baseline behavior real where required
16
16
  - keep repo-local `README.md` honest from the start
17
- - make the selected-stack primary launch command and primary full-test command real from the scaffold stage
17
+ - make the selected-stack primary runtime command and the universal `./run_tests.sh` broad test command real from the scaffold stage
18
+
19
+ For Dockerized web backend/fullstack projects, scaffold must make these commands real and working before scaffold can pass:
20
+
21
+ - `docker compose up --build`
22
+ - `./run_tests.sh`
18
23
 
19
24
  ## Scaffold and foundation guidance
20
25
 
21
26
  - create the initial project structure intentionally
22
27
  - follow the original prompt and existing repository first; only use the package defaults below when they do not already specify the platform or stack
23
- - create the selected-stack primary full-test command during scaffold; for web backend/fullstack projects this is usually `./run_tests.sh`, while non-web projects should expose their own single documented full-test command
24
- - create the selected-stack primary launch command during scaffold; for web backend/fullstack projects this is usually `docker compose up --build`, while non-web projects should expose their own single documented launch command
28
+ - create `./run_tests.sh` during scaffold for every project as the single broad test entrypoint
29
+ - for Dockerized web backend/fullstack projects, make `docker compose up --build` real as the primary runtime command during scaffold
30
+ - when `docker compose up --build` is not the runtime contract, create `./run_app.sh` during scaffold as the single primary runtime wrapper
31
+ - make `./run_tests.sh` self-sufficient from a clean environment by preparing or installing anything it needs before executing the tests
32
+ - for Dockerized web backend/fullstack projects, `./run_tests.sh` must execute the broad test path through Docker and should own that Dockerized test flow directly instead of requiring separate manual pre-setup
33
+ - for non-web or non-Docker projects, `./run_tests.sh` must execute the selected stack's platform-equivalent broad test flow while preserving the same single-command interface
34
+ - local non-Docker test commands should still be installed and working for normal development iteration
25
35
  - create required testing directories and baseline docs structure
26
36
  - put baseline config and logging structure in place
27
37
  - install and configure the local test tooling needed for ordinary iteration during scaffold rather than deferring local testing setup to later phases
@@ -42,6 +52,7 @@ Use this skill during `P3 Scaffold` before prompting the developer.
42
52
  - require reproducible build and tooling foundations: prefer lockfile-driven installs where the stack supports them, keep source and build outputs clearly separated, and do not allow generated runtime artifacts to drift back into source directories
43
53
  - for typed build pipelines, keep source-of-truth boundaries clean so compiled output does not create TS/JS or similar dual-source drift in the working tree
44
54
  - establish README structure early instead of leaving it until the end
55
+ - ensure `README.md` clearly documents the primary runtime command and the broad `./run_tests.sh` contract for the selected stack
45
56
  - prove the scaffold in a clean state before deeper feature work
46
57
  - verify clean startup and teardown behavior under the selected stack's runtime contract
47
58
  - for Dockerized web projects, verify clean startup and teardown behavior under the chosen project namespace
@@ -66,3 +77,5 @@ Scaffold should make later slices easier, not force them to retrofit missing fun
66
77
  - use local and narrow checks while correcting scaffold work
67
78
  - reserve one broad owner-run scaffold gate for actual scaffold acceptance
68
79
  - do not spend extra broad reruns once the acceptance question is already answered
80
+ - for Dockerized web backend/fullstack projects, the owner must run `docker compose up --build` and `./run_tests.sh` once after scaffold completion to confirm the baseline actually works
81
+ - after that scaffold confirmation, do not run Docker again during ordinary development work; the next Docker-based run should be at development completion when integrated behavior is checked
@@ -79,11 +79,9 @@ The final submission layout in the parent project root must be:
79
79
  - relocated screenshots and proof materials needed for submission review
80
80
  - current working directory delivered as parent-root `repo/`
81
81
  - `../sessions/`
82
+ - `develop-N.json`
83
+ - `bugfix-N.json`
82
84
  - `../metadata.json`
83
- - `../session.json`
84
- - `../session-N.json` when multiple exported sessions exist
85
- - `../trajectory.json`
86
- - `../trajectory-N.json` when multiple trajectories exist
87
85
  - parent-root `../.tmp/` directory moved out of current `.tmp/` when it exists
88
86
 
89
87
  ## Required packaging actions
@@ -101,32 +99,49 @@ The final submission layout in the parent project root must be:
101
99
  - `~/slopmachine/implementation-comparison.md`
102
100
  - `~/slopmachine/quality-document.md`
103
101
  - ensure `README.md` matches the delivered codebase, functionality, runtime steps, and test steps, stays friendly to a junior developer, and does not reference the external docs set in `../docs/`
104
- - include the selected stack's primary full-test command and any supporting runner script or wrapper needed to execute it from a clean environment
102
+ - include `./run_tests.sh` and any supporting runner logic it needs to execute the project's broad test path from a clean environment
105
103
  - relocate evaluation artifacts into parent-root `../submission/`
106
104
  - relocate screenshots and proof materials relevant to runtime behavior and major flows into parent-root `../submission/`
107
- - include exported session artifacts at the parent project root using the naming rules:
108
- - `../session.json` for a single exported session
109
- - `../session-N.json` when multiple exported sessions exist
110
- - include trajectory artifacts at the parent project root using the naming rules:
111
- - `../trajectory.json` for a single trajectory
112
- - `../trajectory-N.json` when multiple trajectories exist
113
- - preserve parent-root `../sessions/` as the session artifact directory for any additional exported conversation traces the package needs to retain
105
+ - preserve parent-root `../sessions/` as the session artifact directory for converted workflow session exports
106
+ - export all tracked workflow sessions before generating the final submission documents
107
+ - after the session exports are complete, use the last evaluation session recorded in metadata when generating the final submission report content so the report answers can come from cached evaluation context instead of rebuilding that context from scratch
114
108
 
115
109
  ## Session export sequence
116
110
 
117
- For the developer session, run these exact steps:
111
+ This export sequence must happen first in packaging, before final submission documents are generated.
118
112
 
119
- 1. `opencode export <developer-session-id> > ../session-export.json`
120
- 2. `python3 ~/utils/strip_session_parent.py ../session-export.json --output ../session.json`
121
- 3. `python3 ~/utils/convert_ai_session.py -i ../session.json -o ../trajectory.json`
113
+ Export every tracked workflow session from metadata.
114
+
115
+ For each tracked session:
116
+
117
+ 1. `opencode export <session-id> > ../session-export-<label>.json`
118
+ 2. `python3 ~/utils/strip_session_parent.py ../session-export-<label>.json --output ../session-clean-<label>.json`
119
+ 3. `python3 ~/utils/convert_ai_session.py -i ../session-clean-<label>.json -o ../sessions/<final-name>.json`
120
+
121
+ Naming rule for converted files under `../sessions/`:
122
+
123
+ - development-phase sessions become `develop-N.json`
124
+ - hardening or remediation sessions become `bugfix-N.json`
122
125
 
123
126
  After those steps:
124
127
 
125
- - keep the cleaned final exported session as parent-root `../session.json` unless multiple exports require `../session-N.json`
126
- - keep the generated final trajectory as parent-root `../trajectory.json` unless multiple trajectories require `../trajectory-N.json`
127
- - treat parent-root `../session-export.json` as a temporary packaging intermediate
128
- - immediately verify that all expected directories and required files exist before running later packaging steps
129
- - if the required utilities or output files are missing, packaging is not ready to continue
128
+ - verify every planned developer session has been exported and converted before continuing packaging
129
+ - keep the converted session outputs in `../sessions/` using the naming rules above
130
+ - treat the `../session-export-*.json` and `../session-clean-*.json` files as temporary packaging intermediates unless the package contract later says otherwise
131
+ - if the required utilities, metadata session ids, or output files are missing, packaging is not ready to continue
132
+ - only after these exports are complete may you generate the final submission documents
133
+
134
+ ## Final report generation order
135
+
136
+ After all session exports are complete:
137
+
138
+ 1. recover the last evaluation session id from metadata
139
+ 2. use that last evaluation session to answer the final submission-document questions from its cached context
140
+ 3. generate the required final submission documents from that evaluation context plus the canonical `~/slopmachine/` reference files
141
+
142
+ Do not start generating the final submission documents before the session exports are complete.
143
+ Do not create a new evaluation session for final report generation if the last evaluation session is still available.
144
+ If the last evaluation session id is missing or unusable, stop and repair metadata/session recovery before continuing packaging.
130
145
 
131
146
  ## Required file moves
132
147
 
@@ -155,7 +170,9 @@ After those steps:
155
170
  - confirm shared project docs live in parent-root `../docs/` and any accidental repo-local `docs/` copy has been removed from the delivered tree
156
171
  - confirm required screenshots have been relocated into parent-root `../submission/`
157
172
  - confirm parent-root metadata fields are populated correctly
158
- - confirm session export naming rules are followed
173
+ - confirm session export naming rules are followed under `../sessions/`:
174
+ - `develop-N.json` for development-phase sessions
175
+ - `bugfix-N.json` for hardening/remediation sessions
159
176
 
160
177
  ## Submission artifact and response contract
161
178
 
@@ -21,7 +21,12 @@ Use this skill after development begins whenever you are reviewing work, decidin
21
21
  - do not allow `.env` files or env-file variants anywhere in the repo tree
22
22
  - do not allow a project that requires a preexisting `.env` file in the repo or package to start from scratch
23
23
  - if env-file format is needed at runtime, it must be generated ephemerally from the selected runtime environment rather than stored in the repo or package
24
- - require the README to show one primary launch command and one primary full-test command for the selected stack
24
+ - require the README to show the correct primary runtime command and `./run_tests.sh` as the primary broad test command
25
+ - for Dockerized web backend/fullstack projects, that runtime command may be `docker compose up --build` directly
26
+ - when `docker compose up --build` is not the runtime contract, require `./run_app.sh` to be the documented primary runtime wrapper
27
+ - require `./run_tests.sh` to be self-sufficient enough to run from a clean environment, including any required install/setup steps when applicable
28
+ - for Dockerized web backend/fullstack projects, require `./run_tests.sh` to be the Dockerized broad test path used for final broad verification rather than a local-only test wrapper
29
+ - for non-web or non-Docker projects, require `./run_tests.sh` to be the platform-equivalent broad test path used for final broad verification
25
30
 
26
31
  ## Review standard
27
32
 
@@ -50,6 +55,8 @@ Use this skill after development begins whenever you are reviewing work, decidin
50
55
  - use targeted local verification as the default during scaffold corrections, development, hardening, and remediation
51
56
  - reserve the selected stack's broad verification path for the limited owner-run gate moments in the workflow budget
52
57
  - do not turn ordinary acceptance into repeated integrated-style gate runs
58
+ - for Dockerized web backend/fullstack projects, the owner must run `docker compose up --build` and `./run_tests.sh` once after scaffold completion to confirm the scaffold baseline
59
+ - after that scaffold confirmation, the next Docker-based run should be at development completion or integrated-verification entry unless a real blocker forces earlier escalation
53
60
 
54
61
  ## Verify-fix loop
55
62
 
@@ -69,10 +76,18 @@ Use this skill after development begins whenever you are reviewing work, decidin
69
76
 
70
77
  - a broad gate is an owner-run integrated verification boundary, not every ordinary phase change
71
78
  - a phase change alone does not automatically require a broad gate unless that phase exit explicitly calls for one
72
- - a broad gate normally means some combination of full clean runtime proof, the selected stack's primary full-test command, and platform-appropriate UI/E2E evidence when UI-bearing flows exist
79
+ - a broad gate normally means some combination of full clean runtime proof, `./run_tests.sh`, and platform-appropriate UI/E2E evidence when UI-bearing flows exist
73
80
  - in v2, the workflow target is at most 3 broad owner-run verification moments across the whole cycle
74
81
  - ordinary planning, ordinary slice acceptance, and routine in-phase verification are not broad gates by default and should rely on targeted local verification unless the risk profile says otherwise
75
82
 
83
+ For Dockerized web backend/fullstack projects, the default Docker cadence is:
84
+
85
+ 1. one owner-run `docker compose up --build` plus one owner-run `./run_tests.sh` after scaffold completion
86
+ 2. no more Docker-based runs during ordinary development work
87
+ 3. the next Docker-based run at development completion or integrated-verification entry
88
+
89
+ Any earlier extra Docker run needs a concrete blocker-based justification.
90
+
76
91
  ## Runtime gate interpretation
77
92
 
78
93
  Use evidence such as internal metadata files, structured Beads comments, verification command results, and file/project-state checks.
@@ -80,10 +95,13 @@ Use evidence such as internal metadata files, structured Beads comments, verific
80
95
  - clarification requires the `clarification-gate` conditions plus explicit approval record
81
96
  - planning requires the `developer-session-lifecycle` and planning-gate conditions plus a fresh planning-oriented start and the required documentation and repo hygiene state when relevant
82
97
  - scaffold requires evidence for the bounded scaffold gate, baseline logging/config, and when relevant the chosen frontend stack and UI approach being set intentionally
83
- - scaffold also requires safe env/config handling, no persisted local secrets, real migration/runtime foundations, a usable local test environment in the current working directory, and the primary launch/test commands documented and working for the selected stack when practical
98
+ - scaffold also requires safe env/config handling, no persisted local secrets, real migration/runtime foundations, a usable local test environment in the current working directory, and the correct primary runtime command plus `./run_tests.sh` documented and working when practical
99
+ - scaffold also requires `./run_tests.sh` to handle its own required setup from a clean environment when applicable
100
+ - local tests should still exist for ordinary development work even when the primary broad test command is Dockerized
84
101
  - when scaffold includes prompt-critical security controls, acceptance requires real runtime or endpoint verification of the protection rather than helper-only or shape-only proof
85
102
  - for security-bearing scaffolds, require applicable rejection evidence such as stale replay rejection, nonce reuse rejection, CSRF rejection on protected mutations, lockout triggering when lockout is in scope, or equivalent proof that the control is truly enforced
86
103
  - scaffold acceptance also requires clean startup and teardown behavior in the selected runtime model; for Dockerized web projects this includes self-contained Compose namespacing and no unnecessary fragile `container_name` usage
104
+ - for Dockerized web backend/fullstack projects, scaffold acceptance is not complete until the owner has actually run `docker compose up --build` and `./run_tests.sh` once successfully after scaffold completion
87
105
  - module implementation requires platform-appropriate local verification and selected-stack UI/E2E evidence when UI-bearing flows are material
88
106
  - module implementation acceptance should challenge tenant isolation, path confinement, sanitized error behavior, prototype residue, integration seams, and cross-cutting consistency when those concerns are in scope
89
107
  - integrated verification entry requires one of the limited owner-run broad gate moments once development is complete
@@ -26,16 +26,25 @@ This file is the repo-local engineering rulebook for `slopmachine` projects.
26
26
 
27
27
  Every project must expose:
28
28
 
29
- - one primary documented command to launch or run the application in its selected stack
30
- - one primary documented command to run the full supported test suite
31
- - follow the original prompt and existing repository first; use the defaults below only when they do not already specify the platform or stack
29
+ - one primary documented runtime command
30
+ - one primary documented broad test command: `./run_tests.sh`
31
+ - follow the original prompt and existing repository first for the runtime stack; `./run_tests.sh` should exist regardless of project type
32
+ - the primary full-test command should install or prepare what it needs first when that setup is required for a clean environment
32
33
 
33
34
  For web backend/fullstack projects, those are usually:
34
35
 
35
36
  - `docker compose up --build`
36
37
  - `./run_tests.sh`
37
38
 
38
- For mobile, desktop, CLI, library, or other non-web projects, use the selected stack's appropriate commands instead, but keep them to one clear documented launch command and one clear documented full-test command.
39
+ For Dockerized web backend/fullstack projects:
40
+
41
+ - `./run_tests.sh` must run the broad full-test path through Docker
42
+ - local non-Docker tests should still exist for normal development work
43
+ - final broad verification should use the Dockerized `./run_tests.sh` path, not only local test commands
44
+
45
+ When `docker compose up --build` is not the runtime contract, provide `./run_app.sh` as the single primary runtime wrapper.
46
+
47
+ For mobile, desktop, CLI, library, or other non-web projects, `./run_app.sh` should own the selected stack's runtime flow, while `./run_tests.sh` remains the single broad test wrapper calling the platform-equivalent full test path.
39
48
 
40
49
  ## Testing Rules
41
50
 
@@ -55,6 +64,8 @@ Selected-stack defaults:
55
64
 
56
65
  - Keep `README.md` and any codebase-local docs accurate.
57
66
  - The README must explain what the project is, what it does, how to run it, and how to test it.
67
+ - The README must clearly document whether the primary runtime command is `docker compose up --build` or `./run_app.sh`.
68
+ - The README must clearly document `./run_tests.sh` as the broad test command.
58
69
  - The README must stand on its own for basic codebase use.
59
70
 
60
71
  ## Secret And Runtime Rules
package/package.json CHANGED
@@ -1,25 +1,25 @@
1
1
  {
2
- "name": "theslopmachine",
3
- "version": "0.4.1",
4
- "description": "SlopMachine installer and project bootstrap CLI",
5
- "license": "MIT",
6
- "type": "module",
7
- "bin": {
8
- "slopmachine": "bin/slopmachine.js"
9
- },
10
- "scripts": {
11
- "start": "node ./bin/slopmachine.js",
12
- "check": "node ./bin/slopmachine.js --help"
13
- },
14
- "engines": {
15
- "node": ">=18"
16
- },
17
- "files": [
18
- "bin",
19
- "src",
20
- "assets",
21
- "README.md",
22
- "RELEASE.md",
23
- "MANUAL.md"
24
- ]
2
+ "name": "theslopmachine",
3
+ "version": "0.4.2",
4
+ "description": "SlopMachine installer and project bootstrap CLI",
5
+ "license": "MIT",
6
+ "type": "module",
7
+ "bin": {
8
+ "slopmachine": "bin/slopmachine.js"
9
+ },
10
+ "scripts": {
11
+ "start": "node ./bin/slopmachine.js",
12
+ "check": "node ./bin/slopmachine.js --help"
13
+ },
14
+ "engines": {
15
+ "node": ">=18"
16
+ },
17
+ "files": [
18
+ "bin",
19
+ "src",
20
+ "assets",
21
+ "README.md",
22
+ "RELEASE.md",
23
+ "MANUAL.md"
24
+ ]
25
25
  }
package/src/constants.js CHANGED
@@ -39,6 +39,7 @@ export const REQUIRED_SKILL_DIRS = [
39
39
  'evaluation-triage',
40
40
  'remediation-guidance',
41
41
  'submission-packaging',
42
+ 'retrospective-analysis',
42
43
  'owner-evidence-discipline',
43
44
  'report-output-discipline',
44
45
  'frontend-design',
package/src/init.js CHANGED
@@ -177,17 +177,31 @@ async function maybeOpenOpencode(targetPath, openAfterInit) {
177
177
  return
178
178
  }
179
179
 
180
- const opencodeCommand = await resolveCommand('opencode')
181
- if (!opencodeCommand) {
182
- warn('OpenCode is not available in PATH, so the project was initialized but could not be opened automatically. Launch OpenCode manually inside repo/.')
183
- return
180
+ log('Opening OpenCode in repo/')
181
+ const repoPath = path.join(targetPath, 'repo')
182
+ let result
183
+
184
+ if (process.platform === 'win32') {
185
+ result = await runCommand('cmd', ['/c', 'opencode'], {
186
+ stdio: 'inherit',
187
+ cwd: repoPath,
188
+ })
189
+ } else {
190
+ const shellPath = process.env.SHELL && await pathExists(process.env.SHELL)
191
+ ? process.env.SHELL
192
+ : await resolveCommand('zsh') || await resolveCommand('bash') || await resolveCommand('sh')
193
+
194
+ if (!shellPath) {
195
+ warn('No usable shell was found to launch OpenCode automatically. Launch OpenCode manually inside repo/.')
196
+ return
197
+ }
198
+
199
+ result = await runCommand(shellPath, ['-lc', 'opencode'], {
200
+ stdio: 'inherit',
201
+ cwd: repoPath,
202
+ })
184
203
  }
185
204
 
186
- log('Opening OpenCode in repo/')
187
- const result = await runCommand(opencodeCommand, [], {
188
- stdio: 'inherit',
189
- cwd: path.join(targetPath, 'repo'),
190
- })
191
205
  if (result.code !== 0) {
192
206
  warn(`Failed to launch OpenCode automatically (${result.stderr || `exit code ${result.code}`}). Launch it manually inside repo/.`)
193
207
  }
package/src/install.js CHANGED
@@ -670,6 +670,7 @@ async function installSkills(paths) {
670
670
  async function installSlopmachineAssets(paths) {
671
671
  const source = path.join(assetsRoot(), 'slopmachine')
672
672
  await ensureDir(paths.slopmachineDir)
673
+ await ensureDir(path.join(paths.slopmachineDir, 'retrospectives'))
673
674
  const summary = { installed: [], refreshed: [] }
674
675
 
675
676
  for (const relativePath of REQUIRED_SLOPMACHINE_FILES) {
@@ -736,7 +737,17 @@ async function mergeOpencodeConfig(paths, options) {
736
737
  log(`Updated ${paths.opencodeConfigPath}`)
737
738
  }
738
739
 
739
- async function maybeInstallPluginBinary() {
740
+ function hasConfiguredPlugin(existingConfig, pluginName) {
741
+ const plugins = Array.isArray(existingConfig?.plugin) ? existingConfig.plugin : []
742
+ return plugins.includes(pluginName)
743
+ }
744
+
745
+ async function maybeInstallPluginBinary(existingConfig) {
746
+ if (hasConfiguredPlugin(existingConfig, 'oc-chatgpt-multi-auth')) {
747
+ log('OpenCode plugin already configured: oc-chatgpt-multi-auth')
748
+ return
749
+ }
750
+
740
751
  if (process.env.SLOPMACHINE_PLUGIN_BOOTSTRAP === '0') {
741
752
  return
742
753
  }
@@ -812,8 +823,8 @@ export async function runInstall() {
812
823
  const assetSummary = await installSlopmachineAssets(paths)
813
824
 
814
825
  section('OpenCode Config')
815
- await maybeInstallPluginBinary()
816
826
  const existingConfig = (await readJsonIfExists(paths.opencodeConfigPath)) || null
827
+ await maybeInstallPluginBinary(existingConfig)
817
828
  const keys = await collectApiKeys(existingConfig)
818
829
  await mergeOpencodeConfig(paths, keys)
819
830