theslopmachine 0.3.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MANUAL.md +13 -9
- package/README.md +163 -3
- package/RELEASE.md +11 -3
- package/assets/agents/developer-v2.md +86 -0
- package/assets/agents/developer.md +21 -23
- package/assets/agents/slopmachine-v2.md +219 -0
- package/assets/agents/slopmachine.md +56 -38
- package/assets/skills/beads-operations/SKILL.md +32 -31
- package/assets/skills/beads-operations-v2/SKILL.md +82 -0
- package/assets/skills/clarification-gate/SKILL.md +8 -1
- package/assets/skills/clarification-gate-v2/SKILL.md +74 -0
- package/assets/skills/developer-session-lifecycle/SKILL.md +45 -14
- package/assets/skills/developer-session-lifecycle-v2/SKILL.md +148 -0
- package/assets/skills/development-guidance-v2/SKILL.md +60 -0
- package/assets/skills/evaluation-triage-v2/SKILL.md +38 -0
- package/assets/skills/final-evaluation-orchestration/SKILL.md +9 -11
- package/assets/skills/final-evaluation-orchestration-v2/SKILL.md +57 -0
- package/assets/skills/get-overlays/SKILL.md +77 -6
- package/assets/skills/hardening-gate-v2/SKILL.md +64 -0
- package/assets/skills/integrated-verification-v2/SKILL.md +47 -0
- package/assets/skills/owner-evidence-discipline-v2/SKILL.md +15 -0
- package/assets/skills/planning-gate/SKILL.md +6 -4
- package/assets/skills/planning-gate-v2/SKILL.md +91 -0
- package/assets/skills/planning-guidance-v2/SKILL.md +100 -0
- package/assets/skills/remediation-guidance-v2/SKILL.md +31 -0
- package/assets/skills/report-output-discipline-v2/SKILL.md +15 -0
- package/assets/skills/scaffold-guidance-v2/SKILL.md +57 -0
- package/assets/skills/session-rollover-v2/SKILL.md +41 -0
- package/assets/skills/submission-packaging/SKILL.md +147 -115
- package/assets/skills/submission-packaging-v2/SKILL.md +142 -0
- package/assets/skills/verification-gates/SKILL.md +44 -16
- package/assets/skills/verification-gates-v2/SKILL.md +102 -0
- package/assets/slopmachine/backend-evaluation-prompt.md +9 -2
- package/assets/slopmachine/frontend-evaluation-prompt.md +9 -2
- package/assets/slopmachine/templates/AGENTS-v2.md +55 -0
- package/assets/slopmachine/templates/AGENTS.md +20 -17
- package/assets/slopmachine/tracker-init.js +104 -0
- package/assets/slopmachine/workflow-init-v2.js +99 -0
- package/package.json +1 -1
- package/src/constants.js +22 -3
- package/src/init.js +33 -28
- package/src/install.js +186 -140
- package/src/utils.js +19 -0
- package/assets/slopmachine/beads-init.js +0 -439
|
@@ -13,6 +13,15 @@ Use this skill after development begins whenever you are reviewing developer wor
|
|
|
13
13
|
- Treat it as owner-side review and gate guidance, not developer-visible text.
|
|
14
14
|
- Use `get-overlays` as the source of truth for developer-facing execution guidance.
|
|
15
15
|
- Use this skill as the source of truth for owner-side verification, review pressure, and gate interpretation.
|
|
16
|
+
- outside the final evaluation decision, do not pause execution for human approval while using this skill; continue reviewing, rejecting, fixing, and rerunning until the work qualifies
|
|
17
|
+
|
|
18
|
+
## Documentation And Repo Hygiene
|
|
19
|
+
|
|
20
|
+
- maintain the owner-managed external docs in parent-root `../docs/` from accepted clarification, accepted planning, accepted major implementation changes, and hardening verification
|
|
21
|
+
- keep `README.md` codebase-specific, junior-friendly, and separate from the external docs set
|
|
22
|
+
- do not allow `.env` files or env-file variants anywhere in the repo tree
|
|
23
|
+
- do not allow a project that requires a preexisting `.env` file in the repo or package to start from scratch
|
|
24
|
+
- if env-file format is needed at runtime, it must be generated ephemerally from Docker-provided runtime variables rather than stored in the repo or package
|
|
16
25
|
|
|
17
26
|
## Review standard
|
|
18
27
|
|
|
@@ -35,18 +44,20 @@ Use this skill after development begins whenever you are reviewing developer wor
|
|
|
35
44
|
- do not accept frontend-bearing slice completion without checking production build health when the change materially affects frontend code or tooling
|
|
36
45
|
- do not accept module completion that ignores integration seams or cross-cutting consistency with the existing system
|
|
37
46
|
- do not accept end-to-end evidence that bypasses a required user-facing or admin-facing surface with direct API shortcuts
|
|
47
|
+
- do not accept mocked APIs as integration evidence; integration verification must use real HTTP requests against the actual running service surface
|
|
38
48
|
|
|
39
49
|
## Verify-fix loop
|
|
40
50
|
|
|
41
51
|
- inspect the result and the evidence, not just the developer's confidence
|
|
42
52
|
- review technical quality, prompt alignment, architecture impact, and verification depth of the current work
|
|
43
|
-
- during normal implementation iteration, prefer fast local language-native or framework-native verification for the changed area instead of
|
|
53
|
+
- during normal implementation iteration, always prefer fast local language-native or framework-native verification for the changed area instead of `docker compose up --build` or `run_tests.sh`
|
|
44
54
|
- require the developer to set up and use the project-appropriate local test environment in the current working directory when normal local verification is needed
|
|
45
|
-
-
|
|
55
|
+
- require local runtime proof when relevant by starting the server locally and exercising the changed behavior directly rather than jumping to Docker-based proof
|
|
56
|
+
- if the local toolchain is missing, require the developer to install or enable it first; do not jump to `run_tests.sh` during ordinary iteration just because local setup is inconvenient
|
|
46
57
|
- do not accept hand-wavy claims that local verification is unavailable without a real setup attempt and clear explanation
|
|
47
58
|
- for applicable fullstack or UI-bearing work, require local Playwright on affected flows plus screenshot review and explicit UI validation
|
|
48
59
|
- if verification is weak, missing, or failing, require fixes and reruns before acceptance
|
|
49
|
-
- if
|
|
60
|
+
- if documentation or repo hygiene drifts, secrets leak, contracts drift, or frontend integrity is compromised, require cleanup before acceptance
|
|
50
61
|
- keep looping until the current work is genuinely acceptable
|
|
51
62
|
|
|
52
63
|
## Heavy-gate definition
|
|
@@ -54,30 +65,40 @@ Use this skill after development begins whenever you are reviewing developer wor
|
|
|
54
65
|
- a heavy gate is an owner-run integrated verification boundary, not every ordinary phase change
|
|
55
66
|
- a phase change alone does not automatically require a heavy gate unless that phase's exit criteria explicitly call for one
|
|
56
67
|
- a heavy gate normally means some combination of full clean runtime proof, full `run_tests.sh`, and Playwright plus screenshot evidence when UI or fullstack flows exist
|
|
57
|
-
- heavy gates are required at
|
|
58
|
-
-
|
|
68
|
+
- heavy gates are required only at these milestone boundaries:
|
|
69
|
+
- scaffold acceptance
|
|
70
|
+
- when development/coding is complete and the project enters integrated verification
|
|
71
|
+
- when integrated verification is complete and the project is ready to leave that phase
|
|
72
|
+
- when hardening is complete and the project is ready for final evaluation
|
|
73
|
+
- once more on the final qualified state before submission packaging
|
|
59
74
|
- planning acceptance, ordinary module acceptance, and routine in-phase verification are not heavy gates by default and should rely on targeted local verification unless the risk profile says otherwise
|
|
60
75
|
|
|
61
76
|
## Testing cadence interpretation
|
|
62
77
|
|
|
63
78
|
- the first required `run_tests.sh` pass happens in scaffold once the clean foundation exists
|
|
64
|
-
- after scaffold, do not force `docker compose up --build` or `run_tests.sh` on
|
|
79
|
+
- after scaffold, do not force `docker compose up --build` or `run_tests.sh` on normal development steps
|
|
65
80
|
- prefer local targeted or native test commands during module implementation and ordinary verify-fix iteration
|
|
81
|
+
- prefer local runtime startup and direct local behavior checks instead of Docker whenever runtime proof is needed during ordinary work
|
|
66
82
|
- local verification should run inside the current working directory using the project's own environment and tooling rather than hidden global assumptions
|
|
67
83
|
- during applicable fullstack or UI-bearing implementation work, require local Playwright on affected flows and review screenshots
|
|
68
|
-
-
|
|
84
|
+
- during integrated verification, do not rerun `docker compose up --build` or `run_tests.sh` on every small fix inside the phase; use local verification until the next milestone boundary is reached
|
|
85
|
+
- reserve `docker compose up --build` and `run_tests.sh` for these owner-run milestone checks only:
|
|
86
|
+
- scaffold acceptance
|
|
87
|
+
- development/coding complete -> integrated verification entry
|
|
88
|
+
- integrated verification complete -> hardening entry
|
|
89
|
+
- hardening complete -> final evaluation readiness
|
|
90
|
+
- final qualified state -> submission packaging readiness
|
|
69
91
|
- the workflow owner handles those expensive critical-gate runs; do not require the developer to duplicate them during normal phase progression
|
|
70
|
-
- run
|
|
71
|
-
-
|
|
72
|
-
- run
|
|
73
|
-
- after post-evaluation remediation affecting real flows or UI, rerun Playwright and inspect fresh screenshots before re-acceptance
|
|
92
|
+
- each integrated-verification milestone run must also include Playwright for major flows and screenshot review when UI or fullstack flows exist
|
|
93
|
+
- after post-evaluation remediation, prefer local reruns, affected local runtime checks, and affected Playwright checks
|
|
94
|
+
- after remediation, return the project to the appropriate milestone boundary and run the next owner-run gate there rather than turning every remediation fix into an immediate Docker and `run_tests.sh` rerun
|
|
74
95
|
|
|
75
96
|
## Runtime gate interpretation
|
|
76
97
|
|
|
77
|
-
Use evidence such as
|
|
98
|
+
Use evidence such as internal metadata files, structured tracker comments, verification command results, and file/project-state checks.
|
|
78
99
|
|
|
79
100
|
- clarification requires the `clarification-gate` conditions plus explicit approval record
|
|
80
|
-
- development bootstrap requires the `developer-session-lifecycle` conditions plus a fresh planning-oriented start
|
|
101
|
+
- development bootstrap requires the `developer-session-lifecycle` conditions plus a fresh planning-oriented start and the required documentation and repo hygiene state when relevant
|
|
81
102
|
- scaffold requires evidence for `docker compose up --build`, `run_tests.sh`, baseline logging/config, and when relevant the chosen frontend stack and UI approach being set intentionally
|
|
82
103
|
- scaffold also requires safe env/config handling, no persisted local secrets, real migration/runtime foundations, and a usable local test environment in the current working directory when practical
|
|
83
104
|
- when scaffold includes prompt-critical security controls, acceptance requires real runtime or endpoint verification of the protection rather than helper-only or shape-only proof
|
|
@@ -86,21 +107,28 @@ Use evidence such as Bead metadata, structured Bead comments, verification comma
|
|
|
86
107
|
- module implementation requires module planning notes, module definition of done, relevant local verification for the changed area, and for applicable fullstack or UI work local Playwright evidence with screenshots, plus docs sync and review acceptance
|
|
87
108
|
- module implementation also requires integration-seam verification against adjacent modules and cross-cutting concerns where relevant, and known release-facing or build failures block acceptance unless explicitly scoped out
|
|
88
109
|
- module implementation acceptance should also challenge tenant isolation, path confinement, sanitized error behavior, and prototype residue when those concerns are in scope
|
|
89
|
-
- integrated verification requires owner-run `docker compose up --build`, owner-run `run_tests.sh`, end-to-end, Playwright, prompt-alignment, README/runtime, and cross-module evidence
|
|
110
|
+
- integrated verification entry requires owner-run `docker compose up --build`, owner-run `run_tests.sh`, end-to-end, Playwright, prompt-alignment, README/runtime, and cross-module evidence once development/coding is complete
|
|
111
|
+
- integrated verification completion requires one more owner-run `docker compose up --build`, one more owner-run `run_tests.sh`, and the corresponding end-to-end and screenshot evidence before the phase can close
|
|
90
112
|
- fullstack integrated verification must include Playwright coverage for every major flow, plus screenshots used to evaluate frontend behavior and UI quality along the flow using `frontend-design`
|
|
91
113
|
- if a required flow cannot be exercised through the intended UI surface, treat that as incomplete implementation rather than acceptable E2E coverage
|
|
92
114
|
- hardening requires security, maintainability, exploratory, and release-freeze evidence
|
|
115
|
+
- hardening completion requires one owner-run `docker compose up --build` and one owner-run `run_tests.sh` on the hardened state before final evaluation begins
|
|
93
116
|
- hardening must explicitly re-check secret handling, redaction, and frontend/backend observability hygiene
|
|
117
|
+
- hardening must explicitly satisfy the documentation and repo hygiene policy in this file before final evaluation can begin
|
|
94
118
|
- final evaluation readiness requires automated evaluation to be complete and triaged, with a clear go-to-packaging vs return-to-fixes decision
|
|
95
|
-
-
|
|
119
|
+
- submission packaging readiness requires one final owner-run `docker compose up --build` and one final owner-run `run_tests.sh` on the final qualified state immediately before packaging
|
|
120
|
+
- remediation requires accepted issue records plus rerun local verification and affected Playwright where applicable; if remediation materially reopens the integrated verification boundary, route it back through integrated verification before re-evaluation
|
|
96
121
|
|
|
97
122
|
## Hardening and pre-evaluation discipline
|
|
98
123
|
|
|
99
124
|
When all planned modules are complete:
|
|
100
125
|
|
|
101
|
-
- run integrated verification
|
|
126
|
+
- run the owner-run milestone gate for development/coding completion and enter integrated verification
|
|
102
127
|
- run hardening and exploratory testing
|
|
103
128
|
- for fullstack applications, rerun Playwright coverage for major flows and inspect screenshots for frontend regressions or weak UX
|
|
129
|
+
- run the documentation and repo-hygiene verification required by this file before final evaluation
|
|
130
|
+
- close integrated verification only after its completion milestone gate has passed
|
|
131
|
+
- close hardening only after its completion milestone gate has passed
|
|
104
132
|
- enforce release-candidate freeze
|
|
105
133
|
- allow only fixes, verification improvements, doc corrections, and packaging work
|
|
106
134
|
- prepare the package and evidence cleanly before the final evaluation decision gate
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: verification-gates-v2
|
|
3
|
+
description: Owner-side review, acceptance, rejection, and gate-interpretation rules for slopmachine-v2.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Verification Gates v2
|
|
7
|
+
|
|
8
|
+
Use this skill after development begins whenever you are reviewing work, deciding acceptance, or interpreting phase exits.
|
|
9
|
+
|
|
10
|
+
## Usage rules
|
|
11
|
+
|
|
12
|
+
- load this skill before review, acceptance, rejection, runtime gate interpretation, hardening readiness decisions, or broad-gate decisions
|
|
13
|
+
- treat it as owner-side review and gate guidance, not developer-visible text
|
|
14
|
+
- use this skill as the source of truth for owner-side verification, review pressure, and gate interpretation
|
|
15
|
+
- outside the final human decision, do not pause execution for human approval while using this skill; continue reviewing, rejecting, fixing, and rerunning until the work qualifies
|
|
16
|
+
|
|
17
|
+
## Documentation and repo hygiene
|
|
18
|
+
|
|
19
|
+
- maintain the owner-managed external docs in parent-root `../docs/` from accepted clarification, accepted planning, accepted major implementation changes, and hardening verification
|
|
20
|
+
- keep `README.md` codebase-specific, junior-friendly, and separate from the external docs set
|
|
21
|
+
- do not allow `.env` files or env-file variants anywhere in the repo tree
|
|
22
|
+
- do not allow a project that requires a preexisting `.env` file in the repo or package to start from scratch
|
|
23
|
+
- if env-file format is needed at runtime, it must be generated ephemerally from Docker-provided runtime variables rather than stored in the repo or package
|
|
24
|
+
|
|
25
|
+
## Review standard
|
|
26
|
+
|
|
27
|
+
- do not accept fake tests, weak evidence, documentation drift, missing real surfaces, or unresolved release-facing failures
|
|
28
|
+
- do not accept mocked APIs as integration evidence
|
|
29
|
+
- do not accept placeholder or demo UI in product-facing flows
|
|
30
|
+
- do not accept `.env` files or similar env-file artifacts
|
|
31
|
+
- do not accept shallow Docker verification
|
|
32
|
+
- do not accept happy-path-only implementation when failure paths matter
|
|
33
|
+
- do not accept unsupported claims
|
|
34
|
+
- do not accept work that looks complete but is not resilient
|
|
35
|
+
- do not accept committed secrets, hardcoded sensitive values, or sloppy env handling
|
|
36
|
+
- do not accept frontend/backend drift in fullstack work
|
|
37
|
+
- do not accept missing end-to-end coverage for major fullstack flows
|
|
38
|
+
- do not accept UI claims without screenshot-backed Playwright evidence when the change affects real frontend behavior
|
|
39
|
+
- do not accept prototype residue such as seeded credentials, weak demo defaults, login hints, or unsanitized user-facing error behavior
|
|
40
|
+
- do not accept multi-tenant or cross-user security claims without negative isolation evidence when that boundary matters
|
|
41
|
+
- do not accept file-bearing flows without path confinement and traversal-style validation when that boundary matters
|
|
42
|
+
- do not accept partial foundation work for complex features when the prompt implies broader usable scope, infrastructure depth, or security depth than what was actually delivered
|
|
43
|
+
- do not accept frontend-bearing slice completion without checking production build health when the change materially affects frontend code or tooling
|
|
44
|
+
- do not accept module completion that ignores integration seams or cross-cutting consistency with the existing system
|
|
45
|
+
- do not accept end-to-end evidence that bypasses a required user-facing or admin-facing surface with direct API shortcuts
|
|
46
|
+
|
|
47
|
+
## Cadence rule
|
|
48
|
+
|
|
49
|
+
- use targeted local verification as the default during scaffold corrections, development, hardening, and remediation
|
|
50
|
+
- reserve the broad Docker/full-suite path for the limited owner-run gate moments in the workflow budget
|
|
51
|
+
- do not turn ordinary acceptance into repeated integrated-style gate runs
|
|
52
|
+
|
|
53
|
+
## Verify-fix loop
|
|
54
|
+
|
|
55
|
+
- inspect the result and evidence, not just the developer claim
|
|
56
|
+
- review technical quality, prompt alignment, architecture impact, and verification depth of the current work
|
|
57
|
+
- during normal implementation iteration, always prefer fast local language-native or framework-native verification for the changed area instead of broad Docker or full-suite proof
|
|
58
|
+
- require the developer to set up and use the project-appropriate local test environment in the current working directory when normal local verification is needed
|
|
59
|
+
- require local runtime proof when relevant by starting the server locally and exercising the changed behavior directly rather than jumping to Docker-based proof
|
|
60
|
+
- if the local toolchain is missing, require the developer to install or enable it first; do not jump to the broad gate path during ordinary iteration just because local setup is inconvenient
|
|
61
|
+
- do not accept hand-wavy claims that local verification is unavailable without a real setup attempt and clear explanation
|
|
62
|
+
- for applicable fullstack or UI-bearing work, require local Playwright on affected flows plus screenshot review and explicit UI validation
|
|
63
|
+
- if verification is weak, missing, or failing, require fixes and reruns before acceptance
|
|
64
|
+
- if documentation or repo hygiene drifts, secrets leak, contracts drift, or frontend integrity is compromised, require cleanup before acceptance
|
|
65
|
+
- keep looping until the current work is genuinely acceptable
|
|
66
|
+
|
|
67
|
+
## Broad-gate definition
|
|
68
|
+
|
|
69
|
+
- a broad gate is an owner-run integrated verification boundary, not every ordinary phase change
|
|
70
|
+
- a phase change alone does not automatically require a broad gate unless that phase exit explicitly calls for one
|
|
71
|
+
- a broad gate normally means some combination of full clean runtime proof, the broad `run_tests.sh` path, and Playwright plus screenshot evidence when UI or fullstack flows exist
|
|
72
|
+
- in v2, the workflow target is at most 3 broad owner-run verification moments across the whole cycle
|
|
73
|
+
- ordinary planning, ordinary slice acceptance, and routine in-phase verification are not broad gates by default and should rely on targeted local verification unless the risk profile says otherwise
|
|
74
|
+
|
|
75
|
+
## Runtime gate interpretation
|
|
76
|
+
|
|
77
|
+
Use evidence such as internal metadata files, structured Beads comments, verification command results, and file/project-state checks.
|
|
78
|
+
|
|
79
|
+
- clarification requires the `clarification-gate-v2` conditions plus explicit approval record
|
|
80
|
+
- planning requires the `developer-session-lifecycle-v2` and planning-gate conditions plus a fresh planning-oriented start and the required documentation and repo hygiene state when relevant
|
|
81
|
+
- scaffold requires evidence for the bounded scaffold gate, baseline logging/config, and when relevant the chosen frontend stack and UI approach being set intentionally
|
|
82
|
+
- scaffold also requires safe env/config handling, no persisted local secrets, real migration/runtime foundations, and a usable local test environment in the current working directory when practical
|
|
83
|
+
- when scaffold includes prompt-critical security controls, acceptance requires real runtime or endpoint verification of the protection rather than helper-only or shape-only proof
|
|
84
|
+
- for security-bearing scaffolds, require applicable rejection evidence such as stale replay rejection, nonce reuse rejection, CSRF rejection on protected mutations, lockout triggering when lockout is in scope, or equivalent proof that the control is truly enforced
|
|
85
|
+
- scaffold acceptance also requires self-contained Compose namespacing, no unnecessary fragile `container_name` usage, and clean startup plus teardown behavior in the intended shared-environment model
|
|
86
|
+
- module implementation requires module planning notes, module definition of done, relevant local verification for the changed area, and for applicable fullstack or UI work local Playwright evidence with screenshots, plus docs sync and review acceptance
|
|
87
|
+
- module implementation acceptance should challenge tenant isolation, path confinement, sanitized error behavior, prototype residue, integration seams, and cross-cutting consistency when those concerns are in scope
|
|
88
|
+
- integrated verification entry requires one of the limited owner-run broad gate moments once development is complete
|
|
89
|
+
- integrated verification completion requires explicit full-system evidence before the phase can close
|
|
90
|
+
- fullstack integrated verification must include Playwright coverage for every major flow, plus screenshots used to evaluate frontend behavior and UI quality along the flow using `frontend-design`
|
|
91
|
+
- if a required flow cannot be exercised through the intended UI surface, treat that as incomplete implementation rather than acceptable E2E coverage
|
|
92
|
+
- hardening requires security, maintainability, exploratory, and release-freeze evidence
|
|
93
|
+
- hardening must explicitly re-check secret handling, redaction, and frontend/backend observability hygiene
|
|
94
|
+
- hardening must explicitly satisfy the documentation and repo hygiene policy in this file before final evaluation can begin
|
|
95
|
+
- final evaluation readiness requires automated evaluation to be complete and triaged, with a clear go-to-packaging vs return-to-fixes decision
|
|
96
|
+
- remediation requires accepted issue records plus rerun local verification and affected Playwright where applicable; if remediation materially reopens the integrated verification boundary, route it back through integrated verification before re-evaluation
|
|
97
|
+
|
|
98
|
+
## Acceptance rule
|
|
99
|
+
|
|
100
|
+
- inspect the result and evidence, not just the developer claim
|
|
101
|
+
- prefer one strong rejection with a concrete correction request over many small nudges
|
|
102
|
+
- keep looping until the current work is genuinely acceptable
|
|
@@ -271,5 +271,12 @@ Before finalizing, check all of the following:
|
|
|
271
271
|
5. Has security or test sufficiency been judged too loosely without evidence?
|
|
272
272
|
6. Has any Docker non-execution boundary been incorrectly described as a confirmed runtime failure?
|
|
273
273
|
|
|
274
|
-
|
|
275
|
-
|
|
274
|
+
Save the full final report to a markdown file.
|
|
275
|
+
|
|
276
|
+
In-chat, respond with a small summary of the results only:
|
|
277
|
+
|
|
278
|
+
- final verdict
|
|
279
|
+
- top 1-3 findings
|
|
280
|
+
- report file path
|
|
281
|
+
|
|
282
|
+
Please confirm whether the current project tests are genuine and effective rather than superficial or fake tests, whether the API tests actually invoke real HTTP endpoints, and whether they cover more than 90% of the overall API surface.
|
|
@@ -300,5 +300,12 @@ Before finalizing, check all of the following:
|
|
|
300
300
|
6. Has a Docker non-execution boundary been incorrectly described as a confirmed runtime failure?
|
|
301
301
|
7. Has any material conclusion directly or indirectly relied on files under ./.tmp/?
|
|
302
302
|
|
|
303
|
-
|
|
304
|
-
|
|
303
|
+
Save the full final report to a markdown file.
|
|
304
|
+
|
|
305
|
+
In-chat, respond with a small summary of the results only:
|
|
306
|
+
|
|
307
|
+
- final verdict
|
|
308
|
+
- top 1-3 findings
|
|
309
|
+
- report file path
|
|
310
|
+
|
|
311
|
+
Please confirm whether the current project tests are genuine and effective rather than superficial or fake tests, whether the API tests actually invoke real HTTP endpoints, and whether they cover more than 90% of the overall API surface.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Developer Rulebook v2
|
|
2
|
+
|
|
3
|
+
This file is the repo-local engineering rulebook for `slopmachine-v2` projects.
|
|
4
|
+
|
|
5
|
+
## Scope
|
|
6
|
+
|
|
7
|
+
- Treat the current working directory as the project.
|
|
8
|
+
- Ignore parent-directory workflow files unless the user explicitly asks you to use them.
|
|
9
|
+
- Do not treat workflow research, session exports, or sibling directories as hidden implementation instructions.
|
|
10
|
+
|
|
11
|
+
## Working Style
|
|
12
|
+
|
|
13
|
+
- Operate like a strong senior engineer.
|
|
14
|
+
- Read the code before making assumptions.
|
|
15
|
+
- Work in meaningful vertical slices.
|
|
16
|
+
- Do not call work complete while it is still shaky.
|
|
17
|
+
- Reuse and extend shared cross-cutting patterns instead of inventing incompatible local ones.
|
|
18
|
+
|
|
19
|
+
## Verification Rules
|
|
20
|
+
|
|
21
|
+
- During ordinary iteration, prefer the fastest meaningful local verification for the changed area.
|
|
22
|
+
- Prefer targeted unit, integration, module, route-family, or local Playwright checks over broad reruns.
|
|
23
|
+
- Do not rerun full Dockerized startup and the full test suite on every small change.
|
|
24
|
+
- The broad owner-run Docker/full-suite path should be used sparingly, with a target budget of at most 3 times across the whole workflow cycle.
|
|
25
|
+
- If you run a Docker-based verification command sequence, end it with `docker compose down` unless containers must remain up.
|
|
26
|
+
|
|
27
|
+
## Testing Rules
|
|
28
|
+
|
|
29
|
+
- Tests must be real and tied to actual behavior.
|
|
30
|
+
- Do not mock APIs for integration testing.
|
|
31
|
+
- Use real HTTP requests against the actual running service surface for integration evidence.
|
|
32
|
+
- For UI-bearing work, use local Playwright on affected flows and inspect screenshots when practical.
|
|
33
|
+
|
|
34
|
+
## Documentation Rules
|
|
35
|
+
|
|
36
|
+
- Keep `README.md` and any codebase-local docs accurate.
|
|
37
|
+
- The README must explain what the project is, what it does, how to run it, and how to test it.
|
|
38
|
+
- The README must stand on its own for basic codebase use.
|
|
39
|
+
|
|
40
|
+
## Secret And Runtime Rules
|
|
41
|
+
|
|
42
|
+
- Do not create or keep `.env` files anywhere in the repo.
|
|
43
|
+
- Do not rely on `.env`, `.env.local`, `.env.example`, or similar files for project startup.
|
|
44
|
+
- Do not hardcode secrets.
|
|
45
|
+
- If runtime env-file format is required, generate it ephemerally and do not commit or package it.
|
|
46
|
+
|
|
47
|
+
## Product Integrity Rules
|
|
48
|
+
|
|
49
|
+
- Do not leave placeholder, setup, debug, or demo content in product-facing UI.
|
|
50
|
+
- If a real user-facing or admin-facing surface is required, build that surface instead of bypassing it with API shortcuts.
|
|
51
|
+
- Treat missing real surfaces as incomplete implementation.
|
|
52
|
+
|
|
53
|
+
## Rulebook Files
|
|
54
|
+
|
|
55
|
+
- Do not edit `AGENTS.md` or other workflow/rulebook files unless explicitly asked.
|
|
@@ -6,7 +6,7 @@ This file is the developer-facing operating rulebook for project execution.
|
|
|
6
6
|
|
|
7
7
|
- Treat the current working directory as the project.
|
|
8
8
|
- Ignore files outside the current working directory unless the user explicitly asks you to use them.
|
|
9
|
-
- Do not use parent-directory files as hidden requirements.
|
|
9
|
+
- Do not use unrelated parent-directory files as hidden requirements.
|
|
10
10
|
|
|
11
11
|
## Working Style
|
|
12
12
|
|
|
@@ -26,19 +26,16 @@ This file is the developer-facing operating rulebook for project execution.
|
|
|
26
26
|
|
|
27
27
|
## Runtime And Verification Rules
|
|
28
28
|
|
|
29
|
-
- A heavy gate is an owner-run integrated verification boundary, not every ordinary phase change.
|
|
30
|
-
- Heavy gates normally include full clean runtime proof, full `run_tests.sh`, and Playwright plus screenshot evidence when UI or fullstack flows exist.
|
|
31
|
-
- Heavy gates are expected at scaffold acceptance, integrated/full verification, and post-evaluation remediation re-acceptance.
|
|
32
|
-
- Ordinary phase progression and module completion do not automatically mean rerunning every heavy-gate command.
|
|
33
29
|
- Treat Docker as the main runtime contract.
|
|
34
|
-
- `docker compose up --build`
|
|
30
|
+
- `docker compose up --build` must work when the project expects Dockerized execution, but it is not the default per-turn verification command during normal iteration.
|
|
35
31
|
- `run_tests.sh` is a required project test entrypoint and must exist and work.
|
|
36
32
|
- After scaffold is established, do not rerun full `docker compose up --build` and `run_tests.sh` on every small implementation step.
|
|
37
33
|
- During normal iteration, prefer the fastest meaningful local verification inside the current working directory using the project-appropriate test environment and tooling.
|
|
38
|
-
-
|
|
39
|
-
-
|
|
40
|
-
- The workflow owner
|
|
41
|
-
-
|
|
34
|
+
- When runtime proof is needed during ordinary work, prefer starting and exercising the app locally instead of using Docker gate commands.
|
|
35
|
+
- If the local test toolchain is missing, try to install or enable it; do not jump to `run_tests.sh` on ordinary turns just because local setup takes work.
|
|
36
|
+
- The workflow owner runs `run_tests.sh` only at milestone boundaries: after scaffold, after development/coding is complete, after integrated verification is complete, after hardening is complete, and once more before final submission.
|
|
37
|
+
- Do not rerun `docker compose up --build` or `run_tests.sh` on every small fix inside integrated verification; use local verification until the next milestone boundary is reached.
|
|
38
|
+
- After post-evaluation remediation, strengthen local verification and affected Playwright checks rather than rerunning full gate commands yourself unless explicitly required.
|
|
42
39
|
- Do not let unverified work accumulate.
|
|
43
40
|
|
|
44
41
|
## Testing Rules
|
|
@@ -46,6 +43,7 @@ This file is the developer-facing operating rulebook for project execution.
|
|
|
46
43
|
- Tests must be real, meaningful, and tied to actual behavior.
|
|
47
44
|
- Cover happy paths, failure paths, and realistic edge cases.
|
|
48
45
|
- For API-bearing projects, prefer real endpoint invocation where practical.
|
|
46
|
+
- Do not mock APIs for integration testing; integration evidence must use real HTTP requests against the actual running service surface.
|
|
49
47
|
- For backend integration tests, prefer production-equivalent infrastructure when practical instead of a weaker substitute that can hide real defects.
|
|
50
48
|
- For applicable frontend or fullstack work, run local Playwright against affected end-to-end flows during implementation and inspect screenshots to verify the UI actually matches.
|
|
51
49
|
- Do not pad the test suite with superficial or fake tests.
|
|
@@ -53,6 +51,8 @@ This file is the developer-facing operating rulebook for project execution.
|
|
|
53
51
|
|
|
54
52
|
## Frontend Product Integrity
|
|
55
53
|
|
|
54
|
+
- Unless the prompt, existing repository, or established stack clearly dictates otherwise, default frontend work to Tailwind CSS for styling and `shadcn/ui` for component primitives.
|
|
55
|
+
- If the existing project already uses a different UI system, preserve and extend that system instead of forcing Tailwind CSS or `shadcn/ui` into it.
|
|
56
56
|
- Do not place development, setup, scaffold, seed, or debug information in the product UI.
|
|
57
57
|
- Do not add demo banners, `database is working` messages, scaffold-password hints, setup reminders, or similar developer-facing content to frontend screens.
|
|
58
58
|
- If a screen exists, it should serve the real user or operator purpose it was created for.
|
|
@@ -61,12 +61,11 @@ This file is the developer-facing operating rulebook for project execution.
|
|
|
61
61
|
## Documentation Rules
|
|
62
62
|
|
|
63
63
|
- Keep docs aligned with the current implementation.
|
|
64
|
-
- During development, keep working technical docs under `docs/`.
|
|
65
|
-
- Maintain a test-coverage document under `docs/` that explains the major flow coverage, the relevant test entry points, and any important coverage boundaries.
|
|
66
64
|
- Do not add or keep tests that only assert that docs directories or docs files exist.
|
|
67
|
-
-
|
|
68
|
-
-
|
|
69
|
-
- The README must explain what the project is, how to run it, how to test it
|
|
65
|
+
- Documentation structure outside the repo is not application behavior and should not be tested as application logic.
|
|
66
|
+
- Keep `README.md` and any codebase-local docs accurate when behavior, runtime steps, or verification expectations change.
|
|
67
|
+
- The README must explain what the project is, what it does, how to run it, and how to test it in a way that is friendly to a junior developer.
|
|
68
|
+
- The README must stand on its own for basic codebase use and must not depend on separate external documentation for run/test basics.
|
|
70
69
|
- Do not leave misleading docs in place after changing behavior.
|
|
71
70
|
|
|
72
71
|
## Engineering Quality Rules
|
|
@@ -78,10 +77,14 @@ This file is the developer-facing operating rulebook for project execution.
|
|
|
78
77
|
|
|
79
78
|
## Secret Handling Rules
|
|
80
79
|
|
|
80
|
+
- Do not create, keep, or rely on `.env` files anywhere in the codebase.
|
|
81
|
+
- Treat `.env`, `.env.local`, `.env.example`, and similar env-file variants as forbidden artifacts.
|
|
81
82
|
- Do not persist local secrets anywhere in the repository.
|
|
82
83
|
- Do not hardcode credentials, API keys, tokens, signing material, database passwords, certificate private keys, or similar sensitive values in code.
|
|
83
|
-
-
|
|
84
|
-
-
|
|
84
|
+
- Do not use env files even for placeholders or setup examples.
|
|
85
|
+
- The delivered repo and package must not require any preexisting `.env` file to start from scratch.
|
|
86
|
+
- If environment variables are needed, rely on Docker-provided runtime variables or generate any required env-file format ephemerally at runtime from those variables.
|
|
87
|
+
- If runtime generation is used, the generated env file must not be committed, packaged, or treated as a persistent project artifact.
|
|
85
88
|
- Do not leak raw secrets into logs, docs, screenshots, telemetry, or operator-facing UI.
|
|
86
89
|
- Treat frontend and backend observability paths as secret-sensitive by default and redact accordingly.
|
|
87
90
|
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import fs from 'node:fs/promises'
|
|
4
|
+
import path from 'node:path'
|
|
5
|
+
import { spawn } from 'node:child_process'
|
|
6
|
+
|
|
7
|
+
const targetInput = process.argv[2] || '.'
|
|
8
|
+
const target = path.resolve(process.cwd(), targetInput)
|
|
9
|
+
const trackerCommand = process.env.BR_COMMAND || 'br'
|
|
10
|
+
|
|
11
|
+
function log(message) {
|
|
12
|
+
console.log(`[tracker-init] ${message}`)
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function die(message) {
|
|
16
|
+
console.error(`[tracker-init] ERROR: ${message}`)
|
|
17
|
+
process.exit(1)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function run(command, args, options = {}) {
|
|
21
|
+
return new Promise((resolve, reject) => {
|
|
22
|
+
const child = spawn(command, args, {
|
|
23
|
+
cwd: options.cwd,
|
|
24
|
+
env: options.env || process.env,
|
|
25
|
+
stdio: options.stdio || 'pipe',
|
|
26
|
+
shell: false,
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
let stdout = ''
|
|
30
|
+
let stderr = ''
|
|
31
|
+
|
|
32
|
+
if (child.stdout) child.stdout.on('data', (chunk) => { stdout += chunk.toString() })
|
|
33
|
+
if (child.stderr) child.stderr.on('data', (chunk) => { stderr += chunk.toString() })
|
|
34
|
+
|
|
35
|
+
if (options.input !== undefined && child.stdin) {
|
|
36
|
+
child.stdin.write(options.input)
|
|
37
|
+
child.stdin.end()
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
child.on('error', reject)
|
|
41
|
+
child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }))
|
|
42
|
+
})
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function commandExists(command) {
|
|
46
|
+
const checker = process.platform === 'win32' ? 'where' : 'which'
|
|
47
|
+
const result = await run(checker, [command])
|
|
48
|
+
return result.code === 0
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function pathExists(targetPath) {
|
|
52
|
+
try {
|
|
53
|
+
await fs.access(targetPath)
|
|
54
|
+
return true
|
|
55
|
+
} catch {
|
|
56
|
+
return false
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function runTracker(args, options = {}) {
|
|
61
|
+
return run(trackerCommand, args, { cwd: target, ...options })
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async function main() {
|
|
65
|
+
const trackerAvailable = trackerCommand !== 'br' ? await pathExists(trackerCommand) : await commandExists('br')
|
|
66
|
+
if (!trackerAvailable) {
|
|
67
|
+
die(`'${trackerCommand}' is not available. Install beads_rust first.`)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (!(await pathExists(target))) {
|
|
71
|
+
die(`Target directory '${targetInput}' does not exist or is not accessible.`)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
log(`Target: ${target}`)
|
|
75
|
+
|
|
76
|
+
const beadsDir = path.join(target, '.beads')
|
|
77
|
+
if (!(await pathExists(beadsDir))) {
|
|
78
|
+
log("Running 'br init --quiet'")
|
|
79
|
+
const initResult = await runTracker(['init', '--quiet'], {
|
|
80
|
+
env: { ...process.env, CI: '1' },
|
|
81
|
+
})
|
|
82
|
+
if (initResult.code !== 0) {
|
|
83
|
+
console.error(`${initResult.stdout}${initResult.stderr}`.trim())
|
|
84
|
+
die("'br init' failed. Review output above.")
|
|
85
|
+
}
|
|
86
|
+
} else {
|
|
87
|
+
log('Found existing .beads; skipping init to avoid destructive re-initialization')
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
log("Running 'br sync --flush-only'")
|
|
91
|
+
const syncResult = await runTracker(['sync', '--flush-only'], {
|
|
92
|
+
env: { ...process.env, CI: '1' },
|
|
93
|
+
})
|
|
94
|
+
if (syncResult.code !== 0) {
|
|
95
|
+
console.error(`${syncResult.stdout}${syncResult.stderr}`.trim())
|
|
96
|
+
die("'br sync --flush-only' failed. Review output above.")
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
log('Success: tracker initialized and JSONL exported')
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
main().catch((error) => {
|
|
103
|
+
die(error instanceof Error ? error.message : String(error))
|
|
104
|
+
})
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import fs from 'node:fs/promises'
|
|
4
|
+
import path from 'node:path'
|
|
5
|
+
import { spawn } from 'node:child_process'
|
|
6
|
+
|
|
7
|
+
const targetInput = process.argv[2] || '.'
|
|
8
|
+
const target = path.resolve(process.cwd(), targetInput)
|
|
9
|
+
const beadsCommand = process.env.BR_COMMAND || 'br'
|
|
10
|
+
|
|
11
|
+
function log(message) {
|
|
12
|
+
console.log(`[workflow-init-v2] ${message}`)
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function die(message) {
|
|
16
|
+
console.error(`[workflow-init-v2] ERROR: ${message}`)
|
|
17
|
+
process.exit(1)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function run(command, args, options = {}) {
|
|
21
|
+
return new Promise((resolve, reject) => {
|
|
22
|
+
const child = spawn(command, args, {
|
|
23
|
+
cwd: options.cwd,
|
|
24
|
+
env: options.env || process.env,
|
|
25
|
+
stdio: options.stdio || 'pipe',
|
|
26
|
+
shell: false,
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
let stdout = ''
|
|
30
|
+
let stderr = ''
|
|
31
|
+
|
|
32
|
+
if (child.stdout) child.stdout.on('data', (chunk) => { stdout += chunk.toString() })
|
|
33
|
+
if (child.stderr) child.stderr.on('data', (chunk) => { stderr += chunk.toString() })
|
|
34
|
+
|
|
35
|
+
child.on('error', reject)
|
|
36
|
+
child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }))
|
|
37
|
+
})
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async function pathExists(targetPath) {
|
|
41
|
+
try {
|
|
42
|
+
await fs.access(targetPath)
|
|
43
|
+
return true
|
|
44
|
+
} catch {
|
|
45
|
+
return false
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function commandExists(command) {
|
|
50
|
+
const checker = process.platform === 'win32' ? 'where' : 'which'
|
|
51
|
+
const result = await run(checker, [command])
|
|
52
|
+
return result.code === 0
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function runBeads(args, options = {}) {
|
|
56
|
+
return run(beadsCommand, args, { cwd: target, ...options })
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async function main() {
|
|
60
|
+
const beadsAvailable = beadsCommand !== 'br' ? await pathExists(beadsCommand) : await commandExists('br')
|
|
61
|
+
if (!beadsAvailable) {
|
|
62
|
+
die(`'${beadsCommand}' is not available. Install beads_rust first.`)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (!(await pathExists(target))) {
|
|
66
|
+
die(`Target directory '${targetInput}' does not exist or is not accessible.`)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
log(`Target: ${target}`)
|
|
70
|
+
|
|
71
|
+
const beadsDir = path.join(target, '.beads')
|
|
72
|
+
if (!(await pathExists(beadsDir))) {
|
|
73
|
+
log("Running 'br init --quiet'")
|
|
74
|
+
const initResult = await runBeads(['init', '--quiet'], {
|
|
75
|
+
env: { ...process.env, CI: '1' },
|
|
76
|
+
})
|
|
77
|
+
if (initResult.code !== 0) {
|
|
78
|
+
console.error(`${initResult.stdout}${initResult.stderr}`.trim())
|
|
79
|
+
die("'br init' failed. Review output above.")
|
|
80
|
+
}
|
|
81
|
+
} else {
|
|
82
|
+
log('Found existing .beads; skipping init to avoid destructive re-initialization')
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
log("Running 'br sync --flush-only'")
|
|
86
|
+
const syncResult = await runBeads(['sync', '--flush-only'], {
|
|
87
|
+
env: { ...process.env, CI: '1' },
|
|
88
|
+
})
|
|
89
|
+
if (syncResult.code !== 0) {
|
|
90
|
+
console.error(`${syncResult.stdout}${syncResult.stderr}`.trim())
|
|
91
|
+
die("'br sync --flush-only' failed. Review output above.")
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
log('Success: beads_rust workspace initialized and JSONL exported')
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
main().catch((error) => {
|
|
98
|
+
die(error instanceof Error ? error.message : String(error))
|
|
99
|
+
})
|