theslopmachine 0.3.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/MANUAL.md +13 -9
  2. package/README.md +163 -3
  3. package/RELEASE.md +11 -3
  4. package/assets/agents/developer-v2.md +86 -0
  5. package/assets/agents/developer.md +21 -23
  6. package/assets/agents/slopmachine-v2.md +219 -0
  7. package/assets/agents/slopmachine.md +56 -38
  8. package/assets/skills/beads-operations/SKILL.md +32 -31
  9. package/assets/skills/beads-operations-v2/SKILL.md +82 -0
  10. package/assets/skills/clarification-gate/SKILL.md +8 -1
  11. package/assets/skills/clarification-gate-v2/SKILL.md +74 -0
  12. package/assets/skills/developer-session-lifecycle/SKILL.md +45 -14
  13. package/assets/skills/developer-session-lifecycle-v2/SKILL.md +148 -0
  14. package/assets/skills/development-guidance-v2/SKILL.md +60 -0
  15. package/assets/skills/evaluation-triage-v2/SKILL.md +38 -0
  16. package/assets/skills/final-evaluation-orchestration/SKILL.md +9 -11
  17. package/assets/skills/final-evaluation-orchestration-v2/SKILL.md +57 -0
  18. package/assets/skills/get-overlays/SKILL.md +77 -6
  19. package/assets/skills/hardening-gate-v2/SKILL.md +64 -0
  20. package/assets/skills/integrated-verification-v2/SKILL.md +47 -0
  21. package/assets/skills/owner-evidence-discipline-v2/SKILL.md +15 -0
  22. package/assets/skills/planning-gate/SKILL.md +6 -4
  23. package/assets/skills/planning-gate-v2/SKILL.md +91 -0
  24. package/assets/skills/planning-guidance-v2/SKILL.md +100 -0
  25. package/assets/skills/remediation-guidance-v2/SKILL.md +31 -0
  26. package/assets/skills/report-output-discipline-v2/SKILL.md +15 -0
  27. package/assets/skills/scaffold-guidance-v2/SKILL.md +57 -0
  28. package/assets/skills/session-rollover-v2/SKILL.md +41 -0
  29. package/assets/skills/submission-packaging/SKILL.md +147 -115
  30. package/assets/skills/submission-packaging-v2/SKILL.md +142 -0
  31. package/assets/skills/verification-gates/SKILL.md +44 -16
  32. package/assets/skills/verification-gates-v2/SKILL.md +102 -0
  33. package/assets/slopmachine/backend-evaluation-prompt.md +9 -2
  34. package/assets/slopmachine/frontend-evaluation-prompt.md +9 -2
  35. package/assets/slopmachine/templates/AGENTS-v2.md +55 -0
  36. package/assets/slopmachine/templates/AGENTS.md +20 -17
  37. package/assets/slopmachine/tracker-init.js +104 -0
  38. package/assets/slopmachine/workflow-init-v2.js +99 -0
  39. package/package.json +1 -1
  40. package/src/constants.js +22 -3
  41. package/src/init.js +33 -28
  42. package/src/install.js +186 -140
  43. package/src/utils.js +19 -0
  44. package/assets/slopmachine/beads-init.js +0 -439
@@ -13,6 +13,15 @@ Use this skill after development begins whenever you are reviewing developer wor
13
13
  - Treat it as owner-side review and gate guidance, not developer-visible text.
14
14
  - Use `get-overlays` as the source of truth for developer-facing execution guidance.
15
15
  - Use this skill as the source of truth for owner-side verification, review pressure, and gate interpretation.
16
+ - outside the final evaluation decision, do not pause execution for human approval while using this skill; continue reviewing, rejecting, fixing, and rerunning until the work qualifies
17
+
18
+ ## Documentation And Repo Hygiene
19
+
20
+ - maintain the owner-managed external docs in parent-root `../docs/` from accepted clarification, accepted planning, accepted major implementation changes, and hardening verification
21
+ - keep `README.md` codebase-specific, junior-friendly, and separate from the external docs set
22
+ - do not allow `.env` files or env-file variants anywhere in the repo tree
23
+ - do not allow a project that requires a preexisting `.env` file in the repo or package to start from scratch
24
+ - if env-file format is needed at runtime, it must be generated ephemerally from Docker-provided runtime variables rather than stored in the repo or package
16
25
 
17
26
  ## Review standard
18
27
 
@@ -35,18 +44,20 @@ Use this skill after development begins whenever you are reviewing developer wor
35
44
  - do not accept frontend-bearing slice completion without checking production build health when the change materially affects frontend code or tooling
36
45
  - do not accept module completion that ignores integration seams or cross-cutting consistency with the existing system
37
46
  - do not accept end-to-end evidence that bypasses a required user-facing or admin-facing surface with direct API shortcuts
47
+ - do not accept mocked APIs as integration evidence; integration verification must use real HTTP requests against the actual running service surface
38
48
 
39
49
  ## Verify-fix loop
40
50
 
41
51
  - inspect the result and the evidence, not just the developer's confidence
42
52
  - review technical quality, prompt alignment, architecture impact, and verification depth of the current work
43
- - during normal implementation iteration, prefer fast local language-native or framework-native verification for the changed area instead of forcing `run_tests.sh` every turn
53
+ - during normal implementation iteration, always prefer fast local language-native or framework-native verification for the changed area instead of `docker compose up --build` or `run_tests.sh`
44
54
  - require the developer to set up and use the project-appropriate local test environment in the current working directory when normal local verification is needed
45
- - if the local toolchain is missing, require the developer to install or enable it first; allow fallback to `run_tests.sh` only when that is not practical
55
+ - require local runtime proof when relevant by starting the server locally and exercising the changed behavior directly rather than jumping to Docker-based proof
56
+ - if the local toolchain is missing, require the developer to install or enable it first; do not jump to `run_tests.sh` during ordinary iteration just because local setup is inconvenient
46
57
  - do not accept hand-wavy claims that local verification is unavailable without a real setup attempt and clear explanation
47
58
  - for applicable fullstack or UI-bearing work, require local Playwright on affected flows plus screenshot review and explicit UI validation
48
59
  - if verification is weak, missing, or failing, require fixes and reruns before acceptance
49
- - if docs drift, secrets leak, contracts drift, or frontend integrity is compromised, require cleanup before acceptance
60
+ - if documentation or repo hygiene drifts, secrets leak, contracts drift, or frontend integrity is compromised, require cleanup before acceptance
50
61
  - keep looping until the current work is genuinely acceptable
51
62
 
52
63
  ## Heavy-gate definition
@@ -54,30 +65,40 @@ Use this skill after development begins whenever you are reviewing developer wor
54
65
  - a heavy gate is an owner-run integrated verification boundary, not every ordinary phase change
55
66
  - a phase change alone does not automatically require a heavy gate unless that phase's exit criteria explicitly call for one
56
67
  - a heavy gate normally means some combination of full clean runtime proof, full `run_tests.sh`, and Playwright plus screenshot evidence when UI or fullstack flows exist
57
- - heavy gates are required at scaffold acceptance, integrated/full verification, and post-evaluation remediation re-acceptance
58
- - a mid-phase extra heavy gate is allowed only when the risk profile justifies it, such as major runtime, infra, migration, auth, security, build, or cross-module integration changes
68
+ - heavy gates are required only at these milestone boundaries:
69
+ - scaffold acceptance
70
+ - when development/coding is complete and the project enters integrated verification
71
+ - when integrated verification is complete and the project is ready to leave that phase
72
+ - when hardening is complete and the project is ready for final evaluation
73
+ - once more on the final qualified state before submission packaging
59
74
  - planning acceptance, ordinary module acceptance, and routine in-phase verification are not heavy gates by default and should rely on targeted local verification unless the risk profile says otherwise
60
75
 
61
76
  ## Testing cadence interpretation
62
77
 
63
78
  - the first required `run_tests.sh` pass happens in scaffold once the clean foundation exists
64
- - after scaffold, do not force `docker compose up --build` or `run_tests.sh` on every normal development step when faster local verification is sufficient
79
+ - after scaffold, do not force `docker compose up --build` or `run_tests.sh` on normal development steps
65
80
  - prefer local targeted or native test commands during module implementation and ordinary verify-fix iteration
81
+ - prefer local runtime startup and direct local behavior checks instead of Docker whenever runtime proof is needed during ordinary work
66
82
  - local verification should run inside the current working directory using the project's own environment and tooling rather than hidden global assumptions
67
83
  - during applicable fullstack or UI-bearing implementation work, require local Playwright on affected flows and review screenshots
68
- - treat `docker compose up --build` and `run_tests.sh` as critical-gate verification commands for integrated/full verification, hard gates, and final-evaluation readiness rather than normal iteration tools
84
+ - during integrated verification, do not rerun `docker compose up --build` or `run_tests.sh` on every small fix inside the phase; use local verification until the next milestone boundary is reached
85
+ - reserve `docker compose up --build` and `run_tests.sh` for these owner-run milestone checks only:
86
+ - scaffold acceptance
87
+ - development/coding complete -> integrated verification entry
88
+ - integrated verification complete -> hardening entry
89
+ - hardening complete -> final evaluation readiness
90
+ - final qualified state -> submission packaging readiness
69
91
  - the workflow owner handles those expensive critical-gate runs; do not require the developer to duplicate them during normal phase progression
70
- - run `run_tests.sh` again at integrated/full verification
71
- - integrated/full verification must also run Playwright for major flows and inspect screenshots
72
- - run `run_tests.sh` again after post-evaluation remediation before re-acceptance
73
- - after post-evaluation remediation affecting real flows or UI, rerun Playwright and inspect fresh screenshots before re-acceptance
92
+ - each integrated-verification milestone run must also include Playwright for major flows and screenshot review when UI or fullstack flows exist
93
+ - after post-evaluation remediation, prefer local reruns, affected local runtime checks, and affected Playwright checks
94
+ - after remediation, return the project to the appropriate milestone boundary and run the next owner-run gate there rather than turning every remediation fix into an immediate Docker and `run_tests.sh` rerun
74
95
 
75
96
  ## Runtime gate interpretation
76
97
 
77
- Use evidence such as Bead metadata, structured Bead comments, verification command results, and file/project-state checks.
98
+ Use evidence such as internal metadata files, structured tracker comments, verification command results, and file/project-state checks.
78
99
 
79
100
  - clarification requires the `clarification-gate` conditions plus explicit approval record
80
- - development bootstrap requires the `developer-session-lifecycle` conditions plus a fresh planning-oriented start in the current working directory with working planning docs under `docs/`
101
+ - development bootstrap requires the `developer-session-lifecycle` conditions plus a fresh planning-oriented start and the required documentation and repo hygiene state when relevant
81
102
  - scaffold requires evidence for `docker compose up --build`, `run_tests.sh`, baseline logging/config, and when relevant the chosen frontend stack and UI approach being set intentionally
82
103
  - scaffold also requires safe env/config handling, no persisted local secrets, real migration/runtime foundations, and a usable local test environment in the current working directory when practical
83
104
  - when scaffold includes prompt-critical security controls, acceptance requires real runtime or endpoint verification of the protection rather than helper-only or shape-only proof
@@ -86,21 +107,28 @@ Use evidence such as Bead metadata, structured Bead comments, verification comma
86
107
  - module implementation requires module planning notes, module definition of done, relevant local verification for the changed area, and for applicable fullstack or UI work local Playwright evidence with screenshots, plus docs sync and review acceptance
87
108
  - module implementation also requires integration-seam verification against adjacent modules and cross-cutting concerns where relevant, and known release-facing or build failures block acceptance unless explicitly scoped out
88
109
  - module implementation acceptance should also challenge tenant isolation, path confinement, sanitized error behavior, and prototype residue when those concerns are in scope
89
- - integrated verification requires owner-run `docker compose up --build`, owner-run `run_tests.sh`, end-to-end, Playwright, prompt-alignment, README/runtime, and cross-module evidence
110
+ - integrated verification entry requires owner-run `docker compose up --build`, owner-run `run_tests.sh`, end-to-end, Playwright, prompt-alignment, README/runtime, and cross-module evidence once development/coding is complete
111
+ - integrated verification completion requires one more owner-run `docker compose up --build`, one more owner-run `run_tests.sh`, and the corresponding end-to-end and screenshot evidence before the phase can close
90
112
  - fullstack integrated verification must include Playwright coverage for every major flow, plus screenshots used to evaluate frontend behavior and UI quality along the flow using `frontend-design`
91
113
  - if a required flow cannot be exercised through the intended UI surface, treat that as incomplete implementation rather than acceptable E2E coverage
92
114
  - hardening requires security, maintainability, exploratory, and release-freeze evidence
115
+ - hardening completion requires one owner-run `docker compose up --build` and one owner-run `run_tests.sh` on the hardened state before final evaluation begins
93
116
  - hardening must explicitly re-check secret handling, redaction, and frontend/backend observability hygiene
117
+ - hardening must explicitly satisfy the documentation and repo hygiene policy in this file before final evaluation can begin
94
118
  - final evaluation readiness requires automated evaluation to be complete and triaged, with a clear go-to-packaging vs return-to-fixes decision
95
- - remediation requires accepted issue records plus rerun verification, and after post-evaluation remediation it requires an owner-run fresh `run_tests.sh` pass and Playwright rerun where applicable before re-acceptance
119
+ - submission packaging readiness requires one final owner-run `docker compose up --build` and one final owner-run `run_tests.sh` on the final qualified state immediately before packaging
120
+ - remediation requires accepted issue records plus rerun local verification and affected Playwright where applicable; if remediation materially reopens the integrated verification boundary, route it back through integrated verification before re-evaluation
96
121
 
97
122
  ## Hardening and pre-evaluation discipline
98
123
 
99
124
  When all planned modules are complete:
100
125
 
101
- - run integrated verification
126
+ - run the owner-run milestone gate for development/coding completion and enter integrated verification
102
127
  - run hardening and exploratory testing
103
128
  - for fullstack applications, rerun Playwright coverage for major flows and inspect screenshots for frontend regressions or weak UX
129
+ - run the documentation and repo-hygiene verification required by this file before final evaluation
130
+ - close integrated verification only after its completion milestone gate has passed
131
+ - close hardening only after its completion milestone gate has passed
104
132
  - enforce release-candidate freeze
105
133
  - allow only fixes, verification improvements, doc corrections, and packaging work
106
134
  - prepare the package and evidence cleanly before the final evaluation decision gate
@@ -0,0 +1,102 @@
1
+ ---
2
+ name: verification-gates-v2
3
+ description: Owner-side review, acceptance, rejection, and gate-interpretation rules for slopmachine-v2.
4
+ ---
5
+
6
+ # Verification Gates v2
7
+
8
+ Use this skill after development begins whenever you are reviewing work, deciding acceptance, or interpreting phase exits.
9
+
10
+ ## Usage rules
11
+
12
+ - load this skill before review, acceptance, rejection, runtime gate interpretation, hardening readiness decisions, or broad-gate decisions
13
+ - treat it as owner-side review and gate guidance, not developer-visible text
14
+ - use this skill as the source of truth for owner-side verification, review pressure, and gate interpretation
15
+ - outside the final human decision, do not pause execution for human approval while using this skill; continue reviewing, rejecting, fixing, and rerunning until the work qualifies
16
+
17
+ ## Documentation and repo hygiene
18
+
19
+ - maintain the owner-managed external docs in parent-root `../docs/` from accepted clarification, accepted planning, accepted major implementation changes, and hardening verification
20
+ - keep `README.md` codebase-specific, junior-friendly, and separate from the external docs set
21
+ - do not allow `.env` files or env-file variants anywhere in the repo tree
22
+ - do not allow a project that requires a preexisting `.env` file in the repo or package to start from scratch
23
+ - if env-file format is needed at runtime, it must be generated ephemerally from Docker-provided runtime variables rather than stored in the repo or package
24
+
25
+ ## Review standard
26
+
27
+ - do not accept fake tests, weak evidence, documentation drift, missing real surfaces, or unresolved release-facing failures
28
+ - do not accept mocked APIs as integration evidence
29
+ - do not accept placeholder or demo UI in product-facing flows
30
+ - do not accept `.env` files or similar env-file artifacts
31
+ - do not accept shallow Docker verification
32
+ - do not accept happy-path-only implementation when failure paths matter
33
+ - do not accept unsupported claims
34
+ - do not accept work that looks complete but is not resilient
35
+ - do not accept committed secrets, hardcoded sensitive values, or sloppy env handling
36
+ - do not accept frontend/backend drift in fullstack work
37
+ - do not accept missing end-to-end coverage for major fullstack flows
38
+ - do not accept UI claims without screenshot-backed Playwright evidence when the change affects real frontend behavior
39
+ - do not accept prototype residue such as seeded credentials, weak demo defaults, login hints, or unsanitized user-facing error behavior
40
+ - do not accept multi-tenant or cross-user security claims without negative isolation evidence when that boundary matters
41
+ - do not accept file-bearing flows without path confinement and traversal-style validation when that boundary matters
42
+ - do not accept partial foundation work for complex features when the prompt implies broader usable scope, infrastructure depth, or security depth than what was actually delivered
43
+ - do not accept frontend-bearing slice completion without checking production build health when the change materially affects frontend code or tooling
44
+ - do not accept module completion that ignores integration seams or cross-cutting consistency with the existing system
45
+ - do not accept end-to-end evidence that bypasses a required user-facing or admin-facing surface with direct API shortcuts
46
+
47
+ ## Cadence rule
48
+
49
+ - use targeted local verification as the default during scaffold corrections, development, hardening, and remediation
50
+ - reserve the broad Docker/full-suite path for the limited owner-run gate moments in the workflow budget
51
+ - do not turn ordinary acceptance into repeated integrated-style gate runs
52
+
53
+ ## Verify-fix loop
54
+
55
+ - inspect the result and evidence, not just the developer claim
56
+ - review technical quality, prompt alignment, architecture impact, and verification depth of the current work
57
+ - during normal implementation iteration, always prefer fast local language-native or framework-native verification for the changed area instead of broad Docker or full-suite proof
58
+ - require the developer to set up and use the project-appropriate local test environment in the current working directory when normal local verification is needed
59
+ - require local runtime proof when relevant by starting the server locally and exercising the changed behavior directly rather than jumping to Docker-based proof
60
+ - if the local toolchain is missing, require the developer to install or enable it first; do not jump to the broad gate path during ordinary iteration just because local setup is inconvenient
61
+ - do not accept hand-wavy claims that local verification is unavailable without a real setup attempt and clear explanation
62
+ - for applicable fullstack or UI-bearing work, require local Playwright on affected flows plus screenshot review and explicit UI validation
63
+ - if verification is weak, missing, or failing, require fixes and reruns before acceptance
64
+ - if documentation or repo hygiene drifts, secrets leak, contracts drift, or frontend integrity is compromised, require cleanup before acceptance
65
+ - keep looping until the current work is genuinely acceptable
66
+
67
+ ## Broad-gate definition
68
+
69
+ - a broad gate is an owner-run integrated verification boundary, not every ordinary phase change
70
+ - a phase change alone does not automatically require a broad gate unless that phase exit explicitly calls for one
71
+ - a broad gate normally means some combination of full clean runtime proof, the broad `run_tests.sh` path, and Playwright plus screenshot evidence when UI or fullstack flows exist
72
+ - in v2, the workflow target is at most 3 broad owner-run verification moments across the whole cycle
73
+ - ordinary planning, ordinary slice acceptance, and routine in-phase verification are not broad gates by default and should rely on targeted local verification unless the risk profile says otherwise
74
+
75
+ ## Runtime gate interpretation
76
+
77
+ Use evidence such as internal metadata files, structured Beads comments, verification command results, and file/project-state checks.
78
+
79
+ - clarification requires the `clarification-gate-v2` conditions plus explicit approval record
80
+ - planning requires the `developer-session-lifecycle-v2` and planning-gate conditions plus a fresh planning-oriented start and the required documentation and repo hygiene state when relevant
81
+ - scaffold requires evidence for the bounded scaffold gate, baseline logging/config, and when relevant the chosen frontend stack and UI approach being set intentionally
82
+ - scaffold also requires safe env/config handling, no persisted local secrets, real migration/runtime foundations, and a usable local test environment in the current working directory when practical
83
+ - when scaffold includes prompt-critical security controls, acceptance requires real runtime or endpoint verification of the protection rather than helper-only or shape-only proof
84
+ - for security-bearing scaffolds, require applicable rejection evidence such as stale replay rejection, nonce reuse rejection, CSRF rejection on protected mutations, lockout triggering when lockout is in scope, or equivalent proof that the control is truly enforced
85
+ - scaffold acceptance also requires self-contained Compose namespacing, no unnecessary fragile `container_name` usage, and clean startup plus teardown behavior in the intended shared-environment model
86
+ - module implementation requires module planning notes, module definition of done, relevant local verification for the changed area, and for applicable fullstack or UI work local Playwright evidence with screenshots, plus docs sync and review acceptance
87
+ - module implementation acceptance should challenge tenant isolation, path confinement, sanitized error behavior, prototype residue, integration seams, and cross-cutting consistency when those concerns are in scope
88
+ - integrated verification entry requires one of the limited owner-run broad gate moments once development is complete
89
+ - integrated verification completion requires explicit full-system evidence before the phase can close
90
+ - fullstack integrated verification must include Playwright coverage for every major flow, plus screenshots used to evaluate frontend behavior and UI quality along the flow using `frontend-design`
91
+ - if a required flow cannot be exercised through the intended UI surface, treat that as incomplete implementation rather than acceptable E2E coverage
92
+ - hardening requires security, maintainability, exploratory, and release-freeze evidence
93
+ - hardening must explicitly re-check secret handling, redaction, and frontend/backend observability hygiene
94
+ - hardening must explicitly satisfy the documentation and repo hygiene policy in this file before final evaluation can begin
95
+ - final evaluation readiness requires automated evaluation to be complete and triaged, with a clear go-to-packaging vs return-to-fixes decision
96
+ - remediation requires accepted issue records plus rerun local verification and affected Playwright where applicable; if remediation materially reopens the integrated verification boundary, route it back through integrated verification before re-evaluation
97
+
98
+ ## Acceptance rule
99
+
100
+ - inspect the result and evidence, not just the developer claim
101
+ - prefer one strong rejection with a concrete correction request over many small nudges
102
+ - keep looping until the current work is genuinely acceptable
@@ -271,5 +271,12 @@ Before finalizing, check all of the following:
271
271
  5. Has security or test sufficiency been judged too loosely without evidence?
272
272
  6. Has any Docker non-execution boundary been incorrectly described as a confirmed runtime failure?
273
273
 
274
- If file writing is supported, save the final report to a markdown file.
275
- Otherwise, return the report in-chat.
274
+ Save the full final report to a markdown file.
275
+
276
+ In-chat, respond with a small summary of the results only:
277
+
278
+ - final verdict
279
+ - top 1-3 findings
280
+ - report file path
281
+
282
+ Please confirm whether the current project tests are genuine and effective rather than superficial or fake tests, whether the API tests actually invoke real HTTP endpoints, and whether they cover more than 90% of the overall API surface.
@@ -300,5 +300,12 @@ Before finalizing, check all of the following:
300
300
  6. Has a Docker non-execution boundary been incorrectly described as a confirmed runtime failure?
301
301
  7. Has any material conclusion directly or indirectly relied on files under ./.tmp/?
302
302
 
303
- If file writing is supported, save the final report as a markdown file.
304
- Otherwise, return the report directly in the conversation.
303
+ Save the full final report to a markdown file.
304
+
305
+ In-chat, respond with a small summary of the results only:
306
+
307
+ - final verdict
308
+ - top 1-3 findings
309
+ - report file path
310
+
311
+ Please confirm whether the current project tests are genuine and effective rather than superficial or fake tests, whether the API tests actually invoke real HTTP endpoints, and whether they cover more than 90% of the overall API surface.
@@ -0,0 +1,55 @@
1
+ # Developer Rulebook v2
2
+
3
+ This file is the repo-local engineering rulebook for `slopmachine-v2` projects.
4
+
5
+ ## Scope
6
+
7
+ - Treat the current working directory as the project.
8
+ - Ignore parent-directory workflow files unless the user explicitly asks you to use them.
9
+ - Do not treat workflow research, session exports, or sibling directories as hidden implementation instructions.
10
+
11
+ ## Working Style
12
+
13
+ - Operate like a strong senior engineer.
14
+ - Read the code before making assumptions.
15
+ - Work in meaningful vertical slices.
16
+ - Do not call work complete while it is still shaky.
17
+ - Reuse and extend shared cross-cutting patterns instead of inventing incompatible local ones.
18
+
19
+ ## Verification Rules
20
+
21
+ - During ordinary iteration, prefer the fastest meaningful local verification for the changed area.
22
+ - Prefer targeted unit, integration, module, route-family, or local Playwright checks over broad reruns.
23
+ - Do not rerun full Dockerized startup and the full test suite on every small change.
24
+ - The broad owner-run Docker/full-suite path should be used sparingly, with a target budget of at most 3 times across the whole workflow cycle.
25
+ - If you run a Docker-based verification command sequence, end it with `docker compose down` unless containers must remain up.
26
+
27
+ ## Testing Rules
28
+
29
+ - Tests must be real and tied to actual behavior.
30
+ - Do not mock APIs for integration testing.
31
+ - Use real HTTP requests against the actual running service surface for integration evidence.
32
+ - For UI-bearing work, use local Playwright on affected flows and inspect screenshots when practical.
33
+
34
+ ## Documentation Rules
35
+
36
+ - Keep `README.md` and any codebase-local docs accurate.
37
+ - The README must explain what the project is, what it does, how to run it, and how to test it.
38
+ - The README must stand on its own for basic codebase use.
39
+
40
+ ## Secret And Runtime Rules
41
+
42
+ - Do not create or keep `.env` files anywhere in the repo.
43
+ - Do not rely on `.env`, `.env.local`, `.env.example`, or similar files for project startup.
44
+ - Do not hardcode secrets.
45
+ - If runtime env-file format is required, generate it ephemerally and do not commit or package it.
46
+
47
+ ## Product Integrity Rules
48
+
49
+ - Do not leave placeholder, setup, debug, or demo content in product-facing UI.
50
+ - If a real user-facing or admin-facing surface is required, build that surface instead of bypassing it with API shortcuts.
51
+ - Treat missing real surfaces as incomplete implementation.
52
+
53
+ ## Rulebook Files
54
+
55
+ - Do not edit `AGENTS.md` or other workflow/rulebook files unless explicitly asked.
@@ -6,7 +6,7 @@ This file is the developer-facing operating rulebook for project execution.
6
6
 
7
7
  - Treat the current working directory as the project.
8
8
  - Ignore files outside the current working directory unless the user explicitly asks you to use them.
9
- - Do not use parent-directory files as hidden requirements.
9
+ - Do not use unrelated parent-directory files as hidden requirements.
10
10
 
11
11
  ## Working Style
12
12
 
@@ -26,19 +26,16 @@ This file is the developer-facing operating rulebook for project execution.
26
26
 
27
27
  ## Runtime And Verification Rules
28
28
 
29
- - A heavy gate is an owner-run integrated verification boundary, not every ordinary phase change.
30
- - Heavy gates normally include full clean runtime proof, full `run_tests.sh`, and Playwright plus screenshot evidence when UI or fullstack flows exist.
31
- - Heavy gates are expected at scaffold acceptance, integrated/full verification, and post-evaluation remediation re-acceptance.
32
- - Ordinary phase progression and module completion do not automatically mean rerunning every heavy-gate command.
33
29
  - Treat Docker as the main runtime contract.
34
- - `docker compose up --build` is the canonical startup path and must work when the project expects Dockerized execution.
30
+ - `docker compose up --build` must work when the project expects Dockerized execution, but it is not the default per-turn verification command during normal iteration.
35
31
  - `run_tests.sh` is a required project test entrypoint and must exist and work.
36
32
  - After scaffold is established, do not rerun full `docker compose up --build` and `run_tests.sh` on every small implementation step.
37
33
  - During normal iteration, prefer the fastest meaningful local verification inside the current working directory using the project-appropriate test environment and tooling.
38
- - If the local test toolchain is missing, try to install or enable it before falling back to `run_tests.sh`.
39
- - Treat `docker compose up --build` and `run_tests.sh` as critical-gate verification commands, not normal per-turn iteration commands.
40
- - The workflow owner handles those expensive critical-gate runs; focus on strong local verification during normal work so the gate passes succeed cleanly.
41
- - After post-evaluation remediation, strengthen local verification and affected Playwright checks rather than rerunning every full gate command yourself unless explicitly required.
34
+ - When runtime proof is needed during ordinary work, prefer starting and exercising the app locally instead of using Docker gate commands.
35
+ - If the local test toolchain is missing, try to install or enable it; do not jump to `run_tests.sh` on ordinary turns just because local setup takes work.
36
+ - The workflow owner runs `run_tests.sh` only at milestone boundaries: after scaffold, after development/coding is complete, after integrated verification is complete, after hardening is complete, and once more before final submission.
37
+ - Do not rerun `docker compose up --build` or `run_tests.sh` on every small fix inside integrated verification; use local verification until the next milestone boundary is reached.
38
+ - After post-evaluation remediation, strengthen local verification and affected Playwright checks rather than rerunning full gate commands yourself unless explicitly required.
42
39
  - Do not let unverified work accumulate.
43
40
 
44
41
  ## Testing Rules
@@ -46,6 +43,7 @@ This file is the developer-facing operating rulebook for project execution.
46
43
  - Tests must be real, meaningful, and tied to actual behavior.
47
44
  - Cover happy paths, failure paths, and realistic edge cases.
48
45
  - For API-bearing projects, prefer real endpoint invocation where practical.
46
+ - Do not mock APIs for integration testing; integration evidence must use real HTTP requests against the actual running service surface.
49
47
  - For backend integration tests, prefer production-equivalent infrastructure when practical instead of a weaker substitute that can hide real defects.
50
48
  - For applicable frontend or fullstack work, run local Playwright against affected end-to-end flows during implementation and inspect screenshots to verify the UI actually matches.
51
49
  - Do not pad the test suite with superficial or fake tests.
@@ -53,6 +51,8 @@ This file is the developer-facing operating rulebook for project execution.
53
51
 
54
52
  ## Frontend Product Integrity
55
53
 
54
+ - Unless the prompt, existing repository, or established stack clearly dictates otherwise, default frontend work to Tailwind CSS for styling and `shadcn/ui` for component primitives.
55
+ - If the existing project already uses a different UI system, preserve and extend that system instead of forcing Tailwind CSS or `shadcn/ui` into it.
56
56
  - Do not place development, setup, scaffold, seed, or debug information in the product UI.
57
57
  - Do not add demo banners, `database is working` messages, scaffold-password hints, setup reminders, or similar developer-facing content to frontend screens.
58
58
  - If a screen exists, it should serve the real user or operator purpose it was created for.
@@ -61,12 +61,11 @@ This file is the developer-facing operating rulebook for project execution.
61
61
  ## Documentation Rules
62
62
 
63
63
  - Keep docs aligned with the current implementation.
64
- - During development, keep working technical docs under `docs/`.
65
- - Maintain a test-coverage document under `docs/` that explains the major flow coverage, the relevant test entry points, and any important coverage boundaries.
66
64
  - Do not add or keep tests that only assert that docs directories or docs files exist.
67
- - Delivery packaging may relocate docs, but that is not product behavior and should not be tested as application logic.
68
- - Update technical docs when behavior, architecture, interfaces, runtime steps, or verification expectations change.
69
- - The README must explain what the project is, how to run it, how to test it, and how to verify it.
65
+ - Documentation structure outside the repo is not application behavior and should not be tested as application logic.
66
+ - Keep `README.md` and any codebase-local docs accurate when behavior, runtime steps, or verification expectations change.
67
+ - The README must explain what the project is, what it does, how to run it, and how to test it in a way that is friendly to a junior developer.
68
+ - The README must stand on its own for basic codebase use and must not depend on separate external documentation for run/test basics.
70
69
  - Do not leave misleading docs in place after changing behavior.
71
70
 
72
71
  ## Engineering Quality Rules
@@ -78,10 +77,14 @@ This file is the developer-facing operating rulebook for project execution.
78
77
 
79
78
  ## Secret Handling Rules
80
79
 
80
+ - Do not create, keep, or rely on `.env` files anywhere in the codebase.
81
+ - Treat `.env`, `.env.local`, `.env.example`, and similar env-file variants as forbidden artifacts.
81
82
  - Do not persist local secrets anywhere in the repository.
82
83
  - Do not hardcode credentials, API keys, tokens, signing material, database passwords, certificate private keys, or similar sensitive values in code.
83
- - Keep committed env/config examples limited to placeholders or clearly non-production defaults.
84
- - If a real secret is needed, inject it through Docker-managed runtime configuration and keep it out of committed source files.
84
+ - Do not use env files even for placeholders or setup examples.
85
+ - The delivered repo and package must not require any preexisting `.env` file to start from scratch.
86
+ - If environment variables are needed, rely on Docker-provided runtime variables or generate any required env-file format ephemerally at runtime from those variables.
87
+ - If runtime generation is used, the generated env file must not be committed, packaged, or treated as a persistent project artifact.
85
88
  - Do not leak raw secrets into logs, docs, screenshots, telemetry, or operator-facing UI.
86
89
  - Treat frontend and backend observability paths as secret-sensitive by default and redact accordingly.
87
90
 
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env node
2
+
3
+ import fs from 'node:fs/promises'
4
+ import path from 'node:path'
5
+ import { spawn } from 'node:child_process'
6
+
7
+ const targetInput = process.argv[2] || '.'
8
+ const target = path.resolve(process.cwd(), targetInput)
9
+ const trackerCommand = process.env.BR_COMMAND || 'br'
10
+
11
+ function log(message) {
12
+ console.log(`[tracker-init] ${message}`)
13
+ }
14
+
15
+ function die(message) {
16
+ console.error(`[tracker-init] ERROR: ${message}`)
17
+ process.exit(1)
18
+ }
19
+
20
+ function run(command, args, options = {}) {
21
+ return new Promise((resolve, reject) => {
22
+ const child = spawn(command, args, {
23
+ cwd: options.cwd,
24
+ env: options.env || process.env,
25
+ stdio: options.stdio || 'pipe',
26
+ shell: false,
27
+ })
28
+
29
+ let stdout = ''
30
+ let stderr = ''
31
+
32
+ if (child.stdout) child.stdout.on('data', (chunk) => { stdout += chunk.toString() })
33
+ if (child.stderr) child.stderr.on('data', (chunk) => { stderr += chunk.toString() })
34
+
35
+ if (options.input !== undefined && child.stdin) {
36
+ child.stdin.write(options.input)
37
+ child.stdin.end()
38
+ }
39
+
40
+ child.on('error', reject)
41
+ child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }))
42
+ })
43
+ }
44
+
45
+ async function commandExists(command) {
46
+ const checker = process.platform === 'win32' ? 'where' : 'which'
47
+ const result = await run(checker, [command])
48
+ return result.code === 0
49
+ }
50
+
51
+ async function pathExists(targetPath) {
52
+ try {
53
+ await fs.access(targetPath)
54
+ return true
55
+ } catch {
56
+ return false
57
+ }
58
+ }
59
+
60
+ async function runTracker(args, options = {}) {
61
+ return run(trackerCommand, args, { cwd: target, ...options })
62
+ }
63
+
64
+ async function main() {
65
+ const trackerAvailable = trackerCommand !== 'br' ? await pathExists(trackerCommand) : await commandExists('br')
66
+ if (!trackerAvailable) {
67
+ die(`'${trackerCommand}' is not available. Install beads_rust first.`)
68
+ }
69
+
70
+ if (!(await pathExists(target))) {
71
+ die(`Target directory '${targetInput}' does not exist or is not accessible.`)
72
+ }
73
+
74
+ log(`Target: ${target}`)
75
+
76
+ const beadsDir = path.join(target, '.beads')
77
+ if (!(await pathExists(beadsDir))) {
78
+ log("Running 'br init --quiet'")
79
+ const initResult = await runTracker(['init', '--quiet'], {
80
+ env: { ...process.env, CI: '1' },
81
+ })
82
+ if (initResult.code !== 0) {
83
+ console.error(`${initResult.stdout}${initResult.stderr}`.trim())
84
+ die("'br init' failed. Review output above.")
85
+ }
86
+ } else {
87
+ log('Found existing .beads; skipping init to avoid destructive re-initialization')
88
+ }
89
+
90
+ log("Running 'br sync --flush-only'")
91
+ const syncResult = await runTracker(['sync', '--flush-only'], {
92
+ env: { ...process.env, CI: '1' },
93
+ })
94
+ if (syncResult.code !== 0) {
95
+ console.error(`${syncResult.stdout}${syncResult.stderr}`.trim())
96
+ die("'br sync --flush-only' failed. Review output above.")
97
+ }
98
+
99
+ log('Success: tracker initialized and JSONL exported')
100
+ }
101
+
102
+ main().catch((error) => {
103
+ die(error instanceof Error ? error.message : String(error))
104
+ })
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env node
2
+
3
+ import fs from 'node:fs/promises'
4
+ import path from 'node:path'
5
+ import { spawn } from 'node:child_process'
6
+
7
+ const targetInput = process.argv[2] || '.'
8
+ const target = path.resolve(process.cwd(), targetInput)
9
+ const beadsCommand = process.env.BR_COMMAND || 'br'
10
+
11
+ function log(message) {
12
+ console.log(`[workflow-init-v2] ${message}`)
13
+ }
14
+
15
+ function die(message) {
16
+ console.error(`[workflow-init-v2] ERROR: ${message}`)
17
+ process.exit(1)
18
+ }
19
+
20
+ function run(command, args, options = {}) {
21
+ return new Promise((resolve, reject) => {
22
+ const child = spawn(command, args, {
23
+ cwd: options.cwd,
24
+ env: options.env || process.env,
25
+ stdio: options.stdio || 'pipe',
26
+ shell: false,
27
+ })
28
+
29
+ let stdout = ''
30
+ let stderr = ''
31
+
32
+ if (child.stdout) child.stdout.on('data', (chunk) => { stdout += chunk.toString() })
33
+ if (child.stderr) child.stderr.on('data', (chunk) => { stderr += chunk.toString() })
34
+
35
+ child.on('error', reject)
36
+ child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }))
37
+ })
38
+ }
39
+
40
+ async function pathExists(targetPath) {
41
+ try {
42
+ await fs.access(targetPath)
43
+ return true
44
+ } catch {
45
+ return false
46
+ }
47
+ }
48
+
49
+ async function commandExists(command) {
50
+ const checker = process.platform === 'win32' ? 'where' : 'which'
51
+ const result = await run(checker, [command])
52
+ return result.code === 0
53
+ }
54
+
55
+ async function runBeads(args, options = {}) {
56
+ return run(beadsCommand, args, { cwd: target, ...options })
57
+ }
58
+
59
+ async function main() {
60
+ const beadsAvailable = beadsCommand !== 'br' ? await pathExists(beadsCommand) : await commandExists('br')
61
+ if (!beadsAvailable) {
62
+ die(`'${beadsCommand}' is not available. Install beads_rust first.`)
63
+ }
64
+
65
+ if (!(await pathExists(target))) {
66
+ die(`Target directory '${targetInput}' does not exist or is not accessible.`)
67
+ }
68
+
69
+ log(`Target: ${target}`)
70
+
71
+ const beadsDir = path.join(target, '.beads')
72
+ if (!(await pathExists(beadsDir))) {
73
+ log("Running 'br init --quiet'")
74
+ const initResult = await runBeads(['init', '--quiet'], {
75
+ env: { ...process.env, CI: '1' },
76
+ })
77
+ if (initResult.code !== 0) {
78
+ console.error(`${initResult.stdout}${initResult.stderr}`.trim())
79
+ die("'br init' failed. Review output above.")
80
+ }
81
+ } else {
82
+ log('Found existing .beads; skipping init to avoid destructive re-initialization')
83
+ }
84
+
85
+ log("Running 'br sync --flush-only'")
86
+ const syncResult = await runBeads(['sync', '--flush-only'], {
87
+ env: { ...process.env, CI: '1' },
88
+ })
89
+ if (syncResult.code !== 0) {
90
+ console.error(`${syncResult.stdout}${syncResult.stderr}`.trim())
91
+ die("'br sync --flush-only' failed. Review output above.")
92
+ }
93
+
94
+ log('Success: beads_rust workspace initialized and JSONL exported')
95
+ }
96
+
97
+ main().catch((error) => {
98
+ die(error instanceof Error ? error.message : String(error))
99
+ })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "theslopmachine",
3
- "version": "0.3.7",
3
+ "version": "0.4.0",
4
4
  "description": "SlopMachine installer and project bootstrap CLI",
5
5
  "license": "MIT",
6
6
  "type": "module",