cclaw-cli 0.5.16 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/artifact-linter.d.ts +13 -0
- package/dist/artifact-linter.js +182 -13
- package/dist/cli.d.ts +4 -2
- package/dist/cli.js +18 -4
- package/dist/config.d.ts +2 -2
- package/dist/config.js +19 -5
- package/dist/constants.d.ts +2 -2
- package/dist/constants.js +3 -2
- package/dist/content/agents.js +2 -2
- package/dist/content/examples.js +71 -62
- package/dist/content/hooks.d.ts +1 -0
- package/dist/content/hooks.js +145 -0
- package/dist/content/learnings.js +25 -5
- package/dist/content/meta-skill.js +12 -0
- package/dist/content/next-command.js +8 -0
- package/dist/content/observe.js +18 -0
- package/dist/content/session-hooks.js +1 -1
- package/dist/content/stage-schema.js +12 -2
- package/dist/content/status-command.d.ts +9 -0
- package/dist/content/status-command.js +132 -0
- package/dist/content/templates.js +18 -19
- package/dist/content/utility-skills.d.ts +6 -2
- package/dist/content/utility-skills.js +431 -3
- package/dist/delegation.d.ts +6 -0
- package/dist/delegation.js +12 -4
- package/dist/doctor.js +37 -1
- package/dist/flow-state.d.ts +16 -4
- package/dist/flow-state.js +50 -11
- package/dist/gate-evidence.d.ts +14 -0
- package/dist/gate-evidence.js +65 -3
- package/dist/harness-adapters.js +1 -0
- package/dist/install.d.ts +2 -1
- package/dist/install.js +107 -6
- package/dist/runs.d.ts +13 -1
- package/dist/runs.js +73 -7
- package/dist/types.d.ts +13 -0
- package/dist/types.js +13 -0
- package/package.json +1 -1
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Utility skills that complement the
|
|
2
|
+
* Utility skills that complement the 8 flow stages.
|
|
3
3
|
* These are contextual lenses, not flow stages.
|
|
4
4
|
* Each skill: ~120-180 lines, under the 500-line progressive disclosure guideline.
|
|
5
5
|
*/
|
|
@@ -504,12 +504,45 @@ Do not start implementation execution without an approved plan artifact and expl
|
|
|
504
504
|
- Machine-only checks are delegated to subagents when supported.
|
|
505
505
|
- User approvals are requested only at required gate boundaries.
|
|
506
506
|
|
|
507
|
+
## Fresh Context Protocol (between waves)
|
|
508
|
+
|
|
509
|
+
After a wave completes — especially after long agent turns — context drift is
|
|
510
|
+
the #1 cause of degraded execution quality. Before starting the **next wave**,
|
|
511
|
+
prefer a **fresh agent context** over continuing in a saturated session:
|
|
512
|
+
|
|
513
|
+
1. **Snapshot wave outcome** — append a short summary to the plan artifact
|
|
514
|
+
(\`### Wave <N> outcome\` with: tasks done, evidence files, blockers, next-wave inputs).
|
|
515
|
+
2. **Capture handoff facts** — the minimum information the next agent needs:
|
|
516
|
+
- Stage and run id (from \`.cclaw/state/flow-state.json\`)
|
|
517
|
+
- List of completed task IDs from the plan
|
|
518
|
+
- Open blockers / failing gates by name
|
|
519
|
+
- File paths the next wave will touch (no full diffs)
|
|
520
|
+
3. **Decide: continue or rotate**
|
|
521
|
+
- **Rotate** (start a new agent session) when: prior wave consumed > ~50% of the context budget, the prior wave required deep investigation that the next wave does not need, or you are about to cross a stage boundary.
|
|
522
|
+
- **Continue** when: next wave is a tiny follow-up (≤ 1 task) and the prior context is directly relevant.
|
|
523
|
+
4. **Resume** in the new session via \`/cc-next\` — the session-start hook will restore flow state, checkpoint, and digest automatically.
|
|
524
|
+
|
|
525
|
+
This is the same intuition as Compound Engineering's "fresh context per iteration": every wave starts with a clean, intentionally-loaded context, not a degraded carry-over.
|
|
526
|
+
|
|
527
|
+
### Handoff template (paste into next session)
|
|
528
|
+
|
|
529
|
+
\`\`\`markdown
|
|
530
|
+
## Wave <N> handoff
|
|
531
|
+
- Stage: <stage>
|
|
532
|
+
- Run: <runId>
|
|
533
|
+
- Completed task IDs: <list>
|
|
534
|
+
- Blockers: <list or none>
|
|
535
|
+
- Files next wave will touch: <list>
|
|
536
|
+
- Verification command(s) used: <list>
|
|
537
|
+
\`\`\`
|
|
538
|
+
|
|
507
539
|
## Anti-Patterns
|
|
508
540
|
|
|
509
541
|
- Executing all tasks in one pass without intermediate verification.
|
|
510
542
|
- Marking tasks done without command evidence.
|
|
511
543
|
- Reordering critical dependencies for speed.
|
|
512
544
|
- Continuing after a gate failure hoping later tasks fix it.
|
|
545
|
+
- Carrying a saturated context across wave boundaries because "it has all the history" — saturated context is a liability, not an asset.
|
|
513
546
|
`;
|
|
514
547
|
}
|
|
515
548
|
export function contextEngineeringSkill() {
|
|
@@ -649,6 +682,393 @@ Do not approve user-facing UI changes that break basic keyboard navigation or re
|
|
|
649
682
|
- Color-only status indicators with no text/aria support.
|
|
650
683
|
`;
|
|
651
684
|
}
|
|
685
|
+
export function landscapeCheckSkill() {
|
|
686
|
+
return `---
|
|
687
|
+
name: landscape-check
|
|
688
|
+
description: "Landscape survey before a design/scope decision. Use when deciding whether to build, reuse, or adopt — inside and outside the repo."
|
|
689
|
+
---
|
|
690
|
+
|
|
691
|
+
# Landscape Check
|
|
692
|
+
|
|
693
|
+
## Quick Start
|
|
694
|
+
|
|
695
|
+
> 1. Before committing to a build decision, survey the landscape: in-repo, in-ecosystem, and in-class.
|
|
696
|
+
> 2. Produce a one-page table of candidates (build / reuse in-repo / adopt external) with evidence.
|
|
697
|
+
> 3. Explicitly kill alternatives with a one-line reason. Do not leave implicit assumptions.
|
|
698
|
+
|
|
699
|
+
## HARD-GATE
|
|
700
|
+
|
|
701
|
+
Do not approve a scope or design that introduces a new system, library,
|
|
702
|
+
or abstraction without comparing at least **one in-repo candidate** and
|
|
703
|
+
**one external/ecosystem candidate** (or explicitly stating why no such
|
|
704
|
+
candidates exist).
|
|
705
|
+
|
|
706
|
+
## When to Use
|
|
707
|
+
|
|
708
|
+
- Scope stage, before picking a mode (expand/selective/hold/reduce)
|
|
709
|
+
- Design stage, before committing to a new architecture boundary
|
|
710
|
+
- Brainstorm stage, when the user frames the problem as "let's build X"
|
|
711
|
+
- Review stage, when a proposed change duplicates an existing capability
|
|
712
|
+
|
|
713
|
+
## Protocol
|
|
714
|
+
|
|
715
|
+
1. **Define the capability in one sentence.** "We need a way to <verb> <object> under <constraint>."
|
|
716
|
+
2. **In-repo search.** Grep for similar verbs/modules/components. Read the closest 1-3 candidates. Record their fit and why they are or are not a good adapter target.
|
|
717
|
+
3. **Ecosystem search.** Check ecosystem defaults (stdlib, framework primitives, common OSS packages in use). Do not invent new dependencies when an existing one covers 80%+ of the need.
|
|
718
|
+
4. **In-class search.** Look at how other well-known projects in the same class solve this. Cite at least one concrete example (even if you end up rejecting it).
|
|
719
|
+
5. **Produce the decision table.** Columns: Candidate, Kind (build / reuse / adopt), Fit (1-5), Effort (S/M/L/XL), Risk, Reason accepted or rejected.
|
|
720
|
+
6. **Commit.** Pick exactly one winner. All losers must have a one-line kill reason.
|
|
721
|
+
|
|
722
|
+
## Output Template
|
|
723
|
+
|
|
724
|
+
\`\`\`markdown
|
|
725
|
+
### Landscape Check — <capability>
|
|
726
|
+
|
|
727
|
+
| Candidate | Kind | Fit | Effort | Risk | Verdict |
|
|
728
|
+
|---|---|---|---|---|---|
|
|
729
|
+
| src/foo/Bar | reuse | 4/5 | S | Low | SELECTED — already covers 80% of the need |
|
|
730
|
+
| external/lib-x | adopt | 3/5 | M | Med | REJECTED — heavy dep, 20% unused surface |
|
|
731
|
+
| build new | build | 2/5 | L | High | REJECTED — premature abstraction |
|
|
732
|
+
|
|
733
|
+
**Decision:** Reuse \`src/foo/Bar\` with a thin adapter. Kill reasons recorded above.
|
|
734
|
+
\`\`\`
|
|
735
|
+
|
|
736
|
+
## Anti-Patterns
|
|
737
|
+
|
|
738
|
+
- "We looked and nothing fits" without citing what was looked at.
|
|
739
|
+
- Treating "nobody on the team knows library X" as a kill reason without evaluating the learning cost.
|
|
740
|
+
- Choosing "build" because reuse would require a small refactor of the existing component.
|
|
741
|
+
- Skipping the in-class search because "our case is special" — it usually is not.
|
|
742
|
+
|
|
743
|
+
## Red Flags
|
|
744
|
+
|
|
745
|
+
- Decision table has only the winner listed.
|
|
746
|
+
- Ecosystem search is empty when a well-known primitive obviously applies.
|
|
747
|
+
- "Fit" scores without evidence (no file:line, no cited OSS repo, no framework docs reference).
|
|
748
|
+
- The in-repo candidate was never read before being dismissed.
|
|
749
|
+
`;
|
|
750
|
+
}
|
|
751
|
+
export function knowledgeCurationSkill() {
|
|
752
|
+
return `---
|
|
753
|
+
name: knowledge-curation
|
|
754
|
+
description: "Read-only curation pass over .cclaw/knowledge.md. Surfaces stale, duplicate, or low-confidence entries and proposes a soft-archive plan; never deletes without explicit user approval."
|
|
755
|
+
---
|
|
756
|
+
|
|
757
|
+
# Knowledge Curation
|
|
758
|
+
|
|
759
|
+
## Quick Start
|
|
760
|
+
|
|
761
|
+
> 1. This is a **read-only audit** of \`.cclaw/knowledge.md\`. Never delete or rewrite entries here.
|
|
762
|
+
> 2. Surface candidates for soft-archive when the active file > 50 entries OR contains stale/duplicate/superseded entries.
|
|
763
|
+
> 3. Propose a single archive plan and require explicit user approval before any move.
|
|
764
|
+
|
|
765
|
+
## HARD-GATE
|
|
766
|
+
|
|
767
|
+
- Do not modify \`.cclaw/knowledge.md\` from this skill except via an explicit
|
|
768
|
+
user-approved archive plan that **moves** entries to
|
|
769
|
+
\`.cclaw/knowledge.archive.md\` (never deletes them).
|
|
770
|
+
- Do not silently rewrite or summarize entries — preserve original wording.
|
|
771
|
+
|
|
772
|
+
## When to run
|
|
773
|
+
|
|
774
|
+
- Triggered automatically by **\`/cc-learn curate\`**.
|
|
775
|
+
- Recommended after \`cclaw archive\` of a feature run, when knowledge has grown.
|
|
776
|
+
- Recommended when active entry count exceeds **50**.
|
|
777
|
+
|
|
778
|
+
## Audit dimensions
|
|
779
|
+
|
|
780
|
+
For each entry in \`.cclaw/knowledge.md\` produce a row with:
|
|
781
|
+
|
|
782
|
+
| Field | Source |
|
|
783
|
+
|---|---|
|
|
784
|
+
| Title | \`### <ts> [type] <title>\` heading |
|
|
785
|
+
| Type | \`rule\` / \`pattern\` / \`lesson\` / \`compound\` |
|
|
786
|
+
| Stage | \`Stage:\` field (or \`unknown\`) |
|
|
787
|
+
| Age | days since timestamp |
|
|
788
|
+
| Confidence | \`Confidence:\` field if present, else \`unstated\` |
|
|
789
|
+
| Domain | \`Domain:\` field if present |
|
|
790
|
+
| Supersedes | \`Supersedes:\` field if present |
|
|
791
|
+
| Status hint | one of: keep / supersede-candidate / archive-candidate / duplicate |
|
|
792
|
+
|
|
793
|
+
### Status rules
|
|
794
|
+
|
|
795
|
+
- **supersede-candidate**: another entry has \`Supersedes: <this-title>\`.
|
|
796
|
+
- **duplicate**: title or insight ≈ another entry's (caller's judgment, not regex).
|
|
797
|
+
- **archive-candidate**:
|
|
798
|
+
- Type \`lesson\` AND age > 180 days AND no \`Supersedes\` chain points to it; OR
|
|
799
|
+
- Stage = \`brainstorm\` AND age > 90 days; OR
|
|
800
|
+
- Confidence = \`low\` AND age > 60 days; OR
|
|
801
|
+
- Total active entries > 50 and entry has lowest reuse signal.
|
|
802
|
+
- **keep**: everything else.
|
|
803
|
+
|
|
804
|
+
## Output format
|
|
805
|
+
|
|
806
|
+
Produce two artifacts as **chat output only** (do not write files):
|
|
807
|
+
|
|
808
|
+
### 1. Audit table
|
|
809
|
+
|
|
810
|
+
\`\`\`markdown
|
|
811
|
+
| # | Title | Type | Stage | Age | Confidence | Status hint |
|
|
812
|
+
|---|---|---|---|---|---|---|
|
|
813
|
+
| 1 | … | … | … | … | … | … |
|
|
814
|
+
\`\`\`
|
|
815
|
+
|
|
816
|
+
### 2. Soft-archive proposal
|
|
817
|
+
|
|
818
|
+
\`\`\`markdown
|
|
819
|
+
## Proposed archive (requires user approval)
|
|
820
|
+
|
|
821
|
+
Threshold reasoning: <why entries below were selected>
|
|
822
|
+
|
|
823
|
+
Entries to archive:
|
|
824
|
+
1. <title> — reason
|
|
825
|
+
2. <title> — reason
|
|
826
|
+
|
|
827
|
+
Action plan if approved:
|
|
828
|
+
1. Append a header to \`.cclaw/knowledge.archive.md\` with today's UTC date.
|
|
829
|
+
2. Move (cut/paste) selected entries verbatim from \`.cclaw/knowledge.md\` into the archive file.
|
|
830
|
+
3. Append a single supersession line to \`.cclaw/knowledge.md\`:
|
|
831
|
+
\\\`### <ts> [pattern] knowledge-curation-<date> — archived <N> entries, see knowledge.archive.md\\\`
|
|
832
|
+
|
|
833
|
+
After approval: ask the user to run the move themselves, or — if they explicitly grant write access — perform the move atomically and report the new active count.
|
|
834
|
+
\`\`\`
|
|
835
|
+
|
|
836
|
+
## Anti-patterns
|
|
837
|
+
|
|
838
|
+
- Deleting entries instead of archiving — knowledge must be append-only.
|
|
839
|
+
- Rewriting an entry to "clean it up" — preserve original wording verbatim.
|
|
840
|
+
- Auto-archiving without user approval, even when above threshold.
|
|
841
|
+
- Removing \`compound\` entries — these are the highest-leverage records.
|
|
842
|
+
- Treating high age as a proxy for low value — a 2-year-old security rule may be the most important entry in the file.
|
|
843
|
+
`;
|
|
844
|
+
}
|
|
845
|
+
export function securityAuditSkill() {
|
|
846
|
+
return `---
|
|
847
|
+
name: security-audit
|
|
848
|
+
description: "Proactive security audit — hunts for vulnerabilities across the codebase using pattern-based detection. Distinct from security review (checklist for a specific diff)."
|
|
849
|
+
---
|
|
850
|
+
|
|
851
|
+
# Security Audit
|
|
852
|
+
|
|
853
|
+
## Quick Start
|
|
854
|
+
|
|
855
|
+
> 1. Scan the codebase for high-signal vulnerability patterns (not just the diff).
|
|
856
|
+
> 2. Produce a finding register grouped by category with severity and file:line.
|
|
857
|
+
> 3. For each Critical: provide a concrete exploit path (not just a category label).
|
|
858
|
+
|
|
859
|
+
## HARD-GATE
|
|
860
|
+
|
|
861
|
+
Do not close a security audit pass while any Critical pattern match is
|
|
862
|
+
unresolved. Each Critical finding must be either fixed, suppressed with
|
|
863
|
+
a documented reason, or tracked as a named accepted risk with an owner.
|
|
864
|
+
|
|
865
|
+
## When to Use
|
|
866
|
+
|
|
867
|
+
- Initial project onboarding (baseline audit)
|
|
868
|
+
- Before a major release that expands attack surface
|
|
869
|
+
- When new dependencies are introduced
|
|
870
|
+
- After a security incident (to check for same-class issues)
|
|
871
|
+
- On a scheduled cadence (quarterly for stable projects, monthly for high-risk)
|
|
872
|
+
|
|
873
|
+
This is complementary to the \`security\` skill, which is a point-in-time
|
|
874
|
+
review checklist scoped to a single diff.
|
|
875
|
+
|
|
876
|
+
## Audit Pattern Catalog
|
|
877
|
+
|
|
878
|
+
Run each category as a focused pass. For every pattern, capture
|
|
879
|
+
file:line evidence — never assume the project is clean just because
|
|
880
|
+
there was "no obvious problem".
|
|
881
|
+
|
|
882
|
+
### 1. Secret Exposure
|
|
883
|
+
|
|
884
|
+
Patterns to grep for (language-agnostic):
|
|
885
|
+
|
|
886
|
+
- \`AKIA[0-9A-Z]{16}\` — AWS access key id
|
|
887
|
+
- \`-----BEGIN (RSA |EC |DSA )?PRIVATE KEY-----\`
|
|
888
|
+
- \`xox[bp]-[0-9a-zA-Z-]+\` — Slack tokens
|
|
889
|
+
- \`ghp_[A-Za-z0-9]{36}\` — GitHub PAT
|
|
890
|
+
- \`console\\.log.*(token|secret|password|api_key)\`
|
|
891
|
+
- Hard-coded JWTs (3 base64 segments separated by \`.\`)
|
|
892
|
+
|
|
893
|
+
Also inspect: .env.example for real values, logs for PII, git history for
|
|
894
|
+
leaked secrets via \`git log -p | grep -i secret\`.
|
|
895
|
+
|
|
896
|
+
### 2. Injection
|
|
897
|
+
|
|
898
|
+
- Raw SQL string concatenation with request data
|
|
899
|
+
- \`eval(\`, \`new Function(\`, \`exec(\`, \`execSync(\` with untrusted input
|
|
900
|
+
- \`dangerouslySetInnerHTML\`, \`innerHTML =\` with user-provided content
|
|
901
|
+
- Shell command construction from user input
|
|
902
|
+
- Template literal SQL (\`\\\`SELECT ... \${userInput}\\\`\`)
|
|
903
|
+
|
|
904
|
+
### 3. Auth and Session
|
|
905
|
+
|
|
906
|
+
- Missing auth middleware on routes that mutate state
|
|
907
|
+
- JWT verification that trusts the \`alg\` header (algorithm confusion)
|
|
908
|
+
- \`setCookie\` without \`HttpOnly\`, \`Secure\`, or \`SameSite\`
|
|
909
|
+
- Session fixation (no regenerate-on-login)
|
|
910
|
+
- Rate limit absent on login, signup, password reset
|
|
911
|
+
|
|
912
|
+
### 4. Trust Boundary and LLM Output
|
|
913
|
+
|
|
914
|
+
- LLM output passed directly to \`exec\` / SQL / filesystem calls
|
|
915
|
+
- Tool-call arguments from the model used without schema validation
|
|
916
|
+
- Untrusted markdown rendered without sanitization
|
|
917
|
+
- Confused deputy: service acts on behalf of user without passing auth context
|
|
918
|
+
|
|
919
|
+
### 5. Crypto Misuse
|
|
920
|
+
|
|
921
|
+
- MD5 / SHA1 for password hashing
|
|
922
|
+
- \`Math.random()\` used for security tokens
|
|
923
|
+
- Reused IV in AES-GCM (catastrophic)
|
|
924
|
+
- ECB mode cipher usage
|
|
925
|
+
- Missing constant-time comparison for secrets
|
|
926
|
+
|
|
927
|
+
### 6. Dependency and Supply Chain
|
|
928
|
+
|
|
929
|
+
- \`npm audit\` / \`pip audit\` Critical or High advisories unresolved
|
|
930
|
+
- Dependencies pulled from non-locked tags instead of pinned versions
|
|
931
|
+
- Post-install scripts from new/unknown packages
|
|
932
|
+
- Un-reviewed direct-to-main dependency bumps
|
|
933
|
+
|
|
934
|
+
### 7. File System and Path Traversal
|
|
935
|
+
|
|
936
|
+
- \`path.join\` with user input without \`path.normalize\` + prefix check
|
|
937
|
+
- Unzip/untar without entry path validation (zip-slip)
|
|
938
|
+
- Writing to user-supplied paths without allowlist
|
|
939
|
+
- Following symlinks inside trusted directories
|
|
940
|
+
|
|
941
|
+
### 8. Logging and Observability
|
|
942
|
+
|
|
943
|
+
- Stack traces returned in API responses (production)
|
|
944
|
+
- Logs containing tokens, passwords, full request bodies
|
|
945
|
+
- Error messages that reveal DB schema or internal paths
|
|
946
|
+
|
|
947
|
+
## Output Format
|
|
948
|
+
|
|
949
|
+
Produce a single audit report with this structure:
|
|
950
|
+
|
|
951
|
+
\`\`\`markdown
|
|
952
|
+
# Security Audit — <scope>, <date>
|
|
953
|
+
|
|
954
|
+
## Summary
|
|
955
|
+
- Files scanned: <N>
|
|
956
|
+
- Categories checked: <list>
|
|
957
|
+
- Critical: <N>, Important: <N>, Suggestion: <N>
|
|
958
|
+
|
|
959
|
+
## Findings
|
|
960
|
+
|
|
961
|
+
### <Category> — <Pattern name>
|
|
962
|
+
- **Severity:** Critical | Important | Suggestion
|
|
963
|
+
- **File:line:** path/to/file.ts:42
|
|
964
|
+
- **Evidence:** short excerpt (≤ 3 lines)
|
|
965
|
+
- **Exploit path:** specific, concrete (not a category label)
|
|
966
|
+
- **Fix:** specific remediation with command/patch-level detail
|
|
967
|
+
- **Owner:** <name or role>
|
|
968
|
+
- **Target date:** <YYYY-MM-DD for Critical/Important>
|
|
969
|
+
|
|
970
|
+
## Accepted Risks
|
|
971
|
+
- <finding id>: <reason documented>, owner <name>, revisit <date>
|
|
972
|
+
|
|
973
|
+
## Suppressed (False Positives)
|
|
974
|
+
- <finding id>: <why this pattern is not exploitable here>
|
|
975
|
+
\`\`\`
|
|
976
|
+
|
|
977
|
+
## Anti-Patterns
|
|
978
|
+
|
|
979
|
+
- "No Critical findings" without stating what patterns were actually run.
|
|
980
|
+
- Accepting a Critical risk without named owner + revisit date.
|
|
981
|
+
- Treating a lint rule as equivalent to a runtime security check.
|
|
982
|
+
- Running audits only on the diff — the diff does not contain legacy risks.
|
|
983
|
+
- Deleting audit reports after fixing findings (keep them as regression evidence).
|
|
984
|
+
|
|
985
|
+
## Red Flags
|
|
986
|
+
|
|
987
|
+
- Audit claims coverage but cites zero file:line evidence.
|
|
988
|
+
- Every Critical pattern has zero matches (this is implausible for any non-trivial codebase — verify the grep commands were actually executed).
|
|
989
|
+
- Findings are Important-only (no Critical or Suggestion buckets) — usually means severity was compressed to avoid escalation.
|
|
990
|
+
`;
|
|
991
|
+
}
|
|
992
|
+
export function adversarialReviewSkill() {
|
|
993
|
+
return `---
|
|
994
|
+
name: adversarial-review
|
|
995
|
+
description: "Adversarial review lens. Use during review to deliberately attack the implementation — as a hostile user, a future maintainer, or a competitor."
|
|
996
|
+
---
|
|
997
|
+
|
|
998
|
+
# Adversarial Review
|
|
999
|
+
|
|
1000
|
+
## Quick Start
|
|
1001
|
+
|
|
1002
|
+
> 1. Stop assuming good-faith usage. Play three roles in sequence: hostile user, stressed operator, future maintainer.
|
|
1003
|
+
> 2. For each role, produce at least 2 concrete attack/friction scenarios with file:line evidence.
|
|
1004
|
+
> 3. Escalate any finding that a Critical severity review would miss.
|
|
1005
|
+
|
|
1006
|
+
## HARD-GATE
|
|
1007
|
+
|
|
1008
|
+
Do not complete review stage without an adversarial-review pass when
|
|
1009
|
+
**any** of the following apply: user-facing input surface changed,
|
|
1010
|
+
trust boundary moved, concurrency was introduced, or a new failure
|
|
1011
|
+
mode path was added.
|
|
1012
|
+
|
|
1013
|
+
## When to Use
|
|
1014
|
+
|
|
1015
|
+
- Review stage, after Layer 2 quality checks complete
|
|
1016
|
+
- Before shipping anything user-facing or revenue-sensitive
|
|
1017
|
+
- When fuzz/property-testing exists but was not exercised against this change
|
|
1018
|
+
- When the implementer has a strong "this is fine" prior
|
|
1019
|
+
|
|
1020
|
+
## Roles and Questions
|
|
1021
|
+
|
|
1022
|
+
### Role 1 — Hostile User
|
|
1023
|
+
|
|
1024
|
+
You are trying to break, trick, or exploit the system. Ask:
|
|
1025
|
+
|
|
1026
|
+
- What happens on empty / null / maximum / negative / unicode / newline inputs?
|
|
1027
|
+
- What if I call the endpoint 1000 times per second? What about 1 every 10 minutes for a week?
|
|
1028
|
+
- What if I send a payload that is almost valid (off-by-one schema, wrong content-type, duplicate keys)?
|
|
1029
|
+
- What if two honest actions collide (double-click, race, retry after timeout)?
|
|
1030
|
+
- Can I observe a secret through error messages, timing, or response size?
|
|
1031
|
+
|
|
1032
|
+
### Role 2 — Stressed Operator
|
|
1033
|
+
|
|
1034
|
+
You are on call at 3 AM. Ask:
|
|
1035
|
+
|
|
1036
|
+
- What does this look like in logs when it fails? Is the failure actionable?
|
|
1037
|
+
- If I restart the service mid-request, does state recover cleanly?
|
|
1038
|
+
- Is the rollback procedure real, tested, and under 15 minutes?
|
|
1039
|
+
- Can I tell from metrics alone whether this is healthy?
|
|
1040
|
+
|
|
1041
|
+
### Role 3 — Future Maintainer
|
|
1042
|
+
|
|
1043
|
+
You are reading this code in 6 months with no memory of the context. Ask:
|
|
1044
|
+
|
|
1045
|
+
- Can I safely change this without breaking callers I cannot see?
|
|
1046
|
+
- Are there hidden invariants not captured in tests?
|
|
1047
|
+
- Will renaming this field silently break serialized consumers?
|
|
1048
|
+
- Is the "obviously correct" path actually correct, or is it just plausible?
|
|
1049
|
+
|
|
1050
|
+
## Output Format
|
|
1051
|
+
|
|
1052
|
+
For each finding:
|
|
1053
|
+
|
|
1054
|
+
\`\`\`
|
|
1055
|
+
- **Role:** Hostile User | Stressed Operator | Future Maintainer
|
|
1056
|
+
- **Scenario:** concrete scenario (not a category)
|
|
1057
|
+
- **File:line:** path/to/file.ts:42
|
|
1058
|
+
- **Impact:** what breaks, for whom, under what frequency
|
|
1059
|
+
- **Recommendation:** specific fix or mitigation
|
|
1060
|
+
\`\`\`
|
|
1061
|
+
|
|
1062
|
+
Escalate to the main review-army under the matching severity (Critical / Important / Suggestion).
|
|
1063
|
+
|
|
1064
|
+
## Anti-Patterns
|
|
1065
|
+
|
|
1066
|
+
- Treating adversarial review as a category list without producing concrete scenarios.
|
|
1067
|
+
- Assuming "our users would never do that" — they will, or the next integration will.
|
|
1068
|
+
- Running adversarial review after the ship decision is already made.
|
|
1069
|
+
- Only playing the hostile-user role and skipping operator + maintainer.
|
|
1070
|
+
`;
|
|
1071
|
+
}
|
|
652
1072
|
export const UTILITY_SKILL_FOLDERS = [
|
|
653
1073
|
"security",
|
|
654
1074
|
"debugging",
|
|
@@ -658,7 +1078,11 @@ export const UTILITY_SKILL_FOLDERS = [
|
|
|
658
1078
|
"executing-plans",
|
|
659
1079
|
"context-engineering",
|
|
660
1080
|
"source-driven-development",
|
|
661
|
-
"frontend-accessibility"
|
|
1081
|
+
"frontend-accessibility",
|
|
1082
|
+
"landscape-check",
|
|
1083
|
+
"adversarial-review",
|
|
1084
|
+
"security-audit",
|
|
1085
|
+
"knowledge-curation"
|
|
662
1086
|
];
|
|
663
1087
|
export const UTILITY_SKILL_MAP = {
|
|
664
1088
|
security: securityReviewSkill,
|
|
@@ -669,5 +1093,9 @@ export const UTILITY_SKILL_MAP = {
|
|
|
669
1093
|
"executing-plans": executingPlansSkill,
|
|
670
1094
|
"context-engineering": contextEngineeringSkill,
|
|
671
1095
|
"source-driven-development": sourceDrivenDevelopmentSkill,
|
|
672
|
-
"frontend-accessibility": frontendAccessibilitySkill
|
|
1096
|
+
"frontend-accessibility": frontendAccessibilitySkill,
|
|
1097
|
+
"landscape-check": landscapeCheckSkill,
|
|
1098
|
+
"adversarial-review": adversarialReviewSkill,
|
|
1099
|
+
"security-audit": securityAuditSkill,
|
|
1100
|
+
"knowledge-curation": knowledgeCurationSkill
|
|
673
1101
|
};
|
package/dist/delegation.d.ts
CHANGED
|
@@ -7,6 +7,11 @@ export type DelegationEntry = {
|
|
|
7
7
|
taskId?: string;
|
|
8
8
|
waiverReason?: string;
|
|
9
9
|
ts: string;
|
|
10
|
+
/**
|
|
11
|
+
* Run id the entry belongs to. Older ledgers written before 0.5.17 may omit this;
|
|
12
|
+
* consumers treat missing runId as unscoped (conservatively excluded from current-run checks).
|
|
13
|
+
*/
|
|
14
|
+
runId?: string;
|
|
10
15
|
};
|
|
11
16
|
export type DelegationLedger = {
|
|
12
17
|
runId: string;
|
|
@@ -18,4 +23,5 @@ export declare function checkMandatoryDelegations(projectRoot: string, stage: Fl
|
|
|
18
23
|
satisfied: boolean;
|
|
19
24
|
missing: string[];
|
|
20
25
|
waived: string[];
|
|
26
|
+
staleIgnored: string[];
|
|
21
27
|
}>;
|
package/dist/delegation.js
CHANGED
|
@@ -25,7 +25,8 @@ function isDelegationEntry(value) {
|
|
|
25
25
|
statusOk &&
|
|
26
26
|
typeof o.ts === "string" &&
|
|
27
27
|
(o.taskId === undefined || typeof o.taskId === "string") &&
|
|
28
|
-
(o.waiverReason === undefined || typeof o.waiverReason === "string")
|
|
28
|
+
(o.waiverReason === undefined || typeof o.waiverReason === "string") &&
|
|
29
|
+
(o.runId === undefined || typeof o.runId === "string"));
|
|
29
30
|
}
|
|
30
31
|
function parseLedger(raw, runId) {
|
|
31
32
|
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
|
|
@@ -63,21 +64,27 @@ export async function appendDelegation(projectRoot, entry) {
|
|
|
63
64
|
await withDirectoryLock(delegationLockPath(projectRoot), async () => {
|
|
64
65
|
const filePath = delegationLogPath(projectRoot);
|
|
65
66
|
const prior = await readDelegationLedger(projectRoot);
|
|
67
|
+
const stamped = { ...entry, runId: entry.runId ?? activeRunId };
|
|
66
68
|
const ledger = {
|
|
67
69
|
runId: activeRunId,
|
|
68
|
-
entries: [...prior.entries,
|
|
70
|
+
entries: [...prior.entries, stamped]
|
|
69
71
|
};
|
|
70
72
|
await writeFileSafe(filePath, `${JSON.stringify(ledger, null, 2)}\n`);
|
|
71
73
|
});
|
|
72
74
|
}
|
|
73
75
|
export async function checkMandatoryDelegations(projectRoot, stage) {
|
|
74
76
|
const mandatory = stageSchema(stage).mandatoryDelegations;
|
|
77
|
+
const { activeRunId } = await readFlowState(projectRoot);
|
|
75
78
|
const ledger = await readDelegationLedger(projectRoot);
|
|
76
79
|
const forStage = ledger.entries.filter((e) => e.stage === stage);
|
|
80
|
+
const forRun = forStage.filter((e) => e.runId === activeRunId);
|
|
81
|
+
const staleIgnored = forStage
|
|
82
|
+
.filter((e) => e.runId !== activeRunId)
|
|
83
|
+
.map((e) => `${e.agent}(runId=${e.runId ?? "unknown"})`);
|
|
77
84
|
const missing = [];
|
|
78
85
|
const waived = [];
|
|
79
86
|
for (const agent of mandatory) {
|
|
80
|
-
const rows =
|
|
87
|
+
const rows = forRun.filter((e) => e.agent === agent);
|
|
81
88
|
const ok = rows.some((e) => e.status === "completed" || e.status === "waived");
|
|
82
89
|
if (!ok) {
|
|
83
90
|
missing.push(agent);
|
|
@@ -89,6 +96,7 @@ export async function checkMandatoryDelegations(projectRoot, stage) {
|
|
|
89
96
|
return {
|
|
90
97
|
satisfied: missing.length === 0,
|
|
91
98
|
missing,
|
|
92
|
-
waived
|
|
99
|
+
waived,
|
|
100
|
+
staleIgnored
|
|
93
101
|
};
|
|
94
102
|
}
|
package/dist/doctor.js
CHANGED
|
@@ -13,7 +13,7 @@ import { policyChecks } from "./policy.js";
|
|
|
13
13
|
import { readFlowState } from "./runs.js";
|
|
14
14
|
import { checkMandatoryDelegations } from "./delegation.js";
|
|
15
15
|
import { buildTraceMatrix } from "./trace-matrix.js";
|
|
16
|
-
import { reconcileAndWriteCurrentStageGateCatalog, verifyCurrentStageGateEvidence } from "./gate-evidence.js";
|
|
16
|
+
import { reconcileAndWriteCurrentStageGateCatalog, verifyCompletedStagesGateClosure, verifyCurrentStageGateEvidence } from "./gate-evidence.js";
|
|
17
17
|
import { stageSkillFolder } from "./content/skills.js";
|
|
18
18
|
import { UTILITY_SKILL_FOLDERS } from "./content/utility-skills.js";
|
|
19
19
|
import { CONTEXT_MODES, DEFAULT_CONTEXT_MODE } from "./content/contexts.js";
|
|
@@ -768,11 +768,37 @@ export async function doctorChecks(projectRoot, options = {}) {
|
|
|
768
768
|
? `warning: waived mandatory delegations for stage "${flowState.currentStage}": ${delegation.waived.join(", ")}`
|
|
769
769
|
: "no waived mandatory delegations for current stage"
|
|
770
770
|
});
|
|
771
|
+
checks.push({
|
|
772
|
+
name: "warning:delegation:stale_runs",
|
|
773
|
+
ok: true,
|
|
774
|
+
details: delegation.staleIgnored.length > 0
|
|
775
|
+
? `warning: ${delegation.staleIgnored.length} delegation entries from other runs were ignored: ${delegation.staleIgnored.join(", ")}`
|
|
776
|
+
: "no stale delegation entries from prior runs"
|
|
777
|
+
});
|
|
771
778
|
const trace = await buildTraceMatrix(projectRoot);
|
|
779
|
+
const artifactsDir = path.join(projectRoot, RUNTIME_ROOT, "artifacts");
|
|
780
|
+
const specExists = await exists(path.join(artifactsDir, "04-spec.md"));
|
|
781
|
+
const planExists = await exists(path.join(artifactsDir, "05-plan.md"));
|
|
782
|
+
const tddExists = await exists(path.join(artifactsDir, "06-tdd.md"));
|
|
772
783
|
const traceHasSignal = trace.entries.length > 0 ||
|
|
773
784
|
trace.orphanedCriteria.length > 0 ||
|
|
774
785
|
trace.orphanedTasks.length > 0 ||
|
|
775
786
|
trace.orphanedTests.length > 0;
|
|
787
|
+
const artifactsPresent = specExists || planExists || tddExists;
|
|
788
|
+
const emptyMatrixWithArtifacts = !traceHasSignal && artifactsPresent;
|
|
789
|
+
checks.push({
|
|
790
|
+
name: "trace:matrix_populated",
|
|
791
|
+
ok: !emptyMatrixWithArtifacts,
|
|
792
|
+
details: emptyMatrixWithArtifacts
|
|
793
|
+
? `trace matrix is empty but artifacts exist (${[
|
|
794
|
+
specExists ? "04-spec.md" : null,
|
|
795
|
+
planExists ? "05-plan.md" : null,
|
|
796
|
+
tddExists ? "06-tdd.md" : null
|
|
797
|
+
].filter(Boolean).join(", ")}). The extractors found no criterion/task/slice IDs — check heading conventions and ID formats.`
|
|
798
|
+
: artifactsPresent
|
|
799
|
+
? `trace matrix parsed ${trace.entries.length} criterion(s) from present artifacts`
|
|
800
|
+
: "no downstream artifacts to trace yet"
|
|
801
|
+
});
|
|
776
802
|
checks.push({
|
|
777
803
|
name: "trace:criteria_coverage",
|
|
778
804
|
ok: !traceHasSignal || trace.orphanedCriteria.length === 0,
|
|
@@ -802,6 +828,16 @@ export async function doctorChecks(projectRoot, options = {}) {
|
|
|
802
828
|
? `stage "${gateEvidence.stage}" gate evidence is consistent (required=${gateEvidence.requiredCount}, passed=${gateEvidence.passedCount}, blocked=${gateEvidence.blockedCount})`
|
|
803
829
|
: gateEvidence.issues.join(" ")
|
|
804
830
|
});
|
|
831
|
+
const completedClosure = verifyCompletedStagesGateClosure(flowState);
|
|
832
|
+
checks.push({
|
|
833
|
+
name: "gates:closure:completed_stages",
|
|
834
|
+
ok: completedClosure.ok,
|
|
835
|
+
details: completedClosure.ok
|
|
836
|
+
? flowState.completedStages.length === 0
|
|
837
|
+
? "no completed stages yet"
|
|
838
|
+
: `all ${flowState.completedStages.length} completed stages have every required gate passed`
|
|
839
|
+
: completedClosure.issues.join(" ")
|
|
840
|
+
});
|
|
805
841
|
// Self-improvement block in stage skills
|
|
806
842
|
for (const stage of COMMAND_FILE_ORDER) {
|
|
807
843
|
const skillPath = path.join(projectRoot, RUNTIME_ROOT, "skills", stageSkillFolder(stage), "SKILL.md");
|
package/dist/flow-state.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { FlowStage, TransitionRule } from "./types.js";
|
|
1
|
+
import type { FlowStage, FlowTrack, TransitionRule } from "./types.js";
|
|
2
2
|
export declare const TRANSITION_RULES: TransitionRule[];
|
|
3
3
|
export interface StageGateState {
|
|
4
4
|
required: string[];
|
|
@@ -11,9 +11,21 @@ export interface FlowState {
|
|
|
11
11
|
completedStages: FlowStage[];
|
|
12
12
|
guardEvidence: Record<string, string>;
|
|
13
13
|
stageGateCatalog: Record<FlowStage, StageGateState>;
|
|
14
|
+
/** Active flow track (determines which stages are in the critical path for this run). */
|
|
15
|
+
track: FlowTrack;
|
|
16
|
+
/** Stages explicitly skipped for this track (empty for standard; populated for quick). */
|
|
17
|
+
skippedStages: FlowStage[];
|
|
14
18
|
}
|
|
15
|
-
export
|
|
19
|
+
export interface InitialFlowStateOptions {
|
|
20
|
+
activeRunId?: string;
|
|
21
|
+
track?: FlowTrack;
|
|
22
|
+
}
|
|
23
|
+
export declare function isFlowTrack(value: unknown): value is FlowTrack;
|
|
24
|
+
export declare function trackStages(track: FlowTrack): FlowStage[];
|
|
25
|
+
export declare function skippedStagesForTrack(track: FlowTrack): FlowStage[];
|
|
26
|
+
export declare function firstStageForTrack(track: FlowTrack): FlowStage;
|
|
27
|
+
export declare function createInitialFlowState(activeRunIdOrOptions?: string | InitialFlowStateOptions, maybeTrack?: FlowTrack): FlowState;
|
|
16
28
|
export declare function canTransition(from: FlowStage, to: FlowStage): boolean;
|
|
17
29
|
export declare function getTransitionGuards(from: FlowStage, to: FlowStage): string[];
|
|
18
|
-
export declare function nextStage(stage: FlowStage): FlowStage | null;
|
|
19
|
-
export declare function previousStage(stage: FlowStage): FlowStage | null;
|
|
30
|
+
export declare function nextStage(stage: FlowStage, track?: FlowTrack): FlowStage | null;
|
|
31
|
+
export declare function previousStage(stage: FlowStage, track?: FlowTrack): FlowStage | null;
|