cclaw-cli 0.5.17 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,132 @@
1
+ import { RUNTIME_ROOT } from "../constants.js";
2
+ const STATUS_SKILL_FOLDER = "flow-status";
3
+ const STATUS_SKILL_NAME = "flow-status";
4
+ function flowStatePath() {
5
+ return `${RUNTIME_ROOT}/state/flow-state.json`;
6
+ }
7
+ function delegationLogPath() {
8
+ return `${RUNTIME_ROOT}/state/delegation-log.json`;
9
+ }
10
+ function knowledgePath() {
11
+ return `${RUNTIME_ROOT}/knowledge.md`;
12
+ }
13
+ /**
14
+ * Command contract for /cc-status — a read-only snapshot command.
15
+ * Does not mutate state. Always safe to run.
16
+ */
17
+ export function statusCommandContract() {
18
+ const flowPath = flowStatePath();
19
+ const delegationPath = delegationLogPath();
20
+ return `# /cc-status
21
+
22
+ ## Purpose
23
+
24
+ **Read-only snapshot of the cclaw run.** Shows track, current stage, completed stages,
25
+ gate coverage, mandatory delegations, and the top 3 knowledge highlights.
26
+
27
+ This command **never mutates state**. Use it at session start to orient, or at any
28
+ time to answer "where are we?" without advancing the flow.
29
+
30
+ ## HARD-GATE
31
+
32
+ - **Do not** use \`/cc-status\` output to infer gate completion for decisions — cite
33
+ artifact evidence via \`/cc-next\` when advancing.
34
+ - **Do not** mutate \`${flowPath}\` or delegation log from this command.
35
+
36
+ ## Algorithm
37
+
38
+ 1. Read **\`${flowPath}\`** — capture \`track\`, \`currentStage\`, \`completedStages\`,
39
+ \`skippedStages\`, and per-stage gate catalog.
40
+ 2. Read **\`${delegationPath}\`** — count delegated / completed / waived / pending entries
41
+ for the current stage's \`mandatoryDelegations\`.
42
+ 3. Read the top of **\`${knowledgePath}\`** — surface up to 3 most recent entries
43
+ (by trailing timestamp or source marker).
44
+ 4. Emit the status block described below. Do **not** load any stage skill.
45
+
46
+ ## Status Block Format
47
+
48
+ \`\`\`
49
+ cclaw status
50
+ track: <quick|standard>
51
+ current stage: <stage> (<N>/<total> in track)
52
+ completed stages: <list or "none">
53
+ skipped stages: <list or "none">
54
+
55
+ gates:
56
+ passed: <count> of <required>
57
+ blocked: <count>
58
+ unmet: <list of gate ids>
59
+
60
+ delegations (current stage):
61
+ required: <list>
62
+ completed: <list>
63
+ pending: <list>
64
+
65
+ knowledge highlights:
66
+ - <latest entry summary line>
67
+ - <second entry summary line>
68
+ - <third entry summary line>
69
+
70
+ next action:
71
+ /cc-next (advance or resume current stage)
72
+ \`\`\`
73
+
74
+ ## Anti-patterns
75
+
76
+ - Inventing gate status without reading \`${flowPath}\`.
77
+ - Reporting delegations as satisfied when the log says \`pending\`.
78
+ - Advancing the stage from \`/cc-status\` — progression belongs to \`/cc-next\`.
79
+
80
+ ## Primary skill
81
+
82
+ **${RUNTIME_ROOT}/skills/${STATUS_SKILL_FOLDER}/SKILL.md**
83
+ `;
84
+ }
85
+ /**
86
+ * Skill body for /cc-status — read-only status snapshot.
87
+ */
88
+ export function statusCommandSkillMarkdown() {
89
+ const flowPath = flowStatePath();
90
+ const delegationPath = delegationLogPath();
91
+ return `---
92
+ name: ${STATUS_SKILL_NAME}
93
+ description: "Read-only snapshot of the cclaw flow: track, stage, gate coverage, delegations, knowledge highlights. Never mutates state."
94
+ ---
95
+
96
+ # /cc-status — Flow Status Snapshot
97
+
98
+ ## Overview
99
+
100
+ \`/cc-status\` is the quickest way to answer "where are we in the flow?" without
101
+ advancing or mutating anything. Safe to run at any point.
102
+
103
+ ## HARD-GATE
104
+
105
+ Do **not** mutate \`${flowPath}\` or \`${delegationPath}\` from this skill. This is
106
+ a read-only command.
107
+
108
+ ## Algorithm
109
+
110
+ 1. Read \`${flowPath}\`. If missing → report **BLOCKED: flow state absent** and suggest \`cclaw init\`.
111
+ 2. Read \`${delegationPath}\`. Missing → treat all mandatory delegations as pending.
112
+ 3. Read \`${RUNTIME_ROOT}/knowledge.md\`. If missing or empty → knowledge highlights are \`(none recorded)\`.
113
+ 4. For each gate in \`stageGateCatalog[currentStage].required\`:
114
+ - Satisfied if present in \`passed\` and absent from \`blocked\`.
115
+ 5. Build and print the status block (see command contract for layout).
116
+ 6. Suggest the next action:
117
+ - If current stage has unmet gates → \`/cc-next\` to resume.
118
+ - If current stage is complete → \`/cc-next\` to advance (or report "Flow complete" if terminal).
119
+
120
+ ## Output Guidelines
121
+
122
+ - Keep output compact (≤ 25 lines) — status, not narrative.
123
+ - Report counts, not full artifact contents.
124
+ - If any data source is missing or corrupt, say so explicitly rather than guessing.
125
+
126
+ ## Anti-patterns
127
+
128
+ - Rebuilding trace-matrix or running doctor from \`/cc-status\` — those belong to dedicated tools.
129
+ - Treating absence of delegation log as "all delegations complete".
130
+ - Mutating state to "clean up" during a status check.
131
+ `;
132
+ }
@@ -349,8 +349,16 @@ Execution rule: complete and verify each wave before starting the next wave.
349
349
  - Layer 2 complete:
350
350
  - Review army schema valid:
351
351
  - Open critical blockers:
352
+ - Adversarial review pass:
352
353
  - Ship recommendation:
353
354
 
355
+ ## Completeness Score
356
+ - AC coverage: <N>/<M> (<percent>%)
357
+ - Task coverage (tasks backed by ≥1 test slice): <N>/<M>
358
+ - Slice coverage (slices linked to ≥1 AC): <N>/<M>
359
+ - Adversarial review pass: true | false
360
+ - Overall score: <0-100>
361
+
354
362
  ## Severity Summary
355
363
  - Critical:
356
364
  - Important:
@@ -411,6 +419,12 @@ Execution rule: complete and verify each wave before starting the next wave.
411
419
  ## Completion Status
412
420
  - SHIPPED | SHIPPED_WITH_EXCEPTIONS | BLOCKED
413
421
  - Exceptions (if any):
422
+
423
+ ## Compound Step
424
+ _Optional retrospective. The goal is to make the **next** feature faster, not to evaluate this one._
425
+ _If you have nothing to add, write the explicit line: \`No compound insight this run.\`_
426
+ - Insight: <one short line about what should accelerate the next run>
427
+ - Action: append \`[compound]\` entry to \`.cclaw/knowledge.md\` capturing the insight
414
428
  `
415
429
  };
416
430
  export const RULEBOOK_MARKDOWN = `# Cclaw Rulebook
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Utility skills that complement the 9 flow stages.
2
+ * Utility skills that complement the 8 flow stages.
3
3
  * These are contextual lenses, not flow stages.
4
4
  * Each skill: ~120-180 lines, under the 500-line progressive disclosure guideline.
5
5
  */
@@ -12,5 +12,9 @@ export declare function executingPlansSkill(): string;
12
12
  export declare function contextEngineeringSkill(): string;
13
13
  export declare function sourceDrivenDevelopmentSkill(): string;
14
14
  export declare function frontendAccessibilitySkill(): string;
15
- export declare const UTILITY_SKILL_FOLDERS: readonly ["security", "debugging", "performance", "ci-cd", "docs", "executing-plans", "context-engineering", "source-driven-development", "frontend-accessibility"];
15
+ export declare function landscapeCheckSkill(): string;
16
+ export declare function knowledgeCurationSkill(): string;
17
+ export declare function securityAuditSkill(): string;
18
+ export declare function adversarialReviewSkill(): string;
19
+ export declare const UTILITY_SKILL_FOLDERS: readonly ["security", "debugging", "performance", "ci-cd", "docs", "executing-plans", "context-engineering", "source-driven-development", "frontend-accessibility", "landscape-check", "adversarial-review", "security-audit", "knowledge-curation"];
16
20
  export declare const UTILITY_SKILL_MAP: Record<string, () => string>;
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Utility skills that complement the 9 flow stages.
2
+ * Utility skills that complement the 8 flow stages.
3
3
  * These are contextual lenses, not flow stages.
4
4
  * Each skill: ~120-180 lines, under the 500-line progressive disclosure guideline.
5
5
  */
@@ -504,12 +504,45 @@ Do not start implementation execution without an approved plan artifact and expl
504
504
  - Machine-only checks are delegated to subagents when supported.
505
505
  - User approvals are requested only at required gate boundaries.
506
506
 
507
+ ## Fresh Context Protocol (between waves)
508
+
509
+ After a wave completes — especially after long agent turns — context drift is
510
+ the #1 cause of degraded execution quality. Before starting the **next wave**,
511
+ prefer a **fresh agent context** over continuing in a saturated session:
512
+
513
+ 1. **Snapshot wave outcome** — append a short summary to the plan artifact
514
+ (\`### Wave <N> outcome\` with: tasks done, evidence files, blockers, next-wave inputs).
515
+ 2. **Capture handoff facts** — the minimum information the next agent needs:
516
+ - Stage and run id (from \`.cclaw/state/flow-state.json\`)
517
+ - List of completed task IDs from the plan
518
+ - Open blockers / failing gates by name
519
+ - File paths the next wave will touch (no full diffs)
520
+ 3. **Decide: continue or rotate**
521
+ - **Rotate** (start a new agent session) when: prior wave consumed > ~50% of the context budget, the prior wave required deep investigation that the next wave does not need, or you are about to cross a stage boundary.
522
+ - **Continue** when: next wave is a tiny follow-up (≤ 1 task) and the prior context is directly relevant.
523
+ 4. **Resume** in the new session via \`/cc-next\` — the session-start hook will restore flow state, checkpoint, and digest automatically.
524
+
525
+ This is the same intuition as Compound Engineering's "fresh context per iteration": every wave starts with a clean, intentionally-loaded context, not a degraded carry-over.
526
+
527
+ ### Handoff template (paste into next session)
528
+
529
+ \`\`\`markdown
530
+ ## Wave <N> handoff
531
+ - Stage: <stage>
532
+ - Run: <runId>
533
+ - Completed task IDs: <list>
534
+ - Blockers: <list or none>
535
+ - Files next wave will touch: <list>
536
+ - Verification command(s) used: <list>
537
+ \`\`\`
538
+
507
539
  ## Anti-Patterns
508
540
 
509
541
  - Executing all tasks in one pass without intermediate verification.
510
542
  - Marking tasks done without command evidence.
511
543
  - Reordering critical dependencies for speed.
512
544
  - Continuing after a gate failure hoping later tasks fix it.
545
+ - Carrying a saturated context across wave boundaries because "it has all the history" — saturated context is a liability, not an asset.
513
546
  `;
514
547
  }
515
548
  export function contextEngineeringSkill() {
@@ -649,6 +682,393 @@ Do not approve user-facing UI changes that break basic keyboard navigation or re
649
682
  - Color-only status indicators with no text/aria support.
650
683
  `;
651
684
  }
685
+ export function landscapeCheckSkill() {
686
+ return `---
687
+ name: landscape-check
688
+ description: "Landscape survey before a design/scope decision. Use when deciding whether to build, reuse, or adopt — inside and outside the repo."
689
+ ---
690
+
691
+ # Landscape Check
692
+
693
+ ## Quick Start
694
+
695
+ > 1. Before committing to a build decision, survey the landscape: in-repo, in-ecosystem, and in-class.
696
+ > 2. Produce a one-page table of candidates (build / reuse in-repo / adopt external) with evidence.
697
+ > 3. Explicitly kill alternatives with a one-line reason. Do not leave implicit assumptions.
698
+
699
+ ## HARD-GATE
700
+
701
+ Do not approve a scope or design that introduces a new system, library,
702
+ or abstraction without comparing at least **one in-repo candidate** and
703
+ **one external/ecosystem candidate** (or explicitly stating why no such
704
+ candidates exist).
705
+
706
+ ## When to Use
707
+
708
+ - Scope stage, before picking a mode (expand/selective/hold/reduce)
709
+ - Design stage, before committing to a new architecture boundary
710
+ - Brainstorm stage, when the user frames the problem as "let's build X"
711
+ - Review stage, when a proposed change duplicates an existing capability
712
+
713
+ ## Protocol
714
+
715
+ 1. **Define the capability in one sentence.** "We need a way to <verb> <object> under <constraint>."
716
+ 2. **In-repo search.** Grep for similar verbs/modules/components. Read the closest 1-3 candidates. Record their fit and why they are or are not a good adapter target.
717
+ 3. **Ecosystem search.** Check ecosystem defaults (stdlib, framework primitives, common OSS packages in use). Do not invent new dependencies when an existing one covers 80%+ of the need.
718
+ 4. **In-class search.** Look at how other well-known projects in the same class solve this. Cite at least one concrete example (even if you end up rejecting it).
719
+ 5. **Produce the decision table.** Columns: Candidate, Kind (build / reuse / adopt), Fit (1-5), Effort (S/M/L/XL), Risk, Reason accepted or rejected.
720
+ 6. **Commit.** Pick exactly one winner. All losers must have a one-line kill reason.
721
+
722
+ ## Output Template
723
+
724
+ \`\`\`markdown
725
+ ### Landscape Check — <capability>
726
+
727
+ | Candidate | Kind | Fit | Effort | Risk | Verdict |
728
+ |---|---|---|---|---|---|
729
+ | src/foo/Bar | reuse | 4/5 | S | Low | SELECTED — already covers 80% of the need |
730
+ | external/lib-x | adopt | 3/5 | M | Med | REJECTED — heavy dep, 20% unused surface |
731
+ | build new | build | 2/5 | L | High | REJECTED — premature abstraction |
732
+
733
+ **Decision:** Reuse \`src/foo/Bar\` with a thin adapter. Kill reasons recorded above.
734
+ \`\`\`
735
+
736
+ ## Anti-Patterns
737
+
738
+ - "We looked and nothing fits" without citing what was looked at.
739
+ - Treating "nobody on the team knows library X" as a kill reason without evaluating the learning cost.
740
+ - Choosing "build" because reuse would require a small refactor of the existing component.
741
+ - Skipping the in-class search because "our case is special" — it usually is not.
742
+
743
+ ## Red Flags
744
+
745
+ - Decision table has only the winner listed.
746
+ - Ecosystem search is empty when a well-known primitive obviously applies.
747
+ - "Fit" scores without evidence (no file:line, no cited OSS repo, no framework docs reference).
748
+ - The in-repo candidate was never read before being dismissed.
749
+ `;
750
+ }
751
+ export function knowledgeCurationSkill() {
752
+ return `---
753
+ name: knowledge-curation
754
+ description: "Read-only curation pass over .cclaw/knowledge.md. Surfaces stale, duplicate, or low-confidence entries and proposes a soft-archive plan; never deletes without explicit user approval."
755
+ ---
756
+
757
+ # Knowledge Curation
758
+
759
+ ## Quick Start
760
+
761
+ > 1. This is a **read-only audit** of \`.cclaw/knowledge.md\`. Never delete or rewrite entries here.
762
+ > 2. Surface candidates for soft-archive when the active file > 50 entries OR contains stale/duplicate/superseded entries.
763
+ > 3. Propose a single archive plan and require explicit user approval before any move.
764
+
765
+ ## HARD-GATE
766
+
767
+ - Do not modify \`.cclaw/knowledge.md\` from this skill except via an explicit
768
+ user-approved archive plan that **moves** entries to
769
+ \`.cclaw/knowledge.archive.md\` (never deletes them).
770
+ - Do not silently rewrite or summarize entries — preserve original wording.
771
+
772
+ ## When to run
773
+
774
+ - Triggered automatically by **\`/cc-learn curate\`**.
775
+ - Recommended after \`cclaw archive\` of a feature run, when knowledge has grown.
776
+ - Recommended when active entry count exceeds **50**.
777
+
778
+ ## Audit dimensions
779
+
780
+ For each entry in \`.cclaw/knowledge.md\` produce a row with:
781
+
782
+ | Field | Source |
783
+ |---|---|
784
+ | Title | \`### <ts> [type] <title>\` heading |
785
+ | Type | \`rule\` / \`pattern\` / \`lesson\` / \`compound\` |
786
+ | Stage | \`Stage:\` field (or \`unknown\`) |
787
+ | Age | days since timestamp |
788
+ | Confidence | \`Confidence:\` field if present, else \`unstated\` |
789
+ | Domain | \`Domain:\` field if present |
790
+ | Supersedes | \`Supersedes:\` field if present |
791
+ | Status hint | one of: keep / supersede-candidate / archive-candidate / duplicate |
792
+
793
+ ### Status rules
794
+
795
+ - **supersede-candidate**: another entry has \`Supersedes: <this-title>\`.
796
+ - **duplicate**: title or insight ≈ another entry's (caller's judgment, not regex).
797
+ - **archive-candidate**:
798
+ - Type \`lesson\` AND age > 180 days AND no \`Supersedes\` chain points to it; OR
799
+ - Stage = \`brainstorm\` AND age > 90 days; OR
800
+ - Confidence = \`low\` AND age > 60 days; OR
801
+ - Total active entries > 50 and entry has lowest reuse signal.
802
+ - **keep**: everything else.
803
+
804
+ ## Output format
805
+
806
+ Produce two artifacts as **chat output only** (do not write files):
807
+
808
+ ### 1. Audit table
809
+
810
+ \`\`\`markdown
811
+ | # | Title | Type | Stage | Age | Confidence | Status hint |
812
+ |---|---|---|---|---|---|---|
813
+ | 1 | … | … | … | … | … | … |
814
+ \`\`\`
815
+
816
+ ### 2. Soft-archive proposal
817
+
818
+ \`\`\`markdown
819
+ ## Proposed archive (requires user approval)
820
+
821
+ Threshold reasoning: <why entries below were selected>
822
+
823
+ Entries to archive:
824
+ 1. <title> — reason
825
+ 2. <title> — reason
826
+
827
+ Action plan if approved:
828
+ 1. Append a header to \`.cclaw/knowledge.archive.md\` with today's UTC date.
829
+ 2. Move (cut/paste) selected entries verbatim from \`.cclaw/knowledge.md\` into the archive file.
830
+ 3. Append a single supersession line to \`.cclaw/knowledge.md\`:
831
+ \\\`### <ts> [pattern] knowledge-curation-<date> — archived <N> entries, see knowledge.archive.md\\\`
832
+
833
+ After approval: ask the user to run the move themselves, or — if they explicitly grant write access — perform the move atomically and report the new active count.
834
+ \`\`\`
835
+
836
+ ## Anti-patterns
837
+
838
+ - Deleting entries instead of archiving — knowledge must be append-only.
839
+ - Rewriting an entry to "clean it up" — preserve original wording verbatim.
840
+ - Auto-archiving without user approval, even when above threshold.
841
+ - Removing \`compound\` entries — these are the highest-leverage records.
842
+ - Treating high age as a proxy for low value — a 2-year-old security rule may be the most important entry in the file.
843
+ `;
844
+ }
845
+ export function securityAuditSkill() {
846
+ return `---
847
+ name: security-audit
848
+ description: "Proactive security audit — hunts for vulnerabilities across the codebase using pattern-based detection. Distinct from security review (checklist for a specific diff)."
849
+ ---
850
+
851
+ # Security Audit
852
+
853
+ ## Quick Start
854
+
855
+ > 1. Scan the codebase for high-signal vulnerability patterns (not just the diff).
856
+ > 2. Produce a finding register grouped by category with severity and file:line.
857
+ > 3. For each Critical: provide a concrete exploit path (not just a category label).
858
+
859
+ ## HARD-GATE
860
+
861
+ Do not close a security audit pass while any Critical pattern match is
862
+ unresolved. Each Critical finding must be either fixed, suppressed with
863
+ a documented reason, or tracked as a named accepted risk with an owner.
864
+
865
+ ## When to Use
866
+
867
+ - Initial project onboarding (baseline audit)
868
+ - Before a major release that expands attack surface
869
+ - When new dependencies are introduced
870
+ - After a security incident (to check for same-class issues)
871
+ - On a scheduled cadence (quarterly for stable projects, monthly for high-risk)
872
+
873
+ This is complementary to the \`security\` skill, which is a point-in-time
874
+ review checklist scoped to a single diff.
875
+
876
+ ## Audit Pattern Catalog
877
+
878
+ Run each category as a focused pass. For every pattern, capture
879
+ file:line evidence — never assume the project is clean just because
880
+ there was "no obvious problem".
881
+
882
+ ### 1. Secret Exposure
883
+
884
+ Patterns to grep for (language-agnostic):
885
+
886
+ - \`AKIA[0-9A-Z]{16}\` — AWS access key id
887
+ - \`-----BEGIN (RSA |EC |DSA )?PRIVATE KEY-----\`
888
+ - \`xox[bp]-[0-9a-zA-Z-]+\` — Slack tokens
889
+ - \`ghp_[A-Za-z0-9]{36}\` — GitHub PAT
890
+ - \`console\\.log.*(token|secret|password|api_key)\`
891
+ - Hard-coded JWTs (3 base64 segments separated by \`.\`)
892
+
893
+ Also inspect: .env.example for real values, logs for PII, git history for
894
+ leaked secrets via \`git log -p | grep -i secret\`.
895
+
896
+ ### 2. Injection
897
+
898
+ - Raw SQL string concatenation with request data
899
+ - \`eval(\`, \`new Function(\`, \`exec(\`, \`execSync(\` with untrusted input
900
+ - \`dangerouslySetInnerHTML\`, \`innerHTML =\` with user-provided content
901
+ - Shell command construction from user input
902
+ - Template literal SQL (\`\\\`SELECT ... \${userInput}\\\`\`)
903
+
904
+ ### 3. Auth and Session
905
+
906
+ - Missing auth middleware on routes that mutate state
907
+ - JWT verification that trusts the \`alg\` header (algorithm confusion)
908
+ - \`setCookie\` without \`HttpOnly\`, \`Secure\`, or \`SameSite\`
909
+ - Session fixation (no regenerate-on-login)
910
+ - Rate limit absent on login, signup, password reset
911
+
912
+ ### 4. Trust Boundary and LLM Output
913
+
914
+ - LLM output passed directly to \`exec\` / SQL / filesystem calls
915
+ - Tool-call arguments from the model used without schema validation
916
+ - Untrusted markdown rendered without sanitization
917
+ - Confused deputy: service acts on behalf of user without passing auth context
918
+
919
+ ### 5. Crypto Misuse
920
+
921
+ - MD5 / SHA1 for password hashing
922
+ - \`Math.random()\` used for security tokens
923
+ - Reused IV in AES-GCM (catastrophic)
924
+ - ECB mode cipher usage
925
+ - Missing constant-time comparison for secrets
926
+
927
+ ### 6. Dependency and Supply Chain
928
+
929
+ - \`npm audit\` / \`pip audit\` Critical or High advisories unresolved
930
+ - Dependencies pulled from non-locked tags instead of pinned versions
931
+ - Post-install scripts from new/unknown packages
932
+ - Un-reviewed direct-to-main dependency bumps
933
+
934
+ ### 7. File System and Path Traversal
935
+
936
+ - \`path.join\` with user input without \`path.normalize\` + prefix check
937
+ - Unzip/untar without entry path validation (zip-slip)
938
+ - Writing to user-supplied paths without allowlist
939
+ - Following symlinks inside trusted directories
940
+
941
+ ### 8. Logging and Observability
942
+
943
+ - Stack traces returned in API responses (production)
944
+ - Logs containing tokens, passwords, full request bodies
945
+ - Error messages that reveal DB schema or internal paths
946
+
947
+ ## Output Format
948
+
949
+ Produce a single audit report with this structure:
950
+
951
+ \`\`\`markdown
952
+ # Security Audit — <scope>, <date>
953
+
954
+ ## Summary
955
+ - Files scanned: <N>
956
+ - Categories checked: <list>
957
+ - Critical: <N>, Important: <N>, Suggestion: <N>
958
+
959
+ ## Findings
960
+
961
+ ### <Category> — <Pattern name>
962
+ - **Severity:** Critical | Important | Suggestion
963
+ - **File:line:** path/to/file.ts:42
964
+ - **Evidence:** short excerpt (≤ 3 lines)
965
+ - **Exploit path:** specific, concrete (not a category label)
966
+ - **Fix:** specific remediation with command/patch-level detail
967
+ - **Owner:** <name or role>
968
+ - **Target date:** <YYYY-MM-DD for Critical/Important>
969
+
970
+ ## Accepted Risks
971
+ - <finding id>: <reason documented>, owner <name>, revisit <date>
972
+
973
+ ## Suppressed (False Positives)
974
+ - <finding id>: <why this pattern is not exploitable here>
975
+ \`\`\`
976
+
977
+ ## Anti-Patterns
978
+
979
+ - "No Critical findings" without stating what patterns were actually run.
980
+ - Accepting a Critical risk without named owner + revisit date.
981
+ - Treating a lint rule as equivalent to a runtime security check.
982
+ - Running audits only on the diff — the diff does not contain legacy risks.
983
+ - Deleting audit reports after fixing findings (keep them as regression evidence).
984
+
985
+ ## Red Flags
986
+
987
+ - Audit claims coverage but cites zero file:line evidence.
988
+ - Every Critical pattern has zero matches (this is implausible for any non-trivial codebase — verify the grep commands were actually executed).
989
+ - Findings are Important-only (no Critical or Suggestion buckets) — usually means severity was compressed to avoid escalation.
990
+ `;
991
+ }
992
+ export function adversarialReviewSkill() {
993
+ return `---
994
+ name: adversarial-review
995
+ description: "Adversarial review lens. Use during review to deliberately attack the implementation — as a hostile user, a future maintainer, or a competitor."
996
+ ---
997
+
998
+ # Adversarial Review
999
+
1000
+ ## Quick Start
1001
+
1002
+ > 1. Stop assuming good-faith usage. Play three roles in sequence: hostile user, stressed operator, future maintainer.
1003
+ > 2. For each role, produce at least 2 concrete attack/friction scenarios with file:line evidence.
1004
+ > 3. Escalate any finding that a Critical severity review would miss.
1005
+
1006
+ ## HARD-GATE
1007
+
1008
+ Do not complete review stage without an adversarial-review pass when
1009
+ **any** of the following apply: user-facing input surface changed,
1010
+ trust boundary moved, concurrency was introduced, or a new failure
1011
+ mode path was added.
1012
+
1013
+ ## When to Use
1014
+
1015
+ - Review stage, after Layer 2 quality checks complete
1016
+ - Before shipping anything user-facing or revenue-sensitive
1017
+ - When fuzz/property-testing exists but was not exercised against this change
1018
+ - When the implementer has a strong "this is fine" prior
1019
+
1020
+ ## Roles and Questions
1021
+
1022
+ ### Role 1 — Hostile User
1023
+
1024
+ You are trying to break, trick, or exploit the system. Ask:
1025
+
1026
+ - What happens on empty / null / maximum / negative / unicode / newline inputs?
1027
+ - What if I call the endpoint 1000 times per second? What about 1 every 10 minutes for a week?
1028
+ - What if I send a payload that is almost valid (off-by-one schema, wrong content-type, duplicate keys)?
1029
+ - What if two honest actions collide (double-click, race, retry after timeout)?
1030
+ - Can I observe a secret through error messages, timing, or response size?
1031
+
1032
+ ### Role 2 — Stressed Operator
1033
+
1034
+ You are on call at 3 AM. Ask:
1035
+
1036
+ - What does this look like in logs when it fails? Is the failure actionable?
1037
+ - If I restart the service mid-request, does state recover cleanly?
1038
+ - Is the rollback procedure real, tested, and under 15 minutes?
1039
+ - Can I tell from metrics alone whether this is healthy?
1040
+
1041
+ ### Role 3 — Future Maintainer
1042
+
1043
+ You are reading this code in 6 months with no memory of the context. Ask:
1044
+
1045
+ - Can I safely change this without breaking callers I cannot see?
1046
+ - Are there hidden invariants not captured in tests?
1047
+ - Will renaming this field silently break serialized consumers?
1048
+ - Is the "obviously correct" path actually correct, or is it just plausible?
1049
+
1050
+ ## Output Format
1051
+
1052
+ For each finding:
1053
+
1054
+ \`\`\`
1055
+ - **Role:** Hostile User | Stressed Operator | Future Maintainer
1056
+ - **Scenario:** concrete scenario (not a category)
1057
+ - **File:line:** path/to/file.ts:42
1058
+ - **Impact:** what breaks, for whom, under what frequency
1059
+ - **Recommendation:** specific fix or mitigation
1060
+ \`\`\`
1061
+
1062
+ Escalate to the main review-army under the matching severity (Critical / Important / Suggestion).
1063
+
1064
+ ## Anti-Patterns
1065
+
1066
+ - Treating adversarial review as a category list without producing concrete scenarios.
1067
+ - Assuming "our users would never do that" — they will, or the next integration will.
1068
+ - Running adversarial review after the ship decision is already made.
1069
+ - Only playing the hostile-user role and skipping operator + maintainer.
1070
+ `;
1071
+ }
652
1072
  export const UTILITY_SKILL_FOLDERS = [
653
1073
  "security",
654
1074
  "debugging",
@@ -658,7 +1078,11 @@ export const UTILITY_SKILL_FOLDERS = [
658
1078
  "executing-plans",
659
1079
  "context-engineering",
660
1080
  "source-driven-development",
661
- "frontend-accessibility"
1081
+ "frontend-accessibility",
1082
+ "landscape-check",
1083
+ "adversarial-review",
1084
+ "security-audit",
1085
+ "knowledge-curation"
662
1086
  ];
663
1087
  export const UTILITY_SKILL_MAP = {
664
1088
  security: securityReviewSkill,
@@ -669,5 +1093,9 @@ export const UTILITY_SKILL_MAP = {
669
1093
  "executing-plans": executingPlansSkill,
670
1094
  "context-engineering": contextEngineeringSkill,
671
1095
  "source-driven-development": sourceDrivenDevelopmentSkill,
672
- "frontend-accessibility": frontendAccessibilitySkill
1096
+ "frontend-accessibility": frontendAccessibilitySkill,
1097
+ "landscape-check": landscapeCheckSkill,
1098
+ "adversarial-review": adversarialReviewSkill,
1099
+ "security-audit": securityAuditSkill,
1100
+ "knowledge-curation": knowledgeCurationSkill
673
1101
  };
@@ -1,4 +1,4 @@
1
- import type { FlowStage, TransitionRule } from "./types.js";
1
+ import type { FlowStage, FlowTrack, TransitionRule } from "./types.js";
2
2
  export declare const TRANSITION_RULES: TransitionRule[];
3
3
  export interface StageGateState {
4
4
  required: string[];
@@ -11,9 +11,21 @@ export interface FlowState {
11
11
  completedStages: FlowStage[];
12
12
  guardEvidence: Record<string, string>;
13
13
  stageGateCatalog: Record<FlowStage, StageGateState>;
14
+ /** Active flow track (determines which stages are in the critical path for this run). */
15
+ track: FlowTrack;
16
+ /** Stages explicitly skipped for this track (empty for standard; populated for quick). */
17
+ skippedStages: FlowStage[];
14
18
  }
15
- export declare function createInitialFlowState(activeRunId?: string): FlowState;
19
+ export interface InitialFlowStateOptions {
20
+ activeRunId?: string;
21
+ track?: FlowTrack;
22
+ }
23
+ export declare function isFlowTrack(value: unknown): value is FlowTrack;
24
+ export declare function trackStages(track: FlowTrack): FlowStage[];
25
+ export declare function skippedStagesForTrack(track: FlowTrack): FlowStage[];
26
+ export declare function firstStageForTrack(track: FlowTrack): FlowStage;
27
+ export declare function createInitialFlowState(activeRunIdOrOptions?: string | InitialFlowStateOptions, maybeTrack?: FlowTrack): FlowState;
16
28
  export declare function canTransition(from: FlowStage, to: FlowStage): boolean;
17
29
  export declare function getTransitionGuards(from: FlowStage, to: FlowStage): string[];
18
- export declare function nextStage(stage: FlowStage): FlowStage | null;
19
- export declare function previousStage(stage: FlowStage): FlowStage | null;
30
+ export declare function nextStage(stage: FlowStage, track?: FlowTrack): FlowStage | null;
31
+ export declare function previousStage(stage: FlowStage, track?: FlowTrack): FlowStage | null;