@kontourai/flow-agents 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/.github/workflows/runtime-compat.yml +5 -2
  2. package/CHANGELOG.md +26 -0
  3. package/README.md +26 -5
  4. package/build/src/cli/{flow-kit.js → kit.js} +122 -108
  5. package/build/src/cli/validate-source-tree.js +4 -4
  6. package/build/src/cli.js +3 -3
  7. package/build/src/flow-kit/validate.js +58 -62
  8. package/build/src/tools/build-universal-bundles.js +64 -17
  9. package/build/src/tools/generate-context-map.js +49 -7
  10. package/build/src/tools/validate-source-tree.js +32 -1
  11. package/docs/adr/0007-flow-skill-kit-tool-boundary.md +169 -0
  12. package/docs/adr/0007-skill-audit.md +112 -0
  13. package/docs/adr/0008-kit-operation-boundary.md +88 -0
  14. package/docs/context-map.md +18 -22
  15. package/docs/flow-kit-repository-contract.md +5 -5
  16. package/docs/getting-started.md +177 -0
  17. package/docs/index.md +19 -8
  18. package/docs/kit-authoring-guide.md +26 -7
  19. package/docs/knowledge-kit.md +2 -2
  20. package/docs/spec/runtime-hook-surface.md +1 -1
  21. package/docs/vision.md +1 -1
  22. package/docs/workflow-usage-guide.md +1 -1
  23. package/evals/fixtures/builder-kit-workflow-state/happy-path.json +2 -2
  24. package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +2 -2
  25. package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +1 -1
  26. package/evals/fixtures/pull-work-provider/github-issues.json +5 -5
  27. package/evals/integration/test_activate_npx_context.sh +2 -2
  28. package/evals/integration/test_bundle_install.sh +17 -12
  29. package/evals/integration/test_console_learning_projection.sh +1 -1
  30. package/evals/integration/test_flow_kit_install_git.sh +7 -7
  31. package/evals/integration/test_flow_kit_repository.sh +4 -4
  32. package/evals/integration/test_kit_conformance_levels.sh +1 -1
  33. package/evals/integration/test_local_flow_kit_install.sh +7 -7
  34. package/evals/integration/test_publish_change_helper.sh +1 -1
  35. package/evals/integration/test_pull_work_provider.sh +1 -1
  36. package/evals/integration/test_runtime_adapter_activation.sh +3 -3
  37. package/evals/lib/node.sh +2 -2
  38. package/evals/static/test_workflow_skills.sh +15 -15
  39. package/integrations/strands/flow_agents_strands/steering.py +1 -1
  40. package/integrations/strands-ts/src/hooks.ts +1 -1
  41. package/kits/builder/kit.json +17 -0
  42. package/{skills → kits/builder/skills}/builder-shape/SKILL.md +4 -4
  43. package/{skills → kits/builder/skills}/idea-to-backlog/SKILL.md +1 -1
  44. package/kits/knowledge/kit.json +16 -9
  45. package/package.json +8 -5
  46. package/packaging/packs.json +1 -21
  47. package/scripts/README.md +1 -1
  48. package/scripts/kit.js +2 -0
  49. package/skills/README.md +23 -0
  50. package/src/cli/{flow-kit.ts → kit.ts} +124 -109
  51. package/src/cli/validate-source-tree.ts +4 -4
  52. package/src/cli.ts +3 -3
  53. package/src/flow-kit/validate.ts +63 -57
  54. package/src/tools/build-universal-bundles.ts +60 -13
  55. package/src/tools/generate-context-map.ts +36 -6
  56. package/src/tools/validate-source-tree.ts +27 -1
  57. package/scripts/flow-kit.js +0 -2
  58. package/skills/context-budget/SKILL.md +0 -40
  59. package/skills/explore/SKILL.md +0 -137
  60. package/skills/feedback-loop/SKILL.md +0 -87
  61. package/skills/frontend-design/SKILL.md +0 -80
  62. /package/{skills → kits/builder/skills}/deliver/SKILL.md +0 -0
  63. /package/{skills → kits/builder/skills}/design-probe/SKILL.md +0 -0
  64. /package/{skills → kits/builder/skills}/evidence-gate/SKILL.md +0 -0
  65. /package/{skills → kits/builder/skills}/execute-plan/SKILL.md +0 -0
  66. /package/{skills → kits/builder/skills}/fix-bug/SKILL.md +0 -0
  67. /package/{skills → kits/builder/skills}/learning-review/SKILL.md +0 -0
  68. /package/{skills → kits/builder/skills}/pickup-probe/SKILL.md +0 -0
  69. /package/{skills → kits/builder/skills}/plan-work/SKILL.md +0 -0
  70. /package/{skills → kits/builder/skills}/pull-work/SKILL.md +0 -0
  71. /package/{skills → kits/builder/skills}/release-readiness/SKILL.md +0 -0
  72. /package/{skills → kits/builder/skills}/review-work/SKILL.md +0 -0
  73. /package/{skills → kits/builder/skills}/tdd-workflow/SKILL.md +0 -0
  74. /package/{skills → kits/builder/skills}/verify-work/SKILL.md +0 -0
  75. /package/{skills → kits/knowledge/skills}/knowledge-capture/SKILL.md +0 -0
@@ -12,6 +12,57 @@ const textExtensions = new Set([".css", ".html", ".js", ".json", ".md", ".sh", "
12
12
  const dropDiagnostics: string[] = [];
13
13
  const printDiagnostics = !["0", "false", "no"].includes(String(process.env.FLOW_AGENTS_EXPORT_DIAGNOSTICS ?? "1").toLowerCase());
14
14
 
15
+ /**
16
+ * Collect all skill source paths across skills/ and kit-owned skills.
17
+ * Returns an array of {name, src} pairs where name is the install name
18
+ * (same as the directory name) and src is the absolute SKILL.md path.
19
+ * Kit-owned skills are discovered by reading kit.json `skills` arrays;
20
+ * each entry's `path` is resolved relative to the kit directory.
21
+ */
22
+ function collectAllSkills(): Array<{ name: string; src: string }> {
23
+ const results: Array<{ name: string; src: string }> = [];
24
+ const seen = new Set<string>();
25
+
26
+ // 1. Top-level skills/ directory (tools pending reclassification).
27
+ const skillsDir = path.join(root, "skills");
28
+ if (fs.existsSync(skillsDir)) {
29
+ for (const skill of fs.readdirSync(skillsDir).sort()) {
30
+ const skillPath = path.join(skillsDir, skill, "SKILL.md");
31
+ if (fs.existsSync(skillPath) && !seen.has(skill)) {
32
+ seen.add(skill);
33
+ results.push({ name: skill, src: skillPath });
34
+ }
35
+ }
36
+ }
37
+
38
+ // 2. Kit-owned skills declared in kits/<kit>/kit.json `skills` arrays.
39
+ const kitsDir = path.join(root, "kits");
40
+ if (fs.existsSync(kitsDir)) {
41
+ for (const kitName of fs.readdirSync(kitsDir).sort()) {
42
+ const kitJson = path.join(kitsDir, kitName, "kit.json");
43
+ if (!fs.existsSync(kitJson)) continue;
44
+ let kitManifest: Record<string, unknown>;
45
+ try { kitManifest = loadJson<Record<string, unknown>>(kitJson); } catch { continue; }
46
+ const skills = Array.isArray(kitManifest["skills"]) ? kitManifest["skills"] as unknown[] : [];
47
+ for (const entry of skills) {
48
+ if (typeof entry !== "object" || entry === null) continue;
49
+ const skillEntry = entry as Record<string, unknown>;
50
+ const relPath = typeof skillEntry["path"] === "string" ? skillEntry["path"] : null;
51
+ if (!relPath) continue;
52
+ // Derive install name from the directory containing SKILL.md (one level up).
53
+ const absPath = path.resolve(path.join(kitsDir, kitName), relPath);
54
+ const skillName = path.basename(path.dirname(absPath));
55
+ if (fs.existsSync(absPath) && !seen.has(skillName)) {
56
+ seen.add(skillName);
57
+ results.push({ name: skillName, src: absPath });
58
+ }
59
+ }
60
+ }
61
+ }
62
+
63
+ return results.sort((a, b) => a.name.localeCompare(b.name));
64
+ }
65
+
15
66
  function resetDir(dir: string): void {
16
67
  fs.rmSync(dir, { recursive: true, force: true });
17
68
  fs.mkdirSync(dir, { recursive: true });
@@ -302,9 +353,8 @@ function buildClaudeCode(agents: Agent[]): void {
302
353
  copySharedContent(bundle, "claude-code", "<bundle-root>");
303
354
  writeText(path.join(bundle, manifest.claude_code.task_dir, ".gitkeep"), "");
304
355
  for (const spec of agents) writeText(path.join(bundle, ".claude/agents", `${spec.name}.md`), exportClaudeAgent(spec));
305
- for (const skill of fs.readdirSync(path.join(root, "skills"))) {
306
- const skillPath = path.join(root, "skills", skill, "SKILL.md");
307
- if (fs.existsSync(skillPath)) writeText(path.join(bundle, ".claude/skills", skill, "SKILL.md"), sanitizeText(readText(skillPath), "claude-code", "<bundle-root>"));
356
+ for (const { name, src } of collectAllSkills()) {
357
+ writeText(path.join(bundle, ".claude/skills", name, "SKILL.md"), sanitizeText(readText(src), "claude-code", "<bundle-root>"));
308
358
  }
309
359
  writeText(path.join(bundle, ".claude/settings.json"), exportClaudeSettings());
310
360
  writeText(path.join(bundle, "AGENTS.md"), exportRootAgentsMd("Claude Code", agents, manifest.claude_code.task_dir));
@@ -324,9 +374,8 @@ function buildCodex(agents: Agent[]): void {
324
374
  for (const [profileName, profile] of Object.entries(manifest.codex.profiles ?? {})) writeText(path.join(bundle, ".codex", `${profileName}.config.toml`), exportCodexProfileConfig(profile as Record<string, unknown>, settings));
325
375
  writeText(path.join(bundle, ".codex/hooks.json"), exportCodexHooks());
326
376
  for (const spec of targetAgents) writeText(path.join(bundle, ".codex/agents", `${spec.name}.toml`), exportCodexAgent(spec));
327
- for (const skill of fs.readdirSync(path.join(root, "skills"))) {
328
- const skillPath = path.join(root, "skills", skill, "SKILL.md");
329
- if (fs.existsSync(skillPath)) writeText(path.join(bundle, ".codex/skills", skill, "SKILL.md"), sanitizeText(readText(skillPath), "codex", "<bundle-root>"));
377
+ for (const { name, src } of collectAllSkills()) {
378
+ writeText(path.join(bundle, ".codex/skills", name, "SKILL.md"), sanitizeText(readText(src), "codex", "<bundle-root>"));
330
379
  }
331
380
  writeText(path.join(bundle, "AGENTS.md"), exportRootAgentsMd("Codex", targetAgents, manifest.codex.task_dir));
332
381
  writeText(path.join(bundle, "README.md"), exportTargetReadme("Codex", "bash install.sh /path/to/workspace"));
@@ -490,9 +539,8 @@ function buildOpencode(agents: Agent[]): void {
490
539
  for (const spec of agents) {
491
540
  writeText(path.join(bundle, ".opencode/agents", `${spec.name}.md`), exportOpencodeAgent(spec));
492
541
  }
493
- for (const skill of fs.readdirSync(path.join(root, "skills"))) {
494
- const skillPath = path.join(root, "skills", skill, "SKILL.md");
495
- if (fs.existsSync(skillPath)) writeText(path.join(bundle, ".opencode/skills", skill, "SKILL.md"), sanitizeText(readText(skillPath), "opencode", "<bundle-root>"));
542
+ for (const { name, src } of collectAllSkills()) {
543
+ writeText(path.join(bundle, ".opencode/skills", name, "SKILL.md"), sanitizeText(readText(src), "opencode", "<bundle-root>"));
496
544
  }
497
545
  writeText(path.join(bundle, ".opencode/plugins/flow-agents.js"), exportOpencodePlugin());
498
546
  writeText(path.join(bundle, "opencode.json"), exportOpencodeConfig());
@@ -602,9 +650,8 @@ function buildPi(agents: Agent[]): void {
602
650
  writeText(path.join(bundle, manifest.pi.task_dir, ".gitkeep"), "");
603
651
  // pi has no named-subagent registry; agents are left canonical/unexported.
604
652
  // Skills are exported to .pi/skills/ (direct .md files supported in that dir).
605
- for (const skill of fs.readdirSync(path.join(root, "skills"))) {
606
- const skillPath = path.join(root, "skills", skill, "SKILL.md");
607
- if (fs.existsSync(skillPath)) writeText(path.join(bundle, ".pi/skills", skill, "SKILL.md"), sanitizeText(readText(skillPath), "pi", "<bundle-root>"));
653
+ for (const { name, src } of collectAllSkills()) {
654
+ writeText(path.join(bundle, ".pi/skills", name, "SKILL.md"), sanitizeText(readText(src), "pi", "<bundle-root>"));
608
655
  }
609
656
  writeText(path.join(bundle, ".pi/extensions/flow-agents.ts"), exportPiExtension());
610
657
  writeText(path.join(bundle, "AGENTS.md"), exportRootAgentsMd("pi", agents, manifest.pi.task_dir));
@@ -617,7 +664,7 @@ function buildCatalog(agents: Agent[]): Record<string, unknown> {
617
664
  return {
618
665
  source_root: ".",
619
666
  agents: agents.slice().sort((a, b) => a.name.localeCompare(b.name)).map((spec) => spec.name),
620
- skills: fs.readdirSync(path.join(root, "skills")).filter((name) => fs.existsSync(path.join(root, "skills", name, "SKILL.md"))).sort(),
667
+ skills: collectAllSkills().map(({ name }) => name),
621
668
  powers: fs.readdirSync(path.join(root, "powers")).filter((name) => fs.existsSync(path.join(root, "powers", name, "mcp.json"))).sort(),
622
669
  packs: packs.packs ?? [],
623
670
  kits: fs.existsSync(kitsCatalog) ? loadJson<Record<string, unknown>>(kitsCatalog).kits ?? [] : [],
@@ -74,15 +74,45 @@ function repoShape(manifest: Record<string, unknown>): string[][] {
74
74
  return rows;
75
75
  }
76
76
 
77
+ /** Collect all skill {name, absPath} pairs from skills/ and kit-owned skills. */
78
+ function allSkillPaths(): Array<{ name: string; absPath: string }> {
79
+ const results: Array<{ name: string; absPath: string }> = [];
80
+ const seen = new Set<string>();
81
+ const skillsDir = path.join(root, "skills");
82
+ if (exists(skillsDir)) {
83
+ for (const name of fs.readdirSync(skillsDir).sort()) {
84
+ const absPath = path.join(skillsDir, name, "SKILL.md");
85
+ if (exists(absPath) && !seen.has(name)) { seen.add(name); results.push({ name, absPath }); }
86
+ }
87
+ }
88
+ const kitsDir = path.join(root, "kits");
89
+ if (exists(kitsDir)) {
90
+ for (const kitName of fs.readdirSync(kitsDir).sort()) {
91
+ const kitJson = path.join(kitsDir, kitName, "kit.json");
92
+ if (!exists(kitJson)) continue;
93
+ let kitManifest: Record<string, unknown>;
94
+ try { kitManifest = loadJson<Record<string, unknown>>(kitJson); } catch { continue; }
95
+ const skills = Array.isArray(kitManifest["skills"]) ? kitManifest["skills"] as unknown[] : [];
96
+ for (const entry of skills) {
97
+ if (typeof entry !== "object" || entry === null) continue;
98
+ const skillEntry = entry as Record<string, unknown>;
99
+ const relPath = typeof skillEntry["path"] === "string" ? skillEntry["path"] : null;
100
+ if (!relPath) continue;
101
+ const absPath = path.resolve(path.join(kitsDir, kitName), relPath);
102
+ const skillName = path.basename(path.dirname(absPath));
103
+ if (exists(absPath) && !seen.has(skillName)) { seen.add(skillName); results.push({ name: skillName, absPath }); }
104
+ }
105
+ }
106
+ }
107
+ return results.sort((a, b) => a.name.localeCompare(b.name));
108
+ }
109
+
77
110
  function listSkillRows(): [string[][], string[][]] {
78
111
  const workflowRows: string[][] = [];
79
112
  const supportRows: string[][] = [];
80
- const skillsDir = path.join(root, "skills");
81
- for (const name of fs.readdirSync(skillsDir).sort()) {
82
- const skillPath = path.join(skillsDir, name, "SKILL.md");
83
- if (!exists(skillPath)) continue;
84
- const meta = frontmatter(readText(skillPath));
85
- const row = [meta.name ?? name, rel(skillPath), oneLine(meta.description ?? "")];
113
+ for (const { name, absPath } of allSkillPaths()) {
114
+ const meta = frontmatter(readText(absPath));
115
+ const row = [meta.name ?? name, rel(absPath), oneLine(meta.description ?? "")];
86
116
  if (workflowSkills.has(row[0])) workflowRows.push(row);
87
117
  else supportRows.push(row);
88
118
  }
@@ -37,7 +37,7 @@ const publicScriptWrappers = new Map<string, { target: string; significantLines:
37
37
  ] }],
38
38
  ["scripts/filter-installed-packs.js", { target: "../build/src/tools/filter-installed-packs.js", significantLines: ['import("../build/src/tools/filter-installed-packs.js").then(({ main }) => process.exit(main(process.argv.slice(2))));'] }],
39
39
  ["scripts/generate-context-map.js", { target: "../build/src/tools/generate-context-map.js", significantLines: ['import("../build/src/tools/generate-context-map.js").then(({ main }) => process.exit(main(process.argv.slice(2))));'] }],
40
- ["scripts/flow-kit.js", { target: "../build/src/cli/flow-kit.js", significantLines: ['import("../build/src/cli/flow-kit.js").then(({ main }) => process.exit(main()));'] }],
40
+ ["scripts/kit.js", { target: "../build/src/cli/kit.js", significantLines: ['import("../build/src/cli/kit.js").then(({ main }) => main().then((code) => process.exit(code)));'] }],
41
41
  ["scripts/pull-work-provider.js", { target: "../build/src/cli/pull-work-provider.js", significantLines: ['import("../build/src/cli/pull-work-provider.js").then(({ main }) => process.exit(main()));'] }],
42
42
  ["scripts/effective-backlog-settings.js", { target: "../build/src/cli/effective-backlog-settings.js", significantLines: ['import("../build/src/cli/effective-backlog-settings.js").then(({ main }) => process.exit(main()));'] }],
43
43
  ["scripts/publish-change-helper.js", { target: "../build/src/cli/publish-change-helper.js", significantLines: ['import("../build/src/cli/publish-change-helper.js").then(({ main }) => process.exit(main()));'] }],
@@ -301,6 +301,28 @@ function validateAgentPaths(reporter: Reporter, manifest: any): void {
301
301
  }
302
302
  }
303
303
  function validateLegacyRefs(reporter: Reporter): void {
304
+ // Collect all kit-owned asset relative paths so legacy-ref scanning can skip matches
305
+ // that are subpaths of kit-owned assets. E.g. legacyRefRe matches "skills/plan-work/SKILL.md"
306
+ // within "kits/builder/skills/plan-work/SKILL.md"; the kit declares and validates these.
307
+ const kitOwnedSubPaths = new Set<string>();
308
+ const kitsDir = path.join(root, "kits");
309
+ if (fs.existsSync(kitsDir)) {
310
+ for (const kitName of fs.readdirSync(kitsDir)) {
311
+ const kitJson = path.join(kitsDir, kitName, "kit.json");
312
+ if (!fs.existsSync(kitJson)) continue;
313
+ try {
314
+ const kitManifest = loadJson<Record<string, unknown>>(kitJson);
315
+ for (const section of ["skills", "docs", "adapters", "evals", "assets"]) {
316
+ const entries = Array.isArray(kitManifest[section]) ? kitManifest[section] as unknown[] : [];
317
+ for (const entry of entries) {
318
+ if (typeof entry !== "object" || entry === null) continue;
319
+ const relPath = (entry as Record<string, unknown>)["path"];
320
+ if (typeof relPath === "string" && relPath) kitOwnedSubPaths.add(relPath);
321
+ }
322
+ }
323
+ } catch { /* skip invalid kit.json */ }
324
+ }
325
+ }
304
326
  for (const file of walkFiles(path.join(root, "evals")).sort()) {
305
327
  if (!textRefExtensions.has(path.extname(file))) continue;
306
328
  const parts = path.relative(path.join(root, "evals"), file).split(path.sep);
@@ -310,6 +332,10 @@ function validateLegacyRefs(reporter: Reporter): void {
310
332
  const ref = match[0].replace(/[.,)'"\]]+$/, "");
311
333
  if (/[{}$]/.test(ref)) continue;
312
334
  if (ref.split(/[\\/]/).includes("node_modules")) continue;
335
+ // Skip refs that are declared kit-owned asset paths or their parent directories
336
+ // (e.g. "skills/plan-work/SKILL.md" or "skills/plan-work" matched inside
337
+ // "kits/builder/skills/plan-work/SKILL.md" in eval files).
338
+ if (kitOwnedSubPaths.has(ref) || [...kitOwnedSubPaths].some((p) => p.startsWith(ref + "/"))) continue;
313
339
  const candidates = [path.join(root, ref), ...(ref.startsWith("evals/") ? [] : [path.join(root, "evals", ref)])];
314
340
  if (!candidates.some(fs.existsSync)) reporter.fail(`${rel(file)}: references missing source path: ${ref}`);
315
341
  }
@@ -1,2 +0,0 @@
1
- #!/usr/bin/env node
2
- import("../build/src/cli/flow-kit.js").then(({ main }) => process.exit(main()));
@@ -1,40 +0,0 @@
1
- ---
2
- name: context-budget
3
- description: >-
4
- Audit token overhead across Flow Agents bundles — agent specs, skills, context files,
5
- MCP servers. Produces budget report with per-component breakdown and optimization suggestions.
6
- ---
7
-
8
- # Context Budget Audit
9
-
10
- Scan installed Flow Agents bundles and estimate token overhead per component. Produces a structured budget report with optimization suggestions.
11
-
12
- ## Workflow
13
-
14
- ### Phase 1: Inventory
15
-
16
- Run `bash context/scripts/context-budget/budget-scan.sh` to discover all loaded components. The script walks `~/.flow-agents/` and outputs JSON with per-bundle breakdowns.
17
-
18
- ### Phase 2: Classify
19
-
20
- Bucket each component from the scan output:
21
- - **Always loaded**: context files matching package dependency patterns, skill frontmatter descriptions
22
- - **On-demand**: full SKILL.md body (loaded on skill activation), deferred context (`context/deferred/`)
23
- - **Per-agent**: agent-spec systemPrompt, agent-specific MCP servers
24
-
25
- ### Phase 3: Detect Issues
26
-
27
- Flag problems from the scan data:
28
- - Heavy agent specs: systemPrompt > 200 lines
29
- - Bloated skill descriptions: frontmatter description > 30 words
30
- - MCP over-subscription: agent with > 10 MCP servers or > 50 total tools
31
- - Context bloat: any single context file > 100 lines
32
- - Deferred candidates: context files > 2% of model context that aren't safety/routing
33
-
34
- ### Phase 4: Report
35
-
36
- Structured output:
37
- - Per-bundle breakdown (tokens by category)
38
- - Per-agent breakdown (what each agent loads at spawn)
39
- - Top-N optimization suggestions ranked by token savings
40
- - Use `--verbose` flag on budget-scan.sh for per-file token counts
@@ -1,137 +0,0 @@
1
- ---
2
- name: "explore"
3
- description: "Parallel codebase exploration — fans out subagents to map structure, entry points, dependencies, patterns, config, and tests in one pass."
4
- ---
5
-
6
- # Codebase Exploration
7
-
8
- Efficiently gather context about repositories by running parallel exploration tasks.
9
-
10
- ## Harness Limit
11
-
12
- Some harnesses cap a single delegation batch at 4 subagents.
13
- - Respect the current harness limit.
14
- - If the limit is unknown, assume 4.
15
- - Never submit more than 4 subagents in one batch.
16
- - Use multiple waves when needed rather than overfilling the first fan-out.
17
-
18
- ## Exploration Strategy
19
-
20
- Spawn MULTIPLE subagents IN PARALLEL to investigate different dimensions:
21
-
22
- ### Wave 1A (parallel, up to 4 subagents)
23
- 1. **Structure Scout** - Map directory structure, identify key folders (src, lib, tests, config)
24
- 2. **Entry Point Finder** - Locate main files, CLI entry points, API routes, exports
25
- 3. **Dependency Analyzer** - Parse package.json, requirements.txt, go.mod, Cargo.toml, pom.xml
26
- 4. **Pattern Detective** - Identify architectural patterns, frameworks, coding conventions
27
-
28
- ### Wave 1B (parallel, after Wave 1A if needed)
29
- 5. **Config Inspector** - Find and summarize configuration files, env vars, build configs
30
- 6. **Test Mapper** - Locate test files, understand testing strategy and coverage areas
31
- 7. **Documentation Auditor** - Cross-reference all documentation against actual file system state:
32
- - README agent tables vs actual `agents/*.agent-spec.json` files (ghost agents? missing agents?)
33
- - README skill lists vs actual `skills/*/SKILL.md` files
34
- - README dependency lists vs `Config` file declarations
35
- - AGENTS.md shared sections consistency across packages (paths, naming examples, model references)
36
- - All `.md` and `.json` files: grep for references to agents, skills, or paths that don't exist
37
- - Agent spec `resources` paths: verify referenced context files exist
38
- - Agent spec `model` fields: verify they follow conventions (orchestrators=opus, tools=haiku/sonnet)
39
- - Typos and spelling errors in documentation files
40
- - Empty directories or dead skill/SOP stubs
41
-
42
- ### Wave 2 (after Wave 1A/1B — needs dependency list)
43
- 7. **Tech Stack Researcher** - Research the identified tech stack using web search tools (`web_search`, `web_fetch`) and `tool-dependencies-updater` (audit-only — do NOT apply updates). Goals:
44
- - Identify outdated or deprecated dependencies and how significant an upgrade would be (patch vs minor vs major, breaking changes)
45
- - Discover new features in the current stack that the project could leverage
46
- - Assess whether any part of the stack is irrelevant, superseded, or approaching EOL
47
- - Surface project-specific context (migration guides, EOL announcements, known issues)
48
-
49
- ## Execution Model
50
-
51
- ```
52
- [User Request]
53
- |
54
- v
55
- [Wave 1A: Spawn first 4 dimensions in parallel]
56
- |
57
- v
58
- [Wave 1B: Spawn remaining dimensions in parallel if needed]
59
- |
60
- v
61
- [Aggregate Wave 1 findings]
62
- |
63
- v
64
- [Wave 2: Spawn Tech Stack Researcher with dependency list from Wave 1]
65
- - tool-dependencies-updater: audit-only scan for outdated packages, version gaps, security advisories
66
- - web search: research key frameworks/libraries for new features, deprecation, relevance
67
- |
68
- v
69
- [Final Synthesis]
70
- ```
71
-
72
- ## Subagent Prompts (use these as templates)
73
-
74
- Wave 1A:
75
- - "Explore the directory structure of this repo. List key folders and their purposes. Focus on: [specific area if provided]"
76
- - "Find all entry points in this codebase - main files, CLI commands, API routes, exported modules"
77
- - "Analyze dependencies - what frameworks, libraries, and tools does this project use?"
78
- - "Identify architectural patterns - is this MVC, microservices, monolith? What conventions are used?"
79
-
80
- Wave 1B:
81
- - "Find and summarize all configuration files - what can be configured and how?"
82
- - "Map the test structure - where are tests, what testing frameworks, what's the coverage strategy?"
83
- - "Audit all documentation for accuracy: (1) List every agent-spec.json file and cross-reference against README agent tables — flag any agents listed in docs but missing from disk or vice versa. (2) List every skills/*/SKILL.md and cross-reference against README skill lists. (3) Compare Config dependency declarations against README dependency sections. (4) Grep all .md and .json files for references to agent names and verify each referenced agent exists as an agent-spec.json. (5) Check AGENTS.md files across packages for inconsistent paths, naming examples, or model references. (6) Flag empty directories, typos, and dead stubs."
84
-
85
- Wave 2 (spawn these two in parallel):
86
- - tool-dependencies-updater: "Scan this project for all dependency manifests, check every dependency against the latest available version, run security advisory checks on outdated packages, and report findings grouped by risk level (critical/major/minor). Do NOT apply any updates — audit only."
87
- - web search: "Research the following tech stack: [list key frameworks/libraries from Wave 1]. For each, find: (1) latest stable version and what's new, (2) any deprecation or EOL announcements, (3) notable new features that could benefit this project, (4) whether any component has been superseded by a better alternative. Cite sources."
88
-
89
- ## Output Format
90
-
91
- After all subagents complete, synthesize into:
92
-
93
- ```
94
- ## Codebase Overview
95
- [1-2 sentence summary]
96
-
97
- ## Key Findings
98
- - **Tech Stack**: [languages, frameworks, tools]
99
- - **Architecture**: [pattern, structure]
100
- - **Entry Points**: [main files, commands]
101
- - **Configuration**: [key config files]
102
- - **Testing**: [strategy, frameworks]
103
-
104
- ## Tech Stack Health
105
- - **Outdated (Critical)**: [packages with security vulnerabilities]
106
- - **Outdated (Major)**: [packages with major version bumps available — note breaking change risk]
107
- - **Outdated (Minor)**: [packages with minor/patch updates]
108
- - **New Features Available**: [notable new capabilities in current stack]
109
- - **Deprecation/EOL Warnings**: [anything approaching end of life]
110
- - **Upgrade Effort Summary**: [overall assessment — low/medium/high effort to get current]
111
-
112
- ## Recommended Starting Points
113
- [Files to read first for understanding]
114
-
115
- ## Potential Concerns
116
- [Any issues, outdated deps, missing tests, etc.]
117
-
118
- ## Documentation Audit
119
- - **Ghost references**: [agents/skills/paths mentioned in docs but not on disk]
120
- - **Missing from docs**: [agents/skills that exist on disk but aren't documented]
121
- - **Stale content**: [outdated descriptions, wrong dependency lists, inconsistent AGENTS.md sections]
122
- - **Config mismatches**: [README deps vs Config file deps]
123
- - **Path inconsistencies**: [resource paths in agent specs that don't follow conventions]
124
- - **Empty/dead artifacts**: [empty directories, stub files with no content]
125
- - **Typos**: [spelling errors found in documentation]
126
- ```
127
-
128
- ## Key Principles
129
-
130
- - ALWAYS run explorations in PARALLEL within the current harness limit - this is the whole point
131
- - Never exceed 4 subagents in one batch unless the harness explicitly allows more
132
- - Wave 2 (Tech Stack Researcher) runs AFTER Wave 1A/1B completes because it needs the dependency list
133
- - tool-dependencies-updater is used in AUDIT-ONLY mode — never apply updates during explore
134
- - Be thorough but efficient - don't read entire files, scan for structure
135
- - Focus on what helps someone GET STARTED quickly
136
- - Flag anything unusual or concerning
137
- - If a specific area is requested, weight exploration toward that area
@@ -1,87 +0,0 @@
1
- ---
2
- name: "feedback-loop"
3
- description: "Verify implementation actually works. Visual changes → Playwright; integration changes → commands/tests. Run after completing builds."
4
- ---
5
-
6
- # Feedback Loop
7
-
8
- Verify that what you claim to have built actually works. Don't just say "done" — prove it.
9
-
10
- ## When to Use
11
-
12
- - After implementing changes, before declaring them complete
13
- - When the user asks you to verify or prove your work
14
- - As the final step of any implementation workflow
15
- - When you're uncertain whether your changes actually function correctly
16
-
17
- ## Workflow
18
-
19
- ### Step 1: IDENTIFY CHANGES
20
-
21
- Determine what was just built:
22
- - Check `git diff` for modified/added files
23
- - Review the active TODO list for context on what was implemented
24
- - Identify the nature of the change: what should be different now?
25
-
26
- ### Step 2: CLASSIFY
27
-
28
- Determine the verification method:
29
-
30
- | Change Type | Method | Examples |
31
- |---|---|---|
32
- | **Visual** | Playwright via `tool-playwright` | UI components, pages, styles, layouts, forms, visual regressions |
33
- | **Integration** | Commands, tests, execution | APIs, CLIs, libraries, configs, build scripts, data processing |
34
-
35
- If changes span both, run both verification paths.
36
-
37
- ### Step 3: VERIFY
38
-
39
- #### Visual Path (frontend/UI changes)
40
- Delegate to `tool-playwright`:
41
- 1. Load the relevant URL (local dev server, preview, etc.)
42
- 2. Take an accessibility snapshot to confirm elements exist and are structured correctly
43
- 3. Take a screenshot for visual confirmation
44
- 4. If interactive — click, type, navigate to exercise the changed behavior
45
- 5. Compare against expected state: are the right elements present? Does the layout match intent?
46
-
47
- If the dev server isn't running, start it (or tell the user to) before proceeding.
48
-
49
- #### Integration Path (non-visual changes)
50
- Use the most direct verification available, in priority order:
51
- 1. **Run existing tests** — if tests cover the changed code, run them
52
- 2. **Execute the code** — run the CLI command, call the API endpoint, import the module
53
- 3. **Check build** — compile/lint to confirm no syntax or type errors
54
- 4. **Inspect output** — verify the output matches expected behavior
55
-
56
- Always capture actual output as evidence.
57
-
58
- ### Step 4: REPORT
59
-
60
- State clearly:
61
- - **What was verified** — which changes, which method
62
- - **Evidence** — actual output, screenshots, test results, command output
63
- - **Verdict** — ✅ confirmed working, or ❌ found issues with specifics
64
-
65
- If verification fails, fix the issue and re-verify. Don't report failure without attempting a fix first.
66
-
67
- ## Persistence Rule
68
-
69
- **Keep trying until the user says stop.** This is the core behavior of the feedback loop.
70
-
71
- - If a verification method fails (Playwright won't connect, tests error out, server won't start), **debug and retry**. Don't downgrade to a weaker method or declare "good enough."
72
- - If visual verification is required and Playwright is having issues, fix the Playwright issue. Don't fall back to "well the build passes so it's probably fine."
73
- - If integration tests fail, diagnose why, fix, and re-run. Don't report partial success.
74
- - Cycle: **verify → fail → diagnose → fix → verify again**. Repeat until either:
75
- 1. ✅ All verification methods pass with evidence, OR
76
- 2. 🛑 The user explicitly says to stop or skip a method
77
-
78
- Never self-exit the loop. Never decide on the AI's behalf that a failure is acceptable. The user breaks the loop, not the agent.
79
-
80
- ## Key Principles
81
-
82
- - **Evidence over assertion.** Show output, not just "it works."
83
- - **Never settle.** If a verification method should work but isn't, that's a bug to fix — not a reason to skip it.
84
- - **Fix before reporting.** If verification reveals a bug you introduced, fix it and re-run.
85
- - **Match the medium.** UI changes need visual proof. Backend changes need execution proof.
86
- - **Be specific.** "Tests pass" is weak. "Ran `npm test` — 14 tests passed, 0 failed, output attached" is strong.
87
- - **Don't skip this.** The whole point is catching the gap between "I wrote the code" and "the code works."
@@ -1,80 +0,0 @@
1
- ---
2
- name: "frontend-design"
3
- description: "Create distinctive, production-grade frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, or applications. Generates creative, polished code that avoids generic AI aesthetics."
4
- ---
5
-
6
- # Frontend Design
7
-
8
- Delegate frontend implementation to `tool-worker` with the design guidelines below included in the prompt. The orchestrator's job is to understand the user's requirements, choose an aesthetic direction, and hand off to tool-worker with clear instructions plus the full aesthetics guidelines.
9
-
10
- ## Trigger Patterns
11
-
12
- This skill activates when the user:
13
-
14
- - Asks to build a UI, page, component, or web application
15
- - Wants a landing page, dashboard, form, or interactive interface
16
- - Mentions design quality, aesthetics, or visual polish
17
- - Asks for something "that looks good" or "production-grade"
18
-
19
- ## Workflow
20
-
21
- ### Step 1: UNDERSTAND REQUIREMENTS
22
- Gather from the user:
23
- - What to build (component, page, app)
24
- - Purpose and audience
25
- - Technical constraints (framework, existing codebase)
26
- - Any aesthetic preferences or references
27
-
28
- ### Step 2: DELEGATE TO tool-worker
29
- Spawn `tool-worker` with a prompt that includes:
30
- - Delegate to the exact `tool-worker` role; do not spawn an unnamed/default implementation agent.
31
- 1. The specific implementation task (what to build, where files go, framework)
32
- 2. The **full Design Guidelines section below** — copy it into the delegation prompt so tool-worker has it in context
33
-
34
- ### Step 3: VERIFY VISUALLY (mandatory)
35
- After tool-worker completes, you MUST delegate to tool-playwright to screenshot the result and confirm it renders correctly. Do NOT skip this step. Do NOT treat implementation as the final step. Visual verification is required before relaying results to the user.
36
-
37
- ## Design Guidelines
38
-
39
- Include everything below this line in the tool-worker delegation prompt.
40
-
41
- ---
42
-
43
- ### Design Thinking
44
-
45
- Before coding, understand the context and commit to a BOLD aesthetic direction:
46
-
47
- - **Purpose**: What problem does this interface solve? Who uses it?
48
- - **Tone**: Pick an extreme: brutally minimal, maximalist chaos, retro-futuristic, organic/natural, luxury/refined, playful/toy-like, editorial/magazine, brutalist/raw, art deco/geometric, soft/pastel, industrial/utilitarian, etc. Use these for inspiration but design one that is true to the aesthetic direction.
49
- - **Constraints**: Technical requirements (framework, performance, accessibility).
50
- - **Differentiation**: What makes this UNFORGETTABLE? What's the one thing someone will remember?
51
-
52
- **CRITICAL**: Choose a clear conceptual direction and execute it with precision. Bold maximalism and refined minimalism both work — the key is intentionality, not intensity.
53
-
54
- Then implement working code (HTML/CSS/JS, React, Vue, etc.) that is:
55
-
56
- - Production-grade and functional
57
- - Visually striking and memorable
58
- - Cohesive with a clear aesthetic point-of-view
59
- - Meticulously refined in every detail
60
-
61
- ### Frontend Aesthetics
62
-
63
- - **Typography**: Choose fonts that are beautiful, unique, and interesting. Avoid generic fonts like Arial and Inter; opt for distinctive choices that elevate the aesthetic. Pair a distinctive display font with a refined body font.
64
- - **Color & Theme**: Commit to a cohesive aesthetic. Use CSS variables for consistency. Dominant colors with sharp accents outperform timid, evenly-distributed palettes.
65
- - **Motion**: Use animations for effects and micro-interactions. Prioritize CSS-only solutions for HTML. Use Motion library for React when available. Focus on high-impact moments: one well-orchestrated page load with staggered reveals creates more delight than scattered micro-interactions.
66
- - **Spatial Composition**: Unexpected layouts. Asymmetry. Overlap. Diagonal flow. Grid-breaking elements. Generous negative space OR controlled density.
67
- - **Backgrounds & Visual Details**: Create atmosphere and depth rather than defaulting to solid colors. Apply creative forms like gradient meshes, noise textures, geometric patterns, layered transparencies, dramatic shadows, decorative borders, and grain overlays.
68
-
69
- ### Anti-Patterns (NEVER use)
70
-
71
- - Generic font families (Inter, Roboto, Arial, system fonts)
72
- - Cliched color schemes (particularly purple gradients on white backgrounds)
73
- - Predictable layouts and cookie-cutter component patterns
74
- - Converging on the same "safe" choices across generations (e.g., Space Grotesk every time)
75
-
76
- Vary between light and dark themes, different fonts, different aesthetics. Every design should feel unique to its context.
77
-
78
- ### Calibration
79
-
80
- Match implementation complexity to the aesthetic vision. Maximalist designs need elaborate code with extensive animations and effects. Minimalist or refined designs need restraint, precision, and careful attention to spacing, typography, and subtle details.