npm - @joshski/dust - Versions diffs - 0.1.111 → 0.1.113 - Mend

@joshski/dust 0.1.111 → 0.1.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +17 -0
package/dist/cli/shared/agent-shared.d.ts +9 -1
package/dist/cli/types.d.ts +5 -1
package/dist/core-principles.js +608 -608
package/dist/dust.js +877 -681
package/dist/execution-order.d.ts +17 -0
package/dist/execution-order.js +39 -0
package/dist/lint/validators/content-validator.d.ts +1 -0
package/dist/loop/iteration.d.ts +4 -0
package/dist/patch.js +33 -0
package/dist/validation.js +33 -0
package/package.json +5 -1

package/dist/dust.js CHANGED Viewed

@@ -7,7 +7,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
 var require_package = __commonJS((exports, module) => {
   module.exports = {
     name: "@joshski/dust",
-    version: "0.1.111",
+    version: "0.1.113",
     description: "Flow state for AI coding agents",
     type: "module",
     bin: {
@@ -56,6 +56,10 @@ var require_package = __commonJS((exports, module) => {
       "./core-principles": {
         import: "./dist/core-principles.js",
         types: "./dist/core-principles.d.ts"
+      },
+      "./execution-order": {
+        import: "./dist/execution-order.js",
+        types: "./dist/execution-order.d.ts"
       }
     },
     files: [
@@ -394,16 +398,22 @@ import {
 // lib/git/file-sorter.ts
 function createGitDirectoryFileSorter(gitRunner) {
   return async (dir, files) => {
-    const timestamps = await Promise.all(files.map(async (file) => {
+    const results = await Promise.all(files.map(async (file) => {
       const result = await gitRunner.run(["log", "-1", "--format=%ct", "--", file], dir);
-      const ts = result.exitCode === 0 ? Number.parseInt(result.output.trim(), 10) : Number.NaN;
-      return {
-        file,
-        timestamp: Number.isNaN(ts) ? Number.POSITIVE_INFINITY : ts
-      };
+      const epochSeconds = result.exitCode === 0 ? Number.parseInt(result.output.trim(), 10) : Number.NaN;
+      const lastCommittedAt = Number.isNaN(epochSeconds) ? null : new Date(epochSeconds * 1000).toISOString();
+      return { file, lastCommittedAt };
     }));
-    timestamps.sort((a, b) => a.timestamp - b.timestamp);
-    return timestamps.map((t) => t.file);
+    results.sort((a, b) => {
+      if (a.lastCommittedAt === null && b.lastCommittedAt === null)
+        return 0;
+      if (a.lastCommittedAt === null)
+        return 1;
+      if (b.lastCommittedAt === null)
+        return -1;
+      return new Date(a.lastCommittedAt).getTime() - new Date(b.lastCommittedAt).getTime();
+    });
+    return results;
   };
 }
@@ -721,7 +731,7 @@ async function loadSettings(cwd, fileSystem, runtime) {
 }
 // lib/version.ts
-var DUST_VERSION = "0.1.111";
+var DUST_VERSION = "0.1.113";
 // lib/cli/middleware.ts
 function applyMiddleware(middlewares, execute) {
@@ -773,16 +783,6 @@ function createDefaultTracingOptions() {
   };
 }
-// lib/cli/dedent.ts
-function dedent(strings, ...values) {
-  const result = strings.reduce((acc, part, index) => acc + part + (values[index] ?? ""), "");
-  const lines = result.split(`
-`);
-  const indent = lines.filter((line) => line.trim()).reduce((min, line) => Math.min(min, line.match(/^\s*/)[0].length), Number.POSITIVE_INFINITY);
-  return lines.map((line) => line.slice(indent)).join(`
-`).trim();
-}
 // lib/cli/shared/agent-shared.ts
 import { join as join4 } from "node:path";
@@ -929,6 +929,16 @@ ${newHookContent}
   };
 }
+// lib/cli/dedent.ts
+function dedent(strings, ...values) {
+  const result = strings.reduce((acc, part, index) => acc + part + (values[index] ?? ""), "");
+  const lines = result.split(`
+`);
+  const indent = lines.filter((line) => line.trim()).reduce((min, line) => Math.min(min, line.match(/^\s*/)[0].length), Number.POSITIVE_INFINITY);
+  return lines.map((line) => line.slice(indent)).join(`
+`).trim();
+}
 // lib/cli/shared/agent-shared.ts
 async function loadAgentInstructions(cwd, fileSystem, agentType) {
   const instructionsPath = join4(cwd, ".dust", "config", "agents", `${agentType}.md`);
@@ -969,25 +979,6 @@ async function templateVariablesWithInstructions(cwd, fileSystem, settings, hook
     agentInstructions
   };
 }
-async function manageGitHooks(dependencies) {
-  const { context, fileSystem, settings } = dependencies;
-  const hooks = createHooksManager(context.cwd, fileSystem, settings);
-  if (!hooks.isGitRepo()) {
-    return false;
-  }
-  const isInstalled = await hooks.isHookInstalled();
-  if (!isInstalled) {
-    await hooks.installHook();
-    return true;
-  }
-  const hookBinaryPath = await hooks.getHookBinaryPath();
-  if (hookBinaryPath && hookBinaryPath !== settings.dustCommand) {
-    await hooks.updateHookBinaryPath(settings.dustCommand);
-  }
-  return true;
-}
-// lib/cli/commands/agent.ts
 function agentGreeting(vars) {
   const instructions = vars.agentInstructions ? `
 ---
@@ -1025,6 +1016,25 @@ ${vars.agentInstructions}` : "";
     Do NOT proceed without running one of these commands.${instructions}
   `;
 }
+async function manageGitHooks(dependencies) {
+  const { context, fileSystem, settings } = dependencies;
+  const hooks = createHooksManager(context.cwd, fileSystem, settings);
+  if (!hooks.isGitRepo()) {
+    return false;
+  }
+  const isInstalled = await hooks.isHookInstalled();
+  if (!isInstalled) {
+    await hooks.installHook();
+    return true;
+  }
+  const hookBinaryPath = await hooks.getHookBinaryPath();
+  if (hookBinaryPath && hookBinaryPath !== settings.dustCommand) {
+    await hooks.updateHookBinaryPath(settings.dustCommand);
+  }
+  return true;
+}
+// lib/cli/commands/agent.ts
 async function agent(dependencies, env = process.env) {
   const { context, fileSystem, settings } = dependencies;
   if (env[DUST_SKIP_AGENT] === "1") {
@@ -6098,6 +6108,43 @@ function extractFirstSentence2(paragraph) {
   return match ? match[1] : null;
 }
+// lib/execution-order.ts
+function computeExecutionOrder(nodes) {
+  if (nodes.length === 0)
+    return [];
+  const sorted = [...nodes].toSorted((a, b) => {
+    if (a.lastCommittedAt === null && b.lastCommittedAt === null)
+      return 0;
+    if (a.lastCommittedAt === null)
+      return 1;
+    if (b.lastCommittedAt === null)
+      return -1;
+    return new Date(a.lastCommittedAt).getTime() - new Date(b.lastCommittedAt).getTime();
+  });
+  const result = [];
+  const completed = new Set;
+  const nodeMap = new Map(nodes.map((n) => [n.slug, n]));
+  while (result.length < nodes.length) {
+    const next = sorted.find((node) => {
+      if (completed.has(node.slug))
+        return false;
+      return node.blockedBy.every((slug) => completed.has(slug) || !nodeMap.has(slug));
+    });
+    if (!next) {
+      for (const node of sorted) {
+        if (!completed.has(node.slug)) {
+          result.push({ node, executionOrder: result.length + 1 });
+          completed.add(node.slug);
+        }
+      }
+      break;
+    }
+    result.push({ node: next, executionOrder: result.length + 1 });
+    completed.add(next.slug);
+  }
+  return result;
+}
 // lib/artifacts/workflow-tasks.ts
 var CAPTURE_IDEA_PREFIX = "Add Idea: ";
 var EXPEDITE_IDEA_PREFIX = "Expedite Idea: ";
@@ -6373,6 +6420,7 @@ async function parseCaptureIdeaTask(fileSystem, dustPath, taskSlug) {
 }
 // lib/lint/validators/content-validator.ts
+var FRONT_MATTER_DELIMITER = "---";
 var REQUIRED_TASK_HEADINGS = ["Task Type", "Blocked By", "Definition of Done"];
 var ALLOWED_TASK_TYPES = new Set(VALID_TASK_TYPES);
 var MAX_OPENING_SENTENCE_LENGTH = 150;
@@ -6390,6 +6438,18 @@ var NON_IMPERATIVE_STARTERS = new Set([
   "you",
   "i"
 ]);
+function validateNoFrontMatter(artifact) {
+  const firstLine = artifact.rawContent.split(`
+`)[0];
+  if (firstLine.trim() === FRONT_MATTER_DELIMITER) {
+    return {
+      file: artifact.filePath,
+      line: 1,
+      message: "Artifact must not contain front matter. The title must be the first line."
+    };
+  }
+  return null;
+}
 function validateOpeningSentence(artifact) {
   if (!artifact.openingSentence) {
     return {
@@ -6475,20 +6535,22 @@ function validateTaskType(artifact) {
 function hasRequiredHeadings(content) {
   return /^## Blocked By\s*$/m.test(content) && /^## Definition of Done\s*$/m.test(content);
 }
-function extractBlockedBy(content) {
+function extractBlockedBySlugs(content) {
   const blockedByMatch = content.match(/^## Blocked By\s*\n([\s\S]*?)(?=\n## |\n*$)/m);
   const section = blockedByMatch[1].trim();
   if (section === "(none)") {
     return [];
   }
   const linkPattern = /\[.*?\]\(([^)]+\.md)\)/g;
-  const blockers = [];
+  const slugs = [];
   let match = linkPattern.exec(section);
   while (match !== null) {
-    blockers.push(match[1]);
+    const slugMatch = match[1].match(/([^/]+)\.md$/);
+    if (slugMatch)
+      slugs.push(slugMatch[1]);
     match = linkPattern.exec(section);
   }
-  return blockers;
+  return slugs;
 }
 async function findUnblockedTasks(cwd, fileSystem, directoryFileSorter) {
   const dustPath = `${cwd}/.dust`;
@@ -6500,19 +6562,20 @@ async function findUnblockedTasks(cwd, fileSystem, directoryFileSorter) {
     return { tasks: [], invalidTasks: [] };
   }
   const files = await fileSystem.readdir(tasksPath);
-  let mdFiles = files.filter((f) => f.endsWith(".md"));
-  if (directoryFileSorter) {
-    mdFiles = await directoryFileSorter(tasksPath, mdFiles);
-  } else {
-    mdFiles.sort((a, b) => {
-      const aTime = fileSystem.getFileCreationTime(`${tasksPath}/${a}`);
-      const bTime = fileSystem.getFileCreationTime(`${tasksPath}/${b}`);
-      return aTime - bTime;
-    });
-  }
+  const mdFiles = files.filter((f) => f.endsWith(".md"));
   if (mdFiles.length === 0) {
     return { tasks: [], invalidTasks: [] };
   }
+  let timestamps;
+  if (directoryFileSorter) {
+    const results = await directoryFileSorter(tasksPath, mdFiles);
+    timestamps = new Map(results.map((r) => [r.file, r.lastCommittedAt]));
+  } else {
+    timestamps = new Map(mdFiles.map((f) => {
+      const ms = fileSystem.getFileCreationTime(`${tasksPath}/${f}`);
+      return [f, ms > 0 ? new Date(ms).toISOString() : null];
+    }));
+  }
   const taskFiles = [];
   for (const file of mdFiles) {
     const filePath = `${tasksPath}/${file}`;
@@ -6533,16 +6596,22 @@ async function findUnblockedTasks(cwd, fileSystem, directoryFileSorter) {
       });
     }
   }
-  const existingTasks = new Set(validTaskFiles.map((t) => t.file));
+  const taskNodes = validTaskFiles.map(({ file, content }) => ({
+    slug: file.replace(/\.md$/, ""),
+    file,
+    content,
+    blockedBy: extractBlockedBySlugs(content),
+    lastCommittedAt: timestamps.get(file) ?? null
+  }));
+  const ordered = computeExecutionOrder(taskNodes);
+  const existingSlugs = new Set(taskNodes.map((t) => t.slug));
   const tasks = [];
-  for (const { file, content } of validTaskFiles) {
-    const blockers = extractBlockedBy(content);
-    const hasIncompleteBlocker = blockers.some((blocker) => existingTasks.has(blocker));
+  for (const { node } of ordered) {
+    const hasIncompleteBlocker = node.blockedBy.some((slug) => existingSlugs.has(slug));
     if (!hasIncompleteBlocker) {
-      const title = extractTitle(content);
-      const openingSentence = extractOpeningSentence(content);
-      const relativePath = `.dust/tasks/${file}`;
-      tasks.push({ path: relativePath, title, openingSentence });
+      const title = extractTitle(node.content);
+      const openingSentence = extractOpeningSentence(node.content);
+      tasks.push({ path: `.dust/tasks/${node.file}`, title, openingSentence });
     }
   }
   return { tasks, invalidTasks };
@@ -6841,6 +6910,40 @@ async function executeTask(task, runParameters, onAgentEvent, context, agentName
     return "claude_error";
   }
 }
+function selectShellRunner(spawnFn, options, loopDeps) {
+  if (options.docker && options.containerRuntime) {
+    return buildContainerShellRunner(spawnFn, options.containerRuntime, options.docker);
+  }
+  return loopDeps.shellRunner ?? defaultShellRunner;
+}
+function buildContainerShellRunner(spawnFn, containerRuntime, docker) {
+  const runConfig = {
+    imageTag: docker.imageTag,
+    repoPath: docker.repoPath,
+    homeDir: docker.homeDir,
+    gitProxyUrl: docker.gitProxyUrl
+  };
+  const baseArgs = containerRuntime.buildRunArgs(runConfig);
+  return {
+    run: (command, _cwd) => new Promise((resolve) => {
+      const proc = spawnFn(containerRuntime.runCommand, [
+        ...baseArgs,
+        "sh",
+        "-c",
+        command
+      ]);
+      const chunks = [];
+      proc.stdout?.on("data", (data) => chunks.push(data.toString()));
+      proc.stderr?.on("data", (data) => chunks.push(data.toString()));
+      proc.on("close", (code) => {
+        resolve({ exitCode: code ?? 1, output: chunks.join("") });
+      });
+      proc.on("error", (error) => {
+        resolve({ exitCode: 1, output: error.message });
+      });
+    })
+  };
+}
 async function runOneIteration(dependencies, loopDependencies, onLoopEvent, onAgentEvent, options = {}) {
   const { context, fileSystem, settings } = dependencies;
   const { spawn: spawn2, run: run2 } = loopDependencies;
@@ -6890,7 +6993,7 @@ async function runOneIteration(dependencies, loopDependencies, onLoopEvent, onAg
   const taskTitle = task.title ?? task.path;
   log2(`found ${tasks.length} task(s), picking: ${taskTitle}`);
   onLoopEvent({ type: "loop.tasks_found" });
-  const shellRunner = loopDependencies.shellRunner ?? defaultShellRunner;
+  const shellRunner = selectShellRunner(spawn2, options, loopDependencies);
   const preflightResult = await runPreflightChecks(context.cwd, settings.dustCommand, settings.installCommand, shellRunner, onLoopEvent, onAgentEvent, taskTitle);
   if (preflightResult.failed) {
     return handleCheckFailure(preflightResult.output, settings.dustCommand, { run: run2, prompt: "", spawnOptions, onRawEvent }, onAgentEvent, context, agentName, agentType, logger);
@@ -11384,6 +11487,8 @@ function validateIdeaOpenQuestions(artifact) {
   const topLevelStructureMessage = "Open Questions must use `### Question?` headings and `#### Option` headings at the top level. Put supporting markdown (including lists and code blocks) under an option heading. Run `dust new idea` to see the expected format.";
   let inOpenQuestions = false;
   let currentQuestionLine = null;
+  let currentQuestionText = null;
+  let currentQuestionOptionNames = new Set;
   let inOption = false;
   let inCodeBlock = false;
   for (let i = 0;i < lines.length; i++) {
@@ -11407,6 +11512,8 @@ function validateIdeaOpenQuestions(artifact) {
       violations.push(...validateH2Heading(filePath, line, i + 1, inOpenQuestions, currentQuestionLine));
       inOpenQuestions = line === "## Open Questions";
       currentQuestionLine = null;
+      currentQuestionText = null;
+      currentQuestionOptionNames = new Set;
       inOption = false;
       inCodeBlock = false;
       continue;
@@ -11422,6 +11529,7 @@ function validateIdeaOpenQuestions(artifact) {
           line: currentQuestionLine
         });
       }
+      currentQuestionOptionNames = new Set;
       if (!trimmedLine.endsWith("?")) {
         violations.push({
           file: filePath,
@@ -11429,12 +11537,24 @@ function validateIdeaOpenQuestions(artifact) {
           line: i + 1
         });
         currentQuestionLine = null;
+        currentQuestionText = null;
       } else {
         currentQuestionLine = i + 1;
+        currentQuestionText = trimmedLine.slice(4);
       }
       continue;
     }
     if (line.startsWith("#### ")) {
+      const optionName = trimmedLine.slice(5);
+      if (currentQuestionOptionNames.has(optionName)) {
+        violations.push({
+          file: filePath,
+          message: `Duplicate option "${optionName}" under question "${currentQuestionText}" — each option must have a unique name`,
+          line: i + 1
+        });
+      } else {
+        currentQuestionOptionNames.add(optionName);
+      }
       currentQuestionLine = null;
       inOption = true;
       continue;
@@ -11810,6 +11930,9 @@ function validateArtifacts(context) {
   }
   for (const artifacts of Object.values(byType)) {
     for (const artifact of artifacts) {
+      const frontMatterViolation = validateNoFrontMatter(artifact);
+      if (frontMatterViolation)
+        violations.push(frontMatterViolation);
       const openingSentenceViolation = validateOpeningSentence(artifact);
       if (openingSentenceViolation)
         violations.push(openingSentenceViolation);
@@ -12158,82 +12281,153 @@ async function check(dependencies, shellRunner, clock, _setInterval, _clearInter
   return { exitCode };
 }
+// lib/cli/commands/codex-hook.ts
+var KNOWN_HOOK_EVENTS = [
+  "PreToolUse",
+  "PermissionRequest",
+  "PostToolUse",
+  "SessionStart",
+  "UserPromptSubmit",
+  "Stop"
+];
+async function readStdinUtf8() {
+  const chunks = [];
+  for await (const chunk of process.stdin) {
+    chunks.push(chunk);
+  }
+  return Buffer.concat(chunks).toString("utf8");
+}
+var defaultCodexHookDependencies = {
+  readStdin: readStdinUtf8
+};
+function isKnownEvent(value) {
+  return typeof value === "string" && KNOWN_HOOK_EVENTS.includes(value);
+}
+async function handleSessionStart(dependencies) {
+  const { context, fileSystem, settings } = dependencies;
+  const agentInstructions = await loadAgentInstructions(context.cwd, fileSystem, "codex");
+  const additionalContext = agentGreeting({
+    bin: settings.dustCommand,
+    agentName: "Codex",
+    hooksInstalled: false,
+    isClaudeCodeWeb: false,
+    hasIdeaFile: true,
+    agentInstructions
+  });
+  return JSON.stringify({
+    continue: true,
+    hookSpecificOutput: {
+      hookEventName: "SessionStart",
+      additionalContext
+    },
+    systemMessage: "dust agent loaded"
+  });
+}
+function handleNoOp() {
+  return JSON.stringify({ continue: true });
+}
+async function codexHook(dependencies, hookDependencies = defaultCodexHookDependencies) {
+  const { context } = dependencies;
+  const raw = await hookDependencies.readStdin();
+  let payload;
+  try {
+    payload = JSON.parse(raw);
+  } catch {
+    context.stderr("dust codex hook: failed to parse stdin as JSON");
+    return { exitCode: 1 };
+  }
+  if (!payload || typeof payload !== "object") {
+    context.stderr("dust codex hook: stdin payload must be a JSON object");
+    return { exitCode: 1 };
+  }
+  const eventName = payload.hook_event_name;
+  if (!isKnownEvent(eventName)) {
+    context.stderr(`dust codex hook: unknown hook_event_name: ${JSON.stringify(eventName)}`);
+    return { exitCode: 1 };
+  }
+  const response = eventName === "SessionStart" ? await handleSessionStart(dependencies) : handleNoOp();
+  context.stdout(response);
+  return { exitCode: 0 };
+}
 // lib/bundled-core-principles.ts
 var BUNDLED_PRINCIPLES = [
   {
-    slug: "batteries-included",
-    content: `# Batteries Included
-Dust should provide everything that is required (within reason) for an agent to be productive in an arbitrary codebase.
+    slug: "design-for-testability",
+    content: `# Design for Testability
-An agent working autonomously should not be blocked because a tool or configuration is missing. For example, dust should ship custom lint rules for different linters, even though those linters are not dependencies of dust itself. If an agent needs a capability to do its job well in a typical codebase, dust should provide it out of the box.
+Design code to be testable first; good structure follows naturally.
-This means accepting some breadth of scope — bundling configs, rules, and utilities that target external tools — in exchange for agents that can start producing useful work immediately without manual setup.
+Testability should be a primary design driver, not a quality to be retrofitted. When code is designed to be testable from the start, it naturally becomes decoupled, explicit in its dependencies, and clear in its interfaces.
-## Applicability
+The discipline of testability forces good design: functions become pure, dependencies become explicit, side effects become isolated. Rather than viewing testability as a tax on production code, recognize it as a compass that points toward better architecture.
-Internal
+This is particularly important in agent-driven development. Agents cannot manually verify their changes—they rely entirely on tests. Code that resists testing resists autonomous modification.
 ## Parent Principle
-- [Agent Autonomy](agent-autonomy.md)
+- [Decoupled Code](decoupled-code.md)
 ## Sub-Principles
+- (none)
 `
   },
   {
-    slug: "some-big-design-up-front",
-    content: `# Some Big Design Up Front
-AI agents lower the cost of architectural exploration, making heavier upfront investment rational during the idea phase.
+    slug: "fast-feedback-loops",
+    content: `# Fast Feedback Loops
-Agile's rejection of "big design up front" (BDUF) was largely economic: detailed architecture was expensive to produce and often wrong. AI agents change that equation — they can explore multiple variants, prototype them, and measure trade-offs cheaply. When evaluating alternatives costs less, the expected value of avoiding large structural mistakes increases.
+The primary feedback loop — write code, run checks, see results — should be as fast as possible.
-This doesn't mean returning to traditional BDUF. Uncertainty about future requirements still limits what prediction can achieve. The insight is that the optimal amount of upfront work has shifted, not that prediction became reliable.
+Fast feedback is the foundation of productive development, for both humans and agents. When tests, linters, and type checks run in seconds rather than minutes, developers iterate more frequently and catch problems earlier. Agents especially benefit because they operate in tight loops of change-and-verify; slow feedback wastes tokens and context window space on waiting rather than working.
-The model is hybrid: thorough AI-assisted exploration during ideas, followed by straightforward execution during tasks. "Lightweight" refers to task-level planning, not idea-level exploration. Invest heavily in understanding alternatives during the idea phase, then decompose into atomic tasks once the direction is clear.
+Dust should help projects measure the speed of their feedback loops, identify bottlenecks, and keep them fast as the codebase grows. This includes promoting practices like unit tests over integration tests for speed, incremental compilation, and check parallelisation.
-## Convergence Criteria
+## Parent Principle
-Exploration should continue until clear trade-offs are identified and the chosen approach can be articulated against alternatives. This is convergence-based, not time-boxed — simple ideas converge quickly, complex architectural decisions require more exploration.
+- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
-When exploration feels "done":
+## Sub-Principles
-- Multiple approaches have been considered
-- Trade-offs between approaches are understood
-- The chosen direction has clear justification
-- Remaining uncertainty is about requirements, not design
+- (none)
+`
+  },
+  {
+    slug: "test-isolation",
+    content: `# Test Isolation
-If a task requires significant design decisions during execution, it wasn't ready to be a task.
+Tests should not interfere with one another. Each test must be independently runnable and produce the same result regardless of execution order or which other tests run alongside it.
-## Documenting Alternatives
+This means:
+- No shared mutable state between tests
+- No reliance on test execution order
+- No file system or environment pollution
+- Each test sets up its own dependencies
-Ideas should document the alternatives considered and why they were ruled out. This creates a decision log that helps future agents and humans understand context. Include alternatives in the idea body or Open Questions sections.
+Test isolation enables parallel execution, makes failures easier to diagnose, and prevents cascading false failures when one test breaks.
 ## Parent Principle
-- [Lightweight Planning](lightweight-planning.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
-- (none)
+- [Environment-Independent Tests](environment-independent-tests.md)
 `
   },
   {
-    slug: "design-for-testability",
-    content: `# Design for Testability
-Design code to be testable first; good structure follows naturally.
+    slug: "boy-scout-rule",
+    content: `# Boy Scout Rule
-Testability should be a primary design driver, not a quality to be retrofitted. When code is designed to be testable from the start, it naturally becomes decoupled, explicit in its dependencies, and clear in its interfaces.
+Always leave the code better than you found it.
-The discipline of testability forces good design: functions become pure, dependencies become explicit, side effects become isolated. Rather than viewing testability as a tax on production code, recognize it as a compass that points toward better architecture.
+When working in any area of the codebase, take the opportunity to make small improvements — clearer names, removed dead code, better structure — even if they're not directly related to the task at hand. These incremental improvements compound over time, preventing gradual decay and keeping the codebase healthy without requiring dedicated cleanup efforts.
-This is particularly important in agent-driven development. Agents cannot manually verify their changes—they rely entirely on tests. Code that resists testing resists autonomous modification.
+The Boy Scout Rule is not a license for large-scale refactoring during unrelated work. Improvements should be small, obvious, and low-risk. If a cleanup is too large to include alongside the current task, capture it as a separate task instead.
 ## Parent Principle
-- [Decoupled Code](decoupled-code.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
@@ -12241,51 +12435,35 @@ This is particularly important in agent-driven development. Agents cannot manual
 `
   },
   {
-    slug: "readable-test-data",
-    content: `# Readable Test Data
+    slug: "atomic-commits",
+    content: `# Atomic Commits
-Test data setup should use natural structures that mirror what they represent.
+Each commit should tell a complete story, bundling implementation changes with their corresponding documentation updates.
-## Why it matters
+When a task is completed, the commit deletes the task file, updates relevant facts to reflect the new reality, and removes any ideas that have been realized. This discipline ensures that any point in the commit history represents a coherent, self-documenting state of the project.
-When test data is easy to read, tests become self-documenting. A file system hierarchy expressed as a nested object immediately conveys structure, while a flat Map with path strings requires mental parsing to understand the relationships.
+Clean commit history is essential because archaeology depends on it. Future humans and AI agents will traverse history to understand why decisions were made and how the system evolved.
-## In practice
+## Parent Principle
-Prefer literal structures that visually match the domain:
+- [Repository Hygiene](repository-hygiene.md)
-\`\`\`javascript
-// Avoid: flat paths that obscure hierarchy
-const fs = createFileSystemEmulator({
-  files: new Map([['/project/.dust/principles/my-goal.md', '# My Goal']]),
-  existingPaths: new Set(['/project/.dust/ideas']),
-})
+## Sub-Principles
-// Prefer: nested object that mirrors file system structure
-const fs = createFileSystemEmulator({
-  project: {
-    '.dust': {
-      principles: {
-        'my-goal.md': '# My Goal'
-      },
-      ideas: {}
-    }
-  }
-})
-\`\`\`
+- [Traceable Decisions](traceable-decisions.md)
+`
+  },
+  {
+    slug: "co-located-tests",
+    content: `# Co-located Tests
-The nested form:
-- Shows parent-child relationships through indentation
-- Makes empty directories explicit with empty objects
-- Requires no mental path concatenation to understand structure
-## How to evaluate
+Test files should live next to the code they test.
-Work supports this principle when test setup data uses structures that visually resemble what they represent, reducing cognitive load for readers.
+When tests are co-located with their source files, developers can immediately see what's tested and what isn't. Finding the test for a module becomes trivial—it's right there in the same directory. This proximity encourages writing tests as part of the development flow rather than as an afterthought, and makes it natural to update tests when modifying code.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Intuitive Directory Structure](intuitive-directory-structure.md)
 ## Sub-Principles
@@ -12293,26 +12471,20 @@ Work supports this principle when test setup data uses structures that visually
 `
   },
   {
-    slug: "agent-specific-enhancement",
-    content: `# Agent-Specific Enhancement
-Dust should detect and enhance the experience for specific agents while remaining agnostic at its core.
-While Dust has [Agent-Agnostic Design](agent-agnostic-design.md) and works with any capable agent, it can still optimize the "agent DX" (developer experience) when it detects a specific agent is being used. This means:
+    slug: "broken-windows",
+    content: `# Broken Windows
-- **Detection** - Dust may detect which agent is running (e.g., Claude Code, Aider, Cursor) through environment variables, configuration, or other signals
-- **Enhancement** - Once detected, Dust can tailor its output format, prompts, or context to leverage that agent's specific strengths
-- **Graceful fallback** - When no specific agent is detected, Dust provides a generic experience that works with any agent
+Don't leave broken windows unrepaired.
-This principle complements Agent-Agnostic Design: the core functionality never requires a specific agent, but the experience improves when one is recognized.
+A broken window — a bad name, a hack, a TODO that lingers, a test that's been skipped — signals that nobody cares. That signal invites more neglect. One shortcut becomes two, then ten, and the codebase quietly rots from the inside.
-## Applicability
+When you spot a broken window, fix it immediately if the fix is small. If it's too large, capture it as a task so it doesn't get forgotten. The key is to never normalise the damage. Even a comment acknowledging the problem ("this needs fixing because...") is better than silent acceptance.
-Internal
+This principle complements the [Boy Scout Rule](boy-scout-rule.md): the Boy Scout Rule encourages proactive improvement, while Broken Windows warns against tolerating known problems. Together they keep entropy at bay.
 ## Parent Principle
-- [Agent Autonomy](agent-autonomy.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
@@ -12320,76 +12492,45 @@ Internal
 `
   },
   {
-    slug: "context-optimised-code",
-    content: `# Context-Optimised Code
+    slug: "trunk-based-development",
+    content: `# Trunk-Based Development
-Code should be structured so that agents can understand and modify it within their context window constraints.
+Dust is designed to support a non-branching workflow where developers commit directly to a single main branch.
-Large files, deeply nested abstractions, and sprawling dependency chains all work against agents. A 3,000-line file cannot be fully loaded into context. A function that requires understanding six levels of indirection demands more context than one that is self-contained. Context-optimised code favours small files, shallow abstractions, explicit dependencies, and co-located related logic.
+In trunk-based development, teams collaborate on code in one primary branch rather than maintaining multiple long-lived feature branches. This eliminates merge conflicts, enables continuous integration, and keeps the codebase continuously releasable.
-Dust should help projects identify files that are too large, modules that are too tangled, and patterns that make agent comprehension harder than it needs to be. This is not just about file size — it is about ensuring that the unit of code an agent needs to understand fits comfortably within the window available.
+The \`dust loop claude\` command embodies this philosophy: agents pull from main, implement a task, and push directly back to main. There are no feature branches, no pull requests, no merge queues. Each commit is atomic and complete.
+This approach scales through discipline rather than isolation. Feature flags and incremental changes replace long-running branches. The repository history becomes a linear sequence of working states.
+See: https://trunkbaseddevelopment.com/
 ## Parent Principle
-- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
+- [Repository Hygiene](repository-hygiene.md)
 ## Sub-Principles
-- (none)
+(none)
 `
   },
   {
-    slug: "self-diagnosing-tests",
-    content: `# Self-Diagnosing Tests
-When a big test fails, it should be self-evident how to diagnose and fix the failure.
-The more moving parts a test has — end-to-end, system, integration — the more critical this becomes. A test that fails with \`expected true, received false\` forces the developer (or agent) to re-run, add logging, and guess. A test that fails with a rich diff showing the actual state versus the expected state turns diagnosis into reading.
-## Anti-patterns
-**Boolean flattening** — collapsing a rich value into true/false before asserting:
-\`\`\`javascript
-// Bad: "expected true, received false" — what events arrived?
-expect(events.some(e => e.type === 'check-passed')).toBe(true)
-// Good: shows the actual event types on failure
-expect(events.map(e => e.type)).toContain('check-passed')
-\`\`\`
-**Length-only assertions** — checking count without showing contents:
-\`\`\`javascript
-// Bad: "expected 2, received 0" — what requests were captured?
-expect(requests.length).toBe(2)
-// Good: shows the actual requests on failure
-expect(requests).toHaveLength(2)  // vitest shows the array
-\`\`\`
-**Silent guards** — using \`if\` where an assertion belongs:
-\`\`\`javascript
-// Bad: silently passes when settings is undefined
-if (settings) {
-  expect(JSON.parse(settings).key).toBeDefined()
-}
-// Good: fails explicitly if settings is missing
-expect(settings).toBeDefined()
-const parsed = JSON.parse(settings!)
-expect(parsed.key).toBeDefined()
-\`\`\`
-## The test
+    slug: "environment-independent-tests",
+    content: `# Environment-Independent Tests
-If a test fails, can a developer who has never seen the code identify the problem from the failure output alone — without re-running, adding console.logs, or reading the test source? The closer to "yes", the better.
+Tests must produce the same result regardless of where they run. A test that passes locally but fails in CI (or vice versa) is a broken test.
-## How to evaluate
+Concretely, tests should never depend on:
+- Ambient environment variables (e.g. \`CLAUDECODE\`, \`CI\`, \`HOME\`)
+- The current working directory or filesystem layout of the host machine
+- Network availability or external services
+- The identity of the user or agent running the tests
-Work supports this principle when every assertion in a system or integration test would, on failure, reveal the actual state richly enough to guide a fix. Bare boolean checks, length-only assertions, and silent conditional guards are violations.
+When a function's behavior depends on environment variables, the test must explicitly control those variables (via \`stubEnv\`, dependency injection, or passing an \`env\` parameter) rather than relying on whatever happens to be set in the current shell.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Test Isolation](test-isolation.md)
 ## Sub-Principles
@@ -12397,67 +12538,53 @@ Work supports this principle when every assertion in a system or integration tes
 `
   },
   {
-    slug: "ideal-agent-developer-experience",
-    content: `# Ideal Agent Developer Experience
-The agent is the developer. The human is the CEO. Dust is the PM.
-With today's AI coding assistants, the human is stuck in a tight loop with agents — constantly directing, reviewing, and course-correcting. Dust is designed to relieve humans from this tight loop. Like an assistant to a CEO, dust predominantly brings fully-researched questions and well-prepared work to the human, rather than expecting the human to drive every decision. The human checks in less frequently, and when they do, they make high-leverage strategic calls rather than micromanaging implementation.
-For this to work, the agent's development environment must be excellent. The agent reads the code, writes changes, runs the checks, and iterates until the task is done. Everything about the codebase and its tooling either helps or hinders that process. Comprehensive tests are the agent's only way to verify correctness. Fast feedback loops are the agent's iteration speed. Structured logs are the agent's eyes into runtime behaviour. Small, well-organised files are what fit in the agent's context window. Exploratory and debugging tools are how the agent navigates and diagnoses without trial and error.
-Each sub-principle represents a different aspect of the ideal agent developer setup. The better these are, the less the human needs to be in the loop.
-## Parent Principle
-- [Human-AI Collaboration](human-ai-collaboration.md)
+    slug: "comprehensive-assertions",
+    content: `# Comprehensive Assertions
-## Sub-Principles
+Assert the whole, not the parts.
-- [Comprehensive Test Coverage](comprehensive-test-coverage.md)
-- [Fast Feedback Loops](fast-feedback-loops.md)
-- [Slow Feedback Coping](slow-feedback-coping.md)
-- [Development Traceability](development-traceability.md)
-- [Context-Optimised Code](context-optimised-code.md)
-- [Exploratory Tooling](exploratory-tooling.md)
-- [Debugging Tooling](debugging-tooling.md)
-- [Self-Contained Repository](self-contained-repository.md)
-`
-  },
-  {
-    slug: "broken-windows",
-    content: `# Broken Windows
+When you break a complex object into many small assertions, a failure tells you *one thing that's wrong*. When you assert against the whole expected value, the diff tells you *what actually happened versus what you expected* — the full picture, in one glance.
-Don't leave broken windows unrepaired.
+Small assertions are like yes/no questions to a witness. A whole-object assertion is like asking "tell me what you saw."
-A broken window — a bad name, a hack, a TODO that lingers, a test that's been skipped — signals that nobody cares. That signal invites more neglect. One shortcut becomes two, then ten, and the codebase quietly rots from the inside.
+## In practice
-When you spot a broken window, fix it immediately if the fix is small. If it's too large, capture it as a task so it doesn't get forgotten. The key is to never normalise the damage. Even a comment acknowledging the problem ("this needs fixing because...") is better than silent acceptance.
+Collapse multiple partial assertions into one comprehensive assertion:
-This principle complements the [Boy Scout Rule](boy-scout-rule.md): the Boy Scout Rule encourages proactive improvement, while Broken Windows warns against tolerating known problems. Together they keep entropy at bay.
+\`\`\`javascript
+// Fragmented — each failure is a narrow keyhole
+expect(result.name).toBe("Alice");
+expect(result.age).toBe(30);
+expect(result.role).toBe("admin");
-## Parent Principle
+// Whole — a failure diff tells the full story
+expect(result).toEqual({
+  name: "Alice",
+  age: 30,
+  role: "admin",
+});
+\`\`\`
-- [Maintainable Codebase](maintainable-codebase.md)
+If \`role\` is \`"user"\` and \`age\` is \`29\`, the fragmented version stops at the first failure. The whole-object assertion shows both discrepancies at once, in context.
-## Sub-Principles
+The same applies to arrays:
-- (none)
-`
-  },
-  {
-    slug: "progressive-disclosure",
-    content: `# Progressive Disclosure
+\`\`\`javascript
+// Avoid: partial assertions that hide the actual state
+expect(array).toContain('apples')
+expect(array).toContain('oranges')
-Dust should reveal details progressively as a way of achieving context window efficiency.
+// Prefer: one assertion that reveals the full picture on failure
+expect(array).toEqual(['apples', 'oranges'])
+\`\`\`
-Not all information is needed at once. A task list showing just titles is sufficient for choosing what to work on. Full task details are only needed when actively implementing. Linked principles and facts can be followed when deeper context is required.
+## How to evaluate
-This layered approach keeps initial reads lightweight while preserving access to complete information when needed.
+Work supports this principle when test failures tell a rich story — showing the complete actual value alongside the complete expected value, so the reader can understand what happened without re-running anything.
 ## Parent Principle
-- [Context Window Efficiency](context-window-efficiency.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
@@ -12465,76 +12592,84 @@ This layered approach keeps initial reads lightweight while preserving access to
 `
   },
   {
-    slug: "lightweight-planning",
-    content: `# Lightweight Planning
-Dust aims to be a minimal, low-overhead planning system that stays relevant over time.
+    slug: "maintainable-codebase",
+    content: `# Maintainable Codebase
-Planning artifacts are simple markdown files that live alongside code. Ideas are intentionally vague until implementation is imminent. Tasks are small and completable in single commits. Facts document current reality rather than aspirational states.
+The dust codebase should be easy to understand, modify, and extend.
-The system avoids the staleness problem by deferring detail until the last responsible moment and deleting completed work rather than archiving it.
+This principle governs how we develop and maintain dust itself, separate from the principles that describe what dust offers its users. A well-maintained codebase enables rapid iteration, reduces bugs, and makes contributions easier.
 ## Parent Principle
-- [Human-AI Collaboration](human-ai-collaboration.md)
+- [Agentic Flow State](agentic-flow-state.md)
 ## Sub-Principles
-- [Task-First Workflow](task-first-workflow.md)
-- [Some Big Design Up Front](some-big-design-up-front.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Minimal Dependencies](minimal-dependencies.md)
+- [Intuitive Directory Structure](intuitive-directory-structure.md)
+- [Repository Hygiene](repository-hygiene.md)
+- [Naming Matters](naming-matters.md)
+- [Reasonably DRY](reasonably-dry.md)
+- [Make the Change Easy](make-the-change-easy.md)
+- [Boy Scout Rule](boy-scout-rule.md)
+- [Broken Windows](broken-windows.md)
 `
   },
   {
-    slug: "comprehensive-test-coverage",
-    content: `# Comprehensive Test Coverage
+    slug: "context-window-efficiency",
+    content: `# Context Window Efficiency
-A project's test suite is its primary safety net, and agents depend on it even more than humans do.
+Dust should be designed with short attention spans in mind.
-Agents cannot manually verify that their changes work. They rely entirely on automated tests to confirm correctness. Gaps in test coverage become gaps in agent capability — areas where changes are risky and feedback is absent. Comprehensive coverage means every meaningful behaviour is tested, so agents can make changes anywhere in the codebase with confidence.
+AI agents operate within limited context windows. Every token consumed by planning artifacts is a token unavailable for reasoning about code. Dust keeps artifacts concise and scannable so agents can quickly understand what needs to be done without wading through verbose documentation.
-Dust should help projects measure and improve their test coverage, flag untested areas, and encourage a culture where new code comes with new tests.
+This means favoring brevity over completeness, using consistent structures that are fast to parse, and avoiding redundant information across files.
 ## Parent Principle
-- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
+- [Agent Autonomy](agent-autonomy.md)
 ## Sub-Principles
-- (none)
+- [Progressive Disclosure](progressive-disclosure.md)
 `
   },
   {
-    slug: "intuitive-directory-structure",
-    content: `# Intuitive Directory Structure
+    slug: "human-ai-collaboration",
+    content: `# Human-AI Collaboration
-Code should be organized around related concerns in clearly named directories.
+Dust exists to enable effective collaboration between humans and AI agents on complex projects.
-When files that serve similar purposes are grouped together, the codebase becomes easier to navigate and understand. A developer looking for "commands" should find them in a \`commands\` directory. Utilities should live with utilities. This organization reduces cognitive load and makes the project structure self-documenting.
+The human is the CEO — they set direction, make strategic decisions, and check in when it matters. Dust is the PM — it manages the work, prepares context, and brings fully-researched questions to the human rather than expecting them to drive every detail. Agents are the developers — they read code, write changes, and iterate autonomously.
+Today's AI coding tools keep humans in a tight loop with agents. Dust is designed to loosen that loop, so humans spend less time directing and more time deciding.
 ## Parent Principle
-- [Maintainable Codebase](maintainable-codebase.md)
+- [Agentic Flow State](agentic-flow-state.md)
 ## Sub-Principles
-- [Co-located Tests](co-located-tests.md)
+- [Agent Autonomy](agent-autonomy.md)
+- [Easy Adoption](easy-adoption.md)
+- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
+- [Lightweight Planning](lightweight-planning.md)
 `
   },
   {
-    slug: "small-units",
-    content: `# Small Units
-Ideas, principles, facts, and tasks should each be as discrete and fine-grained as possible.
+    slug: "functional-core-imperative-shell",
+    content: `# Functional Core, Imperative Shell
-Small, focused documents enable precise relationships between them. A task can link to exactly the principles it serves. A fact can describe one specific aspect of the system. This granularity reduces ambiguity.
+Separate code into a pure "functional core" and a thin "imperative shell." The core takes values in and returns values out, with no side effects. The shell handles I/O and wires things together.
-Tasks especially benefit from being small. A narrowly scoped task gives agents or humans the best chance of delivering exactly what was intended, in a single atomic commit.
+Purely functional code makes some things easier to understand: because values don't change, you can call functions and know that only their return value matters—they don't change anything outside themselves.
-Note: This principle directly supports [Lightweight Planning](lightweight-planning.md), which explicitly mentions that "Tasks are small and completable in single commits."
+The functional core contains business logic as pure functions that take values and return values. The imperative shell sits at the boundary, reading input, calling into the core, and performing side effects with the results. This keeps the majority of code easy to test (no mocks or stubs needed for pure functions) and makes the I/O surface area small and explicit.
 ## Parent Principle
-- [Agent Autonomy](agent-autonomy.md)
+- [Decoupled Code](decoupled-code.md)
 ## Sub-Principles
@@ -12542,35 +12677,28 @@ Note: This principle directly supports [Lightweight Planning](lightweight-planni
 `
   },
   {
-    slug: "fast-feedback",
-    content: `# Fast Feedback
-Dust should provide fast feedback loops for developers.
-Scripts and tooling should execute quickly so developers can iterate rapidly. Slow feedback discourages frequent validation and leads to larger, riskier changes. Fast feedback enables small, confident steps.
-## Parent Principle
+    slug: "keep-unit-tests-pure",
+    content: `# Keep Unit Tests Pure
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+Unit tests (those run very frequently as part of a tight feedback loop) should be pure and side-effect free. A test is **not** a unit test if it:
-## Sub-Principles
+- Accesses a database
+- Communicates over a network
+- Touches the file system
+- Cannot run concurrently with other tests
+- Requires special environment setup
-- (none)
-`
-  },
-  {
-    slug: "dependency-injection",
-    content: `# Dependency Injection
+"Unit tests" here means tests run frequently during development — not system tests, which intentionally exercise the full stack including I/O. Pure unit tests exercise only business logic, not infrastructure.
-Avoid global mocks. Dependency injection is almost always preferable to testing code that depends directly on globals.
+The value of pure unit tests is that they are fast, deterministic, and isolate business logic from infrastructure concerns. When unit tests pass but integration or system tests fail, developers can immediately narrow the problem to the boundary layer — a diagnostic "binary chop" that accelerates debugging.
-When code depends on global state or singletons, testing requires mocking those globals—which introduces hidden coupling, complicates test setup, and risks interference between tests. Dependency injection makes dependencies explicit: they're passed in as arguments, making the code's requirements visible and enabling tests to supply controlled implementations.
+## Migration Guidance
-This approach improves testability (each test controls its own dependencies), readability (dependencies are declared upfront), and flexibility (swapping implementations doesn't require changing the consuming code). It also makes refactoring safer since dependencies are explicit rather than implicit.
+Where existing tests are impure (e.g. they spawn processes, write temporary files, or make network calls), prefer converting them to use in-memory alternatives — stubs, fakes, or dependency-injected doubles — rather than leaving them as-is. Opportunistic migration is fine; a big-bang rewrite is not required.
 ## Parent Principle
-- [Decoupled Code](decoupled-code.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
@@ -12578,16 +12706,16 @@ This approach improves testability (each test controls its own dependencies), re
 `
   },
   {
-    slug: "reproducible-checks",
-    content: `# Reproducible Checks
+    slug: "runtime-agnostic-tests",
+    content: `# Runtime Agnostic Tests
-Every check must produce the same result regardless of who runs it, when, or on what machine. If a check passes for one developer but fails for another, the check is broken.
+Dust's test suite should work across JavaScript runtimes.
-Concretely, checks should pin their tool versions via the project's dependency manager (e.g. \`devDependencies\`) rather than relying on \`npx\`/\`bunx\` to fetch the latest version at runtime. Unpinned versions introduce non-determinism — a check that passed yesterday may fail today due to a tool upgrade that nobody chose to adopt.
+Tests should use standard JavaScript testing patterns that work across Node.js, Bun, and other runtimes. Avoiding runtime-specific test APIs ensures the project can leverage different runtimes' advantages while maintaining broad compatibility.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Minimal Dependencies](minimal-dependencies.md)
 ## Sub-Principles
@@ -12595,18 +12723,18 @@ Concretely, checks should pin their tool versions via the project's dependency m
 `
   },
   {
-    slug: "slow-feedback-coping",
-    content: `# Slow Feedback Coping
+    slug: "unsurprising-ux",
+    content: `# Unsurprising UX
-Some feedback is unavoidably slow — dust should offer coping strategies rather than pretending it can be eliminated.
+The user interface should be as "guessable" as possible.
-Integration tests, end-to-end tests, deployment pipelines, and external API calls all take time. Pretending they can be made instant is unrealistic. Instead, dust should help developers and agents cope with slow feedback effectively: by structuring work so that fast checks catch most problems early, by batching slow checks intelligently, by providing clear progress indicators, and by ensuring that when slow feedback does arrive, it is actionable and specific.
+Following the [Principle of Least Astonishment](https://en.wikipedia.org/wiki/Principle_of_least_astonishment), users form expectations about how a tool will behave based on conventions, prior experience, and intuition. Dust's interface (including the CLI) should match those expectations wherever possible. If users are observed trying to use the interface in ways we didn't anticipate, the interface should be adjusted to meet their expectations — even if that means supporting many ways of achieving the same result.
-Strategies include separating fast and slow test suites, running slow checks asynchronously or in CI, caching expensive operations, and designing workflows that minimise how often slow feedback is needed.
+Surprising behavior erodes trust and slows people down. Unsurprising behavior lets users stay in flow.
 ## Parent Principle
-- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
+- [Easy Adoption](easy-adoption.md)
 ## Sub-Principles
@@ -12614,127 +12742,113 @@ Strategies include separating fast and slow test suites, running slow checks asy
 `
   },
   {
-    slug: "make-changes-with-confidence",
-    content: `# Make Changes with Confidence
+    slug: "unit-test-coverage",
+    content: `# Unit Test Coverage
-Developers should be able to modify code without fear of breaking existing behavior.
+Complete unit test coverage ensures low-level tests give users direct feedback as they change the code.
-Tests, type checking, and other automated verification enable safe refactoring and evolution of the codebase. When changes break something, fast feedback identifies the problem before it spreads. This confidence encourages continuous improvement rather than fragile, stagnant code.
+Excluding system tests from coverage reporting focuses attention on unit tests - the tests that provide the fastest, most specific feedback. When coverage tools only measure unit tests, developers can quickly identify which parts of the codebase lack fine-grained test protection.
 ## Parent Principle
-- [Maintainable Codebase](maintainable-codebase.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
-- [Comprehensive Assertions](comprehensive-assertions.md)
-- [Decoupled Code](decoupled-code.md)
-- [Fast Feedback](fast-feedback.md)
-- [Lint Everything](lint-everything.md)
-- [Readable Test Data](readable-test-data.md)
-- [Reproducible Checks](reproducible-checks.md)
-- [Stop the Line](stop-the-line.md)
-- [Keep Unit Tests Pure](keep-unit-tests-pure.md)
-- [Test Isolation](test-isolation.md)
-- [Self-Diagnosing Tests](self-diagnosing-tests.md)
-- [Unit Test Coverage](unit-test-coverage.md)
+- (none)
 `
   },
   {
-    slug: "test-isolation",
-    content: `# Test Isolation
+    slug: "cross-platform-compatibility",
+    content: `# Cross-Platform Compatibility
-Tests should not interfere with one another. Each test must be independently runnable and produce the same result regardless of execution order or which other tests run alongside it.
+Dust should work consistently across operating systems: Linux, macOS, and Windows.
 This means:
-- No shared mutable state between tests
-- No reliance on test execution order
-- No file system or environment pollution
-- Each test sets up its own dependencies
+- Avoiding platform-specific shell commands or syntax
+- Using cross-platform path handling
+- Testing on multiple platforms when possible
+- Documenting any platform-specific limitations
-Test isolation enables parallel execution, makes failures easier to diagnose, and prevents cascading false failures when one test breaks.
+Cross-platform support broadens adoption and ensures teams with mixed environments can collaborate effectively.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Easy Adoption](easy-adoption.md)
 ## Sub-Principles
-- [Environment-Independent Tests](environment-independent-tests.md)
+- (none)
 `
   },
   {
-    slug: "repository-hygiene",
-    content: `# Repository Hygiene
+    slug: "vcs-independence",
+    content: `# VCS Independence
-Dust repositories should maintain a clean, organized state with minimal noise.
+Dust should work independently of any specific version control system.
-This includes proper gitignore configuration to exclude build artifacts, dependencies, editor files, and other generated content from version control. A well-maintained repository makes it easier for both humans and AI to navigate and understand the codebase.
+While git is common, dust's core functionality should not require git. This enables use in repositories using other VCS (Mercurial, SVN, Perforce) or in non-VCS workflows.
 ## Parent Principle
-- [Maintainable Codebase](maintainable-codebase.md)
+- [Easy Adoption](easy-adoption.md)
 ## Sub-Principles
-- [Atomic Commits](atomic-commits.md)
-- [Trunk-Based Development](trunk-based-development.md)
+- (none)
 `
   },
   {
-    slug: "agentic-flow-state",
-    content: `# Agentic Flow State
-Flow is the mental state where work becomes effortless - where you're fully immersed, losing track of time, operating at peak performance. Psychologist Mihaly Csikszentmihalyi identified three conditions that create flow: clear goals, immediate feedback, and challenge-skill balance.
+    slug: "self-contained-repository",
+    content: `# Self-Contained Repository
-For AI agents, achieving flow state means staying engaged and productive without interruption. Agents enter flow when they have optimal context, comprehensive guard rails, and minimal friction. Context window optimization ensures agents have exactly what they need without cognitive overload. In-session guard rails prevent agents from straying off course or making mistakes that break their momentum.
+Where possible, developers and agents should have everything they need to be productive, within the repository.
-Dust's design targets these conditions directly:
+No third-party tools should be required beyond those that can be installed with a single command defined in the repository. Setup instructions, scripts, configuration, and dependencies should all live in version control so that cloning the repo and running a single install command is sufficient to start working. This eliminates onboarding friction, reduces "works on my machine" issues, and is especially important for agents — who cannot browse the web to find missing tools or ask colleagues how to set things up.
-- **Clear goals**: Task files and lightweight planning give you a concrete target. You know exactly what you're building next.
-- **Immediate feedback**: Fast feedback loops let you see results quickly. Each change confirms you're on track or shows you what to adjust.
-- **Challenge-skill balance**: Small units of work and agent autonomy keep you in the zone - challenged enough to stay engaged, supported enough to succeed.
-- **Context window efficiency**: Progressive disclosure and artifact summarization ensure agents have the right context without overflow.
-- **Comprehensive guard rails**: Lint rules, type checks, and automated validation catch mistakes before they compound.
+## Applicability
-Everything dust does serves flow. When agents stay in flow, they produce better work, sustain their momentum, and complete tasks autonomously.
+Internal
 ## Parent Principle
-- (none)
+- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
 ## Sub-Principles
-- [Human-AI Collaboration](human-ai-collaboration.md)
-- [Maintainable Codebase](maintainable-codebase.md)
+- (none)
 `
   },
   {
-    slug: "stop-the-line",
-    content: `# Stop the Line
+    slug: "minimal-dependencies",
+    content: `# Minimal Dependencies
-Any worker — human or agent — should halt and fix a problem the moment they detect it, rather than letting defects propagate downstream.
+Dust should avoid coupling to specific tools so we can switch to better alternatives as they emerge.
-Originating from the Toyota production system, "Stop the Line" empowers every participant to pause work immediately upon identifying a defect, failing check, or safety hazard. Problems are cheaper to fix at their source than after they've compounded through later stages. In the context of dust, this means agents and humans alike should treat broken checks, test failures, and lint errors as blockers that demand immediate attention — not warnings to be deferred.
+By keeping dependencies minimal and using standard APIs where possible, we maintain the freedom to adopt new tools without major rewrites. This applies to runtimes, test frameworks, build tools, and other infrastructure choices.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
-- (none)
+- [Runtime Agnostic Tests](runtime-agnostic-tests.md)
 `
   },
   {
-    slug: "agent-context-inference",
-    content: `# Agent Context Inference
+    slug: "agent-specific-enhancement",
+    content: `# Agent-Specific Enhancement
-Terse human prompts should trigger the correct agent action.
+Dust should detect and enhance the experience for specific agents while remaining agnostic at its core.
-When a human gives a brief instruction like "the button should be green", the agent should be able to infer what to do. The agent shouldn't require the human to specify file paths, component names, or implementation details that can be discovered from the repository.
+While Dust has [Agent-Agnostic Design](agent-agnostic-design.md) and works with any capable agent, it can still optimize the "agent DX" (developer experience) when it detects a specific agent is being used. This means:
-This reduces friction for humans and makes agent interactions feel more natural. The burden of context discovery shifts to the agent, which can use dust's CLI and repository structure to find what it needs.
+- **Detection** - Dust may detect which agent is running (e.g., Claude Code, Aider, Cursor) through environment variables, configuration, or other signals
+- **Enhancement** - Once detected, Dust can tailor its output format, prompts, or context to leverage that agent's specific strengths
+- **Graceful fallback** - When no specific agent is detected, Dust provides a generic experience that works with any agent
+This principle complements Agent-Agnostic Design: the core functionality never requires a specific agent, but the experience improves when one is recognized.
 ## Applicability
@@ -12750,40 +12864,57 @@ Internal
 `
   },
   {
-    slug: "naming-matters",
-    content: `# Naming Matters
+    slug: "self-diagnosing-tests",
+    content: `# Self-Diagnosing Tests
-Good naming reduces waste by eliminating confusion and making code self-documenting.
+When a big test fails, it should be self-evident how to diagnose and fix the failure.
-Poor names cause rework, bugs, and communication overhead. When names don't clearly convey meaning, developers waste time deciphering code, misunderstand intentions, and introduce defects. Well-chosen names serve as documentation that never goes stale, reducing the need for explanatory comments and enabling both humans and AI agents to navigate the codebase efficiently.
+The more moving parts a test has — end-to-end, system, integration — the more critical this becomes. A test that fails with \`expected true, received false\` forces the developer (or agent) to re-run, add logging, and guess. A test that fails with a rich diff showing the actual state versus the expected state turns diagnosis into reading.
-## Parent Principle
+## Anti-patterns
-- [Maintainable Codebase](maintainable-codebase.md)
+**Boolean flattening** — collapsing a rich value into true/false before asserting:
+\`\`\`javascript
+// Bad: "expected true, received false" — what events arrived?
+expect(events.some(e => e.type === 'check-passed')).toBe(true)
-## Sub-Principles
+// Good: shows the actual event types on failure
+expect(events.map(e => e.type)).toContain('check-passed')
+\`\`\`
-- [Consistent Naming](consistent-naming.md)
-- [Clarity Over Brevity](clarity-over-brevity.md)
-`
-  },
-  {
-    slug: "stubs-over-mocks",
-    content: `# Stubs Over Mocks
+**Length-only assertions** — checking count without showing contents:
+\`\`\`javascript
+// Bad: "expected 2, received 0" — what requests were captured?
+expect(requests.length).toBe(2)
-Prefer hand-rolled stubs over mocks, in unit tests. Stubs keep tests focused on observable behavior instead of implementation details.
+// Good: shows the actual requests on failure
+expect(requests).toHaveLength(2)  // vitest shows the array
+\`\`\`
-Mocks tend to encode a script of “expected calls” (what was invoked, in what order, with what arguments). That makes tests brittle: harmless refactors (changing internal decomposition, adding caching, batching calls, reordering operations) can break tests even when the externally visible behavior is unchanged. You end up maintaining tests that police how the code works rather than what it does.
+**Silent guards** — using \`if\` where an assertion belongs:
+\`\`\`javascript
+// Bad: silently passes when settings is undefined
+if (settings) {
+  expect(JSON.parse(settings).key).toBeDefined()
+}
-Stubs (and especially in-memory emulators) push tests toward the contract: provide inputs, run the code, assert outputs and side effects. When a test fails, it’s usually because a behavior changed, not because the internal call choreography shifted. That improves signal-to-noise, reduces rewrites during refactors, and makes it easier to evolve the implementation.
+// Good: fails explicitly if settings is missing
+expect(settings).toBeDefined()
+const parsed = JSON.parse(settings!)
+expect(parsed.key).toBeDefined()
+\`\`\`
-For external dependencies (databases, queues, object stores, HTTP services), the default choice should be an in-memory emulator: a drop-in replacement that is faithful enough to the real interface/semantics but runs entirely in-process. It gives most of the benefits of integration testing—realistic state transitions, error modes, concurrency behavior where relevant—without the cost, flakiness, and setup burden of booting real infrastructure. It also keeps the test environment hermetic (no network, no shared state), which improves determinism and makes tests fast.
+## The test
-Still use mocks selectively—mainly to assert something is called (e.g., telemetry emission, "at most once" notifications, payment capture guarded by a feature flag) or when a dependency is impossible to emulate. But for most cases, stubs and in-memory emulators produce tests that are clearer, more resilient to refactoring, and better aligned with the system's actual contracts.
+If a test fails, can a developer who has never seen the code identify the problem from the failure output alone — without re-running, adding console.logs, or reading the test source? The closer to "yes", the better.
+## How to evaluate
+Work supports this principle when every assertion in a system or integration test would, on failure, reveal the actual state richly enough to guide a fix. Bare boolean checks, length-only assertions, and silent conditional guards are violations.
 ## Parent Principle
-- [Decoupled Code](decoupled-code.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
@@ -12791,18 +12922,18 @@ Still use mocks selectively—mainly to assert something is called (e.g., teleme
 `
   },
   {
-    slug: "functional-core-imperative-shell",
-    content: `# Functional Core, Imperative Shell
+    slug: "slow-feedback-coping",
+    content: `# Slow Feedback Coping
-Separate code into a pure "functional core" and a thin "imperative shell." The core takes values in and returns values out, with no side effects. The shell handles I/O and wires things together.
+Some feedback is unavoidably slow — dust should offer coping strategies rather than pretending it can be eliminated.
-Purely functional code makes some things easier to understand: because values don't change, you can call functions and know that only their return value matters—they don't change anything outside themselves.
+Integration tests, end-to-end tests, deployment pipelines, and external API calls all take time. Pretending they can be made instant is unrealistic. Instead, dust should help developers and agents cope with slow feedback effectively: by structuring work so that fast checks catch most problems early, by batching slow checks intelligently, by providing clear progress indicators, and by ensuring that when slow feedback does arrive, it is actionable and specific.
-The functional core contains business logic as pure functions that take values and return values. The imperative shell sits at the boundary, reading input, calling into the core, and performing side effects with the results. This keeps the majority of code easy to test (no mocks or stubs needed for pure functions) and makes the I/O surface area small and explicit.
+Strategies include separating fast and slow test suites, running slow checks asynchronously or in CI, caching expensive operations, and designing workflows that minimise how often slow feedback is needed.
 ## Parent Principle
-- [Decoupled Code](decoupled-code.md)
+- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
 ## Sub-Principles
@@ -12810,51 +12941,61 @@ The functional core contains business logic as pure functions that take values a
 `
   },
   {
-    slug: "development-traceability",
-    content: `# Development Traceability
+    slug: "agentic-flow-state",
+    content: `# Agentic Flow State
-Structured logging and tracing help agents understand system behaviour without resorting to ad-hoc testing cycles.
+Flow is the mental state where work becomes effortless - where you're fully immersed, losing track of time, operating at peak performance. Psychologist Mihaly Csikszentmihalyi identified three conditions that create flow: clear goals, immediate feedback, and challenge-skill balance.
-When something goes wrong, agents often resort to adding temporary log statements, running the code, reading the output, and repeating — a slow and wasteful debugging loop. Good traceability means the system already records what happened and why, through structured logs, trace IDs, and observable state. This lets agents diagnose issues by reading existing output rather than generating new experiments.
+For AI agents, achieving flow state means staying engaged and productive without interruption. Agents enter flow when they have optimal context, comprehensive guard rails, and minimal friction. Context window optimization ensures agents have exactly what they need without cognitive overload. In-session guard rails prevent agents from straying off course or making mistakes that break their momentum.
-Dust should encourage projects to adopt structured logging, promote traceability as a first-class concern, and provide tools that surface relevant trace information when agents need it.
+Dust's design targets these conditions directly:
-## Applicability
+- **Clear goals**: Task files and lightweight planning give you a concrete target. You know exactly what you're building next.
+- **Immediate feedback**: Fast feedback loops let you see results quickly. Each change confirms you're on track or shows you what to adjust.
+- **Challenge-skill balance**: Small units of work and agent autonomy keep you in the zone - challenged enough to stay engaged, supported enough to succeed.
+- **Context window efficiency**: Progressive disclosure and artifact summarization ensure agents have the right context without overflow.
+- **Comprehensive guard rails**: Lint rules, type checks, and automated validation catch mistakes before they compound.
-Internal
+Everything dust does serves flow. When agents stay in flow, they produce better work, sustain their momentum, and complete tasks autonomously.
 ## Parent Principle
-- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
+- (none)
 ## Sub-Principles
-- (none)
+- [Human-AI Collaboration](human-ai-collaboration.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 `
   },
   {
-    slug: "keep-unit-tests-pure",
-    content: `# Keep Unit Tests Pure
+    slug: "reproducible-checks",
+    content: `# Reproducible Checks
-Unit tests (those run very frequently as part of a tight feedback loop) should be pure and side-effect free. A test is **not** a unit test if it:
+Every check must produce the same result regardless of who runs it, when, or on what machine. If a check passes for one developer but fails for another, the check is broken.
-- Accesses a database
-- Communicates over a network
-- Touches the file system
-- Cannot run concurrently with other tests
-- Requires special environment setup
+Concretely, checks should pin their tool versions via the project's dependency manager (e.g. \`devDependencies\`) rather than relying on \`npx\`/\`bunx\` to fetch the latest version at runtime. Unpinned versions introduce non-determinism — a check that passed yesterday may fail today due to a tool upgrade that nobody chose to adopt.
-"Unit tests" here means tests run frequently during development — not system tests, which intentionally exercise the full stack including I/O. Pure unit tests exercise only business logic, not infrastructure.
+## Parent Principle
-The value of pure unit tests is that they are fast, deterministic, and isolate business logic from infrastructure concerns. When unit tests pass but integration or system tests fail, developers can immediately narrow the problem to the boundary layer — a diagnostic "binary chop" that accelerates debugging.
+- [Make Changes with Confidence](make-changes-with-confidence.md)
-## Migration Guidance
+## Sub-Principles
-Where existing tests are impure (e.g. they spawn processes, write temporary files, or make network calls), prefer converting them to use in-memory alternatives — stubs, fakes, or dependency-injected doubles — rather than leaving them as-is. Opportunistic migration is fine; a big-bang rewrite is not required.
+- (none)
+`
+  },
+  {
+    slug: "task-first-workflow",
+    content: `# Task-First Workflow
+Work should be captured as a task before implementation begins, creating traceability between intent and outcome.
+This discipline ensures that every change has a documented purpose. The commit history shows pairs of "Add task" followed by implementation, making it easy to understand why each change was made. It also prevents scope creep by defining boundaries before work starts.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Lightweight Planning](lightweight-planning.md)
 ## Sub-Principles
@@ -12862,78 +13003,96 @@ Where existing tests are impure (e.g. they spawn processes, write temporary file
 `
   },
   {
-    slug: "co-located-tests",
-    content: `# Co-located Tests
+    slug: "ideal-agent-developer-experience",
+    content: `# Ideal Agent Developer Experience
-Test files should live next to the code they test.
+The agent is the developer. The human is the CEO. Dust is the PM.
-When tests are co-located with their source files, developers can immediately see what's tested and what isn't. Finding the test for a module becomes trivial—it's right there in the same directory. This proximity encourages writing tests as part of the development flow rather than as an afterthought, and makes it natural to update tests when modifying code.
+With today's AI coding assistants, the human is stuck in a tight loop with agents — constantly directing, reviewing, and course-correcting. Dust is designed to relieve humans from this tight loop. Like an assistant to a CEO, dust predominantly brings fully-researched questions and well-prepared work to the human, rather than expecting the human to drive every decision. The human checks in less frequently, and when they do, they make high-leverage strategic calls rather than micromanaging implementation.
+For this to work, the agent's development environment must be excellent. The agent reads the code, writes changes, runs the checks, and iterates until the task is done. Everything about the codebase and its tooling either helps or hinders that process. Comprehensive tests are the agent's only way to verify correctness. Fast feedback loops are the agent's iteration speed. Structured logs are the agent's eyes into runtime behaviour. Small, well-organised files are what fit in the agent's context window. Exploratory and debugging tools are how the agent navigates and diagnoses without trial and error.
+Each sub-principle represents a different aspect of the ideal agent developer setup. The better these are, the less the human needs to be in the loop.
 ## Parent Principle
-- [Intuitive Directory Structure](intuitive-directory-structure.md)
+- [Human-AI Collaboration](human-ai-collaboration.md)
 ## Sub-Principles
-- (none)
+- [Comprehensive Test Coverage](comprehensive-test-coverage.md)
+- [Fast Feedback Loops](fast-feedback-loops.md)
+- [Slow Feedback Coping](slow-feedback-coping.md)
+- [Development Traceability](development-traceability.md)
+- [Context-Optimised Code](context-optimised-code.md)
+- [Exploratory Tooling](exploratory-tooling.md)
+- [Debugging Tooling](debugging-tooling.md)
+- [Self-Contained Repository](self-contained-repository.md)
 `
   },
   {
-    slug: "human-ai-collaboration",
-    content: `# Human-AI Collaboration
+    slug: "agent-context-inference",
+    content: `# Agent Context Inference
-Dust exists to enable effective collaboration between humans and AI agents on complex projects.
+Terse human prompts should trigger the correct agent action.
-The human is the CEO — they set direction, make strategic decisions, and check in when it matters. Dust is the PM — it manages the work, prepares context, and brings fully-researched questions to the human rather than expecting them to drive every detail. Agents are the developers — they read code, write changes, and iterate autonomously.
+When a human gives a brief instruction like "the button should be green", the agent should be able to infer what to do. The agent shouldn't require the human to specify file paths, component names, or implementation details that can be discovered from the repository.
-Today's AI coding tools keep humans in a tight loop with agents. Dust is designed to loosen that loop, so humans spend less time directing and more time deciding.
+This reduces friction for humans and makes agent interactions feel more natural. The burden of context discovery shifts to the agent, which can use dust's CLI and repository structure to find what it needs.
+## Applicability
+Internal
 ## Parent Principle
-- [Agentic Flow State](agentic-flow-state.md)
+- [Agent Autonomy](agent-autonomy.md)
 ## Sub-Principles
-- [Agent Autonomy](agent-autonomy.md)
-- [Easy Adoption](easy-adoption.md)
-- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
-- [Lightweight Planning](lightweight-planning.md)
+- (none)
 `
   },
   {
-    slug: "vcs-independence",
-    content: `# VCS Independence
+    slug: "agent-autonomy",
+    content: `# Agent Autonomy
-Dust should work independently of any specific version control system.
+Dust exists to enable AI agents to produce work autonomously.
-While git is common, dust's core functionality should not require git. This enables use in repositories using other VCS (Mercurial, SVN, Perforce) or in non-VCS workflows.
+With sufficient planning and small enough units, this works much better in practice.
 ## Parent Principle
-- [Easy Adoption](easy-adoption.md)
+- [Human-AI Collaboration](human-ai-collaboration.md)
 ## Sub-Principles
-- (none)
+- [Actionable Errors](actionable-errors.md)
+- [Batteries Included](batteries-included.md)
+- [Agent-Agnostic Design](agent-agnostic-design.md)
+- [Agent Context Inference](agent-context-inference.md)
+- [Agent-Specific Enhancement](agent-specific-enhancement.md)
+- [Context Window Efficiency](context-window-efficiency.md)
+- [Small Units](small-units.md)
 `
   },
   {
-    slug: "environment-independent-tests",
-    content: `# Environment-Independent Tests
+    slug: "stubs-over-mocks",
+    content: `# Stubs Over Mocks
-Tests must produce the same result regardless of where they run. A test that passes locally but fails in CI (or vice versa) is a broken test.
+Prefer hand-rolled stubs over mocks, in unit tests. Stubs keep tests focused on observable behavior instead of implementation details.
-Concretely, tests should never depend on:
-- Ambient environment variables (e.g. \`CLAUDECODE\`, \`CI\`, \`HOME\`)
-- The current working directory or filesystem layout of the host machine
-- Network availability or external services
-- The identity of the user or agent running the tests
+Mocks tend to encode a script of “expected calls” (what was invoked, in what order, with what arguments). That makes tests brittle: harmless refactors (changing internal decomposition, adding caching, batching calls, reordering operations) can break tests even when the externally visible behavior is unchanged. You end up maintaining tests that police how the code works rather than what it does.
-When a function's behavior depends on environment variables, the test must explicitly control those variables (via \`stubEnv\`, dependency injection, or passing an \`env\` parameter) rather than relying on whatever happens to be set in the current shell.
+Stubs (and especially in-memory emulators) push tests toward the contract: provide inputs, run the code, assert outputs and side effects. When a test fails, it’s usually because a behavior changed, not because the internal call choreography shifted. That improves signal-to-noise, reduces rewrites during refactors, and makes it easier to evolve the implementation.
+For external dependencies (databases, queues, object stores, HTTP services), the default choice should be an in-memory emulator: a drop-in replacement that is faithful enough to the real interface/semantics but runs entirely in-process. It gives most of the benefits of integration testing—realistic state transitions, error modes, concurrency behavior where relevant—without the cost, flakiness, and setup burden of booting real infrastructure. It also keeps the test environment hermetic (no network, no shared state), which improves determinism and makes tests fast.
+Still use mocks selectively—mainly to assert something is called (e.g., telemetry emission, "at most once" notifications, payment capture guarded by a feature flag) or when a dependency is impossible to emulate. But for most cases, stubs and in-memory emulators produce tests that are clearer, more resilient to refactoring, and better aligned with the system's actual contracts.
 ## Parent Principle
-- [Test Isolation](test-isolation.md)
+- [Decoupled Code](decoupled-code.md)
 ## Sub-Principles
@@ -12964,91 +13123,93 @@ Internal
 `
   },
   {
-    slug: "atomic-commits",
-    content: `# Atomic Commits
-Each commit should tell a complete story, bundling implementation changes with their corresponding documentation updates.
+    slug: "consistent-naming",
+    content: `# Consistent Naming
-When a task is completed, the commit deletes the task file, updates relevant facts to reflect the new reality, and removes any ideas that have been realized. This discipline ensures that any point in the commit history represents a coherent, self-documenting state of the project.
+Names should follow established conventions within each category to reduce cognitive load.
-Clean commit history is essential because archaeology depends on it. Future humans and AI agents will traverse history to understand why decisions were made and how the system evolved.
+Principles use Title Case. File names use kebab-case. Commands use lowercase with hyphens. When naming conventions exist, follow them. When they don't, establish one and apply it consistently. Inconsistent naming creates friction for both humans and AI agents trying to predict or recall identifiers.
 ## Parent Principle
-- [Repository Hygiene](repository-hygiene.md)
+- [Naming Matters](naming-matters.md)
 ## Sub-Principles
-- [Traceable Decisions](traceable-decisions.md)
+- (none)
 `
   },
   {
-    slug: "trunk-based-development",
-    content: `# Trunk-Based Development
-Dust is designed to support a non-branching workflow where developers commit directly to a single main branch.
-In trunk-based development, teams collaborate on code in one primary branch rather than maintaining multiple long-lived feature branches. This eliminates merge conflicts, enables continuous integration, and keeps the codebase continuously releasable.
+    slug: "lightweight-planning",
+    content: `# Lightweight Planning
-The \`dust loop claude\` command embodies this philosophy: agents pull from main, implement a task, and push directly back to main. There are no feature branches, no pull requests, no merge queues. Each commit is atomic and complete.
+Dust aims to be a minimal, low-overhead planning system that stays relevant over time.
-This approach scales through discipline rather than isolation. Feature flags and incremental changes replace long-running branches. The repository history becomes a linear sequence of working states.
+Planning artifacts are simple markdown files that live alongside code. Ideas are intentionally vague until implementation is imminent. Tasks are small and completable in single commits. Facts document current reality rather than aspirational states.
-See: https://trunkbaseddevelopment.com/
+The system avoids the staleness problem by deferring detail until the last responsible moment and deleting completed work rather than archiving it.
 ## Parent Principle
-- [Repository Hygiene](repository-hygiene.md)
+- [Human-AI Collaboration](human-ai-collaboration.md)
 ## Sub-Principles
-(none)
+- [Task-First Workflow](task-first-workflow.md)
+- [Some Big Design Up Front](some-big-design-up-front.md)
 `
   },
   {
-    slug: "comprehensive-assertions",
-    content: `# Comprehensive Assertions
+    slug: "easy-adoption",
+    content: `# Easy Adoption
-Assert the whole, not the parts.
+Dust should be trivially easy to adopt in any repository.
-When you break a complex object into many small assertions, a failure tells you *one thing that's wrong*. When you assert against the whole expected value, the diff tells you *what actually happened versus what you expected* — the full picture, in one glance.
+Getting started with Dust should require minimal friction. A developer should be able to bootstrap Dust in their repository with a single command, without needing to install dependencies, configure build tools, or understand the internals.
-Small assertions are like yes/no questions to a witness. A whole-object assertion is like asking "tell me what you saw."
+This lowers the barrier to entry and encourages experimentation.
-## In practice
+## Parent Principle
-Collapse multiple partial assertions into one comprehensive assertion:
+- [Human-AI Collaboration](human-ai-collaboration.md)
-\`\`\`javascript
-// Fragmented — each failure is a narrow keyhole
-expect(result.name).toBe("Alice");
-expect(result.age).toBe(30);
-expect(result.role).toBe("admin");
+## Sub-Principles
-// Whole — a failure diff tells the full story
-expect(result).toEqual({
-  name: "Alice",
-  age: 30,
-  role: "admin",
-});
-\`\`\`
+- [Cross-Platform Compatibility](cross-platform-compatibility.md)
+- [Unsurprising UX](unsurprising-ux.md)
+- [VCS Independence](vcs-independence.md)
+`
+  },
+  {
+    slug: "intuitive-directory-structure",
+    content: `# Intuitive Directory Structure
-If \`role\` is \`"user"\` and \`age\` is \`29\`, the fragmented version stops at the first failure. The whole-object assertion shows both discrepancies at once, in context.
+Code should be organized around related concerns in clearly named directories.
-The same applies to arrays:
+When files that serve similar purposes are grouped together, the codebase becomes easier to navigate and understand. A developer looking for "commands" should find them in a \`commands\` directory. Utilities should live with utilities. This organization reduces cognitive load and makes the project structure self-documenting.
-\`\`\`javascript
-// Avoid: partial assertions that hide the actual state
-expect(array).toContain('apples')
-expect(array).toContain('oranges')
+## Parent Principle
-// Prefer: one assertion that reveals the full picture on failure
-expect(array).toEqual(['apples', 'oranges'])
-\`\`\`
+- [Maintainable Codebase](maintainable-codebase.md)
-## How to evaluate
+## Sub-Principles
-Work supports this principle when test failures tell a rich story — showing the complete actual value alongside the complete expected value, so the reader can understand what happened without re-running anything.
+- [Co-located Tests](co-located-tests.md)
+`
+  },
+  {
+    slug: "lint-everything",
+    content: `# Lint Everything
+Prefer static analysis over runtime checks. Every error caught by a linter is an error that never reaches tests, and every error caught by tests is an error that never reaches production.
+Lint markdown, lint types, lint formatting. If it can be checked statically, check it. Linters are fast, deterministic, and catch entire categories of bugs before code even runs.
+This project lints:
+- TypeScript (type checking and style)
+- Markdown (broken links, required sections)
+- Task files (structure validation)
+- Principle hierarchy (parent/child consistency)
 ## Parent Principle
@@ -13056,26 +13217,22 @@ Work supports this principle when test failures tell a rich story — showing th
 ## Sub-Principles
-- (none)
+(none)
 `
   },
   {
-    slug: "cross-platform-compatibility",
-    content: `# Cross-Platform Compatibility
+    slug: "progressive-disclosure",
+    content: `# Progressive Disclosure
-Dust should work consistently across operating systems: Linux, macOS, and Windows.
+Dust should reveal details progressively as a way of achieving context window efficiency.
-This means:
-- Avoiding platform-specific shell commands or syntax
-- Using cross-platform path handling
-- Testing on multiple platforms when possible
-- Documenting any platform-specific limitations
+Not all information is needed at once. A task list showing just titles is sufficient for choosing what to work on. Full task details are only needed when actively implementing. Linked principles and facts can be followed when deeper context is required.
-Cross-platform support broadens adoption and ensures teams with mixed environments can collaborate effectively.
+This layered approach keeps initial reads lightweight while preserving access to complete information when needed.
 ## Parent Principle
-- [Easy Adoption](easy-adoption.md)
+- [Context Window Efficiency](context-window-efficiency.md)
 ## Sub-Principles
@@ -13083,18 +13240,14 @@ Cross-platform support broadens adoption and ensures teams with mixed environmen
 `
   },
   {
-    slug: "exploratory-tooling",
-    content: `# Exploratory Tooling
-Agents need tools to efficiently explore and understand unfamiliar codebases.
-When an agent encounters a new codebase — or an unfamiliar corner of a familiar one — it needs to quickly build a mental model: what exists, how it fits together, and where to make changes. Without good exploratory tools, agents waste context on trial-and-error searches, reading irrelevant files, and forming incorrect assumptions.
+    slug: "context-optimised-code",
+    content: `# Context-Optimised Code
-Dust should promote and integrate tools that help agents explore: dependency graphs, module overviews, search utilities tuned for code navigation, and summaries of project structure. The goal is to make the "orientation" phase of any task as short and reliable as possible.
+Code should be structured so that agents can understand and modify it within their context window constraints.
-## Applicability
+Large files, deeply nested abstractions, and sprawling dependency chains all work against agents. A 3,000-line file cannot be fully loaded into context. A function that requires understanding six levels of indirection demands more context than one that is self-contained. Context-optimised code favours small files, shallow abstractions, explicit dependencies, and co-located related logic.
-Internal
+Dust should help projects identify files that are too large, modules that are too tangled, and patterns that make agent comprehension harder than it needs to be. This is not just about file size — it is about ensuring that the unit of code an agent needs to understand fits comfortably within the window available.
 ## Parent Principle
@@ -13106,16 +13259,37 @@ Internal
 `
   },
   {
-    slug: "reasonably-dry",
-    content: `# Reasonably DRY
+    slug: "some-big-design-up-front",
+    content: `# Some Big Design Up Front
-Don't repeat yourself is a good principle, but don't overdo it.
+AI agents lower the cost of architectural exploration, making heavier upfront investment rational during the idea phase.
-Extracting shared code too eagerly can create tight coupling, obscure intent, and make changes harder. When two pieces of code look similar but serve different purposes or are likely to evolve independently, duplication is the better choice. The cost of a wrong abstraction is higher than the cost of a little repetition. Extract shared code when the duplication is truly about the same concept and has proven stable, not just because two things happen to look alike right now.
+Agile's rejection of "big design up front" (BDUF) was largely economic: detailed architecture was expensive to produce and often wrong. AI agents change that equation — they can explore multiple variants, prototype them, and measure trade-offs cheaply. When evaluating alternatives costs less, the expected value of avoiding large structural mistakes increases.
+This doesn't mean returning to traditional BDUF. Uncertainty about future requirements still limits what prediction can achieve. The insight is that the optimal amount of upfront work has shifted, not that prediction became reliable.
+The model is hybrid: thorough AI-assisted exploration during ideas, followed by straightforward execution during tasks. "Lightweight" refers to task-level planning, not idea-level exploration. Invest heavily in understanding alternatives during the idea phase, then decompose into atomic tasks once the direction is clear.
+## Convergence Criteria
+Exploration should continue until clear trade-offs are identified and the chosen approach can be articulated against alternatives. This is convergence-based, not time-boxed — simple ideas converge quickly, complex architectural decisions require more exploration.
+When exploration feels "done":
+- Multiple approaches have been considered
+- Trade-offs between approaches are understood
+- The chosen direction has clear justification
+- Remaining uncertainty is about requirements, not design
+If a task requires significant design decisions during execution, it wasn't ready to be a task.
+## Documenting Alternatives
+Ideas should document the alternatives considered and why they were ruled out. This creates a decision log that helps future agents and humans understand context. Include alternatives in the idea body or Open Questions sections.
 ## Parent Principle
-- [Maintainable Codebase](maintainable-codebase.md)
+- [Lightweight Planning](lightweight-planning.md)
 ## Sub-Principles
@@ -13123,16 +13297,16 @@ Extracting shared code too eagerly can create tight coupling, obscure intent, an
 `
   },
   {
-    slug: "runtime-agnostic-tests",
-    content: `# Runtime Agnostic Tests
+    slug: "traceable-decisions",
+    content: `# Traceable Decisions
-Dust's test suite should work across JavaScript runtimes.
+The commit history should explain why changes were made, not just what changed.
-Tests should use standard JavaScript testing patterns that work across Node.js, Bun, and other runtimes. Avoiding runtime-specific test APIs ensures the project can leverage different runtimes' advantages while maintaining broad compatibility.
+Commit messages should capture intent and context that would otherwise be lost. Future maintainers (human or AI) will traverse history to understand the reasoning behind decisions. A commit that says "Fix bug" is less valuable than one that explains what was broken and why the fix is correct.
 ## Parent Principle
-- [Minimal Dependencies](minimal-dependencies.md)
+- [Atomic Commits](atomic-commits.md)
 ## Sub-Principles
@@ -13140,16 +13314,16 @@ Tests should use standard JavaScript testing patterns that work across Node.js,
 `
   },
   {
-    slug: "task-first-workflow",
-    content: `# Task-First Workflow
+    slug: "fast-feedback",
+    content: `# Fast Feedback
-Work should be captured as a task before implementation begins, creating traceability between intent and outcome.
+Dust should provide fast feedback loops for developers.
-This discipline ensures that every change has a documented purpose. The commit history shows pairs of "Add task" followed by implementation, making it easy to understand why each change was made. It also prevents scope creep by defining boundaries before work starts.
+Scripts and tooling should execute quickly so developers can iterate rapidly. Slow feedback discourages frequent validation and leads to larger, riskier changes. Fast feedback enables small, confident steps.
 ## Parent Principle
-- [Lightweight Planning](lightweight-planning.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
@@ -13157,58 +13331,63 @@ This discipline ensures that every change has a documented purpose. The commit h
 `
   },
   {
-    slug: "agent-autonomy",
-    content: `# Agent Autonomy
+    slug: "decoupled-code",
+    content: `# Decoupled Code
-Dust exists to enable AI agents to produce work autonomously.
+Code should be organized into independent units with explicit dependencies.
-With sufficient planning and small enough units, this works much better in practice.
+Decoupled code is easier to test, understand, and modify. Dependencies are passed in rather than hard-coded, enabling units to be tested in isolation and composed flexibly. This reduces the blast radius of changes and makes the system more maintainable.
 ## Parent Principle
-- [Human-AI Collaboration](human-ai-collaboration.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
-- [Actionable Errors](actionable-errors.md)
-- [Batteries Included](batteries-included.md)
-- [Agent-Agnostic Design](agent-agnostic-design.md)
-- [Agent Context Inference](agent-context-inference.md)
-- [Agent-Specific Enhancement](agent-specific-enhancement.md)
-- [Context Window Efficiency](context-window-efficiency.md)
-- [Small Units](small-units.md)
+- [Dependency Injection](dependency-injection.md)
+- [Stubs Over Mocks](stubs-over-mocks.md)
+- [Functional Core, Imperative Shell](functional-core-imperative-shell.md)
+- [Design for Testability](design-for-testability.md)
 `
   },
   {
-    slug: "clarity-over-brevity",
-    content: `# Clarity Over Brevity
+    slug: "make-changes-with-confidence",
+    content: `# Make Changes with Confidence
-Names should be descriptive and self-documenting, even if longer.
+Developers should be able to modify code without fear of breaking existing behavior.
-Abbreviated names like \`ctx\`, \`deps\`, \`fs\`, or \`args\` save a few keystrokes but obscure meaning. Full names like \`context\`, \`dependencies\`, \`fileSystem\`, and \`arguments\` make code immediately understandable without requiring readers to decode conventions. This is especially valuable when AI agents or new contributors read the codebase for the first time.
+Tests, type checking, and other automated verification enable safe refactoring and evolution of the codebase. When changes break something, fast feedback identifies the problem before it spreads. This confidence encourages continuous improvement rather than fragile, stagnant code.
 ## Parent Principle
-- [Naming Matters](naming-matters.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
-- (none)
+- [Comprehensive Assertions](comprehensive-assertions.md)
+- [Decoupled Code](decoupled-code.md)
+- [Fast Feedback](fast-feedback.md)
+- [Lint Everything](lint-everything.md)
+- [Readable Test Data](readable-test-data.md)
+- [Reproducible Checks](reproducible-checks.md)
+- [Stop the Line](stop-the-line.md)
+- [Keep Unit Tests Pure](keep-unit-tests-pure.md)
+- [Test Isolation](test-isolation.md)
+- [Self-Diagnosing Tests](self-diagnosing-tests.md)
+- [Unit Test Coverage](unit-test-coverage.md)
 `
   },
   {
-    slug: "fast-feedback-loops",
-    content: `# Fast Feedback Loops
-The primary feedback loop — write code, run checks, see results — should be as fast as possible.
+    slug: "clarity-over-brevity",
+    content: `# Clarity Over Brevity
-Fast feedback is the foundation of productive development, for both humans and agents. When tests, linters, and type checks run in seconds rather than minutes, developers iterate more frequently and catch problems earlier. Agents especially benefit because they operate in tight loops of change-and-verify; slow feedback wastes tokens and context window space on waiting rather than working.
+Names should be descriptive and self-documenting, even if longer.
-Dust should help projects measure the speed of their feedback loops, identify bottlenecks, and keep them fast as the codebase grows. This includes promoting practices like unit tests over integration tests for speed, incremental compilation, and check parallelisation.
+Abbreviated names like \`ctx\`, \`deps\`, \`fs\`, or \`args\` save a few keystrokes but obscure meaning. Full names like \`context\`, \`dependencies\`, \`fileSystem\`, and \`arguments\` make code immediately understandable without requiring readers to decode conventions. This is especially valuable when AI agents or new contributors read the codebase for the first time.
 ## Parent Principle
-- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
+- [Naming Matters](naming-matters.md)
 ## Sub-Principles
@@ -13216,18 +13395,24 @@ Dust should help projects measure the speed of their feedback loops, identify bo
 `
   },
   {
-    slug: "make-the-change-easy",
-    content: `# Make the Change Easy
+    slug: "agent-agnostic-design",
+    content: `# Agent-Agnostic Design
+Dust should work with multiple agents without favoring one.
+Rather than implementing agents, Dust generates prompts and context that can be passed to any capable agent. This keeps Dust lightweight and allows teams to use whatever agent tooling they prefer.
+Dust may have built-in support for invoking popular agents (Claude, Aider, Codex, etc.), but the choice of agent should always be made by the user at runtime - never hard-coded into repository configuration.
-For each desired change, make the change easy, then make the easy change.
+Note: Supporting multiple agents directly contributes to [Easy Adoption](easy-adoption.md), since teams can use their preferred agent tools without being locked into a specific platform.
-This principle, articulated by Kent Beck, recognizes that the hardest part of a change is often not the change itself but the state of the code receiving it. When code resists a change, the right response is to first refactor until the change becomes straightforward, and only then make it. The warning - "this may be hard" - acknowledges that preparing the ground takes real effort, but the result is a change that fits naturally rather than one forced in against the grain.
+## Applicability
-Work that supports this principle includes refactoring before feature work, improving abstractions that make a category of changes simpler, and resisting the urge to bolt changes onto code that isn't ready for them.
+Internal
 ## Parent Principle
-- [Maintainable Codebase](maintainable-codebase.md)
+- [Agent Autonomy](agent-autonomy.md)
 ## Sub-Principles
@@ -13235,20 +13420,51 @@ Work that supports this principle includes refactoring before feature work, impr
 `
   },
   {
-    slug: "self-contained-repository",
-    content: `# Self-Contained Repository
+    slug: "readable-test-data",
+    content: `# Readable Test Data
-Where possible, developers and agents should have everything they need to be productive, within the repository.
+Test data setup should use natural structures that mirror what they represent.
-No third-party tools should be required beyond those that can be installed with a single command defined in the repository. Setup instructions, scripts, configuration, and dependencies should all live in version control so that cloning the repo and running a single install command is sufficient to start working. This eliminates onboarding friction, reduces "works on my machine" issues, and is especially important for agents — who cannot browse the web to find missing tools or ask colleagues how to set things up.
+## Why it matters
-## Applicability
+When test data is easy to read, tests become self-documenting. A file system hierarchy expressed as a nested object immediately conveys structure, while a flat Map with path strings requires mental parsing to understand the relationships.
-Internal
+## In practice
+Prefer literal structures that visually match the domain:
+\`\`\`javascript
+// Avoid: flat paths that obscure hierarchy
+const fs = createFileSystemEmulator({
+  files: new Map([['/project/.dust/principles/my-goal.md', '# My Goal']]),
+  existingPaths: new Set(['/project/.dust/ideas']),
+})
+// Prefer: nested object that mirrors file system structure
+const fs = createFileSystemEmulator({
+  project: {
+    '.dust': {
+      principles: {
+        'my-goal.md': '# My Goal'
+      },
+      ideas: {}
+    }
+  }
+})
+\`\`\`
+The nested form:
+- Shows parent-child relationships through indentation
+- Makes empty directories explicit with empty objects
+- Requires no mental path concatenation to understand structure
+## How to evaluate
+Work supports this principle when test setup data uses structures that visually resemble what they represent, reducing cognitive load for readers.
 ## Parent Principle
-- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
@@ -13256,16 +13472,16 @@ Internal
 `
   },
   {
-    slug: "traceable-decisions",
-    content: `# Traceable Decisions
+    slug: "reasonably-dry",
+    content: `# Reasonably DRY
-The commit history should explain why changes were made, not just what changed.
+Don't repeat yourself is a good principle, but don't overdo it.
-Commit messages should capture intent and context that would otherwise be lost. Future maintainers (human or AI) will traverse history to understand the reasoning behind decisions. A commit that says "Fix bug" is less valuable than one that explains what was broken and why the fix is correct.
+Extracting shared code too eagerly can create tight coupling, obscure intent, and make changes harder. When two pieces of code look similar but serve different purposes or are likely to evolve independently, duplication is the better choice. The cost of a wrong abstraction is higher than the cost of a little repetition. Extract shared code when the duplication is truly about the same concept and has proven stable, not just because two things happen to look alike right now.
 ## Parent Principle
-- [Atomic Commits](atomic-commits.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
@@ -13273,16 +13489,21 @@ Commit messages should capture intent and context that would otherwise be lost.
 `
   },
   {
-    slug: "unit-test-coverage",
-    content: `# Unit Test Coverage
+    slug: "actionable-errors",
+    content: `# Actionable Errors
-Complete unit test coverage ensures low-level tests give users direct feedback as they change the code.
+Error messages should tell you what to do next, not just what went wrong.
-Excluding system tests from coverage reporting focuses attention on unit tests - the tests that provide the fastest, most specific feedback. When coverage tools only measure unit tests, developers can quickly identify which parts of the codebase lack fine-grained test protection.
+When something fails, the message should provide:
+- A clear description of the problem
+- Specific guidance on how to fix it
+- Context needed to take the next step
+This is especially important for AI agents, who need concrete instructions to recover autonomously. A good error message turns a dead end into a signpost.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Agent Autonomy](agent-autonomy.md)
 ## Sub-Principles
@@ -13290,84 +13511,70 @@ Excluding system tests from coverage reporting focuses attention on unit tests -
 `
   },
   {
-    slug: "decoupled-code",
-    content: `# Decoupled Code
+    slug: "make-the-change-easy",
+    content: `# Make the Change Easy
-Code should be organized into independent units with explicit dependencies.
+For each desired change, make the change easy, then make the easy change.
-Decoupled code is easier to test, understand, and modify. Dependencies are passed in rather than hard-coded, enabling units to be tested in isolation and composed flexibly. This reduces the blast radius of changes and makes the system more maintainable.
+This principle, articulated by Kent Beck, recognizes that the hardest part of a change is often not the change itself but the state of the code receiving it. When code resists a change, the right response is to first refactor until the change becomes straightforward, and only then make it. The warning - "this may be hard" - acknowledges that preparing the ground takes real effort, but the result is a change that fits naturally rather than one forced in against the grain.
+Work that supports this principle includes refactoring before feature work, improving abstractions that make a category of changes simpler, and resisting the urge to bolt changes onto code that isn't ready for them.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
-- [Dependency Injection](dependency-injection.md)
-- [Stubs Over Mocks](stubs-over-mocks.md)
-- [Functional Core, Imperative Shell](functional-core-imperative-shell.md)
-- [Design for Testability](design-for-testability.md)
+- (none)
 `
   },
   {
-    slug: "lint-everything",
-    content: `# Lint Everything
+    slug: "dependency-injection",
+    content: `# Dependency Injection
-Prefer static analysis over runtime checks. Every error caught by a linter is an error that never reaches tests, and every error caught by tests is an error that never reaches production.
+Avoid global mocks. Dependency injection is almost always preferable to testing code that depends directly on globals.
-Lint markdown, lint types, lint formatting. If it can be checked statically, check it. Linters are fast, deterministic, and catch entire categories of bugs before code even runs.
+When code depends on global state or singletons, testing requires mocking those globals—which introduces hidden coupling, complicates test setup, and risks interference between tests. Dependency injection makes dependencies explicit: they're passed in as arguments, making the code's requirements visible and enabling tests to supply controlled implementations.
-This project lints:
-- TypeScript (type checking and style)
-- Markdown (broken links, required sections)
-- Task files (structure validation)
-- Principle hierarchy (parent/child consistency)
+This approach improves testability (each test controls its own dependencies), readability (dependencies are declared upfront), and flexibility (swapping implementations doesn't require changing the consuming code). It also makes refactoring safer since dependencies are explicit rather than implicit.
 ## Parent Principle
-- [Make Changes with Confidence](make-changes-with-confidence.md)
+- [Decoupled Code](decoupled-code.md)
 ## Sub-Principles
-(none)
+- (none)
 `
   },
   {
-    slug: "maintainable-codebase",
-    content: `# Maintainable Codebase
+    slug: "repository-hygiene",
+    content: `# Repository Hygiene
-The dust codebase should be easy to understand, modify, and extend.
+Dust repositories should maintain a clean, organized state with minimal noise.
-This principle governs how we develop and maintain dust itself, separate from the principles that describe what dust offers its users. A well-maintained codebase enables rapid iteration, reduces bugs, and makes contributions easier.
+This includes proper gitignore configuration to exclude build artifacts, dependencies, editor files, and other generated content from version control. A well-maintained repository makes it easier for both humans and AI to navigate and understand the codebase.
 ## Parent Principle
-- [Agentic Flow State](agentic-flow-state.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
-- [Make Changes with Confidence](make-changes-with-confidence.md)
-- [Minimal Dependencies](minimal-dependencies.md)
-- [Intuitive Directory Structure](intuitive-directory-structure.md)
-- [Repository Hygiene](repository-hygiene.md)
-- [Naming Matters](naming-matters.md)
-- [Reasonably DRY](reasonably-dry.md)
-- [Make the Change Easy](make-the-change-easy.md)
-- [Boy Scout Rule](boy-scout-rule.md)
-- [Broken Windows](broken-windows.md)
+- [Atomic Commits](atomic-commits.md)
+- [Trunk-Based Development](trunk-based-development.md)
 `
   },
   {
-    slug: "agent-agnostic-design",
-    content: `# Agent-Agnostic Design
-Dust should work with multiple agents without favoring one.
+    slug: "batteries-included",
+    content: `# Batteries Included
-Rather than implementing agents, Dust generates prompts and context that can be passed to any capable agent. This keeps Dust lightweight and allows teams to use whatever agent tooling they prefer.
+Dust should provide everything that is required (within reason) for an agent to be productive in an arbitrary codebase.
-Dust may have built-in support for invoking popular agents (Claude, Aider, Codex, etc.), but the choice of agent should always be made by the user at runtime - never hard-coded into repository configuration.
+An agent working autonomously should not be blocked because a tool or configuration is missing. For example, dust should ship custom lint rules for different linters, even though those linters are not dependencies of dust itself. If an agent needs a capability to do its job well in a typical codebase, dust should provide it out of the box.
-Note: Supporting multiple agents directly contributes to [Easy Adoption](easy-adoption.md), since teams can use their preferred agent tools without being locked into a specific platform.
+This means accepting some breadth of scope — bundling configs, rules, and utilities that target external tools — in exchange for agents that can start producing useful work immediately without manual setup.
 ## Applicability
@@ -13378,47 +13585,25 @@ Internal
 - [Agent Autonomy](agent-autonomy.md)
 ## Sub-Principles
-- (none)
 `
   },
   {
-    slug: "easy-adoption",
-    content: `# Easy Adoption
-Dust should be trivially easy to adopt in any repository.
-Getting started with Dust should require minimal friction. A developer should be able to bootstrap Dust in their repository with a single command, without needing to install dependencies, configure build tools, or understand the internals.
-This lowers the barrier to entry and encourages experimentation.
-## Parent Principle
-- [Human-AI Collaboration](human-ai-collaboration.md)
+    slug: "development-traceability",
+    content: `# Development Traceability
-## Sub-Principles
+Structured logging and tracing help agents understand system behaviour without resorting to ad-hoc testing cycles.
-- [Cross-Platform Compatibility](cross-platform-compatibility.md)
-- [Unsurprising UX](unsurprising-ux.md)
-- [VCS Independence](vcs-independence.md)
-`
-  },
-  {
-    slug: "actionable-errors",
-    content: `# Actionable Errors
+When something goes wrong, agents often resort to adding temporary log statements, running the code, reading the output, and repeating — a slow and wasteful debugging loop. Good traceability means the system already records what happened and why, through structured logs, trace IDs, and observable state. This lets agents diagnose issues by reading existing output rather than generating new experiments.
-Error messages should tell you what to do next, not just what went wrong.
+Dust should encourage projects to adopt structured logging, promote traceability as a first-class concern, and provide tools that surface relevant trace information when agents need it.
-When something fails, the message should provide:
-- A clear description of the problem
-- Specific guidance on how to fix it
-- Context needed to take the next step
+## Applicability
-This is especially important for AI agents, who need concrete instructions to recover autonomously. A good error message turns a dead end into a signpost.
+Internal
 ## Parent Principle
-- [Agent Autonomy](agent-autonomy.md)
+- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
 ## Sub-Principles
@@ -13426,16 +13611,22 @@ This is especially important for AI agents, who need concrete instructions to re
 `
   },
   {
-    slug: "consistent-naming",
-    content: `# Consistent Naming
+    slug: "exploratory-tooling",
+    content: `# Exploratory Tooling
-Names should follow established conventions within each category to reduce cognitive load.
+Agents need tools to efficiently explore and understand unfamiliar codebases.
-Principles use Title Case. File names use kebab-case. Commands use lowercase with hyphens. When naming conventions exist, follow them. When they don't, establish one and apply it consistently. Inconsistent naming creates friction for both humans and AI agents trying to predict or recall identifiers.
+When an agent encounters a new codebase — or an unfamiliar corner of a familiar one — it needs to quickly build a mental model: what exists, how it fits together, and where to make changes. Without good exploratory tools, agents waste context on trial-and-error searches, reading irrelevant files, and forming incorrect assumptions.
+Dust should promote and integrate tools that help agents explore: dependency graphs, module overviews, search utilities tuned for code navigation, and summaries of project structure. The goal is to make the "orientation" phase of any task as short and reliable as possible.
+## Applicability
+Internal
 ## Parent Principle
-- [Naming Matters](naming-matters.md)
+- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
 ## Sub-Principles
@@ -13443,54 +13634,57 @@ Principles use Title Case. File names use kebab-case. Commands use lowercase wit
 `
   },
   {
-    slug: "minimal-dependencies",
-    content: `# Minimal Dependencies
+    slug: "small-units",
+    content: `# Small Units
-Dust should avoid coupling to specific tools so we can switch to better alternatives as they emerge.
+Ideas, principles, facts, and tasks should each be as discrete and fine-grained as possible.
-By keeping dependencies minimal and using standard APIs where possible, we maintain the freedom to adopt new tools without major rewrites. This applies to runtimes, test frameworks, build tools, and other infrastructure choices.
+Small, focused documents enable precise relationships between them. A task can link to exactly the principles it serves. A fact can describe one specific aspect of the system. This granularity reduces ambiguity.
+Tasks especially benefit from being small. A narrowly scoped task gives agents or humans the best chance of delivering exactly what was intended, in a single atomic commit.
+Note: This principle directly supports [Lightweight Planning](lightweight-planning.md), which explicitly mentions that "Tasks are small and completable in single commits."
 ## Parent Principle
-- [Maintainable Codebase](maintainable-codebase.md)
+- [Agent Autonomy](agent-autonomy.md)
 ## Sub-Principles
-- [Runtime Agnostic Tests](runtime-agnostic-tests.md)
+- (none)
 `
   },
   {
-    slug: "context-window-efficiency",
-    content: `# Context Window Efficiency
-Dust should be designed with short attention spans in mind.
+    slug: "naming-matters",
+    content: `# Naming Matters
-AI agents operate within limited context windows. Every token consumed by planning artifacts is a token unavailable for reasoning about code. Dust keeps artifacts concise and scannable so agents can quickly understand what needs to be done without wading through verbose documentation.
+Good naming reduces waste by eliminating confusion and making code self-documenting.
-This means favoring brevity over completeness, using consistent structures that are fast to parse, and avoiding redundant information across files.
+Poor names cause rework, bugs, and communication overhead. When names don't clearly convey meaning, developers waste time deciphering code, misunderstand intentions, and introduce defects. Well-chosen names serve as documentation that never goes stale, reducing the need for explanatory comments and enabling both humans and AI agents to navigate the codebase efficiently.
 ## Parent Principle
-- [Agent Autonomy](agent-autonomy.md)
+- [Maintainable Codebase](maintainable-codebase.md)
 ## Sub-Principles
-- [Progressive Disclosure](progressive-disclosure.md)
+- [Consistent Naming](consistent-naming.md)
+- [Clarity Over Brevity](clarity-over-brevity.md)
 `
   },
   {
-    slug: "boy-scout-rule",
-    content: `# Boy Scout Rule
+    slug: "comprehensive-test-coverage",
+    content: `# Comprehensive Test Coverage
-Always leave the code better than you found it.
+A project's test suite is its primary safety net, and agents depend on it even more than humans do.
-When working in any area of the codebase, take the opportunity to make small improvements — clearer names, removed dead code, better structure — even if they're not directly related to the task at hand. These incremental improvements compound over time, preventing gradual decay and keeping the codebase healthy without requiring dedicated cleanup efforts.
+Agents cannot manually verify that their changes work. They rely entirely on automated tests to confirm correctness. Gaps in test coverage become gaps in agent capability — areas where changes are risky and feedback is absent. Comprehensive coverage means every meaningful behaviour is tested, so agents can make changes anywhere in the codebase with confidence.
-The Boy Scout Rule is not a license for large-scale refactoring during unrelated work. Improvements should be small, obvious, and low-risk. If a cleanup is too large to include alongside the current task, capture it as a separate task instead.
+Dust should help projects measure and improve their test coverage, flag untested areas, and encourage a culture where new code comes with new tests.
 ## Parent Principle
-- [Maintainable Codebase](maintainable-codebase.md)
+- [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
 ## Sub-Principles
@@ -13498,18 +13692,16 @@ The Boy Scout Rule is not a license for large-scale refactoring during unrelated
 `
   },
   {
-    slug: "unsurprising-ux",
-    content: `# Unsurprising UX
-The user interface should be as "guessable" as possible.
+    slug: "stop-the-line",
+    content: `# Stop the Line
-Following the [Principle of Least Astonishment](https://en.wikipedia.org/wiki/Principle_of_least_astonishment), users form expectations about how a tool will behave based on conventions, prior experience, and intuition. Dust's interface (including the CLI) should match those expectations wherever possible. If users are observed trying to use the interface in ways we didn't anticipate, the interface should be adjusted to meet their expectations — even if that means supporting many ways of achieving the same result.
+Any worker — human or agent — should halt and fix a problem the moment they detect it, rather than letting defects propagate downstream.
-Surprising behavior erodes trust and slows people down. Unsurprising behavior lets users stay in flow.
+Originating from the Toyota production system, "Stop the Line" empowers every participant to pause work immediately upon identifying a defect, failing check, or safety hazard. Problems are cheaper to fix at their source than after they've compounded through later stages. In the context of dust, this means agents and humans alike should treat broken checks, test failures, and lint errors as blockers that demand immediate attention — not warnings to be deferred.
 ## Parent Principle
-- [Easy Adoption](easy-adoption.md)
+- [Make Changes with Confidence](make-changes-with-confidence.md)
 ## Sub-Principles
@@ -13801,16 +13993,15 @@ async function init(dependencies) {
       throw error;
     }
   }
-  const runner = dustCommand.split(" ")[0];
   context.stdout("");
   context.stdout(`${colors.bold}\uD83D\uDE80 Next steps:${colors.reset} Commit the changes if you are happy, then get planning!`);
   context.stdout("");
   context.stdout(`${colors.dim}If this is a new repository, you can start adding ideas or tasks right away:${colors.reset}`);
-  context.stdout(`   ${colors.cyan}>${colors.reset} ${runner} claude "Idea: friendly UI for non-technical users"`);
-  context.stdout(`   ${colors.cyan}>${colors.reset} ${runner} codex "Task: set up code coverage"`);
+  context.stdout(`   ${colors.cyan}>${colors.reset} claude "Idea: friendly UI for non-technical users"`);
+  context.stdout(`   ${colors.cyan}>${colors.reset} codex "Task: set up code coverage"`);
   context.stdout("");
   context.stdout(`${colors.dim}If this is an existing codebase, you might want to backfill principles and facts:${colors.reset}`);
-  context.stdout(`   ${colors.cyan}>${colors.reset} ${runner} claude "Add principles and facts based on the code in this repository"`);
+  context.stdout(`   ${colors.cyan}>${colors.reset} claude "Add principles and facts based on the code in this repository"`);
   return { exitCode: 0 };
 }
@@ -14321,7 +14512,8 @@ async function runLoop(dependencies, loopDependencies) {
   let completedIterations = 0;
   const iterationOptions = {
     hooksInstalled,
-    docker: dockerConfig
+    docker: dockerConfig,
+    containerRuntime
   };
   if (eventsUrl) {
     iterationOptions.onRawEvent = createHeartbeatThrottler(onAgentEvent, loopDependencies.agentType ?? "claude");
@@ -14805,6 +14997,9 @@ function runLoopClaude(commandDependencies) {
 function runLoopCodex(commandDependencies) {
   return loopCodex(commandDependencies, createCodexDependencies());
 }
+function runCodexHook(commandDependencies) {
+  return codexHook(commandDependencies, defaultCodexHookDependencies);
+}
 var commandRegistry = {
   init,
   lint: lintMarkdown,
@@ -14819,6 +15014,7 @@ var commandRegistry = {
   audit,
   "bucket worker": bucketWorker,
   "bucket tool": bucketTool,
+  "codex hook": runCodexHook,
   "core principle": corePrinciple,
   focus,
   "new task": newTask,