@joshski/dust 0.1.111 → 0.1.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/dust.js CHANGED
@@ -7,7 +7,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
7
7
  var require_package = __commonJS((exports, module) => {
8
8
  module.exports = {
9
9
  name: "@joshski/dust",
10
- version: "0.1.111",
10
+ version: "0.1.113",
11
11
  description: "Flow state for AI coding agents",
12
12
  type: "module",
13
13
  bin: {
@@ -56,6 +56,10 @@ var require_package = __commonJS((exports, module) => {
56
56
  "./core-principles": {
57
57
  import: "./dist/core-principles.js",
58
58
  types: "./dist/core-principles.d.ts"
59
+ },
60
+ "./execution-order": {
61
+ import: "./dist/execution-order.js",
62
+ types: "./dist/execution-order.d.ts"
59
63
  }
60
64
  },
61
65
  files: [
@@ -394,16 +398,22 @@ import {
394
398
  // lib/git/file-sorter.ts
395
399
  function createGitDirectoryFileSorter(gitRunner) {
396
400
  return async (dir, files) => {
397
- const timestamps = await Promise.all(files.map(async (file) => {
401
+ const results = await Promise.all(files.map(async (file) => {
398
402
  const result = await gitRunner.run(["log", "-1", "--format=%ct", "--", file], dir);
399
- const ts = result.exitCode === 0 ? Number.parseInt(result.output.trim(), 10) : Number.NaN;
400
- return {
401
- file,
402
- timestamp: Number.isNaN(ts) ? Number.POSITIVE_INFINITY : ts
403
- };
403
+ const epochSeconds = result.exitCode === 0 ? Number.parseInt(result.output.trim(), 10) : Number.NaN;
404
+ const lastCommittedAt = Number.isNaN(epochSeconds) ? null : new Date(epochSeconds * 1000).toISOString();
405
+ return { file, lastCommittedAt };
404
406
  }));
405
- timestamps.sort((a, b) => a.timestamp - b.timestamp);
406
- return timestamps.map((t) => t.file);
407
+ results.sort((a, b) => {
408
+ if (a.lastCommittedAt === null && b.lastCommittedAt === null)
409
+ return 0;
410
+ if (a.lastCommittedAt === null)
411
+ return 1;
412
+ if (b.lastCommittedAt === null)
413
+ return -1;
414
+ return new Date(a.lastCommittedAt).getTime() - new Date(b.lastCommittedAt).getTime();
415
+ });
416
+ return results;
407
417
  };
408
418
  }
409
419
 
@@ -721,7 +731,7 @@ async function loadSettings(cwd, fileSystem, runtime) {
721
731
  }
722
732
 
723
733
  // lib/version.ts
724
- var DUST_VERSION = "0.1.111";
734
+ var DUST_VERSION = "0.1.113";
725
735
 
726
736
  // lib/cli/middleware.ts
727
737
  function applyMiddleware(middlewares, execute) {
@@ -773,16 +783,6 @@ function createDefaultTracingOptions() {
773
783
  };
774
784
  }
775
785
 
776
- // lib/cli/dedent.ts
777
- function dedent(strings, ...values) {
778
- const result = strings.reduce((acc, part, index) => acc + part + (values[index] ?? ""), "");
779
- const lines = result.split(`
780
- `);
781
- const indent = lines.filter((line) => line.trim()).reduce((min, line) => Math.min(min, line.match(/^\s*/)[0].length), Number.POSITIVE_INFINITY);
782
- return lines.map((line) => line.slice(indent)).join(`
783
- `).trim();
784
- }
785
-
786
786
  // lib/cli/shared/agent-shared.ts
787
787
  import { join as join4 } from "node:path";
788
788
 
@@ -929,6 +929,16 @@ ${newHookContent}
929
929
  };
930
930
  }
931
931
 
932
+ // lib/cli/dedent.ts
933
+ function dedent(strings, ...values) {
934
+ const result = strings.reduce((acc, part, index) => acc + part + (values[index] ?? ""), "");
935
+ const lines = result.split(`
936
+ `);
937
+ const indent = lines.filter((line) => line.trim()).reduce((min, line) => Math.min(min, line.match(/^\s*/)[0].length), Number.POSITIVE_INFINITY);
938
+ return lines.map((line) => line.slice(indent)).join(`
939
+ `).trim();
940
+ }
941
+
932
942
  // lib/cli/shared/agent-shared.ts
933
943
  async function loadAgentInstructions(cwd, fileSystem, agentType) {
934
944
  const instructionsPath = join4(cwd, ".dust", "config", "agents", `${agentType}.md`);
@@ -969,25 +979,6 @@ async function templateVariablesWithInstructions(cwd, fileSystem, settings, hook
969
979
  agentInstructions
970
980
  };
971
981
  }
972
- async function manageGitHooks(dependencies) {
973
- const { context, fileSystem, settings } = dependencies;
974
- const hooks = createHooksManager(context.cwd, fileSystem, settings);
975
- if (!hooks.isGitRepo()) {
976
- return false;
977
- }
978
- const isInstalled = await hooks.isHookInstalled();
979
- if (!isInstalled) {
980
- await hooks.installHook();
981
- return true;
982
- }
983
- const hookBinaryPath = await hooks.getHookBinaryPath();
984
- if (hookBinaryPath && hookBinaryPath !== settings.dustCommand) {
985
- await hooks.updateHookBinaryPath(settings.dustCommand);
986
- }
987
- return true;
988
- }
989
-
990
- // lib/cli/commands/agent.ts
991
982
  function agentGreeting(vars) {
992
983
  const instructions = vars.agentInstructions ? `
993
984
  ---
@@ -1025,6 +1016,25 @@ ${vars.agentInstructions}` : "";
1025
1016
  Do NOT proceed without running one of these commands.${instructions}
1026
1017
  `;
1027
1018
  }
1019
+ async function manageGitHooks(dependencies) {
1020
+ const { context, fileSystem, settings } = dependencies;
1021
+ const hooks = createHooksManager(context.cwd, fileSystem, settings);
1022
+ if (!hooks.isGitRepo()) {
1023
+ return false;
1024
+ }
1025
+ const isInstalled = await hooks.isHookInstalled();
1026
+ if (!isInstalled) {
1027
+ await hooks.installHook();
1028
+ return true;
1029
+ }
1030
+ const hookBinaryPath = await hooks.getHookBinaryPath();
1031
+ if (hookBinaryPath && hookBinaryPath !== settings.dustCommand) {
1032
+ await hooks.updateHookBinaryPath(settings.dustCommand);
1033
+ }
1034
+ return true;
1035
+ }
1036
+
1037
+ // lib/cli/commands/agent.ts
1028
1038
  async function agent(dependencies, env = process.env) {
1029
1039
  const { context, fileSystem, settings } = dependencies;
1030
1040
  if (env[DUST_SKIP_AGENT] === "1") {
@@ -6098,6 +6108,43 @@ function extractFirstSentence2(paragraph) {
6098
6108
  return match ? match[1] : null;
6099
6109
  }
6100
6110
 
6111
+ // lib/execution-order.ts
6112
+ function computeExecutionOrder(nodes) {
6113
+ if (nodes.length === 0)
6114
+ return [];
6115
+ const sorted = [...nodes].toSorted((a, b) => {
6116
+ if (a.lastCommittedAt === null && b.lastCommittedAt === null)
6117
+ return 0;
6118
+ if (a.lastCommittedAt === null)
6119
+ return 1;
6120
+ if (b.lastCommittedAt === null)
6121
+ return -1;
6122
+ return new Date(a.lastCommittedAt).getTime() - new Date(b.lastCommittedAt).getTime();
6123
+ });
6124
+ const result = [];
6125
+ const completed = new Set;
6126
+ const nodeMap = new Map(nodes.map((n) => [n.slug, n]));
6127
+ while (result.length < nodes.length) {
6128
+ const next = sorted.find((node) => {
6129
+ if (completed.has(node.slug))
6130
+ return false;
6131
+ return node.blockedBy.every((slug) => completed.has(slug) || !nodeMap.has(slug));
6132
+ });
6133
+ if (!next) {
6134
+ for (const node of sorted) {
6135
+ if (!completed.has(node.slug)) {
6136
+ result.push({ node, executionOrder: result.length + 1 });
6137
+ completed.add(node.slug);
6138
+ }
6139
+ }
6140
+ break;
6141
+ }
6142
+ result.push({ node: next, executionOrder: result.length + 1 });
6143
+ completed.add(next.slug);
6144
+ }
6145
+ return result;
6146
+ }
6147
+
6101
6148
  // lib/artifacts/workflow-tasks.ts
6102
6149
  var CAPTURE_IDEA_PREFIX = "Add Idea: ";
6103
6150
  var EXPEDITE_IDEA_PREFIX = "Expedite Idea: ";
@@ -6373,6 +6420,7 @@ async function parseCaptureIdeaTask(fileSystem, dustPath, taskSlug) {
6373
6420
  }
6374
6421
 
6375
6422
  // lib/lint/validators/content-validator.ts
6423
+ var FRONT_MATTER_DELIMITER = "---";
6376
6424
  var REQUIRED_TASK_HEADINGS = ["Task Type", "Blocked By", "Definition of Done"];
6377
6425
  var ALLOWED_TASK_TYPES = new Set(VALID_TASK_TYPES);
6378
6426
  var MAX_OPENING_SENTENCE_LENGTH = 150;
@@ -6390,6 +6438,18 @@ var NON_IMPERATIVE_STARTERS = new Set([
6390
6438
  "you",
6391
6439
  "i"
6392
6440
  ]);
6441
+ function validateNoFrontMatter(artifact) {
6442
+ const firstLine = artifact.rawContent.split(`
6443
+ `)[0];
6444
+ if (firstLine.trim() === FRONT_MATTER_DELIMITER) {
6445
+ return {
6446
+ file: artifact.filePath,
6447
+ line: 1,
6448
+ message: "Artifact must not contain front matter. The title must be the first line."
6449
+ };
6450
+ }
6451
+ return null;
6452
+ }
6393
6453
  function validateOpeningSentence(artifact) {
6394
6454
  if (!artifact.openingSentence) {
6395
6455
  return {
@@ -6475,20 +6535,22 @@ function validateTaskType(artifact) {
6475
6535
  function hasRequiredHeadings(content) {
6476
6536
  return /^## Blocked By\s*$/m.test(content) && /^## Definition of Done\s*$/m.test(content);
6477
6537
  }
6478
- function extractBlockedBy(content) {
6538
+ function extractBlockedBySlugs(content) {
6479
6539
  const blockedByMatch = content.match(/^## Blocked By\s*\n([\s\S]*?)(?=\n## |\n*$)/m);
6480
6540
  const section = blockedByMatch[1].trim();
6481
6541
  if (section === "(none)") {
6482
6542
  return [];
6483
6543
  }
6484
6544
  const linkPattern = /\[.*?\]\(([^)]+\.md)\)/g;
6485
- const blockers = [];
6545
+ const slugs = [];
6486
6546
  let match = linkPattern.exec(section);
6487
6547
  while (match !== null) {
6488
- blockers.push(match[1]);
6548
+ const slugMatch = match[1].match(/([^/]+)\.md$/);
6549
+ if (slugMatch)
6550
+ slugs.push(slugMatch[1]);
6489
6551
  match = linkPattern.exec(section);
6490
6552
  }
6491
- return blockers;
6553
+ return slugs;
6492
6554
  }
6493
6555
  async function findUnblockedTasks(cwd, fileSystem, directoryFileSorter) {
6494
6556
  const dustPath = `${cwd}/.dust`;
@@ -6500,19 +6562,20 @@ async function findUnblockedTasks(cwd, fileSystem, directoryFileSorter) {
6500
6562
  return { tasks: [], invalidTasks: [] };
6501
6563
  }
6502
6564
  const files = await fileSystem.readdir(tasksPath);
6503
- let mdFiles = files.filter((f) => f.endsWith(".md"));
6504
- if (directoryFileSorter) {
6505
- mdFiles = await directoryFileSorter(tasksPath, mdFiles);
6506
- } else {
6507
- mdFiles.sort((a, b) => {
6508
- const aTime = fileSystem.getFileCreationTime(`${tasksPath}/${a}`);
6509
- const bTime = fileSystem.getFileCreationTime(`${tasksPath}/${b}`);
6510
- return aTime - bTime;
6511
- });
6512
- }
6565
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
6513
6566
  if (mdFiles.length === 0) {
6514
6567
  return { tasks: [], invalidTasks: [] };
6515
6568
  }
6569
+ let timestamps;
6570
+ if (directoryFileSorter) {
6571
+ const results = await directoryFileSorter(tasksPath, mdFiles);
6572
+ timestamps = new Map(results.map((r) => [r.file, r.lastCommittedAt]));
6573
+ } else {
6574
+ timestamps = new Map(mdFiles.map((f) => {
6575
+ const ms = fileSystem.getFileCreationTime(`${tasksPath}/${f}`);
6576
+ return [f, ms > 0 ? new Date(ms).toISOString() : null];
6577
+ }));
6578
+ }
6516
6579
  const taskFiles = [];
6517
6580
  for (const file of mdFiles) {
6518
6581
  const filePath = `${tasksPath}/${file}`;
@@ -6533,16 +6596,22 @@ async function findUnblockedTasks(cwd, fileSystem, directoryFileSorter) {
6533
6596
  });
6534
6597
  }
6535
6598
  }
6536
- const existingTasks = new Set(validTaskFiles.map((t) => t.file));
6599
+ const taskNodes = validTaskFiles.map(({ file, content }) => ({
6600
+ slug: file.replace(/\.md$/, ""),
6601
+ file,
6602
+ content,
6603
+ blockedBy: extractBlockedBySlugs(content),
6604
+ lastCommittedAt: timestamps.get(file) ?? null
6605
+ }));
6606
+ const ordered = computeExecutionOrder(taskNodes);
6607
+ const existingSlugs = new Set(taskNodes.map((t) => t.slug));
6537
6608
  const tasks = [];
6538
- for (const { file, content } of validTaskFiles) {
6539
- const blockers = extractBlockedBy(content);
6540
- const hasIncompleteBlocker = blockers.some((blocker) => existingTasks.has(blocker));
6609
+ for (const { node } of ordered) {
6610
+ const hasIncompleteBlocker = node.blockedBy.some((slug) => existingSlugs.has(slug));
6541
6611
  if (!hasIncompleteBlocker) {
6542
- const title = extractTitle(content);
6543
- const openingSentence = extractOpeningSentence(content);
6544
- const relativePath = `.dust/tasks/${file}`;
6545
- tasks.push({ path: relativePath, title, openingSentence });
6612
+ const title = extractTitle(node.content);
6613
+ const openingSentence = extractOpeningSentence(node.content);
6614
+ tasks.push({ path: `.dust/tasks/${node.file}`, title, openingSentence });
6546
6615
  }
6547
6616
  }
6548
6617
  return { tasks, invalidTasks };
@@ -6841,6 +6910,40 @@ async function executeTask(task, runParameters, onAgentEvent, context, agentName
6841
6910
  return "claude_error";
6842
6911
  }
6843
6912
  }
6913
+ function selectShellRunner(spawnFn, options, loopDeps) {
6914
+ if (options.docker && options.containerRuntime) {
6915
+ return buildContainerShellRunner(spawnFn, options.containerRuntime, options.docker);
6916
+ }
6917
+ return loopDeps.shellRunner ?? defaultShellRunner;
6918
+ }
6919
+ function buildContainerShellRunner(spawnFn, containerRuntime, docker) {
6920
+ const runConfig = {
6921
+ imageTag: docker.imageTag,
6922
+ repoPath: docker.repoPath,
6923
+ homeDir: docker.homeDir,
6924
+ gitProxyUrl: docker.gitProxyUrl
6925
+ };
6926
+ const baseArgs = containerRuntime.buildRunArgs(runConfig);
6927
+ return {
6928
+ run: (command, _cwd) => new Promise((resolve) => {
6929
+ const proc = spawnFn(containerRuntime.runCommand, [
6930
+ ...baseArgs,
6931
+ "sh",
6932
+ "-c",
6933
+ command
6934
+ ]);
6935
+ const chunks = [];
6936
+ proc.stdout?.on("data", (data) => chunks.push(data.toString()));
6937
+ proc.stderr?.on("data", (data) => chunks.push(data.toString()));
6938
+ proc.on("close", (code) => {
6939
+ resolve({ exitCode: code ?? 1, output: chunks.join("") });
6940
+ });
6941
+ proc.on("error", (error) => {
6942
+ resolve({ exitCode: 1, output: error.message });
6943
+ });
6944
+ })
6945
+ };
6946
+ }
6844
6947
  async function runOneIteration(dependencies, loopDependencies, onLoopEvent, onAgentEvent, options = {}) {
6845
6948
  const { context, fileSystem, settings } = dependencies;
6846
6949
  const { spawn: spawn2, run: run2 } = loopDependencies;
@@ -6890,7 +6993,7 @@ async function runOneIteration(dependencies, loopDependencies, onLoopEvent, onAg
6890
6993
  const taskTitle = task.title ?? task.path;
6891
6994
  log2(`found ${tasks.length} task(s), picking: ${taskTitle}`);
6892
6995
  onLoopEvent({ type: "loop.tasks_found" });
6893
- const shellRunner = loopDependencies.shellRunner ?? defaultShellRunner;
6996
+ const shellRunner = selectShellRunner(spawn2, options, loopDependencies);
6894
6997
  const preflightResult = await runPreflightChecks(context.cwd, settings.dustCommand, settings.installCommand, shellRunner, onLoopEvent, onAgentEvent, taskTitle);
6895
6998
  if (preflightResult.failed) {
6896
6999
  return handleCheckFailure(preflightResult.output, settings.dustCommand, { run: run2, prompt: "", spawnOptions, onRawEvent }, onAgentEvent, context, agentName, agentType, logger);
@@ -11384,6 +11487,8 @@ function validateIdeaOpenQuestions(artifact) {
11384
11487
  const topLevelStructureMessage = "Open Questions must use `### Question?` headings and `#### Option` headings at the top level. Put supporting markdown (including lists and code blocks) under an option heading. Run `dust new idea` to see the expected format.";
11385
11488
  let inOpenQuestions = false;
11386
11489
  let currentQuestionLine = null;
11490
+ let currentQuestionText = null;
11491
+ let currentQuestionOptionNames = new Set;
11387
11492
  let inOption = false;
11388
11493
  let inCodeBlock = false;
11389
11494
  for (let i = 0;i < lines.length; i++) {
@@ -11407,6 +11512,8 @@ function validateIdeaOpenQuestions(artifact) {
11407
11512
  violations.push(...validateH2Heading(filePath, line, i + 1, inOpenQuestions, currentQuestionLine));
11408
11513
  inOpenQuestions = line === "## Open Questions";
11409
11514
  currentQuestionLine = null;
11515
+ currentQuestionText = null;
11516
+ currentQuestionOptionNames = new Set;
11410
11517
  inOption = false;
11411
11518
  inCodeBlock = false;
11412
11519
  continue;
@@ -11422,6 +11529,7 @@ function validateIdeaOpenQuestions(artifact) {
11422
11529
  line: currentQuestionLine
11423
11530
  });
11424
11531
  }
11532
+ currentQuestionOptionNames = new Set;
11425
11533
  if (!trimmedLine.endsWith("?")) {
11426
11534
  violations.push({
11427
11535
  file: filePath,
@@ -11429,12 +11537,24 @@ function validateIdeaOpenQuestions(artifact) {
11429
11537
  line: i + 1
11430
11538
  });
11431
11539
  currentQuestionLine = null;
11540
+ currentQuestionText = null;
11432
11541
  } else {
11433
11542
  currentQuestionLine = i + 1;
11543
+ currentQuestionText = trimmedLine.slice(4);
11434
11544
  }
11435
11545
  continue;
11436
11546
  }
11437
11547
  if (line.startsWith("#### ")) {
11548
+ const optionName = trimmedLine.slice(5);
11549
+ if (currentQuestionOptionNames.has(optionName)) {
11550
+ violations.push({
11551
+ file: filePath,
11552
+ message: `Duplicate option "${optionName}" under question "${currentQuestionText}" — each option must have a unique name`,
11553
+ line: i + 1
11554
+ });
11555
+ } else {
11556
+ currentQuestionOptionNames.add(optionName);
11557
+ }
11438
11558
  currentQuestionLine = null;
11439
11559
  inOption = true;
11440
11560
  continue;
@@ -11810,6 +11930,9 @@ function validateArtifacts(context) {
11810
11930
  }
11811
11931
  for (const artifacts of Object.values(byType)) {
11812
11932
  for (const artifact of artifacts) {
11933
+ const frontMatterViolation = validateNoFrontMatter(artifact);
11934
+ if (frontMatterViolation)
11935
+ violations.push(frontMatterViolation);
11813
11936
  const openingSentenceViolation = validateOpeningSentence(artifact);
11814
11937
  if (openingSentenceViolation)
11815
11938
  violations.push(openingSentenceViolation);
@@ -12158,82 +12281,153 @@ async function check(dependencies, shellRunner, clock, _setInterval, _clearInter
12158
12281
  return { exitCode };
12159
12282
  }
12160
12283
 
12284
+ // lib/cli/commands/codex-hook.ts
12285
+ var KNOWN_HOOK_EVENTS = [
12286
+ "PreToolUse",
12287
+ "PermissionRequest",
12288
+ "PostToolUse",
12289
+ "SessionStart",
12290
+ "UserPromptSubmit",
12291
+ "Stop"
12292
+ ];
12293
+ async function readStdinUtf8() {
12294
+ const chunks = [];
12295
+ for await (const chunk of process.stdin) {
12296
+ chunks.push(chunk);
12297
+ }
12298
+ return Buffer.concat(chunks).toString("utf8");
12299
+ }
12300
+ var defaultCodexHookDependencies = {
12301
+ readStdin: readStdinUtf8
12302
+ };
12303
+ function isKnownEvent(value) {
12304
+ return typeof value === "string" && KNOWN_HOOK_EVENTS.includes(value);
12305
+ }
12306
+ async function handleSessionStart(dependencies) {
12307
+ const { context, fileSystem, settings } = dependencies;
12308
+ const agentInstructions = await loadAgentInstructions(context.cwd, fileSystem, "codex");
12309
+ const additionalContext = agentGreeting({
12310
+ bin: settings.dustCommand,
12311
+ agentName: "Codex",
12312
+ hooksInstalled: false,
12313
+ isClaudeCodeWeb: false,
12314
+ hasIdeaFile: true,
12315
+ agentInstructions
12316
+ });
12317
+ return JSON.stringify({
12318
+ continue: true,
12319
+ hookSpecificOutput: {
12320
+ hookEventName: "SessionStart",
12321
+ additionalContext
12322
+ },
12323
+ systemMessage: "dust agent loaded"
12324
+ });
12325
+ }
12326
+ function handleNoOp() {
12327
+ return JSON.stringify({ continue: true });
12328
+ }
12329
+ async function codexHook(dependencies, hookDependencies = defaultCodexHookDependencies) {
12330
+ const { context } = dependencies;
12331
+ const raw = await hookDependencies.readStdin();
12332
+ let payload;
12333
+ try {
12334
+ payload = JSON.parse(raw);
12335
+ } catch {
12336
+ context.stderr("dust codex hook: failed to parse stdin as JSON");
12337
+ return { exitCode: 1 };
12338
+ }
12339
+ if (!payload || typeof payload !== "object") {
12340
+ context.stderr("dust codex hook: stdin payload must be a JSON object");
12341
+ return { exitCode: 1 };
12342
+ }
12343
+ const eventName = payload.hook_event_name;
12344
+ if (!isKnownEvent(eventName)) {
12345
+ context.stderr(`dust codex hook: unknown hook_event_name: ${JSON.stringify(eventName)}`);
12346
+ return { exitCode: 1 };
12347
+ }
12348
+ const response = eventName === "SessionStart" ? await handleSessionStart(dependencies) : handleNoOp();
12349
+ context.stdout(response);
12350
+ return { exitCode: 0 };
12351
+ }
12352
+
12161
12353
  // lib/bundled-core-principles.ts
12162
12354
  var BUNDLED_PRINCIPLES = [
12163
12355
  {
12164
- slug: "batteries-included",
12165
- content: `# Batteries Included
12166
-
12167
- Dust should provide everything that is required (within reason) for an agent to be productive in an arbitrary codebase.
12356
+ slug: "design-for-testability",
12357
+ content: `# Design for Testability
12168
12358
 
12169
- An agent working autonomously should not be blocked because a tool or configuration is missing. For example, dust should ship custom lint rules for different linters, even though those linters are not dependencies of dust itself. If an agent needs a capability to do its job well in a typical codebase, dust should provide it out of the box.
12359
+ Design code to be testable first; good structure follows naturally.
12170
12360
 
12171
- This means accepting some breadth of scope bundling configs, rules, and utilities that target external tools in exchange for agents that can start producing useful work immediately without manual setup.
12361
+ Testability should be a primary design driver, not a quality to be retrofitted. When code is designed to be testable from the start, it naturally becomes decoupled, explicit in its dependencies, and clear in its interfaces.
12172
12362
 
12173
- ## Applicability
12363
+ The discipline of testability forces good design: functions become pure, dependencies become explicit, side effects become isolated. Rather than viewing testability as a tax on production code, recognize it as a compass that points toward better architecture.
12174
12364
 
12175
- Internal
12365
+ This is particularly important in agent-driven development. Agents cannot manually verify their changes—they rely entirely on tests. Code that resists testing resists autonomous modification.
12176
12366
 
12177
12367
  ## Parent Principle
12178
12368
 
12179
- - [Agent Autonomy](agent-autonomy.md)
12369
+ - [Decoupled Code](decoupled-code.md)
12180
12370
 
12181
12371
  ## Sub-Principles
12372
+
12373
+ - (none)
12182
12374
  `
12183
12375
  },
12184
12376
  {
12185
- slug: "some-big-design-up-front",
12186
- content: `# Some Big Design Up Front
12187
-
12188
- AI agents lower the cost of architectural exploration, making heavier upfront investment rational during the idea phase.
12377
+ slug: "fast-feedback-loops",
12378
+ content: `# Fast Feedback Loops
12189
12379
 
12190
- Agile's rejection of "big design up front" (BDUF) was largely economic: detailed architecture was expensive to produce and often wrong. AI agents change that equation they can explore multiple variants, prototype them, and measure trade-offs cheaply. When evaluating alternatives costs less, the expected value of avoiding large structural mistakes increases.
12380
+ The primary feedback loopwrite code, run checks, see results should be as fast as possible.
12191
12381
 
12192
- This doesn't mean returning to traditional BDUF. Uncertainty about future requirements still limits what prediction can achieve. The insight is that the optimal amount of upfront work has shifted, not that prediction became reliable.
12382
+ Fast feedback is the foundation of productive development, for both humans and agents. When tests, linters, and type checks run in seconds rather than minutes, developers iterate more frequently and catch problems earlier. Agents especially benefit because they operate in tight loops of change-and-verify; slow feedback wastes tokens and context window space on waiting rather than working.
12193
12383
 
12194
- The model is hybrid: thorough AI-assisted exploration during ideas, followed by straightforward execution during tasks. "Lightweight" refers to task-level planning, not idea-level exploration. Invest heavily in understanding alternatives during the idea phase, then decompose into atomic tasks once the direction is clear.
12384
+ Dust should help projects measure the speed of their feedback loops, identify bottlenecks, and keep them fast as the codebase grows. This includes promoting practices like unit tests over integration tests for speed, incremental compilation, and check parallelisation.
12195
12385
 
12196
- ## Convergence Criteria
12386
+ ## Parent Principle
12197
12387
 
12198
- Exploration should continue until clear trade-offs are identified and the chosen approach can be articulated against alternatives. This is convergence-based, not time-boxed — simple ideas converge quickly, complex architectural decisions require more exploration.
12388
+ - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12199
12389
 
12200
- When exploration feels "done":
12390
+ ## Sub-Principles
12201
12391
 
12202
- - Multiple approaches have been considered
12203
- - Trade-offs between approaches are understood
12204
- - The chosen direction has clear justification
12205
- - Remaining uncertainty is about requirements, not design
12392
+ - (none)
12393
+ `
12394
+ },
12395
+ {
12396
+ slug: "test-isolation",
12397
+ content: `# Test Isolation
12206
12398
 
12207
- If a task requires significant design decisions during execution, it wasn't ready to be a task.
12399
+ Tests should not interfere with one another. Each test must be independently runnable and produce the same result regardless of execution order or which other tests run alongside it.
12208
12400
 
12209
- ## Documenting Alternatives
12401
+ This means:
12402
+ - No shared mutable state between tests
12403
+ - No reliance on test execution order
12404
+ - No file system or environment pollution
12405
+ - Each test sets up its own dependencies
12210
12406
 
12211
- Ideas should document the alternatives considered and why they were ruled out. This creates a decision log that helps future agents and humans understand context. Include alternatives in the idea body or Open Questions sections.
12407
+ Test isolation enables parallel execution, makes failures easier to diagnose, and prevents cascading false failures when one test breaks.
12212
12408
 
12213
12409
  ## Parent Principle
12214
12410
 
12215
- - [Lightweight Planning](lightweight-planning.md)
12411
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
12216
12412
 
12217
12413
  ## Sub-Principles
12218
12414
 
12219
- - (none)
12415
+ - [Environment-Independent Tests](environment-independent-tests.md)
12220
12416
  `
12221
12417
  },
12222
12418
  {
12223
- slug: "design-for-testability",
12224
- content: `# Design for Testability
12225
-
12226
- Design code to be testable first; good structure follows naturally.
12419
+ slug: "boy-scout-rule",
12420
+ content: `# Boy Scout Rule
12227
12421
 
12228
- Testability should be a primary design driver, not a quality to be retrofitted. When code is designed to be testable from the start, it naturally becomes decoupled, explicit in its dependencies, and clear in its interfaces.
12422
+ Always leave the code better than you found it.
12229
12423
 
12230
- The discipline of testability forces good design: functions become pure, dependencies become explicit, side effects become isolated. Rather than viewing testability as a tax on production code, recognize it as a compass that points toward better architecture.
12424
+ When working in any area of the codebase, take the opportunity to make small improvements — clearer names, removed dead code, better structure even if they're not directly related to the task at hand. These incremental improvements compound over time, preventing gradual decay and keeping the codebase healthy without requiring dedicated cleanup efforts.
12231
12425
 
12232
- This is particularly important in agent-driven development. Agents cannot manually verify their changes—they rely entirely on tests. Code that resists testing resists autonomous modification.
12426
+ The Boy Scout Rule is not a license for large-scale refactoring during unrelated work. Improvements should be small, obvious, and low-risk. If a cleanup is too large to include alongside the current task, capture it as a separate task instead.
12233
12427
 
12234
12428
  ## Parent Principle
12235
12429
 
12236
- - [Decoupled Code](decoupled-code.md)
12430
+ - [Maintainable Codebase](maintainable-codebase.md)
12237
12431
 
12238
12432
  ## Sub-Principles
12239
12433
 
@@ -12241,51 +12435,35 @@ This is particularly important in agent-driven development. Agents cannot manual
12241
12435
  `
12242
12436
  },
12243
12437
  {
12244
- slug: "readable-test-data",
12245
- content: `# Readable Test Data
12438
+ slug: "atomic-commits",
12439
+ content: `# Atomic Commits
12246
12440
 
12247
- Test data setup should use natural structures that mirror what they represent.
12441
+ Each commit should tell a complete story, bundling implementation changes with their corresponding documentation updates.
12248
12442
 
12249
- ## Why it matters
12443
+ When a task is completed, the commit deletes the task file, updates relevant facts to reflect the new reality, and removes any ideas that have been realized. This discipline ensures that any point in the commit history represents a coherent, self-documenting state of the project.
12250
12444
 
12251
- When test data is easy to read, tests become self-documenting. A file system hierarchy expressed as a nested object immediately conveys structure, while a flat Map with path strings requires mental parsing to understand the relationships.
12445
+ Clean commit history is essential because archaeology depends on it. Future humans and AI agents will traverse history to understand why decisions were made and how the system evolved.
12252
12446
 
12253
- ## In practice
12447
+ ## Parent Principle
12254
12448
 
12255
- Prefer literal structures that visually match the domain:
12449
+ - [Repository Hygiene](repository-hygiene.md)
12256
12450
 
12257
- \`\`\`javascript
12258
- // Avoid: flat paths that obscure hierarchy
12259
- const fs = createFileSystemEmulator({
12260
- files: new Map([['/project/.dust/principles/my-goal.md', '# My Goal']]),
12261
- existingPaths: new Set(['/project/.dust/ideas']),
12262
- })
12451
+ ## Sub-Principles
12263
12452
 
12264
- // Prefer: nested object that mirrors file system structure
12265
- const fs = createFileSystemEmulator({
12266
- project: {
12267
- '.dust': {
12268
- principles: {
12269
- 'my-goal.md': '# My Goal'
12270
- },
12271
- ideas: {}
12272
- }
12273
- }
12274
- })
12275
- \`\`\`
12453
+ - [Traceable Decisions](traceable-decisions.md)
12454
+ `
12455
+ },
12456
+ {
12457
+ slug: "co-located-tests",
12458
+ content: `# Co-located Tests
12276
12459
 
12277
- The nested form:
12278
- - Shows parent-child relationships through indentation
12279
- - Makes empty directories explicit with empty objects
12280
- - Requires no mental path concatenation to understand structure
12281
-
12282
- ## How to evaluate
12460
+ Test files should live next to the code they test.
12283
12461
 
12284
- Work supports this principle when test setup data uses structures that visually resemble what they represent, reducing cognitive load for readers.
12462
+ When tests are co-located with their source files, developers can immediately see what's tested and what isn't. Finding the test for a module becomes trivial—it's right there in the same directory. This proximity encourages writing tests as part of the development flow rather than as an afterthought, and makes it natural to update tests when modifying code.
12285
12463
 
12286
12464
  ## Parent Principle
12287
12465
 
12288
- - [Make Changes with Confidence](make-changes-with-confidence.md)
12466
+ - [Intuitive Directory Structure](intuitive-directory-structure.md)
12289
12467
 
12290
12468
  ## Sub-Principles
12291
12469
 
@@ -12293,26 +12471,20 @@ Work supports this principle when test setup data uses structures that visually
12293
12471
  `
12294
12472
  },
12295
12473
  {
12296
- slug: "agent-specific-enhancement",
12297
- content: `# Agent-Specific Enhancement
12298
-
12299
- Dust should detect and enhance the experience for specific agents while remaining agnostic at its core.
12300
-
12301
- While Dust has [Agent-Agnostic Design](agent-agnostic-design.md) and works with any capable agent, it can still optimize the "agent DX" (developer experience) when it detects a specific agent is being used. This means:
12474
+ slug: "broken-windows",
12475
+ content: `# Broken Windows
12302
12476
 
12303
- - **Detection** - Dust may detect which agent is running (e.g., Claude Code, Aider, Cursor) through environment variables, configuration, or other signals
12304
- - **Enhancement** - Once detected, Dust can tailor its output format, prompts, or context to leverage that agent's specific strengths
12305
- - **Graceful fallback** - When no specific agent is detected, Dust provides a generic experience that works with any agent
12477
+ Don't leave broken windows unrepaired.
12306
12478
 
12307
- This principle complements Agent-Agnostic Design: the core functionality never requires a specific agent, but the experience improves when one is recognized.
12479
+ A broken window a bad name, a hack, a TODO that lingers, a test that's been skipped — signals that nobody cares. That signal invites more neglect. One shortcut becomes two, then ten, and the codebase quietly rots from the inside.
12308
12480
 
12309
- ## Applicability
12481
+ When you spot a broken window, fix it immediately if the fix is small. If it's too large, capture it as a task so it doesn't get forgotten. The key is to never normalise the damage. Even a comment acknowledging the problem ("this needs fixing because...") is better than silent acceptance.
12310
12482
 
12311
- Internal
12483
+ This principle complements the [Boy Scout Rule](boy-scout-rule.md): the Boy Scout Rule encourages proactive improvement, while Broken Windows warns against tolerating known problems. Together they keep entropy at bay.
12312
12484
 
12313
12485
  ## Parent Principle
12314
12486
 
12315
- - [Agent Autonomy](agent-autonomy.md)
12487
+ - [Maintainable Codebase](maintainable-codebase.md)
12316
12488
 
12317
12489
  ## Sub-Principles
12318
12490
 
@@ -12320,76 +12492,45 @@ Internal
12320
12492
  `
12321
12493
  },
12322
12494
  {
12323
- slug: "context-optimised-code",
12324
- content: `# Context-Optimised Code
12495
+ slug: "trunk-based-development",
12496
+ content: `# Trunk-Based Development
12325
12497
 
12326
- Code should be structured so that agents can understand and modify it within their context window constraints.
12498
+ Dust is designed to support a non-branching workflow where developers commit directly to a single main branch.
12327
12499
 
12328
- Large files, deeply nested abstractions, and sprawling dependency chains all work against agents. A 3,000-line file cannot be fully loaded into context. A function that requires understanding six levels of indirection demands more context than one that is self-contained. Context-optimised code favours small files, shallow abstractions, explicit dependencies, and co-located related logic.
12500
+ In trunk-based development, teams collaborate on code in one primary branch rather than maintaining multiple long-lived feature branches. This eliminates merge conflicts, enables continuous integration, and keeps the codebase continuously releasable.
12329
12501
 
12330
- Dust should help projects identify files that are too large, modules that are too tangled, and patterns that make agent comprehension harder than it needs to be. This is not just about file size it is about ensuring that the unit of code an agent needs to understand fits comfortably within the window available.
12502
+ The \`dust loop claude\` command embodies this philosophy: agents pull from main, implement a task, and push directly back to main. There are no feature branches, no pull requests, no merge queues. Each commit is atomic and complete.
12503
+
12504
+ This approach scales through discipline rather than isolation. Feature flags and incremental changes replace long-running branches. The repository history becomes a linear sequence of working states.
12505
+
12506
+ See: https://trunkbaseddevelopment.com/
12331
12507
 
12332
12508
  ## Parent Principle
12333
12509
 
12334
- - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12510
+ - [Repository Hygiene](repository-hygiene.md)
12335
12511
 
12336
12512
  ## Sub-Principles
12337
12513
 
12338
- - (none)
12514
+ (none)
12339
12515
  `
12340
12516
  },
12341
12517
  {
12342
- slug: "self-diagnosing-tests",
12343
- content: `# Self-Diagnosing Tests
12344
-
12345
- When a big test fails, it should be self-evident how to diagnose and fix the failure.
12346
-
12347
- The more moving parts a test has — end-to-end, system, integration — the more critical this becomes. A test that fails with \`expected true, received false\` forces the developer (or agent) to re-run, add logging, and guess. A test that fails with a rich diff showing the actual state versus the expected state turns diagnosis into reading.
12348
-
12349
- ## Anti-patterns
12350
-
12351
- **Boolean flattening** — collapsing a rich value into true/false before asserting:
12352
- \`\`\`javascript
12353
- // Bad: "expected true, received false" — what events arrived?
12354
- expect(events.some(e => e.type === 'check-passed')).toBe(true)
12355
-
12356
- // Good: shows the actual event types on failure
12357
- expect(events.map(e => e.type)).toContain('check-passed')
12358
- \`\`\`
12359
-
12360
- **Length-only assertions** — checking count without showing contents:
12361
- \`\`\`javascript
12362
- // Bad: "expected 2, received 0" — what requests were captured?
12363
- expect(requests.length).toBe(2)
12364
-
12365
- // Good: shows the actual requests on failure
12366
- expect(requests).toHaveLength(2) // vitest shows the array
12367
- \`\`\`
12368
-
12369
- **Silent guards** — using \`if\` where an assertion belongs:
12370
- \`\`\`javascript
12371
- // Bad: silently passes when settings is undefined
12372
- if (settings) {
12373
- expect(JSON.parse(settings).key).toBeDefined()
12374
- }
12375
-
12376
- // Good: fails explicitly if settings is missing
12377
- expect(settings).toBeDefined()
12378
- const parsed = JSON.parse(settings!)
12379
- expect(parsed.key).toBeDefined()
12380
- \`\`\`
12381
-
12382
- ## The test
12518
+ slug: "environment-independent-tests",
12519
+ content: `# Environment-Independent Tests
12383
12520
 
12384
- If a test fails, can a developer who has never seen the code identify the problem from the failure output alone without re-running, adding console.logs, or reading the test source? The closer to "yes", the better.
12521
+ Tests must produce the same result regardless of where they run. A test that passes locally but fails in CI (or vice versa) is a broken test.
12385
12522
 
12386
- ## How to evaluate
12523
+ Concretely, tests should never depend on:
12524
+ - Ambient environment variables (e.g. \`CLAUDECODE\`, \`CI\`, \`HOME\`)
12525
+ - The current working directory or filesystem layout of the host machine
12526
+ - Network availability or external services
12527
+ - The identity of the user or agent running the tests
12387
12528
 
12388
- Work supports this principle when every assertion in a system or integration test would, on failure, reveal the actual state richly enough to guide a fix. Bare boolean checks, length-only assertions, and silent conditional guards are violations.
12529
+ When a function's behavior depends on environment variables, the test must explicitly control those variables (via \`stubEnv\`, dependency injection, or passing an \`env\` parameter) rather than relying on whatever happens to be set in the current shell.
12389
12530
 
12390
12531
  ## Parent Principle
12391
12532
 
12392
- - [Make Changes with Confidence](make-changes-with-confidence.md)
12533
+ - [Test Isolation](test-isolation.md)
12393
12534
 
12394
12535
  ## Sub-Principles
12395
12536
 
@@ -12397,67 +12538,53 @@ Work supports this principle when every assertion in a system or integration tes
12397
12538
  `
12398
12539
  },
12399
12540
  {
12400
- slug: "ideal-agent-developer-experience",
12401
- content: `# Ideal Agent Developer Experience
12402
-
12403
- The agent is the developer. The human is the CEO. Dust is the PM.
12404
-
12405
- With today's AI coding assistants, the human is stuck in a tight loop with agents — constantly directing, reviewing, and course-correcting. Dust is designed to relieve humans from this tight loop. Like an assistant to a CEO, dust predominantly brings fully-researched questions and well-prepared work to the human, rather than expecting the human to drive every decision. The human checks in less frequently, and when they do, they make high-leverage strategic calls rather than micromanaging implementation.
12406
-
12407
- For this to work, the agent's development environment must be excellent. The agent reads the code, writes changes, runs the checks, and iterates until the task is done. Everything about the codebase and its tooling either helps or hinders that process. Comprehensive tests are the agent's only way to verify correctness. Fast feedback loops are the agent's iteration speed. Structured logs are the agent's eyes into runtime behaviour. Small, well-organised files are what fit in the agent's context window. Exploratory and debugging tools are how the agent navigates and diagnoses without trial and error.
12408
-
12409
- Each sub-principle represents a different aspect of the ideal agent developer setup. The better these are, the less the human needs to be in the loop.
12410
-
12411
- ## Parent Principle
12412
-
12413
- - [Human-AI Collaboration](human-ai-collaboration.md)
12541
+ slug: "comprehensive-assertions",
12542
+ content: `# Comprehensive Assertions
12414
12543
 
12415
- ## Sub-Principles
12544
+ Assert the whole, not the parts.
12416
12545
 
12417
- - [Comprehensive Test Coverage](comprehensive-test-coverage.md)
12418
- - [Fast Feedback Loops](fast-feedback-loops.md)
12419
- - [Slow Feedback Coping](slow-feedback-coping.md)
12420
- - [Development Traceability](development-traceability.md)
12421
- - [Context-Optimised Code](context-optimised-code.md)
12422
- - [Exploratory Tooling](exploratory-tooling.md)
12423
- - [Debugging Tooling](debugging-tooling.md)
12424
- - [Self-Contained Repository](self-contained-repository.md)
12425
- `
12426
- },
12427
- {
12428
- slug: "broken-windows",
12429
- content: `# Broken Windows
12546
+ When you break a complex object into many small assertions, a failure tells you *one thing that's wrong*. When you assert against the whole expected value, the diff tells you *what actually happened versus what you expected* — the full picture, in one glance.
12430
12547
 
12431
- Don't leave broken windows unrepaired.
12548
+ Small assertions are like yes/no questions to a witness. A whole-object assertion is like asking "tell me what you saw."
12432
12549
 
12433
- A broken window — a bad name, a hack, a TODO that lingers, a test that's been skipped — signals that nobody cares. That signal invites more neglect. One shortcut becomes two, then ten, and the codebase quietly rots from the inside.
12550
+ ## In practice
12434
12551
 
12435
- When you spot a broken window, fix it immediately if the fix is small. If it's too large, capture it as a task so it doesn't get forgotten. The key is to never normalise the damage. Even a comment acknowledging the problem ("this needs fixing because...") is better than silent acceptance.
12552
+ Collapse multiple partial assertions into one comprehensive assertion:
12436
12553
 
12437
- This principle complements the [Boy Scout Rule](boy-scout-rule.md): the Boy Scout Rule encourages proactive improvement, while Broken Windows warns against tolerating known problems. Together they keep entropy at bay.
12554
+ \`\`\`javascript
12555
+ // Fragmented — each failure is a narrow keyhole
12556
+ expect(result.name).toBe("Alice");
12557
+ expect(result.age).toBe(30);
12558
+ expect(result.role).toBe("admin");
12438
12559
 
12439
- ## Parent Principle
12560
+ // Whole — a failure diff tells the full story
12561
+ expect(result).toEqual({
12562
+ name: "Alice",
12563
+ age: 30,
12564
+ role: "admin",
12565
+ });
12566
+ \`\`\`
12440
12567
 
12441
- - [Maintainable Codebase](maintainable-codebase.md)
12568
+ If \`role\` is \`"user"\` and \`age\` is \`29\`, the fragmented version stops at the first failure. The whole-object assertion shows both discrepancies at once, in context.
12442
12569
 
12443
- ## Sub-Principles
12570
+ The same applies to arrays:
12444
12571
 
12445
- - (none)
12446
- `
12447
- },
12448
- {
12449
- slug: "progressive-disclosure",
12450
- content: `# Progressive Disclosure
12572
+ \`\`\`javascript
12573
+ // Avoid: partial assertions that hide the actual state
12574
+ expect(array).toContain('apples')
12575
+ expect(array).toContain('oranges')
12451
12576
 
12452
- Dust should reveal details progressively as a way of achieving context window efficiency.
12577
+ // Prefer: one assertion that reveals the full picture on failure
12578
+ expect(array).toEqual(['apples', 'oranges'])
12579
+ \`\`\`
12453
12580
 
12454
- Not all information is needed at once. A task list showing just titles is sufficient for choosing what to work on. Full task details are only needed when actively implementing. Linked principles and facts can be followed when deeper context is required.
12581
+ ## How to evaluate
12455
12582
 
12456
- This layered approach keeps initial reads lightweight while preserving access to complete information when needed.
12583
+ Work supports this principle when test failures tell a rich story — showing the complete actual value alongside the complete expected value, so the reader can understand what happened without re-running anything.
12457
12584
 
12458
12585
  ## Parent Principle
12459
12586
 
12460
- - [Context Window Efficiency](context-window-efficiency.md)
12587
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
12461
12588
 
12462
12589
  ## Sub-Principles
12463
12590
 
@@ -12465,76 +12592,84 @@ This layered approach keeps initial reads lightweight while preserving access to
12465
12592
  `
12466
12593
  },
12467
12594
  {
12468
- slug: "lightweight-planning",
12469
- content: `# Lightweight Planning
12470
-
12471
- Dust aims to be a minimal, low-overhead planning system that stays relevant over time.
12595
+ slug: "maintainable-codebase",
12596
+ content: `# Maintainable Codebase
12472
12597
 
12473
- Planning artifacts are simple markdown files that live alongside code. Ideas are intentionally vague until implementation is imminent. Tasks are small and completable in single commits. Facts document current reality rather than aspirational states.
12598
+ The dust codebase should be easy to understand, modify, and extend.
12474
12599
 
12475
- The system avoids the staleness problem by deferring detail until the last responsible moment and deleting completed work rather than archiving it.
12600
+ This principle governs how we develop and maintain dust itself, separate from the principles that describe what dust offers its users. A well-maintained codebase enables rapid iteration, reduces bugs, and makes contributions easier.
12476
12601
 
12477
12602
  ## Parent Principle
12478
12603
 
12479
- - [Human-AI Collaboration](human-ai-collaboration.md)
12604
+ - [Agentic Flow State](agentic-flow-state.md)
12480
12605
 
12481
12606
  ## Sub-Principles
12482
12607
 
12483
- - [Task-First Workflow](task-first-workflow.md)
12484
- - [Some Big Design Up Front](some-big-design-up-front.md)
12608
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
12609
+ - [Minimal Dependencies](minimal-dependencies.md)
12610
+ - [Intuitive Directory Structure](intuitive-directory-structure.md)
12611
+ - [Repository Hygiene](repository-hygiene.md)
12612
+ - [Naming Matters](naming-matters.md)
12613
+ - [Reasonably DRY](reasonably-dry.md)
12614
+ - [Make the Change Easy](make-the-change-easy.md)
12615
+ - [Boy Scout Rule](boy-scout-rule.md)
12616
+ - [Broken Windows](broken-windows.md)
12485
12617
  `
12486
12618
  },
12487
12619
  {
12488
- slug: "comprehensive-test-coverage",
12489
- content: `# Comprehensive Test Coverage
12620
+ slug: "context-window-efficiency",
12621
+ content: `# Context Window Efficiency
12490
12622
 
12491
- A project's test suite is its primary safety net, and agents depend on it even more than humans do.
12623
+ Dust should be designed with short attention spans in mind.
12492
12624
 
12493
- Agents cannot manually verify that their changes work. They rely entirely on automated tests to confirm correctness. Gaps in test coverage become gaps in agent capability — areas where changes are risky and feedback is absent. Comprehensive coverage means every meaningful behaviour is tested, so agents can make changes anywhere in the codebase with confidence.
12625
+ AI agents operate within limited context windows. Every token consumed by planning artifacts is a token unavailable for reasoning about code. Dust keeps artifacts concise and scannable so agents can quickly understand what needs to be done without wading through verbose documentation.
12494
12626
 
12495
- Dust should help projects measure and improve their test coverage, flag untested areas, and encourage a culture where new code comes with new tests.
12627
+ This means favoring brevity over completeness, using consistent structures that are fast to parse, and avoiding redundant information across files.
12496
12628
 
12497
12629
  ## Parent Principle
12498
12630
 
12499
- - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12631
+ - [Agent Autonomy](agent-autonomy.md)
12500
12632
 
12501
12633
  ## Sub-Principles
12502
12634
 
12503
- - (none)
12635
+ - [Progressive Disclosure](progressive-disclosure.md)
12504
12636
  `
12505
12637
  },
12506
12638
  {
12507
- slug: "intuitive-directory-structure",
12508
- content: `# Intuitive Directory Structure
12639
+ slug: "human-ai-collaboration",
12640
+ content: `# Human-AI Collaboration
12509
12641
 
12510
- Code should be organized around related concerns in clearly named directories.
12642
+ Dust exists to enable effective collaboration between humans and AI agents on complex projects.
12511
12643
 
12512
- When files that serve similar purposes are grouped together, the codebase becomes easier to navigate and understand. A developer looking for "commands" should find them in a \`commands\` directory. Utilities should live with utilities. This organization reduces cognitive load and makes the project structure self-documenting.
12644
+ The human is the CEO they set direction, make strategic decisions, and check in when it matters. Dust is the PM it manages the work, prepares context, and brings fully-researched questions to the human rather than expecting them to drive every detail. Agents are the developers they read code, write changes, and iterate autonomously.
12645
+
12646
+ Today's AI coding tools keep humans in a tight loop with agents. Dust is designed to loosen that loop, so humans spend less time directing and more time deciding.
12513
12647
 
12514
12648
  ## Parent Principle
12515
12649
 
12516
- - [Maintainable Codebase](maintainable-codebase.md)
12650
+ - [Agentic Flow State](agentic-flow-state.md)
12517
12651
 
12518
12652
  ## Sub-Principles
12519
12653
 
12520
- - [Co-located Tests](co-located-tests.md)
12654
+ - [Agent Autonomy](agent-autonomy.md)
12655
+ - [Easy Adoption](easy-adoption.md)
12656
+ - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12657
+ - [Lightweight Planning](lightweight-planning.md)
12521
12658
  `
12522
12659
  },
12523
12660
  {
12524
- slug: "small-units",
12525
- content: `# Small Units
12526
-
12527
- Ideas, principles, facts, and tasks should each be as discrete and fine-grained as possible.
12661
+ slug: "functional-core-imperative-shell",
12662
+ content: `# Functional Core, Imperative Shell
12528
12663
 
12529
- Small, focused documents enable precise relationships between them. A task can link to exactly the principles it serves. A fact can describe one specific aspect of the system. This granularity reduces ambiguity.
12664
+ Separate code into a pure "functional core" and a thin "imperative shell." The core takes values in and returns values out, with no side effects. The shell handles I/O and wires things together.
12530
12665
 
12531
- Tasks especially benefit from being small. A narrowly scoped task gives agents or humans the best chance of delivering exactly what was intended, in a single atomic commit.
12666
+ Purely functional code makes some things easier to understand: because values don't change, you can call functions and know that only their return value matters—they don't change anything outside themselves.
12532
12667
 
12533
- Note: This principle directly supports [Lightweight Planning](lightweight-planning.md), which explicitly mentions that "Tasks are small and completable in single commits."
12668
+ The functional core contains business logic as pure functions that take values and return values. The imperative shell sits at the boundary, reading input, calling into the core, and performing side effects with the results. This keeps the majority of code easy to test (no mocks or stubs needed for pure functions) and makes the I/O surface area small and explicit.
12534
12669
 
12535
12670
  ## Parent Principle
12536
12671
 
12537
- - [Agent Autonomy](agent-autonomy.md)
12672
+ - [Decoupled Code](decoupled-code.md)
12538
12673
 
12539
12674
  ## Sub-Principles
12540
12675
 
@@ -12542,35 +12677,28 @@ Note: This principle directly supports [Lightweight Planning](lightweight-planni
12542
12677
  `
12543
12678
  },
12544
12679
  {
12545
- slug: "fast-feedback",
12546
- content: `# Fast Feedback
12547
-
12548
- Dust should provide fast feedback loops for developers.
12549
-
12550
- Scripts and tooling should execute quickly so developers can iterate rapidly. Slow feedback discourages frequent validation and leads to larger, riskier changes. Fast feedback enables small, confident steps.
12551
-
12552
- ## Parent Principle
12680
+ slug: "keep-unit-tests-pure",
12681
+ content: `# Keep Unit Tests Pure
12553
12682
 
12554
- - [Make Changes with Confidence](make-changes-with-confidence.md)
12683
+ Unit tests (those run very frequently as part of a tight feedback loop) should be pure and side-effect free. A test is **not** a unit test if it:
12555
12684
 
12556
- ## Sub-Principles
12685
+ - Accesses a database
12686
+ - Communicates over a network
12687
+ - Touches the file system
12688
+ - Cannot run concurrently with other tests
12689
+ - Requires special environment setup
12557
12690
 
12558
- - (none)
12559
- `
12560
- },
12561
- {
12562
- slug: "dependency-injection",
12563
- content: `# Dependency Injection
12691
+ "Unit tests" here means tests run frequently during development — not system tests, which intentionally exercise the full stack including I/O. Pure unit tests exercise only business logic, not infrastructure.
12564
12692
 
12565
- Avoid global mocks. Dependency injection is almost always preferable to testing code that depends directly on globals.
12693
+ The value of pure unit tests is that they are fast, deterministic, and isolate business logic from infrastructure concerns. When unit tests pass but integration or system tests fail, developers can immediately narrow the problem to the boundary layer a diagnostic "binary chop" that accelerates debugging.
12566
12694
 
12567
- When code depends on global state or singletons, testing requires mocking those globals—which introduces hidden coupling, complicates test setup, and risks interference between tests. Dependency injection makes dependencies explicit: they're passed in as arguments, making the code's requirements visible and enabling tests to supply controlled implementations.
12695
+ ## Migration Guidance
12568
12696
 
12569
- This approach improves testability (each test controls its own dependencies), readability (dependencies are declared upfront), and flexibility (swapping implementations doesn't require changing the consuming code). It also makes refactoring safer since dependencies are explicit rather than implicit.
12697
+ Where existing tests are impure (e.g. they spawn processes, write temporary files, or make network calls), prefer converting them to use in-memory alternatives — stubs, fakes, or dependency-injected doubles rather than leaving them as-is. Opportunistic migration is fine; a big-bang rewrite is not required.
12570
12698
 
12571
12699
  ## Parent Principle
12572
12700
 
12573
- - [Decoupled Code](decoupled-code.md)
12701
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
12574
12702
 
12575
12703
  ## Sub-Principles
12576
12704
 
@@ -12578,16 +12706,16 @@ This approach improves testability (each test controls its own dependencies), re
12578
12706
  `
12579
12707
  },
12580
12708
  {
12581
- slug: "reproducible-checks",
12582
- content: `# Reproducible Checks
12709
+ slug: "runtime-agnostic-tests",
12710
+ content: `# Runtime Agnostic Tests
12583
12711
 
12584
- Every check must produce the same result regardless of who runs it, when, or on what machine. If a check passes for one developer but fails for another, the check is broken.
12712
+ Dust's test suite should work across JavaScript runtimes.
12585
12713
 
12586
- Concretely, checks should pin their tool versions via the project's dependency manager (e.g. \`devDependencies\`) rather than relying on \`npx\`/\`bunx\` to fetch the latest version at runtime. Unpinned versions introduce non-determinism a check that passed yesterday may fail today due to a tool upgrade that nobody chose to adopt.
12714
+ Tests should use standard JavaScript testing patterns that work across Node.js, Bun, and other runtimes. Avoiding runtime-specific test APIs ensures the project can leverage different runtimes' advantages while maintaining broad compatibility.
12587
12715
 
12588
12716
  ## Parent Principle
12589
12717
 
12590
- - [Make Changes with Confidence](make-changes-with-confidence.md)
12718
+ - [Minimal Dependencies](minimal-dependencies.md)
12591
12719
 
12592
12720
  ## Sub-Principles
12593
12721
 
@@ -12595,18 +12723,18 @@ Concretely, checks should pin their tool versions via the project's dependency m
12595
12723
  `
12596
12724
  },
12597
12725
  {
12598
- slug: "slow-feedback-coping",
12599
- content: `# Slow Feedback Coping
12726
+ slug: "unsurprising-ux",
12727
+ content: `# Unsurprising UX
12600
12728
 
12601
- Some feedback is unavoidably slow — dust should offer coping strategies rather than pretending it can be eliminated.
12729
+ The user interface should be as "guessable" as possible.
12602
12730
 
12603
- Integration tests, end-to-end tests, deployment pipelines, and external API calls all take time. Pretending they can be made instant is unrealistic. Instead, dust should help developers and agents cope with slow feedback effectively: by structuring work so that fast checks catch most problems early, by batching slow checks intelligently, by providing clear progress indicators, and by ensuring that when slow feedback does arrive, it is actionable and specific.
12731
+ Following the [Principle of Least Astonishment](https://en.wikipedia.org/wiki/Principle_of_least_astonishment), users form expectations about how a tool will behave based on conventions, prior experience, and intuition. Dust's interface (including the CLI) should match those expectations wherever possible. If users are observed trying to use the interface in ways we didn't anticipate, the interface should be adjusted to meet their expectations even if that means supporting many ways of achieving the same result.
12604
12732
 
12605
- Strategies include separating fast and slow test suites, running slow checks asynchronously or in CI, caching expensive operations, and designing workflows that minimise how often slow feedback is needed.
12733
+ Surprising behavior erodes trust and slows people down. Unsurprising behavior lets users stay in flow.
12606
12734
 
12607
12735
  ## Parent Principle
12608
12736
 
12609
- - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12737
+ - [Easy Adoption](easy-adoption.md)
12610
12738
 
12611
12739
  ## Sub-Principles
12612
12740
 
@@ -12614,127 +12742,113 @@ Strategies include separating fast and slow test suites, running slow checks asy
12614
12742
  `
12615
12743
  },
12616
12744
  {
12617
- slug: "make-changes-with-confidence",
12618
- content: `# Make Changes with Confidence
12745
+ slug: "unit-test-coverage",
12746
+ content: `# Unit Test Coverage
12619
12747
 
12620
- Developers should be able to modify code without fear of breaking existing behavior.
12748
+ Complete unit test coverage ensures low-level tests give users direct feedback as they change the code.
12621
12749
 
12622
- Tests, type checking, and other automated verification enable safe refactoring and evolution of the codebase. When changes break something, fast feedback identifies the problem before it spreads. This confidence encourages continuous improvement rather than fragile, stagnant code.
12750
+ Excluding system tests from coverage reporting focuses attention on unit tests - the tests that provide the fastest, most specific feedback. When coverage tools only measure unit tests, developers can quickly identify which parts of the codebase lack fine-grained test protection.
12623
12751
 
12624
12752
  ## Parent Principle
12625
12753
 
12626
- - [Maintainable Codebase](maintainable-codebase.md)
12754
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
12627
12755
 
12628
12756
  ## Sub-Principles
12629
12757
 
12630
- - [Comprehensive Assertions](comprehensive-assertions.md)
12631
- - [Decoupled Code](decoupled-code.md)
12632
- - [Fast Feedback](fast-feedback.md)
12633
- - [Lint Everything](lint-everything.md)
12634
- - [Readable Test Data](readable-test-data.md)
12635
- - [Reproducible Checks](reproducible-checks.md)
12636
- - [Stop the Line](stop-the-line.md)
12637
- - [Keep Unit Tests Pure](keep-unit-tests-pure.md)
12638
- - [Test Isolation](test-isolation.md)
12639
- - [Self-Diagnosing Tests](self-diagnosing-tests.md)
12640
- - [Unit Test Coverage](unit-test-coverage.md)
12758
+ - (none)
12641
12759
  `
12642
12760
  },
12643
12761
  {
12644
- slug: "test-isolation",
12645
- content: `# Test Isolation
12762
+ slug: "cross-platform-compatibility",
12763
+ content: `# Cross-Platform Compatibility
12646
12764
 
12647
- Tests should not interfere with one another. Each test must be independently runnable and produce the same result regardless of execution order or which other tests run alongside it.
12765
+ Dust should work consistently across operating systems: Linux, macOS, and Windows.
12648
12766
 
12649
12767
  This means:
12650
- - No shared mutable state between tests
12651
- - No reliance on test execution order
12652
- - No file system or environment pollution
12653
- - Each test sets up its own dependencies
12768
+ - Avoiding platform-specific shell commands or syntax
12769
+ - Using cross-platform path handling
12770
+ - Testing on multiple platforms when possible
12771
+ - Documenting any platform-specific limitations
12654
12772
 
12655
- Test isolation enables parallel execution, makes failures easier to diagnose, and prevents cascading false failures when one test breaks.
12773
+ Cross-platform support broadens adoption and ensures teams with mixed environments can collaborate effectively.
12656
12774
 
12657
12775
  ## Parent Principle
12658
12776
 
12659
- - [Make Changes with Confidence](make-changes-with-confidence.md)
12777
+ - [Easy Adoption](easy-adoption.md)
12660
12778
 
12661
12779
  ## Sub-Principles
12662
12780
 
12663
- - [Environment-Independent Tests](environment-independent-tests.md)
12781
+ - (none)
12664
12782
  `
12665
12783
  },
12666
12784
  {
12667
- slug: "repository-hygiene",
12668
- content: `# Repository Hygiene
12785
+ slug: "vcs-independence",
12786
+ content: `# VCS Independence
12669
12787
 
12670
- Dust repositories should maintain a clean, organized state with minimal noise.
12788
+ Dust should work independently of any specific version control system.
12671
12789
 
12672
- This includes proper gitignore configuration to exclude build artifacts, dependencies, editor files, and other generated content from version control. A well-maintained repository makes it easier for both humans and AI to navigate and understand the codebase.
12790
+ While git is common, dust's core functionality should not require git. This enables use in repositories using other VCS (Mercurial, SVN, Perforce) or in non-VCS workflows.
12673
12791
 
12674
12792
  ## Parent Principle
12675
12793
 
12676
- - [Maintainable Codebase](maintainable-codebase.md)
12794
+ - [Easy Adoption](easy-adoption.md)
12677
12795
 
12678
12796
  ## Sub-Principles
12679
12797
 
12680
- - [Atomic Commits](atomic-commits.md)
12681
- - [Trunk-Based Development](trunk-based-development.md)
12798
+ - (none)
12682
12799
  `
12683
12800
  },
12684
12801
  {
12685
- slug: "agentic-flow-state",
12686
- content: `# Agentic Flow State
12687
-
12688
- Flow is the mental state where work becomes effortless - where you're fully immersed, losing track of time, operating at peak performance. Psychologist Mihaly Csikszentmihalyi identified three conditions that create flow: clear goals, immediate feedback, and challenge-skill balance.
12802
+ slug: "self-contained-repository",
12803
+ content: `# Self-Contained Repository
12689
12804
 
12690
- For AI agents, achieving flow state means staying engaged and productive without interruption. Agents enter flow when they have optimal context, comprehensive guard rails, and minimal friction. Context window optimization ensures agents have exactly what they need without cognitive overload. In-session guard rails prevent agents from straying off course or making mistakes that break their momentum.
12805
+ Where possible, developers and agents should have everything they need to be productive, within the repository.
12691
12806
 
12692
- Dust's design targets these conditions directly:
12807
+ No third-party tools should be required beyond those that can be installed with a single command defined in the repository. Setup instructions, scripts, configuration, and dependencies should all live in version control so that cloning the repo and running a single install command is sufficient to start working. This eliminates onboarding friction, reduces "works on my machine" issues, and is especially important for agents — who cannot browse the web to find missing tools or ask colleagues how to set things up.
12693
12808
 
12694
- - **Clear goals**: Task files and lightweight planning give you a concrete target. You know exactly what you're building next.
12695
- - **Immediate feedback**: Fast feedback loops let you see results quickly. Each change confirms you're on track or shows you what to adjust.
12696
- - **Challenge-skill balance**: Small units of work and agent autonomy keep you in the zone - challenged enough to stay engaged, supported enough to succeed.
12697
- - **Context window efficiency**: Progressive disclosure and artifact summarization ensure agents have the right context without overflow.
12698
- - **Comprehensive guard rails**: Lint rules, type checks, and automated validation catch mistakes before they compound.
12809
+ ## Applicability
12699
12810
 
12700
- Everything dust does serves flow. When agents stay in flow, they produce better work, sustain their momentum, and complete tasks autonomously.
12811
+ Internal
12701
12812
 
12702
12813
  ## Parent Principle
12703
12814
 
12704
- - (none)
12815
+ - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12705
12816
 
12706
12817
  ## Sub-Principles
12707
12818
 
12708
- - [Human-AI Collaboration](human-ai-collaboration.md)
12709
- - [Maintainable Codebase](maintainable-codebase.md)
12819
+ - (none)
12710
12820
  `
12711
12821
  },
12712
12822
  {
12713
- slug: "stop-the-line",
12714
- content: `# Stop the Line
12823
+ slug: "minimal-dependencies",
12824
+ content: `# Minimal Dependencies
12715
12825
 
12716
- Any worker human or agent should halt and fix a problem the moment they detect it, rather than letting defects propagate downstream.
12826
+ Dust should avoid coupling to specific tools so we can switch to better alternatives as they emerge.
12717
12827
 
12718
- Originating from the Toyota production system, "Stop the Line" empowers every participant to pause work immediately upon identifying a defect, failing check, or safety hazard. Problems are cheaper to fix at their source than after they've compounded through later stages. In the context of dust, this means agents and humans alike should treat broken checks, test failures, and lint errors as blockers that demand immediate attention — not warnings to be deferred.
12828
+ By keeping dependencies minimal and using standard APIs where possible, we maintain the freedom to adopt new tools without major rewrites. This applies to runtimes, test frameworks, build tools, and other infrastructure choices.
12719
12829
 
12720
12830
  ## Parent Principle
12721
12831
 
12722
- - [Make Changes with Confidence](make-changes-with-confidence.md)
12832
+ - [Maintainable Codebase](maintainable-codebase.md)
12723
12833
 
12724
12834
  ## Sub-Principles
12725
12835
 
12726
- - (none)
12836
+ - [Runtime Agnostic Tests](runtime-agnostic-tests.md)
12727
12837
  `
12728
12838
  },
12729
12839
  {
12730
- slug: "agent-context-inference",
12731
- content: `# Agent Context Inference
12840
+ slug: "agent-specific-enhancement",
12841
+ content: `# Agent-Specific Enhancement
12732
12842
 
12733
- Terse human prompts should trigger the correct agent action.
12843
+ Dust should detect and enhance the experience for specific agents while remaining agnostic at its core.
12734
12844
 
12735
- When a human gives a brief instruction like "the button should be green", the agent should be able to infer what to do. The agent shouldn't require the human to specify file paths, component names, or implementation details that can be discovered from the repository.
12845
+ While Dust has [Agent-Agnostic Design](agent-agnostic-design.md) and works with any capable agent, it can still optimize the "agent DX" (developer experience) when it detects a specific agent is being used. This means:
12736
12846
 
12737
- This reduces friction for humans and makes agent interactions feel more natural. The burden of context discovery shifts to the agent, which can use dust's CLI and repository structure to find what it needs.
12847
+ - **Detection** - Dust may detect which agent is running (e.g., Claude Code, Aider, Cursor) through environment variables, configuration, or other signals
12848
+ - **Enhancement** - Once detected, Dust can tailor its output format, prompts, or context to leverage that agent's specific strengths
12849
+ - **Graceful fallback** - When no specific agent is detected, Dust provides a generic experience that works with any agent
12850
+
12851
+ This principle complements Agent-Agnostic Design: the core functionality never requires a specific agent, but the experience improves when one is recognized.
12738
12852
 
12739
12853
  ## Applicability
12740
12854
 
@@ -12750,40 +12864,57 @@ Internal
12750
12864
  `
12751
12865
  },
12752
12866
  {
12753
- slug: "naming-matters",
12754
- content: `# Naming Matters
12867
+ slug: "self-diagnosing-tests",
12868
+ content: `# Self-Diagnosing Tests
12755
12869
 
12756
- Good naming reduces waste by eliminating confusion and making code self-documenting.
12870
+ When a big test fails, it should be self-evident how to diagnose and fix the failure.
12757
12871
 
12758
- Poor names cause rework, bugs, and communication overhead. When names don't clearly convey meaning, developers waste time deciphering code, misunderstand intentions, and introduce defects. Well-chosen names serve as documentation that never goes stale, reducing the need for explanatory comments and enabling both humans and AI agents to navigate the codebase efficiently.
12872
+ The more moving parts a test has — end-to-end, system, integration the more critical this becomes. A test that fails with \`expected true, received false\` forces the developer (or agent) to re-run, add logging, and guess. A test that fails with a rich diff showing the actual state versus the expected state turns diagnosis into reading.
12759
12873
 
12760
- ## Parent Principle
12874
+ ## Anti-patterns
12761
12875
 
12762
- - [Maintainable Codebase](maintainable-codebase.md)
12876
+ **Boolean flattening** — collapsing a rich value into true/false before asserting:
12877
+ \`\`\`javascript
12878
+ // Bad: "expected true, received false" — what events arrived?
12879
+ expect(events.some(e => e.type === 'check-passed')).toBe(true)
12763
12880
 
12764
- ## Sub-Principles
12881
+ // Good: shows the actual event types on failure
12882
+ expect(events.map(e => e.type)).toContain('check-passed')
12883
+ \`\`\`
12765
12884
 
12766
- - [Consistent Naming](consistent-naming.md)
12767
- - [Clarity Over Brevity](clarity-over-brevity.md)
12768
- `
12769
- },
12770
- {
12771
- slug: "stubs-over-mocks",
12772
- content: `# Stubs Over Mocks
12885
+ **Length-only assertions** — checking count without showing contents:
12886
+ \`\`\`javascript
12887
+ // Bad: "expected 2, received 0" — what requests were captured?
12888
+ expect(requests.length).toBe(2)
12773
12889
 
12774
- Prefer hand-rolled stubs over mocks, in unit tests. Stubs keep tests focused on observable behavior instead of implementation details.
12890
+ // Good: shows the actual requests on failure
12891
+ expect(requests).toHaveLength(2) // vitest shows the array
12892
+ \`\`\`
12775
12893
 
12776
- Mocks tend to encode a script of “expected calls” (what was invoked, in what order, with what arguments). That makes tests brittle: harmless refactors (changing internal decomposition, adding caching, batching calls, reordering operations) can break tests even when the externally visible behavior is unchanged. You end up maintaining tests that police how the code works rather than what it does.
12894
+ **Silent guards** using \`if\` where an assertion belongs:
12895
+ \`\`\`javascript
12896
+ // Bad: silently passes when settings is undefined
12897
+ if (settings) {
12898
+ expect(JSON.parse(settings).key).toBeDefined()
12899
+ }
12777
12900
 
12778
- Stubs (and especially in-memory emulators) push tests toward the contract: provide inputs, run the code, assert outputs and side effects. When a test fails, it’s usually because a behavior changed, not because the internal call choreography shifted. That improves signal-to-noise, reduces rewrites during refactors, and makes it easier to evolve the implementation.
12901
+ // Good: fails explicitly if settings is missing
12902
+ expect(settings).toBeDefined()
12903
+ const parsed = JSON.parse(settings!)
12904
+ expect(parsed.key).toBeDefined()
12905
+ \`\`\`
12779
12906
 
12780
- For external dependencies (databases, queues, object stores, HTTP services), the default choice should be an in-memory emulator: a drop-in replacement that is faithful enough to the real interface/semantics but runs entirely in-process. It gives most of the benefits of integration testing—realistic state transitions, error modes, concurrency behavior where relevant—without the cost, flakiness, and setup burden of booting real infrastructure. It also keeps the test environment hermetic (no network, no shared state), which improves determinism and makes tests fast.
12907
+ ## The test
12781
12908
 
12782
- Still use mocks selectively—mainly to assert something is called (e.g., telemetry emission, "at most once" notifications, payment capture guarded by a feature flag) or when a dependency is impossible to emulate. But for most cases, stubs and in-memory emulators produce tests that are clearer, more resilient to refactoring, and better aligned with the system's actual contracts.
12909
+ If a test fails, can a developer who has never seen the code identify the problem from the failure output alone without re-running, adding console.logs, or reading the test source? The closer to "yes", the better.
12910
+
12911
+ ## How to evaluate
12912
+
12913
+ Work supports this principle when every assertion in a system or integration test would, on failure, reveal the actual state richly enough to guide a fix. Bare boolean checks, length-only assertions, and silent conditional guards are violations.
12783
12914
 
12784
12915
  ## Parent Principle
12785
12916
 
12786
- - [Decoupled Code](decoupled-code.md)
12917
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
12787
12918
 
12788
12919
  ## Sub-Principles
12789
12920
 
@@ -12791,18 +12922,18 @@ Still use mocks selectively—mainly to assert something is called (e.g., teleme
12791
12922
  `
12792
12923
  },
12793
12924
  {
12794
- slug: "functional-core-imperative-shell",
12795
- content: `# Functional Core, Imperative Shell
12925
+ slug: "slow-feedback-coping",
12926
+ content: `# Slow Feedback Coping
12796
12927
 
12797
- Separate code into a pure "functional core" and a thin "imperative shell." The core takes values in and returns values out, with no side effects. The shell handles I/O and wires things together.
12928
+ Some feedback is unavoidably slow dust should offer coping strategies rather than pretending it can be eliminated.
12798
12929
 
12799
- Purely functional code makes some things easier to understand: because values don't change, you can call functions and know that only their return value matters—they don't change anything outside themselves.
12930
+ Integration tests, end-to-end tests, deployment pipelines, and external API calls all take time. Pretending they can be made instant is unrealistic. Instead, dust should help developers and agents cope with slow feedback effectively: by structuring work so that fast checks catch most problems early, by batching slow checks intelligently, by providing clear progress indicators, and by ensuring that when slow feedback does arrive, it is actionable and specific.
12800
12931
 
12801
- The functional core contains business logic as pure functions that take values and return values. The imperative shell sits at the boundary, reading input, calling into the core, and performing side effects with the results. This keeps the majority of code easy to test (no mocks or stubs needed for pure functions) and makes the I/O surface area small and explicit.
12932
+ Strategies include separating fast and slow test suites, running slow checks asynchronously or in CI, caching expensive operations, and designing workflows that minimise how often slow feedback is needed.
12802
12933
 
12803
12934
  ## Parent Principle
12804
12935
 
12805
- - [Decoupled Code](decoupled-code.md)
12936
+ - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12806
12937
 
12807
12938
  ## Sub-Principles
12808
12939
 
@@ -12810,51 +12941,61 @@ The functional core contains business logic as pure functions that take values a
12810
12941
  `
12811
12942
  },
12812
12943
  {
12813
- slug: "development-traceability",
12814
- content: `# Development Traceability
12944
+ slug: "agentic-flow-state",
12945
+ content: `# Agentic Flow State
12815
12946
 
12816
- Structured logging and tracing help agents understand system behaviour without resorting to ad-hoc testing cycles.
12947
+ Flow is the mental state where work becomes effortless - where you're fully immersed, losing track of time, operating at peak performance. Psychologist Mihaly Csikszentmihalyi identified three conditions that create flow: clear goals, immediate feedback, and challenge-skill balance.
12817
12948
 
12818
- When something goes wrong, agents often resort to adding temporary log statements, running the code, reading the output, and repeating a slow and wasteful debugging loop. Good traceability means the system already records what happened and why, through structured logs, trace IDs, and observable state. This lets agents diagnose issues by reading existing output rather than generating new experiments.
12949
+ For AI agents, achieving flow state means staying engaged and productive without interruption. Agents enter flow when they have optimal context, comprehensive guard rails, and minimal friction. Context window optimization ensures agents have exactly what they need without cognitive overload. In-session guard rails prevent agents from straying off course or making mistakes that break their momentum.
12819
12950
 
12820
- Dust should encourage projects to adopt structured logging, promote traceability as a first-class concern, and provide tools that surface relevant trace information when agents need it.
12951
+ Dust's design targets these conditions directly:
12821
12952
 
12822
- ## Applicability
12953
+ - **Clear goals**: Task files and lightweight planning give you a concrete target. You know exactly what you're building next.
12954
+ - **Immediate feedback**: Fast feedback loops let you see results quickly. Each change confirms you're on track or shows you what to adjust.
12955
+ - **Challenge-skill balance**: Small units of work and agent autonomy keep you in the zone - challenged enough to stay engaged, supported enough to succeed.
12956
+ - **Context window efficiency**: Progressive disclosure and artifact summarization ensure agents have the right context without overflow.
12957
+ - **Comprehensive guard rails**: Lint rules, type checks, and automated validation catch mistakes before they compound.
12823
12958
 
12824
- Internal
12959
+ Everything dust does serves flow. When agents stay in flow, they produce better work, sustain their momentum, and complete tasks autonomously.
12825
12960
 
12826
12961
  ## Parent Principle
12827
12962
 
12828
- - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12963
+ - (none)
12829
12964
 
12830
12965
  ## Sub-Principles
12831
12966
 
12832
- - (none)
12967
+ - [Human-AI Collaboration](human-ai-collaboration.md)
12968
+ - [Maintainable Codebase](maintainable-codebase.md)
12833
12969
  `
12834
12970
  },
12835
12971
  {
12836
- slug: "keep-unit-tests-pure",
12837
- content: `# Keep Unit Tests Pure
12972
+ slug: "reproducible-checks",
12973
+ content: `# Reproducible Checks
12838
12974
 
12839
- Unit tests (those run very frequently as part of a tight feedback loop) should be pure and side-effect free. A test is **not** a unit test if it:
12975
+ Every check must produce the same result regardless of who runs it, when, or on what machine. If a check passes for one developer but fails for another, the check is broken.
12840
12976
 
12841
- - Accesses a database
12842
- - Communicates over a network
12843
- - Touches the file system
12844
- - Cannot run concurrently with other tests
12845
- - Requires special environment setup
12977
+ Concretely, checks should pin their tool versions via the project's dependency manager (e.g. \`devDependencies\`) rather than relying on \`npx\`/\`bunx\` to fetch the latest version at runtime. Unpinned versions introduce non-determinism a check that passed yesterday may fail today due to a tool upgrade that nobody chose to adopt.
12846
12978
 
12847
- "Unit tests" here means tests run frequently during development — not system tests, which intentionally exercise the full stack including I/O. Pure unit tests exercise only business logic, not infrastructure.
12979
+ ## Parent Principle
12848
12980
 
12849
- The value of pure unit tests is that they are fast, deterministic, and isolate business logic from infrastructure concerns. When unit tests pass but integration or system tests fail, developers can immediately narrow the problem to the boundary layer — a diagnostic "binary chop" that accelerates debugging.
12981
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
12850
12982
 
12851
- ## Migration Guidance
12983
+ ## Sub-Principles
12852
12984
 
12853
- Where existing tests are impure (e.g. they spawn processes, write temporary files, or make network calls), prefer converting them to use in-memory alternatives — stubs, fakes, or dependency-injected doubles — rather than leaving them as-is. Opportunistic migration is fine; a big-bang rewrite is not required.
12985
+ - (none)
12986
+ `
12987
+ },
12988
+ {
12989
+ slug: "task-first-workflow",
12990
+ content: `# Task-First Workflow
12991
+
12992
+ Work should be captured as a task before implementation begins, creating traceability between intent and outcome.
12993
+
12994
+ This discipline ensures that every change has a documented purpose. The commit history shows pairs of "Add task" followed by implementation, making it easy to understand why each change was made. It also prevents scope creep by defining boundaries before work starts.
12854
12995
 
12855
12996
  ## Parent Principle
12856
12997
 
12857
- - [Make Changes with Confidence](make-changes-with-confidence.md)
12998
+ - [Lightweight Planning](lightweight-planning.md)
12858
12999
 
12859
13000
  ## Sub-Principles
12860
13001
 
@@ -12862,78 +13003,96 @@ Where existing tests are impure (e.g. they spawn processes, write temporary file
12862
13003
  `
12863
13004
  },
12864
13005
  {
12865
- slug: "co-located-tests",
12866
- content: `# Co-located Tests
13006
+ slug: "ideal-agent-developer-experience",
13007
+ content: `# Ideal Agent Developer Experience
12867
13008
 
12868
- Test files should live next to the code they test.
13009
+ The agent is the developer. The human is the CEO. Dust is the PM.
12869
13010
 
12870
- When tests are co-located with their source files, developers can immediately see what's tested and what isn't. Finding the test for a module becomes trivial—it's right there in the same directory. This proximity encourages writing tests as part of the development flow rather than as an afterthought, and makes it natural to update tests when modifying code.
13011
+ With today's AI coding assistants, the human is stuck in a tight loop with agents constantly directing, reviewing, and course-correcting. Dust is designed to relieve humans from this tight loop. Like an assistant to a CEO, dust predominantly brings fully-researched questions and well-prepared work to the human, rather than expecting the human to drive every decision. The human checks in less frequently, and when they do, they make high-leverage strategic calls rather than micromanaging implementation.
13012
+
13013
+ For this to work, the agent's development environment must be excellent. The agent reads the code, writes changes, runs the checks, and iterates until the task is done. Everything about the codebase and its tooling either helps or hinders that process. Comprehensive tests are the agent's only way to verify correctness. Fast feedback loops are the agent's iteration speed. Structured logs are the agent's eyes into runtime behaviour. Small, well-organised files are what fit in the agent's context window. Exploratory and debugging tools are how the agent navigates and diagnoses without trial and error.
13014
+
13015
+ Each sub-principle represents a different aspect of the ideal agent developer setup. The better these are, the less the human needs to be in the loop.
12871
13016
 
12872
13017
  ## Parent Principle
12873
13018
 
12874
- - [Intuitive Directory Structure](intuitive-directory-structure.md)
13019
+ - [Human-AI Collaboration](human-ai-collaboration.md)
12875
13020
 
12876
13021
  ## Sub-Principles
12877
13022
 
12878
- - (none)
13023
+ - [Comprehensive Test Coverage](comprehensive-test-coverage.md)
13024
+ - [Fast Feedback Loops](fast-feedback-loops.md)
13025
+ - [Slow Feedback Coping](slow-feedback-coping.md)
13026
+ - [Development Traceability](development-traceability.md)
13027
+ - [Context-Optimised Code](context-optimised-code.md)
13028
+ - [Exploratory Tooling](exploratory-tooling.md)
13029
+ - [Debugging Tooling](debugging-tooling.md)
13030
+ - [Self-Contained Repository](self-contained-repository.md)
12879
13031
  `
12880
13032
  },
12881
13033
  {
12882
- slug: "human-ai-collaboration",
12883
- content: `# Human-AI Collaboration
13034
+ slug: "agent-context-inference",
13035
+ content: `# Agent Context Inference
12884
13036
 
12885
- Dust exists to enable effective collaboration between humans and AI agents on complex projects.
13037
+ Terse human prompts should trigger the correct agent action.
12886
13038
 
12887
- The human is the CEO they set direction, make strategic decisions, and check in when it matters. Dust is the PM it manages the work, prepares context, and brings fully-researched questions to the human rather than expecting them to drive every detail. Agents are the developers they read code, write changes, and iterate autonomously.
13039
+ When a human gives a brief instruction like "the button should be green", the agent should be able to infer what to do. The agent shouldn't require the human to specify file paths, component names, or implementation details that can be discovered from the repository.
12888
13040
 
12889
- Today's AI coding tools keep humans in a tight loop with agents. Dust is designed to loosen that loop, so humans spend less time directing and more time deciding.
13041
+ This reduces friction for humans and makes agent interactions feel more natural. The burden of context discovery shifts to the agent, which can use dust's CLI and repository structure to find what it needs.
13042
+
13043
+ ## Applicability
13044
+
13045
+ Internal
12890
13046
 
12891
13047
  ## Parent Principle
12892
13048
 
12893
- - [Agentic Flow State](agentic-flow-state.md)
13049
+ - [Agent Autonomy](agent-autonomy.md)
12894
13050
 
12895
13051
  ## Sub-Principles
12896
13052
 
12897
- - [Agent Autonomy](agent-autonomy.md)
12898
- - [Easy Adoption](easy-adoption.md)
12899
- - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
12900
- - [Lightweight Planning](lightweight-planning.md)
13053
+ - (none)
12901
13054
  `
12902
13055
  },
12903
13056
  {
12904
- slug: "vcs-independence",
12905
- content: `# VCS Independence
13057
+ slug: "agent-autonomy",
13058
+ content: `# Agent Autonomy
12906
13059
 
12907
- Dust should work independently of any specific version control system.
13060
+ Dust exists to enable AI agents to produce work autonomously.
12908
13061
 
12909
- While git is common, dust's core functionality should not require git. This enables use in repositories using other VCS (Mercurial, SVN, Perforce) or in non-VCS workflows.
13062
+ With sufficient planning and small enough units, this works much better in practice.
12910
13063
 
12911
13064
  ## Parent Principle
12912
13065
 
12913
- - [Easy Adoption](easy-adoption.md)
13066
+ - [Human-AI Collaboration](human-ai-collaboration.md)
12914
13067
 
12915
13068
  ## Sub-Principles
12916
13069
 
12917
- - (none)
13070
+ - [Actionable Errors](actionable-errors.md)
13071
+ - [Batteries Included](batteries-included.md)
13072
+ - [Agent-Agnostic Design](agent-agnostic-design.md)
13073
+ - [Agent Context Inference](agent-context-inference.md)
13074
+ - [Agent-Specific Enhancement](agent-specific-enhancement.md)
13075
+ - [Context Window Efficiency](context-window-efficiency.md)
13076
+ - [Small Units](small-units.md)
12918
13077
  `
12919
13078
  },
12920
13079
  {
12921
- slug: "environment-independent-tests",
12922
- content: `# Environment-Independent Tests
13080
+ slug: "stubs-over-mocks",
13081
+ content: `# Stubs Over Mocks
12923
13082
 
12924
- Tests must produce the same result regardless of where they run. A test that passes locally but fails in CI (or vice versa) is a broken test.
13083
+ Prefer hand-rolled stubs over mocks, in unit tests. Stubs keep tests focused on observable behavior instead of implementation details.
12925
13084
 
12926
- Concretely, tests should never depend on:
12927
- - Ambient environment variables (e.g. \`CLAUDECODE\`, \`CI\`, \`HOME\`)
12928
- - The current working directory or filesystem layout of the host machine
12929
- - Network availability or external services
12930
- - The identity of the user or agent running the tests
13085
+ Mocks tend to encode a script of “expected calls” (what was invoked, in what order, with what arguments). That makes tests brittle: harmless refactors (changing internal decomposition, adding caching, batching calls, reordering operations) can break tests even when the externally visible behavior is unchanged. You end up maintaining tests that police how the code works rather than what it does.
12931
13086
 
12932
- When a function's behavior depends on environment variables, the test must explicitly control those variables (via \`stubEnv\`, dependency injection, or passing an \`env\` parameter) rather than relying on whatever happens to be set in the current shell.
13087
+ Stubs (and especially in-memory emulators) push tests toward the contract: provide inputs, run the code, assert outputs and side effects. When a test fails, it’s usually because a behavior changed, not because the internal call choreography shifted. That improves signal-to-noise, reduces rewrites during refactors, and makes it easier to evolve the implementation.
13088
+
13089
+ For external dependencies (databases, queues, object stores, HTTP services), the default choice should be an in-memory emulator: a drop-in replacement that is faithful enough to the real interface/semantics but runs entirely in-process. It gives most of the benefits of integration testing—realistic state transitions, error modes, concurrency behavior where relevant—without the cost, flakiness, and setup burden of booting real infrastructure. It also keeps the test environment hermetic (no network, no shared state), which improves determinism and makes tests fast.
13090
+
13091
+ Still use mocks selectively—mainly to assert something is called (e.g., telemetry emission, "at most once" notifications, payment capture guarded by a feature flag) or when a dependency is impossible to emulate. But for most cases, stubs and in-memory emulators produce tests that are clearer, more resilient to refactoring, and better aligned with the system's actual contracts.
12933
13092
 
12934
13093
  ## Parent Principle
12935
13094
 
12936
- - [Test Isolation](test-isolation.md)
13095
+ - [Decoupled Code](decoupled-code.md)
12937
13096
 
12938
13097
  ## Sub-Principles
12939
13098
 
@@ -12964,91 +13123,93 @@ Internal
12964
13123
  `
12965
13124
  },
12966
13125
  {
12967
- slug: "atomic-commits",
12968
- content: `# Atomic Commits
12969
-
12970
- Each commit should tell a complete story, bundling implementation changes with their corresponding documentation updates.
13126
+ slug: "consistent-naming",
13127
+ content: `# Consistent Naming
12971
13128
 
12972
- When a task is completed, the commit deletes the task file, updates relevant facts to reflect the new reality, and removes any ideas that have been realized. This discipline ensures that any point in the commit history represents a coherent, self-documenting state of the project.
13129
+ Names should follow established conventions within each category to reduce cognitive load.
12973
13130
 
12974
- Clean commit history is essential because archaeology depends on it. Future humans and AI agents will traverse history to understand why decisions were made and how the system evolved.
13131
+ Principles use Title Case. File names use kebab-case. Commands use lowercase with hyphens. When naming conventions exist, follow them. When they don't, establish one and apply it consistently. Inconsistent naming creates friction for both humans and AI agents trying to predict or recall identifiers.
12975
13132
 
12976
13133
  ## Parent Principle
12977
13134
 
12978
- - [Repository Hygiene](repository-hygiene.md)
13135
+ - [Naming Matters](naming-matters.md)
12979
13136
 
12980
13137
  ## Sub-Principles
12981
13138
 
12982
- - [Traceable Decisions](traceable-decisions.md)
13139
+ - (none)
12983
13140
  `
12984
13141
  },
12985
13142
  {
12986
- slug: "trunk-based-development",
12987
- content: `# Trunk-Based Development
12988
-
12989
- Dust is designed to support a non-branching workflow where developers commit directly to a single main branch.
12990
-
12991
- In trunk-based development, teams collaborate on code in one primary branch rather than maintaining multiple long-lived feature branches. This eliminates merge conflicts, enables continuous integration, and keeps the codebase continuously releasable.
13143
+ slug: "lightweight-planning",
13144
+ content: `# Lightweight Planning
12992
13145
 
12993
- The \`dust loop claude\` command embodies this philosophy: agents pull from main, implement a task, and push directly back to main. There are no feature branches, no pull requests, no merge queues. Each commit is atomic and complete.
13146
+ Dust aims to be a minimal, low-overhead planning system that stays relevant over time.
12994
13147
 
12995
- This approach scales through discipline rather than isolation. Feature flags and incremental changes replace long-running branches. The repository history becomes a linear sequence of working states.
13148
+ Planning artifacts are simple markdown files that live alongside code. Ideas are intentionally vague until implementation is imminent. Tasks are small and completable in single commits. Facts document current reality rather than aspirational states.
12996
13149
 
12997
- See: https://trunkbaseddevelopment.com/
13150
+ The system avoids the staleness problem by deferring detail until the last responsible moment and deleting completed work rather than archiving it.
12998
13151
 
12999
13152
  ## Parent Principle
13000
13153
 
13001
- - [Repository Hygiene](repository-hygiene.md)
13154
+ - [Human-AI Collaboration](human-ai-collaboration.md)
13002
13155
 
13003
13156
  ## Sub-Principles
13004
13157
 
13005
- (none)
13158
+ - [Task-First Workflow](task-first-workflow.md)
13159
+ - [Some Big Design Up Front](some-big-design-up-front.md)
13006
13160
  `
13007
13161
  },
13008
13162
  {
13009
- slug: "comprehensive-assertions",
13010
- content: `# Comprehensive Assertions
13163
+ slug: "easy-adoption",
13164
+ content: `# Easy Adoption
13011
13165
 
13012
- Assert the whole, not the parts.
13166
+ Dust should be trivially easy to adopt in any repository.
13013
13167
 
13014
- When you break a complex object into many small assertions, a failure tells you *one thing that's wrong*. When you assert against the whole expected value, the diff tells you *what actually happened versus what you expected* — the full picture, in one glance.
13168
+ Getting started with Dust should require minimal friction. A developer should be able to bootstrap Dust in their repository with a single command, without needing to install dependencies, configure build tools, or understand the internals.
13015
13169
 
13016
- Small assertions are like yes/no questions to a witness. A whole-object assertion is like asking "tell me what you saw."
13170
+ This lowers the barrier to entry and encourages experimentation.
13017
13171
 
13018
- ## In practice
13172
+ ## Parent Principle
13019
13173
 
13020
- Collapse multiple partial assertions into one comprehensive assertion:
13174
+ - [Human-AI Collaboration](human-ai-collaboration.md)
13021
13175
 
13022
- \`\`\`javascript
13023
- // Fragmented — each failure is a narrow keyhole
13024
- expect(result.name).toBe("Alice");
13025
- expect(result.age).toBe(30);
13026
- expect(result.role).toBe("admin");
13176
+ ## Sub-Principles
13027
13177
 
13028
- // Whole — a failure diff tells the full story
13029
- expect(result).toEqual({
13030
- name: "Alice",
13031
- age: 30,
13032
- role: "admin",
13033
- });
13034
- \`\`\`
13178
+ - [Cross-Platform Compatibility](cross-platform-compatibility.md)
13179
+ - [Unsurprising UX](unsurprising-ux.md)
13180
+ - [VCS Independence](vcs-independence.md)
13181
+ `
13182
+ },
13183
+ {
13184
+ slug: "intuitive-directory-structure",
13185
+ content: `# Intuitive Directory Structure
13035
13186
 
13036
- If \`role\` is \`"user"\` and \`age\` is \`29\`, the fragmented version stops at the first failure. The whole-object assertion shows both discrepancies at once, in context.
13187
+ Code should be organized around related concerns in clearly named directories.
13037
13188
 
13038
- The same applies to arrays:
13189
+ When files that serve similar purposes are grouped together, the codebase becomes easier to navigate and understand. A developer looking for "commands" should find them in a \`commands\` directory. Utilities should live with utilities. This organization reduces cognitive load and makes the project structure self-documenting.
13039
13190
 
13040
- \`\`\`javascript
13041
- // Avoid: partial assertions that hide the actual state
13042
- expect(array).toContain('apples')
13043
- expect(array).toContain('oranges')
13191
+ ## Parent Principle
13044
13192
 
13045
- // Prefer: one assertion that reveals the full picture on failure
13046
- expect(array).toEqual(['apples', 'oranges'])
13047
- \`\`\`
13193
+ - [Maintainable Codebase](maintainable-codebase.md)
13048
13194
 
13049
- ## How to evaluate
13195
+ ## Sub-Principles
13050
13196
 
13051
- Work supports this principle when test failures tell a rich story — showing the complete actual value alongside the complete expected value, so the reader can understand what happened without re-running anything.
13197
+ - [Co-located Tests](co-located-tests.md)
13198
+ `
13199
+ },
13200
+ {
13201
+ slug: "lint-everything",
13202
+ content: `# Lint Everything
13203
+
13204
+ Prefer static analysis over runtime checks. Every error caught by a linter is an error that never reaches tests, and every error caught by tests is an error that never reaches production.
13205
+
13206
+ Lint markdown, lint types, lint formatting. If it can be checked statically, check it. Linters are fast, deterministic, and catch entire categories of bugs before code even runs.
13207
+
13208
+ This project lints:
13209
+ - TypeScript (type checking and style)
13210
+ - Markdown (broken links, required sections)
13211
+ - Task files (structure validation)
13212
+ - Principle hierarchy (parent/child consistency)
13052
13213
 
13053
13214
  ## Parent Principle
13054
13215
 
@@ -13056,26 +13217,22 @@ Work supports this principle when test failures tell a rich story — showing th
13056
13217
 
13057
13218
  ## Sub-Principles
13058
13219
 
13059
- - (none)
13220
+ (none)
13060
13221
  `
13061
13222
  },
13062
13223
  {
13063
- slug: "cross-platform-compatibility",
13064
- content: `# Cross-Platform Compatibility
13224
+ slug: "progressive-disclosure",
13225
+ content: `# Progressive Disclosure
13065
13226
 
13066
- Dust should work consistently across operating systems: Linux, macOS, and Windows.
13227
+ Dust should reveal details progressively as a way of achieving context window efficiency.
13067
13228
 
13068
- This means:
13069
- - Avoiding platform-specific shell commands or syntax
13070
- - Using cross-platform path handling
13071
- - Testing on multiple platforms when possible
13072
- - Documenting any platform-specific limitations
13229
+ Not all information is needed at once. A task list showing just titles is sufficient for choosing what to work on. Full task details are only needed when actively implementing. Linked principles and facts can be followed when deeper context is required.
13073
13230
 
13074
- Cross-platform support broadens adoption and ensures teams with mixed environments can collaborate effectively.
13231
+ This layered approach keeps initial reads lightweight while preserving access to complete information when needed.
13075
13232
 
13076
13233
  ## Parent Principle
13077
13234
 
13078
- - [Easy Adoption](easy-adoption.md)
13235
+ - [Context Window Efficiency](context-window-efficiency.md)
13079
13236
 
13080
13237
  ## Sub-Principles
13081
13238
 
@@ -13083,18 +13240,14 @@ Cross-platform support broadens adoption and ensures teams with mixed environmen
13083
13240
  `
13084
13241
  },
13085
13242
  {
13086
- slug: "exploratory-tooling",
13087
- content: `# Exploratory Tooling
13088
-
13089
- Agents need tools to efficiently explore and understand unfamiliar codebases.
13090
-
13091
- When an agent encounters a new codebase — or an unfamiliar corner of a familiar one — it needs to quickly build a mental model: what exists, how it fits together, and where to make changes. Without good exploratory tools, agents waste context on trial-and-error searches, reading irrelevant files, and forming incorrect assumptions.
13243
+ slug: "context-optimised-code",
13244
+ content: `# Context-Optimised Code
13092
13245
 
13093
- Dust should promote and integrate tools that help agents explore: dependency graphs, module overviews, search utilities tuned for code navigation, and summaries of project structure. The goal is to make the "orientation" phase of any task as short and reliable as possible.
13246
+ Code should be structured so that agents can understand and modify it within their context window constraints.
13094
13247
 
13095
- ## Applicability
13248
+ Large files, deeply nested abstractions, and sprawling dependency chains all work against agents. A 3,000-line file cannot be fully loaded into context. A function that requires understanding six levels of indirection demands more context than one that is self-contained. Context-optimised code favours small files, shallow abstractions, explicit dependencies, and co-located related logic.
13096
13249
 
13097
- Internal
13250
+ Dust should help projects identify files that are too large, modules that are too tangled, and patterns that make agent comprehension harder than it needs to be. This is not just about file size — it is about ensuring that the unit of code an agent needs to understand fits comfortably within the window available.
13098
13251
 
13099
13252
  ## Parent Principle
13100
13253
 
@@ -13106,16 +13259,37 @@ Internal
13106
13259
  `
13107
13260
  },
13108
13261
  {
13109
- slug: "reasonably-dry",
13110
- content: `# Reasonably DRY
13262
+ slug: "some-big-design-up-front",
13263
+ content: `# Some Big Design Up Front
13111
13264
 
13112
- Don't repeat yourself is a good principle, but don't overdo it.
13265
+ AI agents lower the cost of architectural exploration, making heavier upfront investment rational during the idea phase.
13113
13266
 
13114
- Extracting shared code too eagerly can create tight coupling, obscure intent, and make changes harder. When two pieces of code look similar but serve different purposes or are likely to evolve independently, duplication is the better choice. The cost of a wrong abstraction is higher than the cost of a little repetition. Extract shared code when the duplication is truly about the same concept and has proven stable, not just because two things happen to look alike right now.
13267
+ Agile's rejection of "big design up front" (BDUF) was largely economic: detailed architecture was expensive to produce and often wrong. AI agents change that equation they can explore multiple variants, prototype them, and measure trade-offs cheaply. When evaluating alternatives costs less, the expected value of avoiding large structural mistakes increases.
13268
+
13269
+ This doesn't mean returning to traditional BDUF. Uncertainty about future requirements still limits what prediction can achieve. The insight is that the optimal amount of upfront work has shifted, not that prediction became reliable.
13270
+
13271
+ The model is hybrid: thorough AI-assisted exploration during ideas, followed by straightforward execution during tasks. "Lightweight" refers to task-level planning, not idea-level exploration. Invest heavily in understanding alternatives during the idea phase, then decompose into atomic tasks once the direction is clear.
13272
+
13273
+ ## Convergence Criteria
13274
+
13275
+ Exploration should continue until clear trade-offs are identified and the chosen approach can be articulated against alternatives. This is convergence-based, not time-boxed — simple ideas converge quickly, complex architectural decisions require more exploration.
13276
+
13277
+ When exploration feels "done":
13278
+
13279
+ - Multiple approaches have been considered
13280
+ - Trade-offs between approaches are understood
13281
+ - The chosen direction has clear justification
13282
+ - Remaining uncertainty is about requirements, not design
13283
+
13284
+ If a task requires significant design decisions during execution, it wasn't ready to be a task.
13285
+
13286
+ ## Documenting Alternatives
13287
+
13288
+ Ideas should document the alternatives considered and why they were ruled out. This creates a decision log that helps future agents and humans understand context. Include alternatives in the idea body or Open Questions sections.
13115
13289
 
13116
13290
  ## Parent Principle
13117
13291
 
13118
- - [Maintainable Codebase](maintainable-codebase.md)
13292
+ - [Lightweight Planning](lightweight-planning.md)
13119
13293
 
13120
13294
  ## Sub-Principles
13121
13295
 
@@ -13123,16 +13297,16 @@ Extracting shared code too eagerly can create tight coupling, obscure intent, an
13123
13297
  `
13124
13298
  },
13125
13299
  {
13126
- slug: "runtime-agnostic-tests",
13127
- content: `# Runtime Agnostic Tests
13300
+ slug: "traceable-decisions",
13301
+ content: `# Traceable Decisions
13128
13302
 
13129
- Dust's test suite should work across JavaScript runtimes.
13303
+ The commit history should explain why changes were made, not just what changed.
13130
13304
 
13131
- Tests should use standard JavaScript testing patterns that work across Node.js, Bun, and other runtimes. Avoiding runtime-specific test APIs ensures the project can leverage different runtimes' advantages while maintaining broad compatibility.
13305
+ Commit messages should capture intent and context that would otherwise be lost. Future maintainers (human or AI) will traverse history to understand the reasoning behind decisions. A commit that says "Fix bug" is less valuable than one that explains what was broken and why the fix is correct.
13132
13306
 
13133
13307
  ## Parent Principle
13134
13308
 
13135
- - [Minimal Dependencies](minimal-dependencies.md)
13309
+ - [Atomic Commits](atomic-commits.md)
13136
13310
 
13137
13311
  ## Sub-Principles
13138
13312
 
@@ -13140,16 +13314,16 @@ Tests should use standard JavaScript testing patterns that work across Node.js,
13140
13314
  `
13141
13315
  },
13142
13316
  {
13143
- slug: "task-first-workflow",
13144
- content: `# Task-First Workflow
13317
+ slug: "fast-feedback",
13318
+ content: `# Fast Feedback
13145
13319
 
13146
- Work should be captured as a task before implementation begins, creating traceability between intent and outcome.
13320
+ Dust should provide fast feedback loops for developers.
13147
13321
 
13148
- This discipline ensures that every change has a documented purpose. The commit history shows pairs of "Add task" followed by implementation, making it easy to understand why each change was made. It also prevents scope creep by defining boundaries before work starts.
13322
+ Scripts and tooling should execute quickly so developers can iterate rapidly. Slow feedback discourages frequent validation and leads to larger, riskier changes. Fast feedback enables small, confident steps.
13149
13323
 
13150
13324
  ## Parent Principle
13151
13325
 
13152
- - [Lightweight Planning](lightweight-planning.md)
13326
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
13153
13327
 
13154
13328
  ## Sub-Principles
13155
13329
 
@@ -13157,58 +13331,63 @@ This discipline ensures that every change has a documented purpose. The commit h
13157
13331
  `
13158
13332
  },
13159
13333
  {
13160
- slug: "agent-autonomy",
13161
- content: `# Agent Autonomy
13334
+ slug: "decoupled-code",
13335
+ content: `# Decoupled Code
13162
13336
 
13163
- Dust exists to enable AI agents to produce work autonomously.
13337
+ Code should be organized into independent units with explicit dependencies.
13164
13338
 
13165
- With sufficient planning and small enough units, this works much better in practice.
13339
+ Decoupled code is easier to test, understand, and modify. Dependencies are passed in rather than hard-coded, enabling units to be tested in isolation and composed flexibly. This reduces the blast radius of changes and makes the system more maintainable.
13166
13340
 
13167
13341
  ## Parent Principle
13168
13342
 
13169
- - [Human-AI Collaboration](human-ai-collaboration.md)
13343
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
13170
13344
 
13171
13345
  ## Sub-Principles
13172
13346
 
13173
- - [Actionable Errors](actionable-errors.md)
13174
- - [Batteries Included](batteries-included.md)
13175
- - [Agent-Agnostic Design](agent-agnostic-design.md)
13176
- - [Agent Context Inference](agent-context-inference.md)
13177
- - [Agent-Specific Enhancement](agent-specific-enhancement.md)
13178
- - [Context Window Efficiency](context-window-efficiency.md)
13179
- - [Small Units](small-units.md)
13347
+ - [Dependency Injection](dependency-injection.md)
13348
+ - [Stubs Over Mocks](stubs-over-mocks.md)
13349
+ - [Functional Core, Imperative Shell](functional-core-imperative-shell.md)
13350
+ - [Design for Testability](design-for-testability.md)
13180
13351
  `
13181
13352
  },
13182
13353
  {
13183
- slug: "clarity-over-brevity",
13184
- content: `# Clarity Over Brevity
13354
+ slug: "make-changes-with-confidence",
13355
+ content: `# Make Changes with Confidence
13185
13356
 
13186
- Names should be descriptive and self-documenting, even if longer.
13357
+ Developers should be able to modify code without fear of breaking existing behavior.
13187
13358
 
13188
- Abbreviated names like \`ctx\`, \`deps\`, \`fs\`, or \`args\` save a few keystrokes but obscure meaning. Full names like \`context\`, \`dependencies\`, \`fileSystem\`, and \`arguments\` make code immediately understandable without requiring readers to decode conventions. This is especially valuable when AI agents or new contributors read the codebase for the first time.
13359
+ Tests, type checking, and other automated verification enable safe refactoring and evolution of the codebase. When changes break something, fast feedback identifies the problem before it spreads. This confidence encourages continuous improvement rather than fragile, stagnant code.
13189
13360
 
13190
13361
  ## Parent Principle
13191
13362
 
13192
- - [Naming Matters](naming-matters.md)
13363
+ - [Maintainable Codebase](maintainable-codebase.md)
13193
13364
 
13194
13365
  ## Sub-Principles
13195
13366
 
13196
- - (none)
13367
+ - [Comprehensive Assertions](comprehensive-assertions.md)
13368
+ - [Decoupled Code](decoupled-code.md)
13369
+ - [Fast Feedback](fast-feedback.md)
13370
+ - [Lint Everything](lint-everything.md)
13371
+ - [Readable Test Data](readable-test-data.md)
13372
+ - [Reproducible Checks](reproducible-checks.md)
13373
+ - [Stop the Line](stop-the-line.md)
13374
+ - [Keep Unit Tests Pure](keep-unit-tests-pure.md)
13375
+ - [Test Isolation](test-isolation.md)
13376
+ - [Self-Diagnosing Tests](self-diagnosing-tests.md)
13377
+ - [Unit Test Coverage](unit-test-coverage.md)
13197
13378
  `
13198
13379
  },
13199
13380
  {
13200
- slug: "fast-feedback-loops",
13201
- content: `# Fast Feedback Loops
13202
-
13203
- The primary feedback loop — write code, run checks, see results — should be as fast as possible.
13381
+ slug: "clarity-over-brevity",
13382
+ content: `# Clarity Over Brevity
13204
13383
 
13205
- Fast feedback is the foundation of productive development, for both humans and agents. When tests, linters, and type checks run in seconds rather than minutes, developers iterate more frequently and catch problems earlier. Agents especially benefit because they operate in tight loops of change-and-verify; slow feedback wastes tokens and context window space on waiting rather than working.
13384
+ Names should be descriptive and self-documenting, even if longer.
13206
13385
 
13207
- Dust should help projects measure the speed of their feedback loops, identify bottlenecks, and keep them fast as the codebase grows. This includes promoting practices like unit tests over integration tests for speed, incremental compilation, and check parallelisation.
13386
+ Abbreviated names like \`ctx\`, \`deps\`, \`fs\`, or \`args\` save a few keystrokes but obscure meaning. Full names like \`context\`, \`dependencies\`, \`fileSystem\`, and \`arguments\` make code immediately understandable without requiring readers to decode conventions. This is especially valuable when AI agents or new contributors read the codebase for the first time.
13208
13387
 
13209
13388
  ## Parent Principle
13210
13389
 
13211
- - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
13390
+ - [Naming Matters](naming-matters.md)
13212
13391
 
13213
13392
  ## Sub-Principles
13214
13393
 
@@ -13216,18 +13395,24 @@ Dust should help projects measure the speed of their feedback loops, identify bo
13216
13395
  `
13217
13396
  },
13218
13397
  {
13219
- slug: "make-the-change-easy",
13220
- content: `# Make the Change Easy
13398
+ slug: "agent-agnostic-design",
13399
+ content: `# Agent-Agnostic Design
13400
+
13401
+ Dust should work with multiple agents without favoring one.
13402
+
13403
+ Rather than implementing agents, Dust generates prompts and context that can be passed to any capable agent. This keeps Dust lightweight and allows teams to use whatever agent tooling they prefer.
13404
+
13405
+ Dust may have built-in support for invoking popular agents (Claude, Aider, Codex, etc.), but the choice of agent should always be made by the user at runtime - never hard-coded into repository configuration.
13221
13406
 
13222
- For each desired change, make the change easy, then make the easy change.
13407
+ Note: Supporting multiple agents directly contributes to [Easy Adoption](easy-adoption.md), since teams can use their preferred agent tools without being locked into a specific platform.
13223
13408
 
13224
- This principle, articulated by Kent Beck, recognizes that the hardest part of a change is often not the change itself but the state of the code receiving it. When code resists a change, the right response is to first refactor until the change becomes straightforward, and only then make it. The warning - "this may be hard" - acknowledges that preparing the ground takes real effort, but the result is a change that fits naturally rather than one forced in against the grain.
13409
+ ## Applicability
13225
13410
 
13226
- Work that supports this principle includes refactoring before feature work, improving abstractions that make a category of changes simpler, and resisting the urge to bolt changes onto code that isn't ready for them.
13411
+ Internal
13227
13412
 
13228
13413
  ## Parent Principle
13229
13414
 
13230
- - [Maintainable Codebase](maintainable-codebase.md)
13415
+ - [Agent Autonomy](agent-autonomy.md)
13231
13416
 
13232
13417
  ## Sub-Principles
13233
13418
 
@@ -13235,20 +13420,51 @@ Work that supports this principle includes refactoring before feature work, impr
13235
13420
  `
13236
13421
  },
13237
13422
  {
13238
- slug: "self-contained-repository",
13239
- content: `# Self-Contained Repository
13423
+ slug: "readable-test-data",
13424
+ content: `# Readable Test Data
13240
13425
 
13241
- Where possible, developers and agents should have everything they need to be productive, within the repository.
13426
+ Test data setup should use natural structures that mirror what they represent.
13242
13427
 
13243
- No third-party tools should be required beyond those that can be installed with a single command defined in the repository. Setup instructions, scripts, configuration, and dependencies should all live in version control so that cloning the repo and running a single install command is sufficient to start working. This eliminates onboarding friction, reduces "works on my machine" issues, and is especially important for agents — who cannot browse the web to find missing tools or ask colleagues how to set things up.
13428
+ ## Why it matters
13244
13429
 
13245
- ## Applicability
13430
+ When test data is easy to read, tests become self-documenting. A file system hierarchy expressed as a nested object immediately conveys structure, while a flat Map with path strings requires mental parsing to understand the relationships.
13246
13431
 
13247
- Internal
13432
+ ## In practice
13433
+
13434
+ Prefer literal structures that visually match the domain:
13435
+
13436
+ \`\`\`javascript
13437
+ // Avoid: flat paths that obscure hierarchy
13438
+ const fs = createFileSystemEmulator({
13439
+ files: new Map([['/project/.dust/principles/my-goal.md', '# My Goal']]),
13440
+ existingPaths: new Set(['/project/.dust/ideas']),
13441
+ })
13442
+
13443
+ // Prefer: nested object that mirrors file system structure
13444
+ const fs = createFileSystemEmulator({
13445
+ project: {
13446
+ '.dust': {
13447
+ principles: {
13448
+ 'my-goal.md': '# My Goal'
13449
+ },
13450
+ ideas: {}
13451
+ }
13452
+ }
13453
+ })
13454
+ \`\`\`
13455
+
13456
+ The nested form:
13457
+ - Shows parent-child relationships through indentation
13458
+ - Makes empty directories explicit with empty objects
13459
+ - Requires no mental path concatenation to understand structure
13460
+
13461
+ ## How to evaluate
13462
+
13463
+ Work supports this principle when test setup data uses structures that visually resemble what they represent, reducing cognitive load for readers.
13248
13464
 
13249
13465
  ## Parent Principle
13250
13466
 
13251
- - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
13467
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
13252
13468
 
13253
13469
  ## Sub-Principles
13254
13470
 
@@ -13256,16 +13472,16 @@ Internal
13256
13472
  `
13257
13473
  },
13258
13474
  {
13259
- slug: "traceable-decisions",
13260
- content: `# Traceable Decisions
13475
+ slug: "reasonably-dry",
13476
+ content: `# Reasonably DRY
13261
13477
 
13262
- The commit history should explain why changes were made, not just what changed.
13478
+ Don't repeat yourself is a good principle, but don't overdo it.
13263
13479
 
13264
- Commit messages should capture intent and context that would otherwise be lost. Future maintainers (human or AI) will traverse history to understand the reasoning behind decisions. A commit that says "Fix bug" is less valuable than one that explains what was broken and why the fix is correct.
13480
+ Extracting shared code too eagerly can create tight coupling, obscure intent, and make changes harder. When two pieces of code look similar but serve different purposes or are likely to evolve independently, duplication is the better choice. The cost of a wrong abstraction is higher than the cost of a little repetition. Extract shared code when the duplication is truly about the same concept and has proven stable, not just because two things happen to look alike right now.
13265
13481
 
13266
13482
  ## Parent Principle
13267
13483
 
13268
- - [Atomic Commits](atomic-commits.md)
13484
+ - [Maintainable Codebase](maintainable-codebase.md)
13269
13485
 
13270
13486
  ## Sub-Principles
13271
13487
 
@@ -13273,16 +13489,21 @@ Commit messages should capture intent and context that would otherwise be lost.
13273
13489
  `
13274
13490
  },
13275
13491
  {
13276
- slug: "unit-test-coverage",
13277
- content: `# Unit Test Coverage
13492
+ slug: "actionable-errors",
13493
+ content: `# Actionable Errors
13278
13494
 
13279
- Complete unit test coverage ensures low-level tests give users direct feedback as they change the code.
13495
+ Error messages should tell you what to do next, not just what went wrong.
13280
13496
 
13281
- Excluding system tests from coverage reporting focuses attention on unit tests - the tests that provide the fastest, most specific feedback. When coverage tools only measure unit tests, developers can quickly identify which parts of the codebase lack fine-grained test protection.
13497
+ When something fails, the message should provide:
13498
+ - A clear description of the problem
13499
+ - Specific guidance on how to fix it
13500
+ - Context needed to take the next step
13501
+
13502
+ This is especially important for AI agents, who need concrete instructions to recover autonomously. A good error message turns a dead end into a signpost.
13282
13503
 
13283
13504
  ## Parent Principle
13284
13505
 
13285
- - [Make Changes with Confidence](make-changes-with-confidence.md)
13506
+ - [Agent Autonomy](agent-autonomy.md)
13286
13507
 
13287
13508
  ## Sub-Principles
13288
13509
 
@@ -13290,84 +13511,70 @@ Excluding system tests from coverage reporting focuses attention on unit tests -
13290
13511
  `
13291
13512
  },
13292
13513
  {
13293
- slug: "decoupled-code",
13294
- content: `# Decoupled Code
13514
+ slug: "make-the-change-easy",
13515
+ content: `# Make the Change Easy
13295
13516
 
13296
- Code should be organized into independent units with explicit dependencies.
13517
+ For each desired change, make the change easy, then make the easy change.
13297
13518
 
13298
- Decoupled code is easier to test, understand, and modify. Dependencies are passed in rather than hard-coded, enabling units to be tested in isolation and composed flexibly. This reduces the blast radius of changes and makes the system more maintainable.
13519
+ This principle, articulated by Kent Beck, recognizes that the hardest part of a change is often not the change itself but the state of the code receiving it. When code resists a change, the right response is to first refactor until the change becomes straightforward, and only then make it. The warning - "this may be hard" - acknowledges that preparing the ground takes real effort, but the result is a change that fits naturally rather than one forced in against the grain.
13520
+
13521
+ Work that supports this principle includes refactoring before feature work, improving abstractions that make a category of changes simpler, and resisting the urge to bolt changes onto code that isn't ready for them.
13299
13522
 
13300
13523
  ## Parent Principle
13301
13524
 
13302
- - [Make Changes with Confidence](make-changes-with-confidence.md)
13525
+ - [Maintainable Codebase](maintainable-codebase.md)
13303
13526
 
13304
13527
  ## Sub-Principles
13305
13528
 
13306
- - [Dependency Injection](dependency-injection.md)
13307
- - [Stubs Over Mocks](stubs-over-mocks.md)
13308
- - [Functional Core, Imperative Shell](functional-core-imperative-shell.md)
13309
- - [Design for Testability](design-for-testability.md)
13529
+ - (none)
13310
13530
  `
13311
13531
  },
13312
13532
  {
13313
- slug: "lint-everything",
13314
- content: `# Lint Everything
13533
+ slug: "dependency-injection",
13534
+ content: `# Dependency Injection
13315
13535
 
13316
- Prefer static analysis over runtime checks. Every error caught by a linter is an error that never reaches tests, and every error caught by tests is an error that never reaches production.
13536
+ Avoid global mocks. Dependency injection is almost always preferable to testing code that depends directly on globals.
13317
13537
 
13318
- Lint markdown, lint types, lint formatting. If it can be checked statically, check it. Linters are fast, deterministic, and catch entire categories of bugs before code even runs.
13538
+ When code depends on global state or singletons, testing requires mocking those globals—which introduces hidden coupling, complicates test setup, and risks interference between tests. Dependency injection makes dependencies explicit: they're passed in as arguments, making the code's requirements visible and enabling tests to supply controlled implementations.
13319
13539
 
13320
- This project lints:
13321
- - TypeScript (type checking and style)
13322
- - Markdown (broken links, required sections)
13323
- - Task files (structure validation)
13324
- - Principle hierarchy (parent/child consistency)
13540
+ This approach improves testability (each test controls its own dependencies), readability (dependencies are declared upfront), and flexibility (swapping implementations doesn't require changing the consuming code). It also makes refactoring safer since dependencies are explicit rather than implicit.
13325
13541
 
13326
13542
  ## Parent Principle
13327
13543
 
13328
- - [Make Changes with Confidence](make-changes-with-confidence.md)
13544
+ - [Decoupled Code](decoupled-code.md)
13329
13545
 
13330
13546
  ## Sub-Principles
13331
13547
 
13332
- (none)
13548
+ - (none)
13333
13549
  `
13334
13550
  },
13335
13551
  {
13336
- slug: "maintainable-codebase",
13337
- content: `# Maintainable Codebase
13552
+ slug: "repository-hygiene",
13553
+ content: `# Repository Hygiene
13338
13554
 
13339
- The dust codebase should be easy to understand, modify, and extend.
13555
+ Dust repositories should maintain a clean, organized state with minimal noise.
13340
13556
 
13341
- This principle governs how we develop and maintain dust itself, separate from the principles that describe what dust offers its users. A well-maintained codebase enables rapid iteration, reduces bugs, and makes contributions easier.
13557
+ This includes proper gitignore configuration to exclude build artifacts, dependencies, editor files, and other generated content from version control. A well-maintained repository makes it easier for both humans and AI to navigate and understand the codebase.
13342
13558
 
13343
13559
  ## Parent Principle
13344
13560
 
13345
- - [Agentic Flow State](agentic-flow-state.md)
13561
+ - [Maintainable Codebase](maintainable-codebase.md)
13346
13562
 
13347
13563
  ## Sub-Principles
13348
13564
 
13349
- - [Make Changes with Confidence](make-changes-with-confidence.md)
13350
- - [Minimal Dependencies](minimal-dependencies.md)
13351
- - [Intuitive Directory Structure](intuitive-directory-structure.md)
13352
- - [Repository Hygiene](repository-hygiene.md)
13353
- - [Naming Matters](naming-matters.md)
13354
- - [Reasonably DRY](reasonably-dry.md)
13355
- - [Make the Change Easy](make-the-change-easy.md)
13356
- - [Boy Scout Rule](boy-scout-rule.md)
13357
- - [Broken Windows](broken-windows.md)
13565
+ - [Atomic Commits](atomic-commits.md)
13566
+ - [Trunk-Based Development](trunk-based-development.md)
13358
13567
  `
13359
13568
  },
13360
13569
  {
13361
- slug: "agent-agnostic-design",
13362
- content: `# Agent-Agnostic Design
13363
-
13364
- Dust should work with multiple agents without favoring one.
13570
+ slug: "batteries-included",
13571
+ content: `# Batteries Included
13365
13572
 
13366
- Rather than implementing agents, Dust generates prompts and context that can be passed to any capable agent. This keeps Dust lightweight and allows teams to use whatever agent tooling they prefer.
13573
+ Dust should provide everything that is required (within reason) for an agent to be productive in an arbitrary codebase.
13367
13574
 
13368
- Dust may have built-in support for invoking popular agents (Claude, Aider, Codex, etc.), but the choice of agent should always be made by the user at runtime - never hard-coded into repository configuration.
13575
+ An agent working autonomously should not be blocked because a tool or configuration is missing. For example, dust should ship custom lint rules for different linters, even though those linters are not dependencies of dust itself. If an agent needs a capability to do its job well in a typical codebase, dust should provide it out of the box.
13369
13576
 
13370
- Note: Supporting multiple agents directly contributes to [Easy Adoption](easy-adoption.md), since teams can use their preferred agent tools without being locked into a specific platform.
13577
+ This means accepting some breadth of scope bundling configs, rules, and utilities that target external tools in exchange for agents that can start producing useful work immediately without manual setup.
13371
13578
 
13372
13579
  ## Applicability
13373
13580
 
@@ -13378,47 +13585,25 @@ Internal
13378
13585
  - [Agent Autonomy](agent-autonomy.md)
13379
13586
 
13380
13587
  ## Sub-Principles
13381
-
13382
- - (none)
13383
13588
  `
13384
13589
  },
13385
13590
  {
13386
- slug: "easy-adoption",
13387
- content: `# Easy Adoption
13388
-
13389
- Dust should be trivially easy to adopt in any repository.
13390
-
13391
- Getting started with Dust should require minimal friction. A developer should be able to bootstrap Dust in their repository with a single command, without needing to install dependencies, configure build tools, or understand the internals.
13392
-
13393
- This lowers the barrier to entry and encourages experimentation.
13394
-
13395
- ## Parent Principle
13396
-
13397
- - [Human-AI Collaboration](human-ai-collaboration.md)
13591
+ slug: "development-traceability",
13592
+ content: `# Development Traceability
13398
13593
 
13399
- ## Sub-Principles
13594
+ Structured logging and tracing help agents understand system behaviour without resorting to ad-hoc testing cycles.
13400
13595
 
13401
- - [Cross-Platform Compatibility](cross-platform-compatibility.md)
13402
- - [Unsurprising UX](unsurprising-ux.md)
13403
- - [VCS Independence](vcs-independence.md)
13404
- `
13405
- },
13406
- {
13407
- slug: "actionable-errors",
13408
- content: `# Actionable Errors
13596
+ When something goes wrong, agents often resort to adding temporary log statements, running the code, reading the output, and repeating — a slow and wasteful debugging loop. Good traceability means the system already records what happened and why, through structured logs, trace IDs, and observable state. This lets agents diagnose issues by reading existing output rather than generating new experiments.
13409
13597
 
13410
- Error messages should tell you what to do next, not just what went wrong.
13598
+ Dust should encourage projects to adopt structured logging, promote traceability as a first-class concern, and provide tools that surface relevant trace information when agents need it.
13411
13599
 
13412
- When something fails, the message should provide:
13413
- - A clear description of the problem
13414
- - Specific guidance on how to fix it
13415
- - Context needed to take the next step
13600
+ ## Applicability
13416
13601
 
13417
- This is especially important for AI agents, who need concrete instructions to recover autonomously. A good error message turns a dead end into a signpost.
13602
+ Internal
13418
13603
 
13419
13604
  ## Parent Principle
13420
13605
 
13421
- - [Agent Autonomy](agent-autonomy.md)
13606
+ - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
13422
13607
 
13423
13608
  ## Sub-Principles
13424
13609
 
@@ -13426,16 +13611,22 @@ This is especially important for AI agents, who need concrete instructions to re
13426
13611
  `
13427
13612
  },
13428
13613
  {
13429
- slug: "consistent-naming",
13430
- content: `# Consistent Naming
13614
+ slug: "exploratory-tooling",
13615
+ content: `# Exploratory Tooling
13431
13616
 
13432
- Names should follow established conventions within each category to reduce cognitive load.
13617
+ Agents need tools to efficiently explore and understand unfamiliar codebases.
13433
13618
 
13434
- Principles use Title Case. File names use kebab-case. Commands use lowercase with hyphens. When naming conventions exist, follow them. When they don't, establish one and apply it consistently. Inconsistent naming creates friction for both humans and AI agents trying to predict or recall identifiers.
13619
+ When an agent encounters a new codebase or an unfamiliar corner of a familiar one it needs to quickly build a mental model: what exists, how it fits together, and where to make changes. Without good exploratory tools, agents waste context on trial-and-error searches, reading irrelevant files, and forming incorrect assumptions.
13620
+
13621
+ Dust should promote and integrate tools that help agents explore: dependency graphs, module overviews, search utilities tuned for code navigation, and summaries of project structure. The goal is to make the "orientation" phase of any task as short and reliable as possible.
13622
+
13623
+ ## Applicability
13624
+
13625
+ Internal
13435
13626
 
13436
13627
  ## Parent Principle
13437
13628
 
13438
- - [Naming Matters](naming-matters.md)
13629
+ - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
13439
13630
 
13440
13631
  ## Sub-Principles
13441
13632
 
@@ -13443,54 +13634,57 @@ Principles use Title Case. File names use kebab-case. Commands use lowercase wit
13443
13634
  `
13444
13635
  },
13445
13636
  {
13446
- slug: "minimal-dependencies",
13447
- content: `# Minimal Dependencies
13637
+ slug: "small-units",
13638
+ content: `# Small Units
13448
13639
 
13449
- Dust should avoid coupling to specific tools so we can switch to better alternatives as they emerge.
13640
+ Ideas, principles, facts, and tasks should each be as discrete and fine-grained as possible.
13450
13641
 
13451
- By keeping dependencies minimal and using standard APIs where possible, we maintain the freedom to adopt new tools without major rewrites. This applies to runtimes, test frameworks, build tools, and other infrastructure choices.
13642
+ Small, focused documents enable precise relationships between them. A task can link to exactly the principles it serves. A fact can describe one specific aspect of the system. This granularity reduces ambiguity.
13643
+
13644
+ Tasks especially benefit from being small. A narrowly scoped task gives agents or humans the best chance of delivering exactly what was intended, in a single atomic commit.
13645
+
13646
+ Note: This principle directly supports [Lightweight Planning](lightweight-planning.md), which explicitly mentions that "Tasks are small and completable in single commits."
13452
13647
 
13453
13648
  ## Parent Principle
13454
13649
 
13455
- - [Maintainable Codebase](maintainable-codebase.md)
13650
+ - [Agent Autonomy](agent-autonomy.md)
13456
13651
 
13457
13652
  ## Sub-Principles
13458
13653
 
13459
- - [Runtime Agnostic Tests](runtime-agnostic-tests.md)
13654
+ - (none)
13460
13655
  `
13461
13656
  },
13462
13657
  {
13463
- slug: "context-window-efficiency",
13464
- content: `# Context Window Efficiency
13465
-
13466
- Dust should be designed with short attention spans in mind.
13658
+ slug: "naming-matters",
13659
+ content: `# Naming Matters
13467
13660
 
13468
- AI agents operate within limited context windows. Every token consumed by planning artifacts is a token unavailable for reasoning about code. Dust keeps artifacts concise and scannable so agents can quickly understand what needs to be done without wading through verbose documentation.
13661
+ Good naming reduces waste by eliminating confusion and making code self-documenting.
13469
13662
 
13470
- This means favoring brevity over completeness, using consistent structures that are fast to parse, and avoiding redundant information across files.
13663
+ Poor names cause rework, bugs, and communication overhead. When names don't clearly convey meaning, developers waste time deciphering code, misunderstand intentions, and introduce defects. Well-chosen names serve as documentation that never goes stale, reducing the need for explanatory comments and enabling both humans and AI agents to navigate the codebase efficiently.
13471
13664
 
13472
13665
  ## Parent Principle
13473
13666
 
13474
- - [Agent Autonomy](agent-autonomy.md)
13667
+ - [Maintainable Codebase](maintainable-codebase.md)
13475
13668
 
13476
13669
  ## Sub-Principles
13477
13670
 
13478
- - [Progressive Disclosure](progressive-disclosure.md)
13671
+ - [Consistent Naming](consistent-naming.md)
13672
+ - [Clarity Over Brevity](clarity-over-brevity.md)
13479
13673
  `
13480
13674
  },
13481
13675
  {
13482
- slug: "boy-scout-rule",
13483
- content: `# Boy Scout Rule
13676
+ slug: "comprehensive-test-coverage",
13677
+ content: `# Comprehensive Test Coverage
13484
13678
 
13485
- Always leave the code better than you found it.
13679
+ A project's test suite is its primary safety net, and agents depend on it even more than humans do.
13486
13680
 
13487
- When working in any area of the codebase, take the opportunity to make small improvements clearer names, removed dead code, better structureeven if they're not directly related to the task at hand. These incremental improvements compound over time, preventing gradual decay and keeping the codebase healthy without requiring dedicated cleanup efforts.
13681
+ Agents cannot manually verify that their changes work. They rely entirely on automated tests to confirm correctness. Gaps in test coverage become gaps in agent capabilityareas where changes are risky and feedback is absent. Comprehensive coverage means every meaningful behaviour is tested, so agents can make changes anywhere in the codebase with confidence.
13488
13682
 
13489
- The Boy Scout Rule is not a license for large-scale refactoring during unrelated work. Improvements should be small, obvious, and low-risk. If a cleanup is too large to include alongside the current task, capture it as a separate task instead.
13683
+ Dust should help projects measure and improve their test coverage, flag untested areas, and encourage a culture where new code comes with new tests.
13490
13684
 
13491
13685
  ## Parent Principle
13492
13686
 
13493
- - [Maintainable Codebase](maintainable-codebase.md)
13687
+ - [Ideal Agent Developer Experience](ideal-agent-developer-experience.md)
13494
13688
 
13495
13689
  ## Sub-Principles
13496
13690
 
@@ -13498,18 +13692,16 @@ The Boy Scout Rule is not a license for large-scale refactoring during unrelated
13498
13692
  `
13499
13693
  },
13500
13694
  {
13501
- slug: "unsurprising-ux",
13502
- content: `# Unsurprising UX
13503
-
13504
- The user interface should be as "guessable" as possible.
13695
+ slug: "stop-the-line",
13696
+ content: `# Stop the Line
13505
13697
 
13506
- Following the [Principle of Least Astonishment](https://en.wikipedia.org/wiki/Principle_of_least_astonishment), users form expectations about how a tool will behave based on conventions, prior experience, and intuition. Dust's interface (including the CLI) should match those expectations wherever possible. If users are observed trying to use the interface in ways we didn't anticipate, the interface should be adjusted to meet their expectations — even if that means supporting many ways of achieving the same result.
13698
+ Any worker human or agent should halt and fix a problem the moment they detect it, rather than letting defects propagate downstream.
13507
13699
 
13508
- Surprising behavior erodes trust and slows people down. Unsurprising behavior lets users stay in flow.
13700
+ Originating from the Toyota production system, "Stop the Line" empowers every participant to pause work immediately upon identifying a defect, failing check, or safety hazard. Problems are cheaper to fix at their source than after they've compounded through later stages. In the context of dust, this means agents and humans alike should treat broken checks, test failures, and lint errors as blockers that demand immediate attention — not warnings to be deferred.
13509
13701
 
13510
13702
  ## Parent Principle
13511
13703
 
13512
- - [Easy Adoption](easy-adoption.md)
13704
+ - [Make Changes with Confidence](make-changes-with-confidence.md)
13513
13705
 
13514
13706
  ## Sub-Principles
13515
13707
 
@@ -13801,16 +13993,15 @@ async function init(dependencies) {
13801
13993
  throw error;
13802
13994
  }
13803
13995
  }
13804
- const runner = dustCommand.split(" ")[0];
13805
13996
  context.stdout("");
13806
13997
  context.stdout(`${colors.bold}\uD83D\uDE80 Next steps:${colors.reset} Commit the changes if you are happy, then get planning!`);
13807
13998
  context.stdout("");
13808
13999
  context.stdout(`${colors.dim}If this is a new repository, you can start adding ideas or tasks right away:${colors.reset}`);
13809
- context.stdout(` ${colors.cyan}>${colors.reset} ${runner} claude "Idea: friendly UI for non-technical users"`);
13810
- context.stdout(` ${colors.cyan}>${colors.reset} ${runner} codex "Task: set up code coverage"`);
14000
+ context.stdout(` ${colors.cyan}>${colors.reset} claude "Idea: friendly UI for non-technical users"`);
14001
+ context.stdout(` ${colors.cyan}>${colors.reset} codex "Task: set up code coverage"`);
13811
14002
  context.stdout("");
13812
14003
  context.stdout(`${colors.dim}If this is an existing codebase, you might want to backfill principles and facts:${colors.reset}`);
13813
- context.stdout(` ${colors.cyan}>${colors.reset} ${runner} claude "Add principles and facts based on the code in this repository"`);
14004
+ context.stdout(` ${colors.cyan}>${colors.reset} claude "Add principles and facts based on the code in this repository"`);
13814
14005
  return { exitCode: 0 };
13815
14006
  }
13816
14007
 
@@ -14321,7 +14512,8 @@ async function runLoop(dependencies, loopDependencies) {
14321
14512
  let completedIterations = 0;
14322
14513
  const iterationOptions = {
14323
14514
  hooksInstalled,
14324
- docker: dockerConfig
14515
+ docker: dockerConfig,
14516
+ containerRuntime
14325
14517
  };
14326
14518
  if (eventsUrl) {
14327
14519
  iterationOptions.onRawEvent = createHeartbeatThrottler(onAgentEvent, loopDependencies.agentType ?? "claude");
@@ -14805,6 +14997,9 @@ function runLoopClaude(commandDependencies) {
14805
14997
  function runLoopCodex(commandDependencies) {
14806
14998
  return loopCodex(commandDependencies, createCodexDependencies());
14807
14999
  }
15000
+ function runCodexHook(commandDependencies) {
15001
+ return codexHook(commandDependencies, defaultCodexHookDependencies);
15002
+ }
14808
15003
  var commandRegistry = {
14809
15004
  init,
14810
15005
  lint: lintMarkdown,
@@ -14819,6 +15014,7 @@ var commandRegistry = {
14819
15014
  audit,
14820
15015
  "bucket worker": bucketWorker,
14821
15016
  "bucket tool": bucketTool,
15017
+ "codex hook": runCodexHook,
14822
15018
  "core principle": corePrinciple,
14823
15019
  focus,
14824
15020
  "new task": newTask,