@cardor/agent-harness-kit 0.16.10 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,30 +38,32 @@ These three calls are **not optional**. The dashboard cannot display what you do
38
38
 
39
39
  ### 1. Log every tool call you make
40
40
 
41
- After **each** tool invocation (Read, Edit, Write, Bash), immediately call:
41
+ After **each** tool invocation (Read, Edit, Write, Bash), call **both**:
42
42
 
43
43
  ```
44
- actions.write(actionId, 'tools_used', '<ToolName>: <args-summary> why')
44
+ actions.record_tool(actionId, '<ToolName>', '<args-summary>', '<why>')
45
45
  ```
46
46
 
47
47
  Examples:
48
- - `Read: src/auth/middleware.ts understand existing JWT pattern`
49
- - `Bash: npm test -- --testPathPattern=auth verify auth tests pass`
50
- - `Edit: src/auth/middleware.ts:45-78 add refresh token validation`
48
+ - `actions.record_tool(actionId, 'Read', 'src/auth/middleware.ts', 'understand existing JWT pattern')`
49
+ - `actions.record_tool(actionId, 'Bash', 'npm test --testPathPattern=auth', 'verify auth tests pass')`
50
+ - `actions.record_tool(actionId, 'Edit', 'src/auth/middleware.ts:45-78', 'add refresh token validation')`
51
51
 
52
52
  ### 2. Log every file you touch
53
53
 
54
- After **each** file modification (Edit, Write), immediately call:
54
+ After **each** file modification (Edit, Write), call:
55
55
 
56
56
  ```
57
- actions.write(actionId, 'files_modified', '<file-path> what changed and why')
57
+ actions.record_file(actionId, '<file-path>', '<operation>', '<what changed and why>')
58
58
  ```
59
59
 
60
- Example: `src/auth/middleware.ts added refresh token expiry check in validateToken()`
60
+ Operations: `created` | `modified` | `deleted`
61
+
62
+ Example: `actions.record_file(actionId, 'src/auth/middleware.ts', 'modified', 'added refresh token expiry check in validateToken()')`
61
63
 
62
64
  ### 3. Do not complete your action without both logs being up to date
63
65
 
64
- If you touched 5 files and made 12 tool calls, there must be 5 `files_modified` entries and 12 `tools_used` entries before you call `actions.complete`.
66
+ If you touched 5 files and made 12 tool calls, there must be 5 `actions.record_file` calls and 12 `actions.record_tool` calls before you call `actions.complete`.
65
67
 
66
68
  ---
67
69
 
@@ -125,8 +127,8 @@ actions.complete(actionId, 'Implementation done — N files modified, tests pass
125
127
 
126
128
  - **Read the plan and analysis first.** Never implement cold.
127
129
  - **Only write to `{{writablePaths}}`.** No exceptions.
128
- - **Log every file you touch.** No silent modifications.
129
- - **Log every tool call.** Use `actions.write(actionId, 'tools_used', ...)` after each Read, Edit, Write, Bash invocation.
130
+ - **Log every file you touch.** Call `actions.record_file(actionId, path, operation, notes)` after each Edit/Write.
131
+ - **Log every tool call.** Call `actions.record_tool(actionId, toolName, args, summary)` after each Read, Edit, Write, Bash invocation.
130
132
  - **Leave tests green.** If tests fail after your changes, fix them before completing.
131
133
  - **Do not refactor beyond the task scope.** Implement what was asked, nothing more.
132
134
  - **If blocked, say so.** Do not invent workarounds for unclear requirements.
@@ -37,18 +37,18 @@ These calls are **not optional**. The dashboard cannot display what you do not r
37
37
 
38
38
  ### Log every tool call you make
39
39
 
40
- After **each** tool invocation (Read, Bash, grep, docs.search), immediately call:
40
+ After **each** tool invocation (Read, Bash, grep, docs.search), call:
41
41
 
42
42
  ```
43
- actions.write(actionId, 'tools_used', '<ToolName>: <args-summary> why')
43
+ actions.record_tool(actionId, '<ToolName>', '<args-summary>', '<why>')
44
44
  ```
45
45
 
46
46
  Examples:
47
- - `Read: src/auth/middleware.ts find existing JWT pattern`
48
- - `Bash: grep -r "refreshToken" src/ locate all refresh token usages`
49
- - `docs.search: "authentication middleware" check project docs for auth guidance`
47
+ - `actions.record_tool(actionId, 'Read', 'src/auth/middleware.ts', 'find existing JWT pattern')`
48
+ - `actions.record_tool(actionId, 'Bash', 'grep -r "refreshToken" src/', 'locate all refresh token usages')`
49
+ - `actions.record_tool(actionId, 'docs.search', 'authentication middleware', 'check project docs for auth guidance')`
50
50
 
51
- **Every single tool call must be logged.** No silent reads. The audit trail in the dashboard is built entirely from these entries.
51
+ **Every single tool call must be logged.** No silent reads. The Tools dashboard is built entirely from these `actions.record_tool` calls.
52
52
 
53
53
  ---
54
54
 
@@ -32,16 +32,16 @@ These calls are **not optional**. The dashboard cannot display what you do not r
32
32
 
33
33
  ### Log every tool call you make
34
34
 
35
- After **each** tool invocation (Bash, tasks.get, tasks.claim, actions.get), immediately call:
35
+ After **each** tool invocation (Bash, tasks.get, tasks.claim, actions.get), call:
36
36
 
37
37
  ```
38
- actions.write(actionId, 'tools_used', '<ToolName>: <args-summary> why')
38
+ actions.record_tool(actionId, '<ToolName>', '<args-summary>', '<why>')
39
39
  ```
40
40
 
41
41
  Examples:
42
- - `Bash: bash health.sh verify codebase health before starting`
43
- - `tasks.get: pending find next task to claim`
44
- - `actions.get: taskId=abc123 read action history to resume in-progress task`
42
+ - `actions.record_tool(actionId, 'Bash', 'bash health.sh', 'verify codebase health before starting')`
43
+ - `actions.record_tool(actionId, 'tasks.get', 'pending', 'find next task to claim')`
44
+ - `actions.record_tool(actionId, 'actions.get', 'taskId=abc123', 'read action history to resume in-progress task')`
45
45
 
46
46
  **Log every call.** This applies from the moment you have an `actionId` (after step 3 below).
47
47
 
@@ -32,28 +32,25 @@ These calls are **not optional**. The dashboard cannot display what you do not r
32
32
 
33
33
  ### 1. Log every tool call you make
34
34
 
35
- After **each** tool invocation (Read, Bash), immediately call:
35
+ After **each** tool invocation (Read, Bash), call:
36
36
 
37
37
  ```
38
- actions.write(actionId, 'tools_used', '<ToolName>: <args-summary> why')
38
+ actions.record_tool(actionId, '<ToolName>', '<args-summary>', '<why>')
39
39
  ```
40
40
 
41
41
  Examples:
42
- - `Read: src/auth/middleware.ts verify refresh token logic matches criterion 2`
43
- - `Bash: npm test -- --testPathPattern=auth confirm all auth tests pass`
42
+ - `actions.record_tool(actionId, 'Read', 'src/auth/middleware.ts', 'verify refresh token logic matches criterion 2')`
43
+ - `actions.record_tool(actionId, 'Bash', 'npm test --testPathPattern=auth', 'confirm all auth tests pass')`
44
44
 
45
45
  ### 2. Mark every acceptance criterion as you verify it
46
46
 
47
- For **each** criterion (0-based index), call this immediately after you evaluate it:
47
+ For **each** criterion, call this immediately after you evaluate it using its `id` from `tasks.get`:
48
48
 
49
49
  ```
50
- tasks.acceptance_update(taskId, criterionIndex, true|false)
50
+ tasks.acceptance.update(criterionId)
51
51
  ```
52
52
 
53
- - `true` = criterion is fully met
54
- - `false` = criterion is not met
55
-
56
- If the task has 3 criteria, you must make exactly 3 `tasks.acceptance_update` calls — one per criterion. Skipping any of them leaves the dashboard showing criteria as unverified.
53
+ If the task has 3 criteria, you must make exactly 3 `tasks.acceptance.update` calls — one per criterion. Skipping any of them leaves the dashboard showing criteria as unverified.
57
54
 
58
55
  ---
59
56
 
@@ -124,7 +121,7 @@ Then notify lead so the builder can be re-assigned.
124
121
 
125
122
  - **Run health.sh before approving.** No exceptions.
126
123
  - **Check every acceptance criterion.** Not just the obvious ones.
127
- - **Call `tasks.acceptance_update()` for each criterion.** Both met and unmet — never skip this step.
124
+ - **Call `tasks.acceptance.update()` for each criterion.** Never skip this step.
128
125
  - **Never self-approve partial work.** All criteria must be met, not most.
129
126
  - **Be specific when blocking.** The builder must know exactly what to fix.
130
127
  - **Do not fix issues yourself.** Your job is to verify, not to implement.
package/dist/cli.js CHANGED
@@ -149,14 +149,17 @@ If it exits non-zero, stop and report the issue. Do not proceed with tasks until
149
149
  The harness exposes tools via MCP server on port ${port}. Use these instead of reading files directly.
150
150
 
151
151
  \`\`\`
152
- actions.start taskId agent \u2192 start an action, returns actionId
153
- actions.write actionId section text \u2192 record a section (result, tools_used, ...)
154
- actions.complete actionId summary \u2192 close the action
155
- actions.get taskId \u2192 full action history for a task
156
- tasks.get [status] \u2192 list tasks (pending | in_progress | done | blocked)
157
- tasks.claim id \u2192 atomically claim a pending task
158
- tasks.update id status \u2192 change task status
159
- docs.search query \u2192 search ${docsPath} for relevant content
152
+ actions.start taskId agent \u2192 start an action, returns actionId
153
+ actions.write actionId section text \u2192 record a section (result, blockers, ...)
154
+ actions.record_tool actionId toolName [argsJson] [summary] \u2192 log a tool call to the Tools dashboard
155
+ actions.record_file actionId filePath operation [notes] \u2192 log a file touch to the Files dashboard
156
+ actions.complete actionId summary \u2192 close the action
157
+ actions.get taskId \u2192 full action history for a task
158
+ tasks.get [status] \u2192 list tasks (pending | in_progress | done | blocked)
159
+ tasks.claim id \u2192 atomically claim a pending task
160
+ tasks.update id status \u2192 change task status
161
+ tasks.acceptance.update criterionId \u2192 mark an acceptance criterion as met
162
+ docs.search query \u2192 search ${docsPath} for relevant content
160
163
  \`\`\`
161
164
 
162
165
  ## Workflow
@@ -169,7 +172,8 @@ docs.search query \u2192 search ${docsPath} for relevant c
169
172
 
170
173
  2. WORK (lead \u2192 explorer \u2192 builder \u2192 reviewer)
171
174
  - Each agent calls actions.start(taskId, agentName) \u2192 actionId
172
- - Records work with actions.write(actionId, section, content)
175
+ - After EVERY tool call: actions.record_tool(actionId, toolName, args, summary)
176
+ - After EVERY file change: actions.record_file(actionId, filePath, operation, notes)
173
177
  - Closes with actions.complete(actionId, summary)
174
178
 
175
179
  3. CLOSE
@@ -1161,12 +1165,27 @@ async function runHealth(cwd2) {
1161
1165
  // src/commands/init.ts
1162
1166
  import { mkdirSync as mkdirSync6, writeFileSync as writeFileSync7 } from "fs";
1163
1167
  import { homedir } from "os";
1164
- import { join as join9 } from "path";
1168
+ import { join as join10 } from "path";
1165
1169
  import * as p2 from "@clack/prompts";
1166
1170
  import pc6 from "picocolors";
1167
1171
 
1168
1172
  // src/commands/init-helpers.ts
1173
+ import { existsSync as existsSync7, readFileSync as readFileSync5 } from "fs";
1174
+ import { join as join9 } from "path";
1169
1175
  import pc5 from "picocolors";
1176
+ function readProjectNameFromPackageJson(cwd2) {
1177
+ try {
1178
+ const pkgPath2 = join9(cwd2, "package.json");
1179
+ if (!existsSync7(pkgPath2)) return null;
1180
+ const content = readFileSync5(pkgPath2, "utf8");
1181
+ const pkg2 = JSON.parse(content);
1182
+ const name = pkg2?.name;
1183
+ if (typeof name === "string" && name.trim()) return name.trim();
1184
+ return null;
1185
+ } catch {
1186
+ return null;
1187
+ }
1188
+ }
1170
1189
  function applyConfigDefaults(params) {
1171
1190
  return {
1172
1191
  provider: params.provider,
@@ -1241,7 +1260,8 @@ function printWelcomeMessage(projectName) {
1241
1260
 
1242
1261
  // src/commands/init.ts
1243
1262
  async function runInit(cwd2, flags) {
1244
- const projectName = flags.name || "my-project";
1263
+ const detectedName = flags.name ?? readProjectNameFromPackageJson(cwd2);
1264
+ const projectName = detectedName || "my-project";
1245
1265
  printWelcomeMessage(projectName);
1246
1266
  let name;
1247
1267
  if (flags.name) {
@@ -1373,9 +1393,9 @@ async function runInit(cwd2, flags) {
1373
1393
  let installDir = cwd2;
1374
1394
  if (globalInstallation) {
1375
1395
  if (provider === "claude-code") {
1376
- installDir = join9(homedir(), ".claude");
1396
+ installDir = join10(homedir(), ".claude");
1377
1397
  } else {
1378
- installDir = join9(homedir(), ".config", "opencode");
1398
+ installDir = join10(homedir(), ".config", "opencode");
1379
1399
  }
1380
1400
  }
1381
1401
  const configContent = configTs({
@@ -1386,8 +1406,8 @@ async function runInit(cwd2, flags) {
1386
1406
  tasksAdapter,
1387
1407
  port: config.tools.mcp.port
1388
1408
  });
1389
- writeFileSync7(join9(installDir, "agent-harness-kit.config.ts"), configContent, "utf8");
1390
- mkdirSync6(join9(installDir, config.storage.dir), { recursive: true });
1409
+ writeFileSync7(join10(installDir, "agent-harness-kit.config.ts"), configContent, "utf8");
1410
+ mkdirSync6(join10(installDir, config.storage.dir), { recursive: true });
1391
1411
  const db = openDB(config, installDir);
1392
1412
  await materializer.scaffold(config, { cwd: installDir, firstTask });
1393
1413
  if (firstTask) {
@@ -1469,8 +1489,8 @@ async function runMigrate(cwd2, opts) {
1469
1489
  }
1470
1490
 
1471
1491
  // src/core/mcp-server.ts
1472
- import { readdirSync, readFileSync as readFileSync5, statSync } from "fs";
1473
- import { join as join10, resolve as resolve7 } from "path";
1492
+ import { readdirSync, readFileSync as readFileSync6, statSync } from "fs";
1493
+ import { join as join11, resolve as resolve7 } from "path";
1474
1494
  import { Server } from "@modelcontextprotocol/sdk/server";
1475
1495
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
1476
1496
  import {
@@ -1741,7 +1761,7 @@ function searchDocs(docsPath, query, maxResults = 10) {
1741
1761
  for (const file of files) {
1742
1762
  if (results.length >= maxResults) break;
1743
1763
  try {
1744
- const content = readFileSync5(file, "utf8");
1764
+ const content = readFileSync6(file, "utf8");
1745
1765
  const lines = content.split("\n");
1746
1766
  for (let i = 0; i < lines.length; i++) {
1747
1767
  const lower = lines[i].toLowerCase();
@@ -1762,7 +1782,7 @@ function collectMarkdownFiles(dir) {
1762
1782
  const files = [];
1763
1783
  try {
1764
1784
  for (const entry of readdirSync(dir)) {
1765
- const full = join10(dir, entry);
1785
+ const full = join11(dir, entry);
1766
1786
  const stat = statSync(full);
1767
1787
  if (stat.isDirectory()) {
1768
1788
  files.push(...collectMarkdownFiles(full));
@@ -1867,13 +1887,13 @@ async function runStatus(cwd2, opts) {
1867
1887
  }
1868
1888
 
1869
1889
  // src/commands/sync.ts
1870
- import { existsSync as existsSync7, readFileSync as readFileSync6 } from "fs";
1871
- import { join as join11, resolve as resolve8 } from "path";
1890
+ import { existsSync as existsSync8, readFileSync as readFileSync7 } from "fs";
1891
+ import { join as join12, resolve as resolve8 } from "path";
1872
1892
  import pc9 from "picocolors";
1873
1893
  async function runSync(cwd2, opts) {
1874
1894
  const config = await loadConfig(cwd2);
1875
1895
  const direction = opts.direction ?? "both";
1876
- const featureListPath = resolve8(join11(cwd2, config.storage.dir, "feature_list.json"));
1896
+ const featureListPath = resolve8(join12(cwd2, config.storage.dir, "feature_list.json"));
1877
1897
  const db = openDB(config, cwd2);
1878
1898
  try {
1879
1899
  if (direction === "in" || direction === "both") {
@@ -1887,13 +1907,13 @@ async function runSync(cwd2, opts) {
1887
1907
  }
1888
1908
  }
1889
1909
  async function syncIn(featureListPath, db, dryRun) {
1890
- if (!existsSync7(featureListPath)) {
1910
+ if (!existsSync8(featureListPath)) {
1891
1911
  console.log(pc9.dim(`feature_list.json not found at ${featureListPath} \u2014 skipping in-sync`));
1892
1912
  return;
1893
1913
  }
1894
1914
  let seeds;
1895
1915
  try {
1896
- seeds = JSON.parse(readFileSync6(featureListPath, "utf8"));
1916
+ seeds = JSON.parse(readFileSync7(featureListPath, "utf8"));
1897
1917
  } catch (err) {
1898
1918
  console.error(pc9.red(`Failed to parse feature_list.json: ${err}`));
1899
1919
  process.exit(1);
@@ -1971,14 +1991,14 @@ async function runTaskAdd(cwd2) {
1971
1991
 
1972
1992
  // src/commands/task/done.ts
1973
1993
  import { spawnSync as spawnSync2 } from "child_process";
1974
- import { existsSync as existsSync8 } from "fs";
1994
+ import { existsSync as existsSync9 } from "fs";
1975
1995
  import { resolve as resolve9 } from "path";
1976
1996
  import pc11 from "picocolors";
1977
1997
  async function runTaskDone(cwd2, idOrSlug) {
1978
1998
  const config = await loadConfig(cwd2);
1979
1999
  if (config.health.required) {
1980
2000
  const scriptPath = resolve9(cwd2, config.health.scriptPath);
1981
- if (existsSync8(scriptPath)) {
2001
+ if (existsSync9(scriptPath)) {
1982
2002
  const result = spawnSync2("bash", [scriptPath], { cwd: cwd2, stdio: "pipe", encoding: "utf8" });
1983
2003
  if (result.status !== 0) {
1984
2004
  console.error(pc11.red("\u2717 Health check failed \u2014 cannot mark task as done."));
@@ -2049,10 +2069,10 @@ async function runTaskList(cwd2, opts) {
2049
2069
 
2050
2070
  // src/core/package-data.ts
2051
2071
  import { createRequire as createRequire2 } from "module";
2052
- import { dirname as dirname5, join as join12 } from "path";
2072
+ import { dirname as dirname5, join as join13 } from "path";
2053
2073
  import { fileURLToPath as fileURLToPath3 } from "url";
2054
2074
  var require2 = createRequire2(import.meta.url);
2055
- var pkgPath = join12(dirname5(fileURLToPath3(import.meta.url)), "..", "package.json");
2075
+ var pkgPath = join13(dirname5(fileURLToPath3(import.meta.url)), "..", "package.json");
2056
2076
  var pkg = require2(pkgPath);
2057
2077
 
2058
2078
  // src/core/update-check.ts
@@ -2101,14 +2121,14 @@ function stripAnsi2(str2) {
2101
2121
  }
2102
2122
 
2103
2123
  // src/commands/reset.ts
2104
- import { existsSync as existsSync9, readdirSync as readdirSync2, rmSync } from "fs";
2105
- import { join as join13, resolve as resolve10 } from "path";
2124
+ import { existsSync as existsSync10, readdirSync as readdirSync2, rmSync } from "fs";
2125
+ import { join as join14, resolve as resolve10 } from "path";
2106
2126
  import * as p5 from "@clack/prompts";
2107
2127
  import pc14 from "picocolors";
2108
2128
  async function resetAgentMds(cwd2, provider) {
2109
2129
  const agentDir = provider === "claude-code" ? ".claude/agents" : ".opencode/agents";
2110
2130
  const agentDirPath = resolve10(cwd2, agentDir);
2111
- if (!existsSync9(agentDirPath)) {
2131
+ if (!existsSync10(agentDirPath)) {
2112
2132
  console.log(pc14.yellow(` Skipping agent files \u2014 directory not found: ${agentDirPath}`));
2113
2133
  return;
2114
2134
  }
@@ -2139,7 +2159,7 @@ async function resetAgentMds(cwd2, provider) {
2139
2159
  }
2140
2160
  if (confirm3) {
2141
2161
  try {
2142
- const filePath = join13(agentDirPath, file);
2162
+ const filePath = join14(agentDirPath, file);
2143
2163
  rmSync(filePath, { force: true });
2144
2164
  console.log(pc14.green(` Removed ${file}`));
2145
2165
  } catch {
@@ -2164,7 +2184,7 @@ async function runReset(cwd2, opts) {
2164
2184
  let resetDb = false;
2165
2185
  let resetFeatureList = false;
2166
2186
  let resetAgentMdsFlag = false;
2167
- if (existsSync9(dbPath)) {
2187
+ if (existsSync10(dbPath)) {
2168
2188
  if (opts.force) {
2169
2189
  resetDb = true;
2170
2190
  } else {
@@ -2179,7 +2199,7 @@ async function runReset(cwd2, opts) {
2179
2199
  resetDb = confirm3;
2180
2200
  }
2181
2201
  }
2182
- if (existsSync9(featureListPath)) {
2202
+ if (existsSync10(featureListPath)) {
2183
2203
  if (opts.force) {
2184
2204
  resetFeatureList = true;
2185
2205
  } else {