@maintainabilityai/research-runner 0.1.43 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/runner/skills.js +350 -49
  2. package/package.json +1 -1
@@ -902,6 +902,11 @@ const handleSelfReviewCodeSecurity = makeCodeReviewHandler('code-security');
902
902
  // ─────────────────────────────────────────────────────────────────────
903
903
  const KnowledgeCodeInput = zod_1.z.object({
904
904
  okrId: zod_1.z.string().min(1),
905
+ // Bug-Q phase 2 — `runId` is the cache key for the clone retained
906
+ // between this skill and `knowledge-code-read`. Falls back to the
907
+ // RUN_ID env var when omitted (the runner already sets it from
908
+ // session context); failing both yields a clear error.
909
+ runId: zod_1.z.string().min(1).optional(),
905
910
  repoUrl: zod_1.z.string().min(1),
906
911
  repoStatus: zod_1.z.enum(['connected', 'not-connected', 'create', 'unreachable']),
907
912
  ref: zod_1.z.string().optional(),
@@ -980,34 +985,187 @@ function walkRepo(rootDir, maxFiles) {
980
985
  recurse(rootDir, '');
981
986
  return out;
982
987
  }
988
+ // ─────────────────────────────────────────────────────────────────────
989
+ // Bug-Q phase 2 — brownfield clone cache.
990
+ // ─────────────────────────────────────────────────────────────────────
991
+ // Until phase 2, `knowledge-code` cloned + classified + deleted the
992
+ // tree in one invocation. That left the agent with structural metadata
993
+ // only — no way to read actual file contents to ground its design.
994
+ // Codex audit round 2 (B1) flagged this: the prompt asks for
995
+ // `src/state/profileStore.ts`-level paths against a substrate that
996
+ // returns only top-dirs and language counts.
997
+ //
998
+ // Phase 2 splits the lifecycle:
999
+ // 1. `knowledge-code` clones + walks + classifies + RETAINS the clone
1000
+ // in a per-runId tmpdir cache.
1001
+ // 2. `knowledge-code-read` reads files FROM that cache (with a
1002
+ // content-addressable re-clone fallback so a stale or expired
1003
+ // cache doesn't break the agent).
1004
+ //
1005
+ // The cache key is `(runId, owner, name)` — one clone per (session, repo)
1006
+ // pair. Workflow runners are sandboxed per-job so tmpdir starts empty,
1007
+ // so cross-run pollution is impossible. Local dev / tests can stale the
1008
+ // cache; `.cache-meta.json` carries `ref` + `sha` so the read skill can
1009
+ // detect staleness and re-clone.
1010
+ //
1011
+ // SECURITY: `knowledge-code-read` enforces a path perimeter — relative
1012
+ // paths only, no `..` segments, resolved path must be a child of the
1013
+ // clone root. Any escape attempt is rejected without reading bytes.
1014
+ function knowledgeCodeCacheDir(runId, owner, name) {
1015
+ // Filesystem-safe key. runId / owner / name are short ascii so the
1016
+ // basename can't blow up POSIX path limits.
1017
+ return path.join(os.tmpdir(), 'knowledge-code-cache', runId, `${owner}-${name}`);
1018
+ }
1019
+ function ensureClone(runId, repoUrl, ref, owner, name) {
1020
+ const cacheDir = knowledgeCodeCacheDir(runId, owner, name);
1021
+ const metaPath = path.join(cacheDir, '.cache-meta.json');
1022
+ // Cache hit if meta exists AND ref matches.
1023
+ if (fs.existsSync(metaPath)) {
1024
+ try {
1025
+ const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
1026
+ if (meta.ref === ref && typeof meta.sha === 'string') {
1027
+ return { ok: true, path: cacheDir, sha: meta.sha, reused: true };
1028
+ }
1029
+ }
1030
+ catch { /* unreadable meta — re-clone */ }
1031
+ }
1032
+ // Clean out a stale cache before re-cloning to avoid mixing two refs.
1033
+ try {
1034
+ fs.rmSync(cacheDir, { recursive: true, force: true });
1035
+ }
1036
+ catch { /* ignore */ }
1037
+ fs.mkdirSync(path.dirname(cacheDir), { recursive: true });
1038
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1039
+ const { execFileSync } = require('node:child_process');
1040
+ const cloneArgs = ['clone', '--depth=1', '--filter=blob:limit=10m'];
1041
+ if (ref && ref !== 'HEAD') {
1042
+ cloneArgs.push('--branch', ref);
1043
+ }
1044
+ cloneArgs.push(repoUrl, cacheDir);
1045
+ try {
1046
+ execFileSync('git', cloneArgs, { stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000 });
1047
+ }
1048
+ catch (err) {
1049
+ try {
1050
+ fs.rmSync(cacheDir, { recursive: true, force: true });
1051
+ }
1052
+ catch { /* ignore */ }
1053
+ return { ok: false, path: '', sha: '', reused: false, error: err instanceof Error ? err.message : String(err) };
1054
+ }
1055
+ let sha = '';
1056
+ try {
1057
+ sha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd: cacheDir, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim();
1058
+ }
1059
+ catch { /* sha stays empty */ }
1060
+ try {
1061
+ fs.writeFileSync(metaPath, JSON.stringify({ owner, name, ref, sha, clonedAt: new Date().toISOString() }), 'utf8');
1062
+ }
1063
+ catch { /* meta write failure is non-fatal — next call will just re-clone */ }
1064
+ return { ok: true, path: cacheDir, sha, reused: false };
1065
+ }
1066
+ function classifyRole(filePath, lang) {
1067
+ const lower = filePath.toLowerCase();
1068
+ // Tests — broadest match wins. `__tests__/` dir, `.test.`, `.spec.`,
1069
+ // or top-level `test/` / `tests/`.
1070
+ if (/(^|\/)__tests__\//.test(lower)
1071
+ || /\.(test|spec)\.(t|j)sx?$/.test(lower)
1072
+ || /\.(test|spec)\.py$/.test(lower)
1073
+ || /^test(s)?\//.test(lower)) {
1074
+ return 'test';
1075
+ }
1076
+ // Routes — files in `routes/`, `pages/` (Next), `app/` (Next/Nuxt
1077
+ // app router), or files named `*.route(s).*`.
1078
+ if (/(^|\/)(routes|pages|app)\//.test(lower)
1079
+ || /\.routes?\.(t|j)sx?$/.test(lower)) {
1080
+ return 'route';
1081
+ }
1082
+ // Docs — `.md`, or anything in `docs/` / `doc/`.
1083
+ if (/\.md$/i.test(lower)
1084
+ || /^docs?\//.test(lower)) {
1085
+ return 'doc';
1086
+ }
1087
+ // Config — top-level YAML/JSON/TOML config files, manifests,
1088
+ // dot-files at root.
1089
+ const base = path.basename(filePath);
1090
+ if (MANIFEST_FILES.has(base)
1091
+ || /^\.[\w.-]+$/.test(base) // .eslintrc, .gitignore, …
1092
+ || /(^|\/)tsconfig(\.[^.]+)?\.json$/.test(lower)
1093
+ || /(^|\/)[^/]+\.config\.(t|j)sx?$/.test(lower)) {
1094
+ return 'config';
1095
+ }
1096
+ if (lang && lang !== 'unknown') {
1097
+ return 'source';
1098
+ }
1099
+ return 'other';
1100
+ }
983
1101
  /**
984
1102
  * Guess the primary BAR-level language + framework from the manifest +
985
- * file mix. For greenfield scaffolding the agent can override these from
986
- * BAR-app.yaml calm-node hints; this is just the brownfield read.
1103
+ * file mix, AND surface bounded file/test/route/module inventories the
1104
+ * agent + workflow gate can use to ground brownfield decisions.
1105
+ *
1106
+ * Bug-Q phase 2 (Codex audit round 2 / B1) extended the return shape
1107
+ * with `files[]`, `tests[]`, `routes[]`, `modules[]`. Before phase 2,
1108
+ * the only structural outputs were `topDirs` + `languages` + manifest
1109
+ * count — enough for the agent to KNOW what kind of repo it was, not
1110
+ * enough to GROUND specific file-level design choices.
987
1111
  */
988
- function classifyRepo(files) {
1112
+ function classifyRepo(filesRaw) {
989
1113
  const topDirs = new Set();
990
1114
  const languages = {};
991
1115
  const packageManifests = [];
992
- for (const f of files) {
1116
+ const files = [];
1117
+ const tests = [];
1118
+ const routes = [];
1119
+ const moduleCounts = {};
1120
+ for (const f of filesRaw) {
993
1121
  const slashIdx = f.indexOf('/');
994
1122
  if (slashIdx > 0) {
995
1123
  topDirs.add(f.slice(0, slashIdx));
996
1124
  }
997
1125
  const ext = path.extname(f).toLowerCase();
998
- const lang = LANG_EXTS[ext];
999
- if (lang) {
1126
+ const lang = LANG_EXTS[ext] ?? 'unknown';
1127
+ if (LANG_EXTS[ext]) {
1000
1128
  languages[lang] = (languages[lang] ?? 0) + 1;
1001
1129
  }
1002
1130
  const base = path.basename(f);
1003
1131
  if (MANIFEST_FILES.has(base)) {
1004
1132
  packageManifests.push(f);
1005
1133
  }
1134
+ const role = classifyRole(f, lang);
1135
+ files.push({ path: f, lang, role });
1136
+ if (role === 'test') {
1137
+ tests.push(f);
1138
+ }
1139
+ if (role === 'route') {
1140
+ routes.push(f);
1141
+ }
1142
+ // Modules — top-level subdirectory of `src/` if present, otherwise
1143
+ // top-level repo subdir. Skips files at the repo root (those aren't
1144
+ // module-organized).
1145
+ const srcMatch = /^src\/([^/]+)\//.exec(f);
1146
+ if (srcMatch) {
1147
+ moduleCounts[srcMatch[1]] = (moduleCounts[srcMatch[1]] ?? 0) + 1;
1148
+ }
1149
+ else if (slashIdx > 0) {
1150
+ const topDir = f.slice(0, slashIdx);
1151
+ // Avoid double-counting top-level dirs that are clearly not
1152
+ // modules (tests, docs, config dirs, infra dirs).
1153
+ if (!['tests', 'test', '__tests__', 'docs', 'doc', '.github', '.vscode', 'scripts'].includes(topDir)) {
1154
+ moduleCounts[topDir] = (moduleCounts[topDir] ?? 0) + 1;
1155
+ }
1156
+ }
1006
1157
  }
1158
+ const modules = Object.entries(moduleCounts)
1159
+ .map(([name, fileCount]) => ({ name, fileCount }))
1160
+ .sort((a, b) => b.fileCount - a.fileCount);
1007
1161
  return {
1008
1162
  topDirs: Array.from(topDirs).sort(),
1009
1163
  languages,
1010
1164
  packageManifests: packageManifests.sort(),
1165
+ files,
1166
+ tests: tests.sort(),
1167
+ routes: routes.sort(),
1168
+ modules,
1011
1169
  };
1012
1170
  }
1013
1171
  /**
@@ -1078,55 +1236,42 @@ const handleKnowledgeCode = async (input) => {
1078
1236
  };
1079
1237
  }
1080
1238
  // ─── Brownfield branch (connected) ─────────────────────────────────
1081
- // Shallow git clone (`--depth=1`) into a tmp dir, walk + classify.
1082
- // Cleanup on exit (process-scoped tmpdir). On clone failure we degrade
1083
- // to a soft-refuse rather than crash the agent can still attempt
1084
- // partial grounding from the SKILL response shape.
1239
+ // Bug-Q phase 2 uses the per-runId clone cache (`ensureClone`)
1240
+ // so `knowledge-code-read` can read the same files later in the
1241
+ // session without re-cloning. The cache stays for the runner-job
1242
+ // tmpdir lifetime (workflow runners get a clean tmpdir per job, so
1243
+ // cross-run pollution is impossible).
1085
1244
  if (!gh) {
1086
1245
  return { ok: false, reason: 'repo-url-not-github', repo: repoUrl };
1087
1246
  }
1088
- const { execFileSync } = await Promise.resolve().then(() => __importStar(require('node:child_process')));
1089
- const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), `knowledge-code-${gh.name}-`));
1090
- const cloneTarget = path.join(tmpRoot, gh.name);
1091
- const cloneRef = ref ?? 'HEAD';
1092
- const cloneArgs = ['clone', '--depth=1', '--filter=blob:limit=10m'];
1093
- if (ref && ref !== 'HEAD') {
1094
- cloneArgs.push('--branch', ref);
1095
- }
1096
- cloneArgs.push(repoUrl, cloneTarget);
1097
- let cloneOk = true;
1098
- let cloneError = '';
1099
- try {
1100
- execFileSync('git', cloneArgs, { stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000 });
1101
- }
1102
- catch (err) {
1103
- cloneOk = false;
1104
- cloneError = err instanceof Error ? err.message : String(err);
1247
+ // Resolve the session runId explicit input wins; fall back to
1248
+ // RUN_ID env var (the runner sets this from session context).
1249
+ const runId = parsed.data.runId ?? process.env.RUN_ID;
1250
+ if (!runId) {
1251
+ return {
1252
+ ok: false,
1253
+ reason: 'missing-run-id',
1254
+ repo: repoSlug,
1255
+ remediation: "knowledge-code needs a session runId to scope the clone cache. Either pass `runId` in the skill input, or set the RUN_ID env var before invoking (the agent does this automatically via session-context export — see agent.md step 1b).",
1256
+ };
1105
1257
  }
1106
- if (!cloneOk) {
1107
- // Clean up the empty tmpdir before bailing.
1108
- try {
1109
- fs.rmSync(tmpRoot, { recursive: true, force: true });
1110
- }
1111
- catch { /* ignore */ }
1258
+ const cloneRef = ref ?? 'HEAD';
1259
+ const cloneResult = ensureClone(runId, repoUrl, cloneRef, gh.owner, gh.name);
1260
+ if (!cloneResult.ok) {
1112
1261
  const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'brownfield-clone-failed', repo_status: 'connected', okr_id: okrId };
1113
1262
  return {
1114
1263
  ok: false,
1115
1264
  reason: 'clone-failed',
1116
1265
  repo: repoSlug,
1117
- remediation: `git clone failed for ${repoUrl}. Verify the GitHub App install is approved on this repo and the ref (${cloneRef}) exists. Underlying error: ${cloneError}`,
1266
+ remediation: `git clone failed for ${repoUrl}. Verify the GitHub App install is approved on this repo and the ref (${cloneRef}) exists. Underlying error: ${cloneResult.error ?? 'unknown'}`,
1118
1267
  auditMetadata,
1119
1268
  };
1120
1269
  }
1121
- // Resolve the actual SHA so the response is reproducible.
1122
- let sha = '';
1123
- try {
1124
- sha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd: cloneTarget, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim();
1125
- }
1126
- catch { /* sha stays empty */ }
1270
+ const cloneTarget = cloneResult.path;
1271
+ const sha = cloneResult.sha;
1127
1272
  const cap = maxFiles ?? 200;
1128
- const files = walkRepo(cloneTarget, cap);
1129
- const structure = classifyRepo(files);
1273
+ const filesRaw = walkRepo(cloneTarget, cap);
1274
+ const structure = classifyRepo(filesRaw);
1130
1275
  // Best-effort entrypoint detection from the most-common manifest +
1131
1276
  // top-level layout. Conservative: only mark something as an entrypoint
1132
1277
  // when we have positive signal (manifest field OR conventional path).
@@ -1166,13 +1311,16 @@ const handleKnowledgeCode = async (input) => {
1166
1311
  catch { /* manifest unreadable / non-JSON; skip */ }
1167
1312
  }
1168
1313
  }
1169
- // Clean up the cloned tree the SKILL is a one-shot read, no need to
1170
- // keep ~10MB of git data per invocation.
1171
- try {
1172
- fs.rmSync(tmpRoot, { recursive: true, force: true });
1173
- }
1174
- catch { /* ignore */ }
1314
+ // Bug-Q phase 2 DO NOT delete the clone here. `knowledge-code-read`
1315
+ // will reuse it through `ensureClone`. Workflow-runner tmpdir is wiped
1316
+ // when the job ends, so cleanup happens for free at the right scope.
1175
1317
  const primaryLanguage = Object.entries(structure.languages).sort((a, b) => b[1] - a[1])[0]?.[0] ?? 'unknown';
1318
+ // Bug-Q phase 2 — surface the file/test/route/module inventory in the
1319
+ // audit payload so the workflow path-citation gate can cross-check
1320
+ // every brownfield path cited in code-design.md against what actually
1321
+ // exists in the clone. `inventory_paths` is the flat list of file
1322
+ // paths (sorted) the workflow uses as its membership set.
1323
+ const inventoryPaths = structure.files.map(f => f.path).sort();
1176
1324
  const auditMetadata = {
1177
1325
  phase: 'what',
1178
1326
  repo: repoSlug,
@@ -1180,9 +1328,15 @@ const handleKnowledgeCode = async (input) => {
1180
1328
  repo_status: 'connected',
1181
1329
  okr_id: okrId,
1182
1330
  sha: sha.slice(0, 12),
1183
- file_count: files.length,
1331
+ file_count: filesRaw.length,
1184
1332
  primary_language: primaryLanguage,
1185
1333
  manifests: structure.packageManifests.length,
1334
+ test_count: structure.tests.length,
1335
+ route_count: structure.routes.length,
1336
+ module_count: structure.modules.length,
1337
+ // Inventory: flat path list — bounded by the `maxFiles` cap above.
1338
+ // Workflow gate consumes this to validate cited paths.
1339
+ inventory_paths: inventoryPaths,
1186
1340
  };
1187
1341
  return {
1188
1342
  ok: true,
@@ -1194,6 +1348,147 @@ const handleKnowledgeCode = async (input) => {
1194
1348
  };
1195
1349
  };
1196
1350
  // ─────────────────────────────────────────────────────────────────────
1351
+ // knowledge-code-read — Bug-Q phase 2 (Codex audit round 2 / B1).
1352
+ // ─────────────────────────────────────────────────────────────────────
1353
+ // `knowledge-code` returns structural metadata; this skill returns
1354
+ // bounded file CONTENTS so the agent can ground design with real code,
1355
+ // not paraphrased guesses. Same session-scoped clone cache as
1356
+ // `knowledge-code` — the read is essentially free after the initial
1357
+ // clone.
1358
+ //
1359
+ // SECURITY PERIMETER: the runner only reads paths that resolve INSIDE
1360
+ // the cloned repo. Path-traversal attempts (`../`, absolute paths) are
1361
+ // rejected without reading bytes. The clone is a shallow git clone in
1362
+ // an isolated tmpdir; even if a malicious file in the repo contained
1363
+ // a symlink to /etc/passwd, the `realpath` check below would refuse.
1364
+ //
1365
+ // CONTENT BOUNDS: max 10 KB per response; binary files (any NUL byte)
1366
+ // rejected. The agent is meant to read CODE, not blobs.
1367
+ //
1368
+ // AUDIT: every read auto-emits a skill_call event with file + bytes
1369
+ // returned, so the chain captures exactly which files the agent
1370
+ // consulted while writing the design.
1371
+ const KnowledgeCodeReadInput = zod_1.z.object({
1372
+ okrId: zod_1.z.string().min(1),
1373
+ runId: zod_1.z.string().min(1).optional(),
1374
+ repoUrl: zod_1.z.string().min(1),
1375
+ ref: zod_1.z.string().optional(),
1376
+ filePath: zod_1.z.string().min(1),
1377
+ });
1378
+ const KNOWLEDGE_CODE_READ_MAX_BYTES = 10_240; // 10 KB cap per response
1379
+ const handleKnowledgeCodeRead = async (input) => {
1380
+ const parsed = KnowledgeCodeReadInput.safeParse(input);
1381
+ if (!parsed.success) {
1382
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
1383
+ }
1384
+ const { okrId, repoUrl, ref, filePath } = parsed.data;
1385
+ const gh = parseGithubUrl(repoUrl);
1386
+ if (!gh) {
1387
+ return { ok: false, reason: 'repo-url-not-github', repo: repoUrl };
1388
+ }
1389
+ const runId = parsed.data.runId ?? process.env.RUN_ID;
1390
+ if (!runId) {
1391
+ return {
1392
+ ok: false,
1393
+ reason: 'missing-run-id',
1394
+ remediation: "knowledge-code-read needs a session runId to find the clone cache shared with knowledge-code. Pass `runId` in input or set the RUN_ID env var (the agent does this via session-context export).",
1395
+ };
1396
+ }
1397
+ // Security perimeter — reject obvious escape attempts BEFORE touching
1398
+ // the filesystem so the audit chain captures the rejection cleanly.
1399
+ if (path.isAbsolute(filePath)) {
1400
+ return { ok: false, reason: `path-rejected: absolute paths are forbidden (${filePath})` };
1401
+ }
1402
+ // Normalize and re-check — a path like `foo/../../bar` would resolve
1403
+ // up two levels even though the literal string contains no leading
1404
+ // `../`. `path.normalize` collapses it; we then reject if it starts
1405
+ // with `..`.
1406
+ const normalized = path.normalize(filePath);
1407
+ if (normalized.startsWith('..') || normalized === '..' || normalized.includes(`${path.sep}..${path.sep}`)) {
1408
+ return { ok: false, reason: `path-rejected: path-traversal segments forbidden (${filePath} -> ${normalized})` };
1409
+ }
1410
+ // Reuse the cached clone from knowledge-code; clone fresh if missing
1411
+ // (e.g. agent called knowledge-code-read without calling knowledge-
1412
+ // code first — supported but slower).
1413
+ const cloneResult = ensureClone(runId, repoUrl, ref ?? 'HEAD', gh.owner, gh.name);
1414
+ if (!cloneResult.ok) {
1415
+ return {
1416
+ ok: false,
1417
+ reason: 'clone-failed',
1418
+ repo: `${gh.owner}/${gh.name}`,
1419
+ remediation: `Could not access clone for ${repoUrl}. Underlying error: ${cloneResult.error ?? 'unknown'}`,
1420
+ };
1421
+ }
1422
+ const absPath = path.join(cloneResult.path, normalized);
1423
+ // Final paranoia check — resolve the real path and verify it's still
1424
+ // a child of the clone root. Defends against symlink-shaped escapes
1425
+ // (an attacker-controlled file in the repo that's a symlink to /etc).
1426
+ let realPath;
1427
+ try {
1428
+ realPath = fs.realpathSync.native(absPath);
1429
+ }
1430
+ catch {
1431
+ return { ok: false, reason: `file-not-found: ${filePath} not in ${gh.owner}/${gh.name}@${cloneResult.sha.slice(0, 12)}` };
1432
+ }
1433
+ const realClone = fs.realpathSync.native(cloneResult.path);
1434
+ if (!realPath.startsWith(realClone + path.sep) && realPath !== realClone) {
1435
+ return { ok: false, reason: `path-escape: resolved path falls outside the cloned repo (${filePath} -> ${realPath})` };
1436
+ }
1437
+ let stat;
1438
+ try {
1439
+ stat = fs.statSync(realPath);
1440
+ }
1441
+ catch {
1442
+ return { ok: false, reason: `file-not-found: ${filePath}` };
1443
+ }
1444
+ if (stat.isDirectory()) {
1445
+ return { ok: false, reason: `path-is-directory: ${filePath} is a directory; knowledge-code-read returns file contents only` };
1446
+ }
1447
+ // Read + truncate + reject binary.
1448
+ let buf;
1449
+ try {
1450
+ buf = fs.readFileSync(realPath);
1451
+ }
1452
+ catch (err) {
1453
+ return { ok: false, reason: `read-failed: ${err instanceof Error ? err.message : String(err)}` };
1454
+ }
1455
+ // Heuristic: a NUL byte in the first 8 KB is a strong binary signal.
1456
+ // Strings of bytes that legitimately contain NUL bytes (gzip, images,
1457
+ // wasm) are not source code; refuse them.
1458
+ if (buf.slice(0, Math.min(buf.length, 8192)).includes(0)) {
1459
+ return { ok: false, reason: `binary-file: ${filePath} contains NUL bytes; knowledge-code-read returns text only` };
1460
+ }
1461
+ const totalBytes = buf.length;
1462
+ const truncated = totalBytes > KNOWLEDGE_CODE_READ_MAX_BYTES;
1463
+ const content = (truncated ? buf.subarray(0, KNOWLEDGE_CODE_READ_MAX_BYTES) : buf).toString('utf8');
1464
+ const lang = LANG_EXTS[path.extname(filePath).toLowerCase()] ?? 'unknown';
1465
+ const lineCount = content.split('\n').length;
1466
+ const auditMetadata = {
1467
+ phase: 'what',
1468
+ repo: `${gh.owner}/${gh.name}`,
1469
+ file: normalized,
1470
+ sha: cloneResult.sha.slice(0, 12),
1471
+ bytes_returned: content.length,
1472
+ bytes_total: totalBytes,
1473
+ truncated,
1474
+ lang,
1475
+ okr_id: okrId,
1476
+ };
1477
+ return {
1478
+ ok: true,
1479
+ repo: `${gh.owner}/${gh.name}`,
1480
+ file: normalized,
1481
+ sha: cloneResult.sha,
1482
+ content,
1483
+ lang,
1484
+ lineCount,
1485
+ truncated,
1486
+ bytesReturned: content.length,
1487
+ bytesTotal: totalBytes,
1488
+ auditMetadata,
1489
+ };
1490
+ };
1491
+ // ─────────────────────────────────────────────────────────────────────
1197
1492
  // Search skills — thin wrappers over the existing search nodes
1198
1493
  // ─────────────────────────────────────────────────────────────────────
1199
1494
  const SearchQueriesInput = zod_1.z.object({
@@ -2050,6 +2345,12 @@ exports.SKILLS = {
2050
2345
  // targetCodeRepoStatus: brownfield (clone + classify), greenfield
2051
2346
  // (scaffolding hints, no clone), refuse (not-connected / unreachable).
2052
2347
  'knowledge-code': handleKnowledgeCode,
2348
+ // Bug-Q phase 2 — knowledge-code-read returns bounded file CONTENT
2349
+ // from the brownfield clone retained by knowledge-code. Lets the
2350
+ // agent ground design decisions in real code excerpts (Codex audit
2351
+ // round 2 / B1: agent was hallucinating brownfield file paths
2352
+ // because the substrate was structural metadata only).
2353
+ 'knowledge-code-read': handleKnowledgeCodeRead,
2053
2354
  'tavily-search': handleTavilySearch,
2054
2355
  'arxiv-search': handleArxivSearch,
2055
2356
  'uspto-search': handleUsptoSearch,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.43",
3
+ "version": "0.1.44",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",