@maintainabilityai/research-runner 0.1.42 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runner/skills.js +368 -49
- package/package.json +1 -1
package/dist/runner/skills.js
CHANGED
|
@@ -902,6 +902,11 @@ const handleSelfReviewCodeSecurity = makeCodeReviewHandler('code-security');
|
|
|
902
902
|
// ─────────────────────────────────────────────────────────────────────
|
|
903
903
|
const KnowledgeCodeInput = zod_1.z.object({
|
|
904
904
|
okrId: zod_1.z.string().min(1),
|
|
905
|
+
// Bug-Q phase 2 — `runId` is the cache key for the clone retained
|
|
906
|
+
// between this skill and `knowledge-code-read`. Falls back to the
|
|
907
|
+
// RUN_ID env var when omitted (the runner already sets it from
|
|
908
|
+
// session context); failing both yields a clear error.
|
|
909
|
+
runId: zod_1.z.string().min(1).optional(),
|
|
905
910
|
repoUrl: zod_1.z.string().min(1),
|
|
906
911
|
repoStatus: zod_1.z.enum(['connected', 'not-connected', 'create', 'unreachable']),
|
|
907
912
|
ref: zod_1.z.string().optional(),
|
|
@@ -980,34 +985,187 @@ function walkRepo(rootDir, maxFiles) {
|
|
|
980
985
|
recurse(rootDir, '');
|
|
981
986
|
return out;
|
|
982
987
|
}
|
|
988
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
989
|
+
// Bug-Q phase 2 — brownfield clone cache.
|
|
990
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
991
|
+
// Until phase 2, `knowledge-code` cloned + classified + deleted the
|
|
992
|
+
// tree in one invocation. That left the agent with structural metadata
|
|
993
|
+
// only — no way to read actual file contents to ground its design.
|
|
994
|
+
// Codex audit round 2 (B1) flagged this: the prompt asks for
|
|
995
|
+
// `src/state/profileStore.ts`-level paths against a substrate that
|
|
996
|
+
// returns only top-dirs and language counts.
|
|
997
|
+
//
|
|
998
|
+
// Phase 2 splits the lifecycle:
|
|
999
|
+
// 1. `knowledge-code` clones + walks + classifies + RETAINS the clone
|
|
1000
|
+
// in a per-runId tmpdir cache.
|
|
1001
|
+
// 2. `knowledge-code-read` reads files FROM that cache (with a
|
|
1002
|
+
// content-addressable re-clone fallback so a stale or expired
|
|
1003
|
+
// cache doesn't break the agent).
|
|
1004
|
+
//
|
|
1005
|
+
// The cache key is `(runId, owner, name)` — one clone per (session, repo)
|
|
1006
|
+
// pair. Workflow runners are sandboxed per-job so tmpdir starts empty,
|
|
1007
|
+
// so cross-run pollution is impossible. Local dev / tests can stale the
|
|
1008
|
+
// cache; `.cache-meta.json` carries `ref` + `sha` so the read skill can
|
|
1009
|
+
// detect staleness and re-clone.
|
|
1010
|
+
//
|
|
1011
|
+
// SECURITY: `knowledge-code-read` enforces a path perimeter — relative
|
|
1012
|
+
// paths only, no `..` segments, resolved path must be a child of the
|
|
1013
|
+
// clone root. Any escape attempt is rejected without reading bytes.
|
|
1014
|
+
function knowledgeCodeCacheDir(runId, owner, name) {
|
|
1015
|
+
// Filesystem-safe key. runId / owner / name are short ascii so the
|
|
1016
|
+
// basename can't blow up POSIX path limits.
|
|
1017
|
+
return path.join(os.tmpdir(), 'knowledge-code-cache', runId, `${owner}-${name}`);
|
|
1018
|
+
}
|
|
1019
|
+
function ensureClone(runId, repoUrl, ref, owner, name) {
|
|
1020
|
+
const cacheDir = knowledgeCodeCacheDir(runId, owner, name);
|
|
1021
|
+
const metaPath = path.join(cacheDir, '.cache-meta.json');
|
|
1022
|
+
// Cache hit if meta exists AND ref matches.
|
|
1023
|
+
if (fs.existsSync(metaPath)) {
|
|
1024
|
+
try {
|
|
1025
|
+
const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
|
|
1026
|
+
if (meta.ref === ref && typeof meta.sha === 'string') {
|
|
1027
|
+
return { ok: true, path: cacheDir, sha: meta.sha, reused: true };
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
catch { /* unreadable meta — re-clone */ }
|
|
1031
|
+
}
|
|
1032
|
+
// Clean out a stale cache before re-cloning to avoid mixing two refs.
|
|
1033
|
+
try {
|
|
1034
|
+
fs.rmSync(cacheDir, { recursive: true, force: true });
|
|
1035
|
+
}
|
|
1036
|
+
catch { /* ignore */ }
|
|
1037
|
+
fs.mkdirSync(path.dirname(cacheDir), { recursive: true });
|
|
1038
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1039
|
+
const { execFileSync } = require('node:child_process');
|
|
1040
|
+
const cloneArgs = ['clone', '--depth=1', '--filter=blob:limit=10m'];
|
|
1041
|
+
if (ref && ref !== 'HEAD') {
|
|
1042
|
+
cloneArgs.push('--branch', ref);
|
|
1043
|
+
}
|
|
1044
|
+
cloneArgs.push(repoUrl, cacheDir);
|
|
1045
|
+
try {
|
|
1046
|
+
execFileSync('git', cloneArgs, { stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000 });
|
|
1047
|
+
}
|
|
1048
|
+
catch (err) {
|
|
1049
|
+
try {
|
|
1050
|
+
fs.rmSync(cacheDir, { recursive: true, force: true });
|
|
1051
|
+
}
|
|
1052
|
+
catch { /* ignore */ }
|
|
1053
|
+
return { ok: false, path: '', sha: '', reused: false, error: err instanceof Error ? err.message : String(err) };
|
|
1054
|
+
}
|
|
1055
|
+
let sha = '';
|
|
1056
|
+
try {
|
|
1057
|
+
sha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd: cacheDir, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim();
|
|
1058
|
+
}
|
|
1059
|
+
catch { /* sha stays empty */ }
|
|
1060
|
+
try {
|
|
1061
|
+
fs.writeFileSync(metaPath, JSON.stringify({ owner, name, ref, sha, clonedAt: new Date().toISOString() }), 'utf8');
|
|
1062
|
+
}
|
|
1063
|
+
catch { /* meta write failure is non-fatal — next call will just re-clone */ }
|
|
1064
|
+
return { ok: true, path: cacheDir, sha, reused: false };
|
|
1065
|
+
}
|
|
1066
|
+
function classifyRole(filePath, lang) {
|
|
1067
|
+
const lower = filePath.toLowerCase();
|
|
1068
|
+
// Tests — broadest match wins. `__tests__/` dir, `.test.`, `.spec.`,
|
|
1069
|
+
// or top-level `test/` / `tests/`.
|
|
1070
|
+
if (/(^|\/)__tests__\//.test(lower)
|
|
1071
|
+
|| /\.(test|spec)\.(t|j)sx?$/.test(lower)
|
|
1072
|
+
|| /\.(test|spec)\.py$/.test(lower)
|
|
1073
|
+
|| /^test(s)?\//.test(lower)) {
|
|
1074
|
+
return 'test';
|
|
1075
|
+
}
|
|
1076
|
+
// Routes — files in `routes/`, `pages/` (Next), `app/` (Next/Nuxt
|
|
1077
|
+
// app router), or files named `*.route(s).*`.
|
|
1078
|
+
if (/(^|\/)(routes|pages|app)\//.test(lower)
|
|
1079
|
+
|| /\.routes?\.(t|j)sx?$/.test(lower)) {
|
|
1080
|
+
return 'route';
|
|
1081
|
+
}
|
|
1082
|
+
// Docs — `.md`, or anything in `docs/` / `doc/`.
|
|
1083
|
+
if (/\.md$/i.test(lower)
|
|
1084
|
+
|| /^docs?\//.test(lower)) {
|
|
1085
|
+
return 'doc';
|
|
1086
|
+
}
|
|
1087
|
+
// Config — top-level YAML/JSON/TOML config files, manifests,
|
|
1088
|
+
// dot-files at root.
|
|
1089
|
+
const base = path.basename(filePath);
|
|
1090
|
+
if (MANIFEST_FILES.has(base)
|
|
1091
|
+
|| /^\.[\w.-]+$/.test(base) // .eslintrc, .gitignore, …
|
|
1092
|
+
|| /(^|\/)tsconfig(\.[^.]+)?\.json$/.test(lower)
|
|
1093
|
+
|| /(^|\/)[^/]+\.config\.(t|j)sx?$/.test(lower)) {
|
|
1094
|
+
return 'config';
|
|
1095
|
+
}
|
|
1096
|
+
if (lang && lang !== 'unknown') {
|
|
1097
|
+
return 'source';
|
|
1098
|
+
}
|
|
1099
|
+
return 'other';
|
|
1100
|
+
}
|
|
983
1101
|
/**
|
|
984
1102
|
* Guess the primary BAR-level language + framework from the manifest +
|
|
985
|
-
* file mix
|
|
986
|
-
*
|
|
1103
|
+
* file mix, AND surface bounded file/test/route/module inventories the
|
|
1104
|
+
* agent + workflow gate can use to ground brownfield decisions.
|
|
1105
|
+
*
|
|
1106
|
+
* Bug-Q phase 2 (Codex audit round 2 / B1) extended the return shape
|
|
1107
|
+
* with `files[]`, `tests[]`, `routes[]`, `modules[]`. Before phase 2,
|
|
1108
|
+
* the only structural outputs were `topDirs` + `languages` + manifest
|
|
1109
|
+
* count — enough for the agent to KNOW what kind of repo it was, not
|
|
1110
|
+
* enough to GROUND specific file-level design choices.
|
|
987
1111
|
*/
|
|
988
|
-
function classifyRepo(
|
|
1112
|
+
function classifyRepo(filesRaw) {
|
|
989
1113
|
const topDirs = new Set();
|
|
990
1114
|
const languages = {};
|
|
991
1115
|
const packageManifests = [];
|
|
992
|
-
|
|
1116
|
+
const files = [];
|
|
1117
|
+
const tests = [];
|
|
1118
|
+
const routes = [];
|
|
1119
|
+
const moduleCounts = {};
|
|
1120
|
+
for (const f of filesRaw) {
|
|
993
1121
|
const slashIdx = f.indexOf('/');
|
|
994
1122
|
if (slashIdx > 0) {
|
|
995
1123
|
topDirs.add(f.slice(0, slashIdx));
|
|
996
1124
|
}
|
|
997
1125
|
const ext = path.extname(f).toLowerCase();
|
|
998
|
-
const lang = LANG_EXTS[ext];
|
|
999
|
-
if (
|
|
1126
|
+
const lang = LANG_EXTS[ext] ?? 'unknown';
|
|
1127
|
+
if (LANG_EXTS[ext]) {
|
|
1000
1128
|
languages[lang] = (languages[lang] ?? 0) + 1;
|
|
1001
1129
|
}
|
|
1002
1130
|
const base = path.basename(f);
|
|
1003
1131
|
if (MANIFEST_FILES.has(base)) {
|
|
1004
1132
|
packageManifests.push(f);
|
|
1005
1133
|
}
|
|
1134
|
+
const role = classifyRole(f, lang);
|
|
1135
|
+
files.push({ path: f, lang, role });
|
|
1136
|
+
if (role === 'test') {
|
|
1137
|
+
tests.push(f);
|
|
1138
|
+
}
|
|
1139
|
+
if (role === 'route') {
|
|
1140
|
+
routes.push(f);
|
|
1141
|
+
}
|
|
1142
|
+
// Modules — top-level subdirectory of `src/` if present, otherwise
|
|
1143
|
+
// top-level repo subdir. Skips files at the repo root (those aren't
|
|
1144
|
+
// module-organized).
|
|
1145
|
+
const srcMatch = /^src\/([^/]+)\//.exec(f);
|
|
1146
|
+
if (srcMatch) {
|
|
1147
|
+
moduleCounts[srcMatch[1]] = (moduleCounts[srcMatch[1]] ?? 0) + 1;
|
|
1148
|
+
}
|
|
1149
|
+
else if (slashIdx > 0) {
|
|
1150
|
+
const topDir = f.slice(0, slashIdx);
|
|
1151
|
+
// Avoid double-counting top-level dirs that are clearly not
|
|
1152
|
+
// modules (tests, docs, config dirs, infra dirs).
|
|
1153
|
+
if (!['tests', 'test', '__tests__', 'docs', 'doc', '.github', '.vscode', 'scripts'].includes(topDir)) {
|
|
1154
|
+
moduleCounts[topDir] = (moduleCounts[topDir] ?? 0) + 1;
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1006
1157
|
}
|
|
1158
|
+
const modules = Object.entries(moduleCounts)
|
|
1159
|
+
.map(([name, fileCount]) => ({ name, fileCount }))
|
|
1160
|
+
.sort((a, b) => b.fileCount - a.fileCount);
|
|
1007
1161
|
return {
|
|
1008
1162
|
topDirs: Array.from(topDirs).sort(),
|
|
1009
1163
|
languages,
|
|
1010
1164
|
packageManifests: packageManifests.sort(),
|
|
1165
|
+
files,
|
|
1166
|
+
tests: tests.sort(),
|
|
1167
|
+
routes: routes.sort(),
|
|
1168
|
+
modules,
|
|
1011
1169
|
};
|
|
1012
1170
|
}
|
|
1013
1171
|
/**
|
|
@@ -1078,55 +1236,42 @@ const handleKnowledgeCode = async (input) => {
|
|
|
1078
1236
|
};
|
|
1079
1237
|
}
|
|
1080
1238
|
// ─── Brownfield branch (connected) ─────────────────────────────────
|
|
1081
|
-
//
|
|
1082
|
-
//
|
|
1083
|
-
//
|
|
1084
|
-
//
|
|
1239
|
+
// Bug-Q phase 2 — uses the per-runId clone cache (`ensureClone`)
|
|
1240
|
+
// so `knowledge-code-read` can read the same files later in the
|
|
1241
|
+
// session without re-cloning. The cache stays for the runner-job
|
|
1242
|
+
// tmpdir lifetime (workflow runners get a clean tmpdir per job, so
|
|
1243
|
+
// cross-run pollution is impossible).
|
|
1085
1244
|
if (!gh) {
|
|
1086
1245
|
return { ok: false, reason: 'repo-url-not-github', repo: repoUrl };
|
|
1087
1246
|
}
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
const
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
let cloneError = '';
|
|
1099
|
-
try {
|
|
1100
|
-
execFileSync('git', cloneArgs, { stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000 });
|
|
1101
|
-
}
|
|
1102
|
-
catch (err) {
|
|
1103
|
-
cloneOk = false;
|
|
1104
|
-
cloneError = err instanceof Error ? err.message : String(err);
|
|
1247
|
+
// Resolve the session runId — explicit input wins; fall back to
|
|
1248
|
+
// RUN_ID env var (the runner sets this from session context).
|
|
1249
|
+
const runId = parsed.data.runId ?? process.env.RUN_ID;
|
|
1250
|
+
if (!runId) {
|
|
1251
|
+
return {
|
|
1252
|
+
ok: false,
|
|
1253
|
+
reason: 'missing-run-id',
|
|
1254
|
+
repo: repoSlug,
|
|
1255
|
+
remediation: "knowledge-code needs a session runId to scope the clone cache. Either pass `runId` in the skill input, or set the RUN_ID env var before invoking (the agent does this automatically via session-context export — see agent.md step 1b).",
|
|
1256
|
+
};
|
|
1105
1257
|
}
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
|
1110
|
-
}
|
|
1111
|
-
catch { /* ignore */ }
|
|
1258
|
+
const cloneRef = ref ?? 'HEAD';
|
|
1259
|
+
const cloneResult = ensureClone(runId, repoUrl, cloneRef, gh.owner, gh.name);
|
|
1260
|
+
if (!cloneResult.ok) {
|
|
1112
1261
|
const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'brownfield-clone-failed', repo_status: 'connected', okr_id: okrId };
|
|
1113
1262
|
return {
|
|
1114
1263
|
ok: false,
|
|
1115
1264
|
reason: 'clone-failed',
|
|
1116
1265
|
repo: repoSlug,
|
|
1117
|
-
remediation: `git clone failed for ${repoUrl}. Verify the GitHub App install is approved on this repo and the ref (${cloneRef}) exists. Underlying error: ${
|
|
1266
|
+
remediation: `git clone failed for ${repoUrl}. Verify the GitHub App install is approved on this repo and the ref (${cloneRef}) exists. Underlying error: ${cloneResult.error ?? 'unknown'}`,
|
|
1118
1267
|
auditMetadata,
|
|
1119
1268
|
};
|
|
1120
1269
|
}
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
try {
|
|
1124
|
-
sha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd: cloneTarget, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim();
|
|
1125
|
-
}
|
|
1126
|
-
catch { /* sha stays empty */ }
|
|
1270
|
+
const cloneTarget = cloneResult.path;
|
|
1271
|
+
const sha = cloneResult.sha;
|
|
1127
1272
|
const cap = maxFiles ?? 200;
|
|
1128
|
-
const
|
|
1129
|
-
const structure = classifyRepo(
|
|
1273
|
+
const filesRaw = walkRepo(cloneTarget, cap);
|
|
1274
|
+
const structure = classifyRepo(filesRaw);
|
|
1130
1275
|
// Best-effort entrypoint detection from the most-common manifest +
|
|
1131
1276
|
// top-level layout. Conservative: only mark something as an entrypoint
|
|
1132
1277
|
// when we have positive signal (manifest field OR conventional path).
|
|
@@ -1166,13 +1311,16 @@ const handleKnowledgeCode = async (input) => {
|
|
|
1166
1311
|
catch { /* manifest unreadable / non-JSON; skip */ }
|
|
1167
1312
|
}
|
|
1168
1313
|
}
|
|
1169
|
-
//
|
|
1170
|
-
//
|
|
1171
|
-
|
|
1172
|
-
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
|
1173
|
-
}
|
|
1174
|
-
catch { /* ignore */ }
|
|
1314
|
+
// Bug-Q phase 2 — DO NOT delete the clone here. `knowledge-code-read`
|
|
1315
|
+
// will reuse it through `ensureClone`. Workflow-runner tmpdir is wiped
|
|
1316
|
+
// when the job ends, so cleanup happens for free at the right scope.
|
|
1175
1317
|
const primaryLanguage = Object.entries(structure.languages).sort((a, b) => b[1] - a[1])[0]?.[0] ?? 'unknown';
|
|
1318
|
+
// Bug-Q phase 2 — surface the file/test/route/module inventory in the
|
|
1319
|
+
// audit payload so the workflow path-citation gate can cross-check
|
|
1320
|
+
// every brownfield path cited in code-design.md against what actually
|
|
1321
|
+
// exists in the clone. `inventory_paths` is the flat list of file
|
|
1322
|
+
// paths (sorted) the workflow uses as its membership set.
|
|
1323
|
+
const inventoryPaths = structure.files.map(f => f.path).sort();
|
|
1176
1324
|
const auditMetadata = {
|
|
1177
1325
|
phase: 'what',
|
|
1178
1326
|
repo: repoSlug,
|
|
@@ -1180,9 +1328,15 @@ const handleKnowledgeCode = async (input) => {
|
|
|
1180
1328
|
repo_status: 'connected',
|
|
1181
1329
|
okr_id: okrId,
|
|
1182
1330
|
sha: sha.slice(0, 12),
|
|
1183
|
-
file_count:
|
|
1331
|
+
file_count: filesRaw.length,
|
|
1184
1332
|
primary_language: primaryLanguage,
|
|
1185
1333
|
manifests: structure.packageManifests.length,
|
|
1334
|
+
test_count: structure.tests.length,
|
|
1335
|
+
route_count: structure.routes.length,
|
|
1336
|
+
module_count: structure.modules.length,
|
|
1337
|
+
// Inventory: flat path list — bounded by the `maxFiles` cap above.
|
|
1338
|
+
// Workflow gate consumes this to validate cited paths.
|
|
1339
|
+
inventory_paths: inventoryPaths,
|
|
1186
1340
|
};
|
|
1187
1341
|
return {
|
|
1188
1342
|
ok: true,
|
|
@@ -1194,6 +1348,147 @@ const handleKnowledgeCode = async (input) => {
|
|
|
1194
1348
|
};
|
|
1195
1349
|
};
|
|
1196
1350
|
// ─────────────────────────────────────────────────────────────────────
|
|
1351
|
+
// knowledge-code-read — Bug-Q phase 2 (Codex audit round 2 / B1).
|
|
1352
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
1353
|
+
// `knowledge-code` returns structural metadata; this skill returns
|
|
1354
|
+
// bounded file CONTENTS so the agent can ground design with real code,
|
|
1355
|
+
// not paraphrased guesses. Same session-scoped clone cache as
|
|
1356
|
+
// `knowledge-code` — the read is essentially free after the initial
|
|
1357
|
+
// clone.
|
|
1358
|
+
//
|
|
1359
|
+
// SECURITY PERIMETER: the runner only reads paths that resolve INSIDE
|
|
1360
|
+
// the cloned repo. Path-traversal attempts (`../`, absolute paths) are
|
|
1361
|
+
// rejected without reading bytes. The clone is a shallow git clone in
|
|
1362
|
+
// an isolated tmpdir; even if a malicious file in the repo contained
|
|
1363
|
+
// a symlink to /etc/passwd, the `realpath` check below would refuse.
|
|
1364
|
+
//
|
|
1365
|
+
// CONTENT BOUNDS: max 10 KB per response; binary files (any NUL byte)
|
|
1366
|
+
// rejected. The agent is meant to read CODE, not blobs.
|
|
1367
|
+
//
|
|
1368
|
+
// AUDIT: every read auto-emits a skill_call event with file + bytes
|
|
1369
|
+
// returned, so the chain captures exactly which files the agent
|
|
1370
|
+
// consulted while writing the design.
|
|
1371
|
+
const KnowledgeCodeReadInput = zod_1.z.object({
|
|
1372
|
+
okrId: zod_1.z.string().min(1),
|
|
1373
|
+
runId: zod_1.z.string().min(1).optional(),
|
|
1374
|
+
repoUrl: zod_1.z.string().min(1),
|
|
1375
|
+
ref: zod_1.z.string().optional(),
|
|
1376
|
+
filePath: zod_1.z.string().min(1),
|
|
1377
|
+
});
|
|
1378
|
+
const KNOWLEDGE_CODE_READ_MAX_BYTES = 10_240; // 10 KB cap per response
|
|
1379
|
+
const handleKnowledgeCodeRead = async (input) => {
|
|
1380
|
+
const parsed = KnowledgeCodeReadInput.safeParse(input);
|
|
1381
|
+
if (!parsed.success) {
|
|
1382
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
1383
|
+
}
|
|
1384
|
+
const { okrId, repoUrl, ref, filePath } = parsed.data;
|
|
1385
|
+
const gh = parseGithubUrl(repoUrl);
|
|
1386
|
+
if (!gh) {
|
|
1387
|
+
return { ok: false, reason: 'repo-url-not-github', repo: repoUrl };
|
|
1388
|
+
}
|
|
1389
|
+
const runId = parsed.data.runId ?? process.env.RUN_ID;
|
|
1390
|
+
if (!runId) {
|
|
1391
|
+
return {
|
|
1392
|
+
ok: false,
|
|
1393
|
+
reason: 'missing-run-id',
|
|
1394
|
+
remediation: "knowledge-code-read needs a session runId to find the clone cache shared with knowledge-code. Pass `runId` in input or set the RUN_ID env var (the agent does this via session-context export).",
|
|
1395
|
+
};
|
|
1396
|
+
}
|
|
1397
|
+
// Security perimeter — reject obvious escape attempts BEFORE touching
|
|
1398
|
+
// the filesystem so the audit chain captures the rejection cleanly.
|
|
1399
|
+
if (path.isAbsolute(filePath)) {
|
|
1400
|
+
return { ok: false, reason: `path-rejected: absolute paths are forbidden (${filePath})` };
|
|
1401
|
+
}
|
|
1402
|
+
// Normalize and re-check — a path like `foo/../../bar` would resolve
|
|
1403
|
+
// up two levels even though the literal string contains no leading
|
|
1404
|
+
// `../`. `path.normalize` collapses it; we then reject if it starts
|
|
1405
|
+
// with `..`.
|
|
1406
|
+
const normalized = path.normalize(filePath);
|
|
1407
|
+
if (normalized.startsWith('..') || normalized === '..' || normalized.includes(`${path.sep}..${path.sep}`)) {
|
|
1408
|
+
return { ok: false, reason: `path-rejected: path-traversal segments forbidden (${filePath} -> ${normalized})` };
|
|
1409
|
+
}
|
|
1410
|
+
// Reuse the cached clone from knowledge-code; clone fresh if missing
|
|
1411
|
+
// (e.g. agent called knowledge-code-read without calling knowledge-
|
|
1412
|
+
// code first — supported but slower).
|
|
1413
|
+
const cloneResult = ensureClone(runId, repoUrl, ref ?? 'HEAD', gh.owner, gh.name);
|
|
1414
|
+
if (!cloneResult.ok) {
|
|
1415
|
+
return {
|
|
1416
|
+
ok: false,
|
|
1417
|
+
reason: 'clone-failed',
|
|
1418
|
+
repo: `${gh.owner}/${gh.name}`,
|
|
1419
|
+
remediation: `Could not access clone for ${repoUrl}. Underlying error: ${cloneResult.error ?? 'unknown'}`,
|
|
1420
|
+
};
|
|
1421
|
+
}
|
|
1422
|
+
const absPath = path.join(cloneResult.path, normalized);
|
|
1423
|
+
// Final paranoia check — resolve the real path and verify it's still
|
|
1424
|
+
// a child of the clone root. Defends against symlink-shaped escapes
|
|
1425
|
+
// (an attacker-controlled file in the repo that's a symlink to /etc).
|
|
1426
|
+
let realPath;
|
|
1427
|
+
try {
|
|
1428
|
+
realPath = fs.realpathSync.native(absPath);
|
|
1429
|
+
}
|
|
1430
|
+
catch {
|
|
1431
|
+
return { ok: false, reason: `file-not-found: ${filePath} not in ${gh.owner}/${gh.name}@${cloneResult.sha.slice(0, 12)}` };
|
|
1432
|
+
}
|
|
1433
|
+
const realClone = fs.realpathSync.native(cloneResult.path);
|
|
1434
|
+
if (!realPath.startsWith(realClone + path.sep) && realPath !== realClone) {
|
|
1435
|
+
return { ok: false, reason: `path-escape: resolved path falls outside the cloned repo (${filePath} -> ${realPath})` };
|
|
1436
|
+
}
|
|
1437
|
+
let stat;
|
|
1438
|
+
try {
|
|
1439
|
+
stat = fs.statSync(realPath);
|
|
1440
|
+
}
|
|
1441
|
+
catch {
|
|
1442
|
+
return { ok: false, reason: `file-not-found: ${filePath}` };
|
|
1443
|
+
}
|
|
1444
|
+
if (stat.isDirectory()) {
|
|
1445
|
+
return { ok: false, reason: `path-is-directory: ${filePath} is a directory; knowledge-code-read returns file contents only` };
|
|
1446
|
+
}
|
|
1447
|
+
// Read + truncate + reject binary.
|
|
1448
|
+
let buf;
|
|
1449
|
+
try {
|
|
1450
|
+
buf = fs.readFileSync(realPath);
|
|
1451
|
+
}
|
|
1452
|
+
catch (err) {
|
|
1453
|
+
return { ok: false, reason: `read-failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
1454
|
+
}
|
|
1455
|
+
// Heuristic: a NUL byte in the first 8 KB is a strong binary signal.
|
|
1456
|
+
// Strings of bytes that legitimately contain NUL bytes (gzip, images,
|
|
1457
|
+
// wasm) are not source code; refuse them.
|
|
1458
|
+
if (buf.slice(0, Math.min(buf.length, 8192)).includes(0)) {
|
|
1459
|
+
return { ok: false, reason: `binary-file: ${filePath} contains NUL bytes; knowledge-code-read returns text only` };
|
|
1460
|
+
}
|
|
1461
|
+
const totalBytes = buf.length;
|
|
1462
|
+
const truncated = totalBytes > KNOWLEDGE_CODE_READ_MAX_BYTES;
|
|
1463
|
+
const content = (truncated ? buf.subarray(0, KNOWLEDGE_CODE_READ_MAX_BYTES) : buf).toString('utf8');
|
|
1464
|
+
const lang = LANG_EXTS[path.extname(filePath).toLowerCase()] ?? 'unknown';
|
|
1465
|
+
const lineCount = content.split('\n').length;
|
|
1466
|
+
const auditMetadata = {
|
|
1467
|
+
phase: 'what',
|
|
1468
|
+
repo: `${gh.owner}/${gh.name}`,
|
|
1469
|
+
file: normalized,
|
|
1470
|
+
sha: cloneResult.sha.slice(0, 12),
|
|
1471
|
+
bytes_returned: content.length,
|
|
1472
|
+
bytes_total: totalBytes,
|
|
1473
|
+
truncated,
|
|
1474
|
+
lang,
|
|
1475
|
+
okr_id: okrId,
|
|
1476
|
+
};
|
|
1477
|
+
return {
|
|
1478
|
+
ok: true,
|
|
1479
|
+
repo: `${gh.owner}/${gh.name}`,
|
|
1480
|
+
file: normalized,
|
|
1481
|
+
sha: cloneResult.sha,
|
|
1482
|
+
content,
|
|
1483
|
+
lang,
|
|
1484
|
+
lineCount,
|
|
1485
|
+
truncated,
|
|
1486
|
+
bytesReturned: content.length,
|
|
1487
|
+
bytesTotal: totalBytes,
|
|
1488
|
+
auditMetadata,
|
|
1489
|
+
};
|
|
1490
|
+
};
|
|
1491
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
1197
1492
|
// Search skills — thin wrappers over the existing search nodes
|
|
1198
1493
|
// ─────────────────────────────────────────────────────────────────────
|
|
1199
1494
|
const SearchQueriesInput = zod_1.z.object({
|
|
@@ -1970,6 +2265,24 @@ const handleAuditVerifyChain = async (input) => {
|
|
|
1970
2265
|
const sealed = signedCount > 0;
|
|
1971
2266
|
const agentEventCount = lines.length - workflowUnsignedCount;
|
|
1972
2267
|
let sealVerified = false;
|
|
2268
|
+
// Bug-Q / Q3 (Codex audit round 2) — a chain that USES per-epoch
|
|
2269
|
+
// signing (any event carries `signer_epoch`) MUST be sealed AND seal-
|
|
2270
|
+
// verified. Without this guard, an attacker could hand-craft a chain
|
|
2271
|
+
// where event 1 is signed (forcing `chainUsesPerEpochSigning=true`)
|
|
2272
|
+
// but every subsequent event is unsigned — `signedCount > 0` would
|
|
2273
|
+
// be true and the per-event check below would pass each unsigned
|
|
2274
|
+
// event as `legitimateUnsigned` if attribution were faked. Equally,
|
|
2275
|
+
// a chain where the runner reports `sealed=true` but the legacy
|
|
2276
|
+
// `chainUsesPerEpochSigning=false` path runs is the gold-product
|
|
2277
|
+
// promise we make to the marketing page. Legacy chains (no event
|
|
2278
|
+
// carries signer_epoch) keep the prior allowance — they predate
|
|
2279
|
+
// Bug O and a user audit-replaying them is intentionally tolerant.
|
|
2280
|
+
if (chainUsesPerEpochSigning && !sealed) {
|
|
2281
|
+
return {
|
|
2282
|
+
ok: false,
|
|
2283
|
+
reason: `per-epoch-chain-not-sealed: chain references signer_epoch (per-epoch signing contract) but no events carry signatures; gold-product contract requires per-epoch chains to be fully sealed`,
|
|
2284
|
+
};
|
|
2285
|
+
}
|
|
1973
2286
|
if (sealed) {
|
|
1974
2287
|
if (signedCount !== agentEventCount) {
|
|
1975
2288
|
return { ok: false, reason: `partial-signatures: ${signedCount}/${agentEventCount} agent-emitted events signed (chain tampered; ${workflowUnsignedCount} workflow-emitted unsigned by-design)` };
|
|
@@ -2032,6 +2345,12 @@ exports.SKILLS = {
|
|
|
2032
2345
|
// targetCodeRepoStatus: brownfield (clone + classify), greenfield
|
|
2033
2346
|
// (scaffolding hints, no clone), refuse (not-connected / unreachable).
|
|
2034
2347
|
'knowledge-code': handleKnowledgeCode,
|
|
2348
|
+
// Bug-Q phase 2 — knowledge-code-read returns bounded file CONTENT
|
|
2349
|
+
// from the brownfield clone retained by knowledge-code. Lets the
|
|
2350
|
+
// agent ground design decisions in real code excerpts (Codex audit
|
|
2351
|
+
// round 2 / B1: agent was hallucinating brownfield file paths
|
|
2352
|
+
// because the substrate was structural metadata only).
|
|
2353
|
+
'knowledge-code-read': handleKnowledgeCodeRead,
|
|
2035
2354
|
'tavily-search': handleTavilySearch,
|
|
2036
2355
|
'arxiv-search': handleArxivSearch,
|
|
2037
2356
|
'uspto-search': handleUsptoSearch,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.44",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|