npm - @maintainabilityai/research-runner - Versions diffs - 0.1.22 → 0.1.25 - Mend

@maintainabilityai/research-runner 0.1.22 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/runner/skills.js +114 -0
package/dist/search/arxiv-client.js +6 -1
package/package.json +1 -1

package/dist/runner/skills.js CHANGED Viewed

@@ -468,6 +468,36 @@ const SearchQueriesInput = zod_1.z.object({
     queries: zod_1.z.array(zod_1.z.string().min(1)).min(1),
     maxResults: zod_1.z.number().int().positive().optional(),
 });
+/**
+ * Decide whether a per-query envelope set means "the provider was reachable
+ * at least once" (=> `ok: true`) or "every single query failed" (=> `ok:
+ * false, reason: all-queries-failed`).
+ *
+ * Why this matters: previously the handlers returned `ok: true` even when
+ * 100% of queries failed (because `runTavilySearch` etc. use
+ * `Promise.allSettled` and never throw). That made `result_count: 0`
+ * ambiguous — could be "API reached, no matches" OR "firewall blocked
+ * every call." The agentic-SDLC evidence-honesty gate (§11.1.7) counts
+ * ok=true as a successful provider call; this fix is what makes that
+ * count actually meaningful.
+ *
+ * Returns `null` when at least one query reached the provider (the
+ * skill returns ok:true). Otherwise returns the failure reason string
+ * the skill should surface in `reason`.
+ */
+function detectAllQueriesFailed(envelopes, skill) {
+    if (envelopes.length === 0) {
+        return null;
+    }
+    const allErrored = envelopes.every(e => e.error !== undefined && e.error.length > 0);
+    if (!allErrored) {
+        return null;
+    }
+    const firstError = envelopes[0].error ?? 'unknown';
+    // `all-queries-failed:` prefix is load-bearing for the audit-validate gate's
+    // pattern matching of firewall-block vs query-quality failures.
+    return `all-queries-failed: ${skill} — ${firstError}`;
+}
 const handleTavilySearch = async (input) => {
     const parsed = SearchQueriesInput.safeParse(input);
     if (!parsed.success) {
@@ -483,6 +513,10 @@ const handleTavilySearch = async (input) => {
             queries: parsed.data.queries,
             maxResultsPerQuery: parsed.data.maxResults,
         });
+        const failure = detectAllQueriesFailed(res.envelopes, 'tavily-search');
+        if (failure) {
+            return { ok: false, reason: failure, envelopes: res.envelopes };
+        }
         return { ok: true, envelopes: res.envelopes, results: res.results };
     }
     catch (err) {
@@ -499,6 +533,10 @@ const handleArxivSearch = async (input) => {
             queries: parsed.data.queries,
             maxResultsPerQuery: parsed.data.maxResults,
         });
+        const failure = detectAllQueriesFailed(res.envelopes, 'arxiv-search');
+        if (failure) {
+            return { ok: false, reason: failure, envelopes: res.envelopes };
+        }
         return { ok: true, envelopes: res.envelopes, results: res.results };
     }
     catch (err) {
@@ -520,6 +558,10 @@ const handleUsptoSearch = async (input) => {
             queries: parsed.data.queries,
             maxResultsPerQuery: parsed.data.maxResults,
         });
+        const failure = detectAllQueriesFailed(res.envelopes, 'uspto-search');
+        if (failure) {
+            return { ok: false, reason: failure, envelopes: res.envelopes };
+        }
         return { ok: true, envelopes: res.envelopes, results: res.results };
     }
     catch (err) {
@@ -536,6 +578,10 @@ const handleHackerNewsSearch = async (input) => {
             queries: parsed.data.queries,
             hitsPerQuery: parsed.data.maxResults,
         });
+        const failure = detectAllQueriesFailed(res.envelopes, 'hackernews-search');
+        if (failure) {
+            return { ok: false, reason: failure, envelopes: res.envelopes };
+        }
         return { ok: true, envelopes: res.envelopes, results: res.results };
     }
     catch (err) {
@@ -747,6 +793,73 @@ const handleAuditEmitEvent = async (input) => {
     return { ok: false, reason: 'audit-write-failed-after-retries' };
 };
 // ─────────────────────────────────────────────────────────────────────
+// Audit verify-chain — CI defense against forged audit logs
+// ─────────────────────────────────────────────────────────────────────
+const AuditVerifyInput = zod_1.z.object({
+    okrId: zod_1.z.string().min(1),
+    runId: zod_1.z.string().min(1),
+});
+/**
+ * `audit-verify-chain` — replay the hash chain over an existing audit
+ * JSONL, returning `{ok: true, chainHead, eventCount}` if the chain is
+ * intact or `{ok: false, reason}` on the first integrity failure.
+ *
+ * Why this skill exists: an agent that loses access to the runner could
+ * (and on PR #105 did) self-write the JSONL with fabricated hashes. The
+ * audit-and-drift workflow calls this skill after each run; verdict
+ * fails + `chain-forgery-detected` label is applied on `ok:false`. The
+ * verification rules are identical to `verifyChain()` in audit-emitter.ts:
+ *   - first event prev_event_hash === null
+ *   - each prev_event_hash === preceding event.event_hash
+ *   - each event_hash === sha256(canonicalStringify(event-with-empty-hash))
+ *   - event_id is monotonic from 1
+ */
+const handleAuditVerifyChain = async (input) => {
+    const parsed = AuditVerifyInput.safeParse(input);
+    if (!parsed.success) {
+        return { ok: false, reason: `bad-input: ${parsed.error.message}` };
+    }
+    const { okrId, runId } = parsed.data;
+    const filePath = path.join(meshPath(), 'okrs', okrId, 'audit', 'events', `${runId}.jsonl`);
+    if (!fs.existsSync(filePath)) {
+        return { ok: false, reason: `audit-jsonl-missing: ${filePath}` };
+    }
+    let lines;
+    try {
+        lines = fs.readFileSync(filePath, 'utf8').split('\n').filter(l => l.trim().length > 0);
+    }
+    catch (err) {
+        return { ok: false, reason: `read-failed: ${err.message}` };
+    }
+    let prev = null;
+    for (let i = 0; i < lines.length; i++) {
+        let event;
+        try {
+            event = JSON.parse(lines[i]);
+        }
+        catch (err) {
+            return { ok: false, reason: `bad-jsonl-line-${i + 1}: ${err.message}` };
+        }
+        if (event.event_id !== i + 1) {
+            return { ok: false, reason: `event-id-mismatch-line-${i + 1}: expected ${i + 1} got ${event.event_id}` };
+        }
+        if (event.prev_event_hash !== prev) {
+            return { ok: false, reason: `prev-hash-mismatch-line-${i + 1}: expected ${prev ?? 'null'} got ${event.prev_event_hash ?? 'null'}` };
+        }
+        const recordedHash = event.event_hash;
+        if (typeof recordedHash !== 'string') {
+            return { ok: false, reason: `missing-event-hash-line-${i + 1}` };
+        }
+        const draft = { ...event, event_hash: '' };
+        const recomputed = sha256(canonicalStringify(draft));
+        if (recordedHash !== recomputed) {
+            return { ok: false, reason: `forged-hash-line-${i + 1}: recorded=${recordedHash.slice(0, 16)}… recomputed=${recomputed.slice(0, 16)}…` };
+        }
+        prev = recordedHash;
+    }
+    return { ok: true, chainHead: prev, eventCount: lines.length };
+};
+// ─────────────────────────────────────────────────────────────────────
 // Registry + dispatcher
 // ─────────────────────────────────────────────────────────────────────
 exports.SKILLS = {
@@ -763,6 +876,7 @@ exports.SKILLS = {
     'dedupe-and-rank': handleDedupeAndRank,
     'format-research-issue-update': handleFormatResearchIssueUpdate,
     'audit-emit-event': handleAuditEmitEvent,
+    'audit-verify-chain': handleAuditVerifyChain,
 };
 function isSkillName(name) {
     return Object.prototype.hasOwnProperty.call(exports.SKILLS, name);

package/dist/search/arxiv-client.js CHANGED Viewed

@@ -14,7 +14,12 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.arxivSearch = arxivSearch;
 exports.parseArxivAtom = parseArxivAtom;
-const DEFAULT_ENDPOINT = 'http://export.arxiv.org/api/query';
+// HTTPS — arXiv has supported it for years. Using plain HTTP previously
+// caused agentic-SDLC runs to be blocked by the Copilot Coding Agent
+// firewall, which allow-lists `https://export.arxiv.org/` (the canonical
+// HTTPS form); a plain-http GET against the same host is a protocol-
+// mismatch and rejected as `http block`. See B-PR1f forensics.
+const DEFAULT_ENDPOINT = 'https://export.arxiv.org/api/query';
 async function arxivSearch(opts) {
     const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
     const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@maintainabilityai/research-runner",
-  "version": "0.1.22",
+  "version": "0.1.25",
   "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
   "license": "MIT",
   "author": "MaintainabilityAI",