@maintainabilityai/research-runner 0.1.22 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runner/skills.js +114 -0
- package/dist/search/arxiv-client.js +6 -1
- package/package.json +1 -1
package/dist/runner/skills.js
CHANGED
|
@@ -468,6 +468,36 @@ const SearchQueriesInput = zod_1.z.object({
|
|
|
468
468
|
queries: zod_1.z.array(zod_1.z.string().min(1)).min(1),
|
|
469
469
|
maxResults: zod_1.z.number().int().positive().optional(),
|
|
470
470
|
});
|
|
471
|
+
/**
|
|
472
|
+
* Decide whether a per-query envelope set means "the provider was reachable
|
|
473
|
+
* at least once" (=> `ok: true`) or "every single query failed" (=> `ok:
|
|
474
|
+
* false, reason: all-queries-failed`).
|
|
475
|
+
*
|
|
476
|
+
* Why this matters: previously the handlers returned `ok: true` even when
|
|
477
|
+
* 100% of queries failed (because `runTavilySearch` etc. use
|
|
478
|
+
* `Promise.allSettled` and never throw). That made `result_count: 0`
|
|
479
|
+
* ambiguous — could be "API reached, no matches" OR "firewall blocked
|
|
480
|
+
* every call." The agentic-SDLC evidence-honesty gate (§11.1.7) counts
|
|
481
|
+
* ok=true as a successful provider call; this fix is what makes that
|
|
482
|
+
* count actually meaningful.
|
|
483
|
+
*
|
|
484
|
+
* Returns `null` when at least one query reached the provider (the
|
|
485
|
+
* skill returns ok:true). Otherwise returns the failure reason string
|
|
486
|
+
* the skill should surface in `reason`.
|
|
487
|
+
*/
|
|
488
|
+
function detectAllQueriesFailed(envelopes, skill) {
|
|
489
|
+
if (envelopes.length === 0) {
|
|
490
|
+
return null;
|
|
491
|
+
}
|
|
492
|
+
const allErrored = envelopes.every(e => e.error !== undefined && e.error.length > 0);
|
|
493
|
+
if (!allErrored) {
|
|
494
|
+
return null;
|
|
495
|
+
}
|
|
496
|
+
const firstError = envelopes[0].error ?? 'unknown';
|
|
497
|
+
// `all-queries-failed:` prefix is load-bearing for the audit-validate gate's
|
|
498
|
+
// pattern matching of firewall-block vs query-quality failures.
|
|
499
|
+
return `all-queries-failed: ${skill} — ${firstError}`;
|
|
500
|
+
}
|
|
471
501
|
const handleTavilySearch = async (input) => {
|
|
472
502
|
const parsed = SearchQueriesInput.safeParse(input);
|
|
473
503
|
if (!parsed.success) {
|
|
@@ -483,6 +513,10 @@ const handleTavilySearch = async (input) => {
|
|
|
483
513
|
queries: parsed.data.queries,
|
|
484
514
|
maxResultsPerQuery: parsed.data.maxResults,
|
|
485
515
|
});
|
|
516
|
+
const failure = detectAllQueriesFailed(res.envelopes, 'tavily-search');
|
|
517
|
+
if (failure) {
|
|
518
|
+
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
519
|
+
}
|
|
486
520
|
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
487
521
|
}
|
|
488
522
|
catch (err) {
|
|
@@ -499,6 +533,10 @@ const handleArxivSearch = async (input) => {
|
|
|
499
533
|
queries: parsed.data.queries,
|
|
500
534
|
maxResultsPerQuery: parsed.data.maxResults,
|
|
501
535
|
});
|
|
536
|
+
const failure = detectAllQueriesFailed(res.envelopes, 'arxiv-search');
|
|
537
|
+
if (failure) {
|
|
538
|
+
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
539
|
+
}
|
|
502
540
|
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
503
541
|
}
|
|
504
542
|
catch (err) {
|
|
@@ -520,6 +558,10 @@ const handleUsptoSearch = async (input) => {
|
|
|
520
558
|
queries: parsed.data.queries,
|
|
521
559
|
maxResultsPerQuery: parsed.data.maxResults,
|
|
522
560
|
});
|
|
561
|
+
const failure = detectAllQueriesFailed(res.envelopes, 'uspto-search');
|
|
562
|
+
if (failure) {
|
|
563
|
+
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
564
|
+
}
|
|
523
565
|
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
524
566
|
}
|
|
525
567
|
catch (err) {
|
|
@@ -536,6 +578,10 @@ const handleHackerNewsSearch = async (input) => {
|
|
|
536
578
|
queries: parsed.data.queries,
|
|
537
579
|
hitsPerQuery: parsed.data.maxResults,
|
|
538
580
|
});
|
|
581
|
+
const failure = detectAllQueriesFailed(res.envelopes, 'hackernews-search');
|
|
582
|
+
if (failure) {
|
|
583
|
+
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
584
|
+
}
|
|
539
585
|
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
540
586
|
}
|
|
541
587
|
catch (err) {
|
|
@@ -747,6 +793,73 @@ const handleAuditEmitEvent = async (input) => {
|
|
|
747
793
|
return { ok: false, reason: 'audit-write-failed-after-retries' };
|
|
748
794
|
};
|
|
749
795
|
// ─────────────────────────────────────────────────────────────────────
|
|
796
|
+
// Audit verify-chain — CI defense against forged audit logs
|
|
797
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
798
|
+
const AuditVerifyInput = zod_1.z.object({
|
|
799
|
+
okrId: zod_1.z.string().min(1),
|
|
800
|
+
runId: zod_1.z.string().min(1),
|
|
801
|
+
});
|
|
802
|
+
/**
|
|
803
|
+
* `audit-verify-chain` — replay the hash chain over an existing audit
|
|
804
|
+
* JSONL, returning `{ok: true, chainHead, eventCount}` if the chain is
|
|
805
|
+
* intact or `{ok: false, reason}` on the first integrity failure.
|
|
806
|
+
*
|
|
807
|
+
* Why this skill exists: an agent that loses access to the runner could
|
|
808
|
+
* (and on PR #105 did) self-write the JSONL with fabricated hashes. The
|
|
809
|
+
* audit-and-drift workflow calls this skill after each run; verdict
|
|
810
|
+
* fails + `chain-forgery-detected` label is applied on `ok:false`. The
|
|
811
|
+
* verification rules are identical to `verifyChain()` in audit-emitter.ts:
|
|
812
|
+
* - first event prev_event_hash === null
|
|
813
|
+
* - each prev_event_hash === preceding event.event_hash
|
|
814
|
+
* - each event_hash === sha256(canonicalStringify(event-with-empty-hash))
|
|
815
|
+
* - event_id is monotonic from 1
|
|
816
|
+
*/
|
|
817
|
+
const handleAuditVerifyChain = async (input) => {
|
|
818
|
+
const parsed = AuditVerifyInput.safeParse(input);
|
|
819
|
+
if (!parsed.success) {
|
|
820
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
821
|
+
}
|
|
822
|
+
const { okrId, runId } = parsed.data;
|
|
823
|
+
const filePath = path.join(meshPath(), 'okrs', okrId, 'audit', 'events', `${runId}.jsonl`);
|
|
824
|
+
if (!fs.existsSync(filePath)) {
|
|
825
|
+
return { ok: false, reason: `audit-jsonl-missing: ${filePath}` };
|
|
826
|
+
}
|
|
827
|
+
let lines;
|
|
828
|
+
try {
|
|
829
|
+
lines = fs.readFileSync(filePath, 'utf8').split('\n').filter(l => l.trim().length > 0);
|
|
830
|
+
}
|
|
831
|
+
catch (err) {
|
|
832
|
+
return { ok: false, reason: `read-failed: ${err.message}` };
|
|
833
|
+
}
|
|
834
|
+
let prev = null;
|
|
835
|
+
for (let i = 0; i < lines.length; i++) {
|
|
836
|
+
let event;
|
|
837
|
+
try {
|
|
838
|
+
event = JSON.parse(lines[i]);
|
|
839
|
+
}
|
|
840
|
+
catch (err) {
|
|
841
|
+
return { ok: false, reason: `bad-jsonl-line-${i + 1}: ${err.message}` };
|
|
842
|
+
}
|
|
843
|
+
if (event.event_id !== i + 1) {
|
|
844
|
+
return { ok: false, reason: `event-id-mismatch-line-${i + 1}: expected ${i + 1} got ${event.event_id}` };
|
|
845
|
+
}
|
|
846
|
+
if (event.prev_event_hash !== prev) {
|
|
847
|
+
return { ok: false, reason: `prev-hash-mismatch-line-${i + 1}: expected ${prev ?? 'null'} got ${event.prev_event_hash ?? 'null'}` };
|
|
848
|
+
}
|
|
849
|
+
const recordedHash = event.event_hash;
|
|
850
|
+
if (typeof recordedHash !== 'string') {
|
|
851
|
+
return { ok: false, reason: `missing-event-hash-line-${i + 1}` };
|
|
852
|
+
}
|
|
853
|
+
const draft = { ...event, event_hash: '' };
|
|
854
|
+
const recomputed = sha256(canonicalStringify(draft));
|
|
855
|
+
if (recordedHash !== recomputed) {
|
|
856
|
+
return { ok: false, reason: `forged-hash-line-${i + 1}: recorded=${recordedHash.slice(0, 16)}… recomputed=${recomputed.slice(0, 16)}…` };
|
|
857
|
+
}
|
|
858
|
+
prev = recordedHash;
|
|
859
|
+
}
|
|
860
|
+
return { ok: true, chainHead: prev, eventCount: lines.length };
|
|
861
|
+
};
|
|
862
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
750
863
|
// Registry + dispatcher
|
|
751
864
|
// ─────────────────────────────────────────────────────────────────────
|
|
752
865
|
exports.SKILLS = {
|
|
@@ -763,6 +876,7 @@ exports.SKILLS = {
|
|
|
763
876
|
'dedupe-and-rank': handleDedupeAndRank,
|
|
764
877
|
'format-research-issue-update': handleFormatResearchIssueUpdate,
|
|
765
878
|
'audit-emit-event': handleAuditEmitEvent,
|
|
879
|
+
'audit-verify-chain': handleAuditVerifyChain,
|
|
766
880
|
};
|
|
767
881
|
function isSkillName(name) {
|
|
768
882
|
return Object.prototype.hasOwnProperty.call(exports.SKILLS, name);
|
|
@@ -14,7 +14,12 @@
|
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
exports.arxivSearch = arxivSearch;
|
|
16
16
|
exports.parseArxivAtom = parseArxivAtom;
|
|
17
|
-
|
|
17
|
+
// HTTPS — arXiv has supported it for years. Using plain HTTP previously
|
|
18
|
+
// caused agentic-SDLC runs to be blocked by the Copilot Coding Agent
|
|
19
|
+
// firewall, which allow-lists `https://export.arxiv.org/` (the canonical
|
|
20
|
+
// HTTPS form); a plain-http GET against the same host is a protocol-
|
|
21
|
+
// mismatch and rejected as `http block`. See B-PR1f forensics.
|
|
22
|
+
const DEFAULT_ENDPOINT = 'https://export.arxiv.org/api/query';
|
|
18
23
|
async function arxivSearch(opts) {
|
|
19
24
|
const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
|
|
20
25
|
const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.25",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|