@maintainabilityai/research-runner 0.1.25 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* session-context — env-var-backed run identity for skill auto-emission (B28).
|
|
3
|
+
*
|
|
4
|
+
* Every agentic-SDLC run flows through a single GitHub Actions job. That job
|
|
5
|
+
* already exports `MESH_PATH` for the runner; B28 extends the contract with
|
|
6
|
+
* four more env vars so the runner can auto-emit `skill_call` audit events
|
|
7
|
+
* without the agent having to call `audit-emit-event` after every skill.
|
|
8
|
+
*
|
|
9
|
+
* | env var | shape |
|
|
10
|
+
* |-----------------------|-----------------------------------------|
|
|
11
|
+
* | `OKR_ID` | non-empty string |
|
|
12
|
+
* | `RUN_ID` | non-empty string |
|
|
13
|
+
* | `INTENT_THREAD_UUID` | non-empty string (UUID expected but not validated here) |
|
|
14
|
+
* | `PHASE` | `'why' \| 'how' \| 'what'` |
|
|
15
|
+
*
|
|
16
|
+
* If ANY var is missing or `PHASE` is not one of the three canonical values,
|
|
17
|
+
* `readSessionContext()` returns `null` and the runner falls back to legacy
|
|
18
|
+
* behavior — the agent emits audit events explicitly via the `audit-emit-event`
|
|
19
|
+
* skill (or doesn't, and the workflow's chain-verify catches the gap). This
|
|
20
|
+
* preserves backward compatibility with pre-B28 chains while letting new runs
|
|
21
|
+
* benefit from deterministic emission.
|
|
22
|
+
*
|
|
23
|
+
* The auto-emission itself happens in `runSkill()` (skills.ts) — this module
|
|
24
|
+
* is just the env-var reader so it stays testable in isolation.
|
|
25
|
+
*/
|
|
26
|
+
export type RunPhase = 'why' | 'how' | 'what';
|
|
27
|
+
export interface SessionContext {
|
|
28
|
+
okrId: string;
|
|
29
|
+
runId: string;
|
|
30
|
+
intentThreadUuid: string;
|
|
31
|
+
phase: RunPhase;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Read the four session-context env vars. Returns null if any are absent or
|
|
35
|
+
* `PHASE` is invalid — callers MUST handle null as "no auto-emission, run
|
|
36
|
+
* the skill anyway." Never throws.
|
|
37
|
+
*/
|
|
38
|
+
export declare function readSessionContext(): SessionContext | null;
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* session-context — env-var-backed run identity for skill auto-emission (B28).
|
|
4
|
+
*
|
|
5
|
+
* Every agentic-SDLC run flows through a single GitHub Actions job. That job
|
|
6
|
+
* already exports `MESH_PATH` for the runner; B28 extends the contract with
|
|
7
|
+
* four more env vars so the runner can auto-emit `skill_call` audit events
|
|
8
|
+
* without the agent having to call `audit-emit-event` after every skill.
|
|
9
|
+
*
|
|
10
|
+
* | env var | shape |
|
|
11
|
+
* |-----------------------|-----------------------------------------|
|
|
12
|
+
* | `OKR_ID` | non-empty string |
|
|
13
|
+
* | `RUN_ID` | non-empty string |
|
|
14
|
+
* | `INTENT_THREAD_UUID` | non-empty string (UUID expected but not validated here) |
|
|
15
|
+
* | `PHASE` | `'why' \| 'how' \| 'what'` |
|
|
16
|
+
*
|
|
17
|
+
* If ANY var is missing or `PHASE` is not one of the three canonical values,
|
|
18
|
+
* `readSessionContext()` returns `null` and the runner falls back to legacy
|
|
19
|
+
* behavior — the agent emits audit events explicitly via the `audit-emit-event`
|
|
20
|
+
* skill (or doesn't, and the workflow's chain-verify catches the gap). This
|
|
21
|
+
* preserves backward compatibility with pre-B28 chains while letting new runs
|
|
22
|
+
* benefit from deterministic emission.
|
|
23
|
+
*
|
|
24
|
+
* The auto-emission itself happens in `runSkill()` (skills.ts) — this module
|
|
25
|
+
* is just the env-var reader so it stays testable in isolation.
|
|
26
|
+
*/
|
|
27
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
28
|
+
exports.readSessionContext = readSessionContext;
|
|
29
|
+
const PHASES = ['why', 'how', 'what'];
|
|
30
|
+
function isRunPhase(value) {
|
|
31
|
+
return PHASES.includes(value);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Read the four session-context env vars. Returns null if any are absent or
|
|
35
|
+
* `PHASE` is invalid — callers MUST handle null as "no auto-emission, run
|
|
36
|
+
* the skill anyway." Never throws.
|
|
37
|
+
*/
|
|
38
|
+
function readSessionContext() {
|
|
39
|
+
const okrId = process.env.OKR_ID;
|
|
40
|
+
const runId = process.env.RUN_ID;
|
|
41
|
+
const intentThreadUuid = process.env.INTENT_THREAD_UUID;
|
|
42
|
+
const phase = process.env.PHASE;
|
|
43
|
+
if (!okrId || !runId || !intentThreadUuid || !phase) {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
if (!isRunPhase(phase)) {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
return { okrId, runId, intentThreadUuid, phase };
|
|
50
|
+
}
|
package/dist/runner/skills.d.ts
CHANGED
|
@@ -2,12 +2,21 @@
|
|
|
2
2
|
* Shape every skill returns. Tagged union so the agent can branch on `ok`.
|
|
3
3
|
* Handlers MUST NOT throw — they return `{ok: false, reason}` instead so
|
|
4
4
|
* the calling agent can keep going (per SKILL.md error contracts).
|
|
5
|
+
*
|
|
6
|
+
* Optional `auditMetadata` field (B28): structured key/value pairs that the
|
|
7
|
+
* auto-emitter merges into the `skill_call` event payload. Handlers use it
|
|
8
|
+
* to declare audit-worthy details (search-skill `queries` + `result_count`,
|
|
9
|
+
* etc.) without the agent having to re-author them in an audit-emit-event
|
|
10
|
+
* call. Canonical fields (`skill`, `ok`, `duration_ms`, `reason`) always
|
|
11
|
+
* win on collision so handlers can't accidentally overwrite them.
|
|
5
12
|
*/
|
|
6
13
|
export type SkillResult = ({
|
|
7
14
|
ok: true;
|
|
15
|
+
auditMetadata?: Record<string, unknown>;
|
|
8
16
|
} & Record<string, unknown>) | {
|
|
9
17
|
ok: false;
|
|
10
18
|
reason: string;
|
|
19
|
+
auditMetadata?: Record<string, unknown>;
|
|
11
20
|
};
|
|
12
21
|
export type SkillHandler = (input: unknown) => Promise<SkillResult>;
|
|
13
22
|
export declare const SKILLS: Record<string, SkillHandler>;
|
package/dist/runner/skills.js
CHANGED
|
@@ -64,6 +64,7 @@ exports.readStdin = readStdin;
|
|
|
64
64
|
*/
|
|
65
65
|
const node_crypto_1 = require("node:crypto");
|
|
66
66
|
const fs = __importStar(require("node:fs"));
|
|
67
|
+
const os = __importStar(require("node:os"));
|
|
67
68
|
const path = __importStar(require("node:path"));
|
|
68
69
|
const yaml = __importStar(require("js-yaml"));
|
|
69
70
|
const zod_1 = require("zod");
|
|
@@ -72,6 +73,7 @@ const arxiv_search_1 = require("./nodes/arxiv-search");
|
|
|
72
73
|
const hackernews_search_1 = require("./nodes/hackernews-search");
|
|
73
74
|
const uspto_search_1 = require("./nodes/uspto-search");
|
|
74
75
|
const dedupe_and_rank_1 = require("./nodes/dedupe-and-rank");
|
|
76
|
+
const session_context_1 = require("./session-context");
|
|
75
77
|
// ─────────────────────────────────────────────────────────────────────
|
|
76
78
|
// Mesh path resolution
|
|
77
79
|
// ─────────────────────────────────────────────────────────────────────
|
|
@@ -462,6 +464,118 @@ const handleKnowledgeResearch = async (input) => {
|
|
|
462
464
|
return { ok: true, findings, whitespace, references, rawBody: body };
|
|
463
465
|
};
|
|
464
466
|
// ─────────────────────────────────────────────────────────────────────
|
|
467
|
+
// Context skills — per-BAR slices of mesh state for PRD agent grounding
|
|
468
|
+
//
|
|
469
|
+
// The prd-agent invokes these AFTER `knowledge-mesh-bar` so the heavy
|
|
470
|
+
// lifting (CALM, threats, ADRs, controls) is already in its working set.
|
|
471
|
+
// These return a focused, persona-specific slice the agent's Architect /
|
|
472
|
+
// Security / Quality lenses each consume in turn during synthesis.
|
|
473
|
+
//
|
|
474
|
+
// Contract: input `{platformId, barIds}` — both required. If any BAR
|
|
475
|
+
// isn't resolvable in the mesh, we return ok:false (HOW agent halts per
|
|
476
|
+
// the "PRDs MUST be grounded" hard rule rather than fabricating).
|
|
477
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
478
|
+
const ContextInput = zod_1.z.object({
|
|
479
|
+
platformId: zod_1.z.string().min(1),
|
|
480
|
+
barIds: zod_1.z.array(zod_1.z.string().min(1)).min(1),
|
|
481
|
+
});
|
|
482
|
+
/**
|
|
483
|
+
* Resolve a list of BAR ids to mesh paths. Returns ok:false on the first
|
|
484
|
+
* unresolvable id so the agent fails fast rather than synthesizing
|
|
485
|
+
* against a partial scope.
|
|
486
|
+
*/
|
|
487
|
+
function resolveBarsOrFail(barIds) {
|
|
488
|
+
const mesh = meshPath();
|
|
489
|
+
const found = [];
|
|
490
|
+
for (const barId of barIds) {
|
|
491
|
+
const r = findBarDir(mesh, barId);
|
|
492
|
+
if (!r) {
|
|
493
|
+
return { ok: false, reason: `bar-not-found: ${barId}` };
|
|
494
|
+
}
|
|
495
|
+
found.push({ barId, barDir: r.barDir, platformSlug: r.platformSlug });
|
|
496
|
+
}
|
|
497
|
+
return { ok: true, found };
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* `context-architecture` — CALM model + ADRs + fitness functions, scoped to
|
|
501
|
+
* the OKR's affected BARs. The Architect persona uses this to ground FRs
|
|
502
|
+
* against declared nodes and flag CALM-drift.
|
|
503
|
+
*/
|
|
504
|
+
const handleContextArchitecture = async (input) => {
|
|
505
|
+
const parsed = ContextInput.safeParse(input);
|
|
506
|
+
if (!parsed.success) {
|
|
507
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
508
|
+
}
|
|
509
|
+
const resolved = resolveBarsOrFail(parsed.data.barIds);
|
|
510
|
+
if (!resolved.ok) {
|
|
511
|
+
return resolved;
|
|
512
|
+
}
|
|
513
|
+
const bars = [];
|
|
514
|
+
for (const { barId, barDir, platformSlug } of resolved.found) {
|
|
515
|
+
const calmModel = readJson(path.join(barDir, 'architecture', 'bar.arch.json'));
|
|
516
|
+
const fitnessFunctions = readYaml(path.join(barDir, 'architecture', 'fitness-functions.yaml'));
|
|
517
|
+
const adrDir = path.join(barDir, 'architecture', 'ADRs');
|
|
518
|
+
const adrs = [];
|
|
519
|
+
for (const name of readDirShallow(adrDir)) {
|
|
520
|
+
if (!name.endsWith('.md')) {
|
|
521
|
+
continue;
|
|
522
|
+
}
|
|
523
|
+
try {
|
|
524
|
+
const body = fs.readFileSync(path.join(adrDir, name), 'utf8');
|
|
525
|
+
const titleMatch = body.match(/^#\s+(.+)/m);
|
|
526
|
+
adrs.push({ id: name.replace(/\.md$/, ''), title: (titleMatch?.[1] ?? name).trim() });
|
|
527
|
+
}
|
|
528
|
+
catch { /* skip */ }
|
|
529
|
+
}
|
|
530
|
+
bars.push({ barId, platformId: platformSlug, slice: { calmModel, fitnessFunctions, adrs } });
|
|
531
|
+
}
|
|
532
|
+
return { ok: true, scope: parsed.data, bars };
|
|
533
|
+
};
|
|
534
|
+
/**
|
|
535
|
+
* `context-security` — threats + controls, scoped to the affected BARs.
|
|
536
|
+
* The Security persona maps SRs to STRIDE THR-NNN + OWASP A0X + NIST
|
|
537
|
+
* controls from this slice.
|
|
538
|
+
*/
|
|
539
|
+
const handleContextSecurity = async (input) => {
|
|
540
|
+
const parsed = ContextInput.safeParse(input);
|
|
541
|
+
if (!parsed.success) {
|
|
542
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
543
|
+
}
|
|
544
|
+
const resolved = resolveBarsOrFail(parsed.data.barIds);
|
|
545
|
+
if (!resolved.ok) {
|
|
546
|
+
return resolved;
|
|
547
|
+
}
|
|
548
|
+
const bars = [];
|
|
549
|
+
for (const { barId, barDir, platformSlug } of resolved.found) {
|
|
550
|
+
const threats = readYaml(path.join(barDir, 'architecture', 'threat-model.yaml'));
|
|
551
|
+
const controls = readYaml(path.join(barDir, 'security', 'security-controls.yaml'));
|
|
552
|
+
bars.push({ barId, platformId: platformSlug, slice: { threats, controls } });
|
|
553
|
+
}
|
|
554
|
+
return { ok: true, scope: parsed.data, bars };
|
|
555
|
+
};
|
|
556
|
+
/**
|
|
557
|
+
* `context-quality` — quality attributes + fitness functions, scoped to the
|
|
558
|
+
* affected BARs. The Quality persona uses this to land NFRs (perf, SLO,
|
|
559
|
+
* reliability) anchored to declared QA targets.
|
|
560
|
+
*/
|
|
561
|
+
const handleContextQuality = async (input) => {
|
|
562
|
+
const parsed = ContextInput.safeParse(input);
|
|
563
|
+
if (!parsed.success) {
|
|
564
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
565
|
+
}
|
|
566
|
+
const resolved = resolveBarsOrFail(parsed.data.barIds);
|
|
567
|
+
if (!resolved.ok) {
|
|
568
|
+
return resolved;
|
|
569
|
+
}
|
|
570
|
+
const bars = [];
|
|
571
|
+
for (const { barId, barDir, platformSlug } of resolved.found) {
|
|
572
|
+
const qualityAttributes = readYaml(path.join(barDir, 'architecture', 'quality-attributes.yaml'));
|
|
573
|
+
const fitnessFunctions = readYaml(path.join(barDir, 'architecture', 'fitness-functions.yaml'));
|
|
574
|
+
bars.push({ barId, platformId: platformSlug, slice: { qualityAttributes, fitnessFunctions } });
|
|
575
|
+
}
|
|
576
|
+
return { ok: true, scope: parsed.data, bars };
|
|
577
|
+
};
|
|
578
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
465
579
|
// Search skills — thin wrappers over the existing search nodes
|
|
466
580
|
// ─────────────────────────────────────────────────────────────────────
|
|
467
581
|
const SearchQueriesInput = zod_1.z.object({
|
|
@@ -505,7 +619,7 @@ const handleTavilySearch = async (input) => {
|
|
|
505
619
|
}
|
|
506
620
|
const apiKey = process.env.TAVILY_API_KEY;
|
|
507
621
|
if (!apiKey) {
|
|
508
|
-
return { ok: false, reason: 'tavily-api-key-missing' };
|
|
622
|
+
return { ok: false, reason: 'tavily-api-key-missing', auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
|
|
509
623
|
}
|
|
510
624
|
try {
|
|
511
625
|
const res = await (0, tavily_search_1.runTavilySearch)({
|
|
@@ -513,14 +627,15 @@ const handleTavilySearch = async (input) => {
|
|
|
513
627
|
queries: parsed.data.queries,
|
|
514
628
|
maxResultsPerQuery: parsed.data.maxResults,
|
|
515
629
|
});
|
|
630
|
+
const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
|
|
516
631
|
const failure = detectAllQueriesFailed(res.envelopes, 'tavily-search');
|
|
517
632
|
if (failure) {
|
|
518
|
-
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
633
|
+
return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
|
|
519
634
|
}
|
|
520
|
-
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
635
|
+
return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
|
|
521
636
|
}
|
|
522
637
|
catch (err) {
|
|
523
|
-
return { ok: false, reason: `tavily-failed: ${err.message}
|
|
638
|
+
return { ok: false, reason: `tavily-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
|
|
524
639
|
}
|
|
525
640
|
};
|
|
526
641
|
const handleArxivSearch = async (input) => {
|
|
@@ -533,14 +648,15 @@ const handleArxivSearch = async (input) => {
|
|
|
533
648
|
queries: parsed.data.queries,
|
|
534
649
|
maxResultsPerQuery: parsed.data.maxResults,
|
|
535
650
|
});
|
|
651
|
+
const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
|
|
536
652
|
const failure = detectAllQueriesFailed(res.envelopes, 'arxiv-search');
|
|
537
653
|
if (failure) {
|
|
538
|
-
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
654
|
+
return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
|
|
539
655
|
}
|
|
540
|
-
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
656
|
+
return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
|
|
541
657
|
}
|
|
542
658
|
catch (err) {
|
|
543
|
-
return { ok: false, reason: `arxiv-failed: ${err.message}
|
|
659
|
+
return { ok: false, reason: `arxiv-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
|
|
544
660
|
}
|
|
545
661
|
};
|
|
546
662
|
const handleUsptoSearch = async (input) => {
|
|
@@ -550,7 +666,7 @@ const handleUsptoSearch = async (input) => {
|
|
|
550
666
|
}
|
|
551
667
|
const apiKey = process.env.USPTO_API_KEY;
|
|
552
668
|
if (!apiKey) {
|
|
553
|
-
return { ok: false, reason: 'uspto-api-key-missing' };
|
|
669
|
+
return { ok: false, reason: 'uspto-api-key-missing', auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
|
|
554
670
|
}
|
|
555
671
|
try {
|
|
556
672
|
const res = await (0, uspto_search_1.runUsptoSearch)({
|
|
@@ -558,14 +674,15 @@ const handleUsptoSearch = async (input) => {
|
|
|
558
674
|
queries: parsed.data.queries,
|
|
559
675
|
maxResultsPerQuery: parsed.data.maxResults,
|
|
560
676
|
});
|
|
677
|
+
const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
|
|
561
678
|
const failure = detectAllQueriesFailed(res.envelopes, 'uspto-search');
|
|
562
679
|
if (failure) {
|
|
563
|
-
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
680
|
+
return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
|
|
564
681
|
}
|
|
565
|
-
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
682
|
+
return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
|
|
566
683
|
}
|
|
567
684
|
catch (err) {
|
|
568
|
-
return { ok: false, reason: `uspto-failed: ${err.message}
|
|
685
|
+
return { ok: false, reason: `uspto-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
|
|
569
686
|
}
|
|
570
687
|
};
|
|
571
688
|
const handleHackerNewsSearch = async (input) => {
|
|
@@ -578,14 +695,15 @@ const handleHackerNewsSearch = async (input) => {
|
|
|
578
695
|
queries: parsed.data.queries,
|
|
579
696
|
hitsPerQuery: parsed.data.maxResults,
|
|
580
697
|
});
|
|
698
|
+
const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
|
|
581
699
|
const failure = detectAllQueriesFailed(res.envelopes, 'hackernews-search');
|
|
582
700
|
if (failure) {
|
|
583
|
-
return { ok: false, reason: failure, envelopes: res.envelopes };
|
|
701
|
+
return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
|
|
584
702
|
}
|
|
585
|
-
return { ok: true, envelopes: res.envelopes, results: res.results };
|
|
703
|
+
return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
|
|
586
704
|
}
|
|
587
705
|
catch (err) {
|
|
588
|
-
return { ok: false, reason: `hackernews-failed: ${err.message}
|
|
706
|
+
return { ok: false, reason: `hackernews-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
|
|
589
707
|
}
|
|
590
708
|
};
|
|
591
709
|
// ─────────────────────────────────────────────────────────────────────
|
|
@@ -699,8 +817,21 @@ const AuditEmitInput = zod_1.z.object({
|
|
|
699
817
|
phase: zod_1.z.enum(['why', 'how', 'what']),
|
|
700
818
|
intentThreadUuid: zod_1.z.string().min(1),
|
|
701
819
|
});
|
|
702
|
-
|
|
703
|
-
|
|
820
|
+
/**
|
|
821
|
+
* Audit-JSONL file-lock retry budget. Sized for parallel auto-emission:
|
|
822
|
+
* the agent often fires 4 search skills concurrently, each completing in
|
|
823
|
+
* ~500ms–3s. When their handlers return at similar times, all 4 try to
|
|
824
|
+
* grab the JSONL lock simultaneously. Pre-B28a.v1.1 the budget was
|
|
825
|
+
* `3 × 50ms linear = 300ms max` which silently dropped 3 of 4 events on
|
|
826
|
+
* PR #108. New budget: 20 retries with exponential 2^n backoff capped at
|
|
827
|
+
* 500ms each (sequence: 100, 200, 400, 500, 500, 500, …) ≈ 9.6s total
|
|
828
|
+
* wait — comfortably tolerates 4–8 parallel skill invocations while
|
|
829
|
+
* staying well under the runner's overall step timeout. Total emission
|
|
830
|
+
* latency stays unchanged in the happy-path single-writer case.
|
|
831
|
+
*/
|
|
832
|
+
const LOCK_RETRY_LIMIT = 20;
|
|
833
|
+
const LOCK_RETRY_BASE_MS = 100;
|
|
834
|
+
const LOCK_RETRY_MAX_MS = 500;
|
|
704
835
|
/** Recursive key-sorted JSON stringify so the event hash is canonical. */
|
|
705
836
|
function canonicalStringify(value) {
|
|
706
837
|
if (value === null || typeof value !== 'object') {
|
|
@@ -719,6 +850,89 @@ function sha256(text) {
|
|
|
719
850
|
async function sleep(ms) {
|
|
720
851
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
721
852
|
}
|
|
853
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
854
|
+
// Knight's Seal v1 — per-run ephemeral Ed25519 signing (B27)
|
|
855
|
+
//
|
|
856
|
+
// Each run gets an ephemeral Ed25519 keypair generated on first
|
|
857
|
+
// `audit-emit-event` call. The PUBLIC key is persisted beside the audit
|
|
858
|
+
// JSONL so verify-chain (and future external auditors) can validate
|
|
859
|
+
// signatures forever. The PRIVATE key lives in `os.tmpdir()` for the
|
|
860
|
+
// duration of the run — NEVER inside the mesh repo (so a careless
|
|
861
|
+
// `git add` can't commit it).
|
|
862
|
+
//
|
|
863
|
+
// Per-event flow:
|
|
864
|
+
// 1. Build event with event_hash='' and signature=''
|
|
865
|
+
// 2. event_hash = sha256(canonical(event)) ← chain integrity
|
|
866
|
+
// 3. signature = Ed25519(privKey, event_hash) ← nonrepudiation
|
|
867
|
+
// 4. Persist {...event, event_hash, signature}
|
|
868
|
+
//
|
|
869
|
+
// Verify flow (in audit-verify-chain):
|
|
870
|
+
// 1. Recompute event_hash (set signature='' AND event_hash='')
|
|
871
|
+
// 2. Match recorded event_hash (current chain check)
|
|
872
|
+
// 3. Verify Ed25519(pubKey, recorded event_hash, recorded signature)
|
|
873
|
+
//
|
|
874
|
+
// Backward compat: a chain with NO signature fields is reported as
|
|
875
|
+
// `sealed: false, sealVerified: false` but still passes if hashes are
|
|
876
|
+
// intact. A chain with PARTIAL signatures is treated as tampering.
|
|
877
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
878
|
+
function knightSealPubKeyPath(okrId, runId) {
|
|
879
|
+
return path.join(meshPath(), 'okrs', okrId, 'audit', 'keys', `${runId}.pub.pem`);
|
|
880
|
+
}
|
|
881
|
+
function knightSealPrivKeyPath(okrId, runId) {
|
|
882
|
+
// Tmpdir-scoped to avoid any chance of `git add`-ing a private key.
|
|
883
|
+
// Filename collision-resistant via okrId+runId.
|
|
884
|
+
return path.join(os.tmpdir(), '.research-runner-keys', `${okrId.replace(/[^A-Za-z0-9_-]/g, '_')}--${runId.replace(/[^A-Za-z0-9_-]/g, '_')}.priv.pem`);
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* Load the run's private key from tmp, or generate + persist a fresh
|
|
888
|
+
* keypair if this is the first event for the run. Returns both KeyObjects.
|
|
889
|
+
*/
|
|
890
|
+
function loadOrCreateRunKeypair(okrId, runId) {
|
|
891
|
+
const privPath = knightSealPrivKeyPath(okrId, runId);
|
|
892
|
+
const pubPath = knightSealPubKeyPath(okrId, runId);
|
|
893
|
+
if (fs.existsSync(privPath) && fs.existsSync(pubPath)) {
|
|
894
|
+
const privPem = fs.readFileSync(privPath, 'utf8');
|
|
895
|
+
const pubPem = fs.readFileSync(pubPath, 'utf8');
|
|
896
|
+
return {
|
|
897
|
+
privKey: (0, node_crypto_1.createPrivateKey)({ key: privPem, format: 'pem' }),
|
|
898
|
+
pubKey: (0, node_crypto_1.createPublicKey)({ key: pubPem, format: 'pem' }),
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
const { privateKey, publicKey } = (0, node_crypto_1.generateKeyPairSync)('ed25519');
|
|
902
|
+
const privPem = privateKey.export({ type: 'pkcs8', format: 'pem' });
|
|
903
|
+
const pubPem = publicKey.export({ type: 'spki', format: 'pem' });
|
|
904
|
+
fs.mkdirSync(path.dirname(privPath), { recursive: true });
|
|
905
|
+
fs.writeFileSync(privPath, privPem, { encoding: 'utf8', mode: 0o600 });
|
|
906
|
+
fs.mkdirSync(path.dirname(pubPath), { recursive: true });
|
|
907
|
+
fs.writeFileSync(pubPath, pubPem, 'utf8');
|
|
908
|
+
return { privKey: privateKey, pubKey: publicKey };
|
|
909
|
+
}
|
|
910
|
+
/** Returns null if no public key has been persisted for this run yet. */
|
|
911
|
+
function tryLoadRunPublicKey(okrId, runId) {
|
|
912
|
+
const pubPath = knightSealPubKeyPath(okrId, runId);
|
|
913
|
+
if (!fs.existsSync(pubPath)) {
|
|
914
|
+
return null;
|
|
915
|
+
}
|
|
916
|
+
try {
|
|
917
|
+
return (0, node_crypto_1.createPublicKey)({ key: fs.readFileSync(pubPath, 'utf8'), format: 'pem' });
|
|
918
|
+
}
|
|
919
|
+
catch {
|
|
920
|
+
return null;
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
function signEventHash(privKey, eventHashHex) {
|
|
924
|
+
// Ed25519 signs raw bytes — we sign the UTF-8 bytes of the hex digest,
|
|
925
|
+
// which is the canonical chain anchor. Output: 64-byte signature, hex.
|
|
926
|
+
return (0, node_crypto_1.sign)(null, Buffer.from(eventHashHex, 'utf8'), privKey).toString('hex');
|
|
927
|
+
}
|
|
928
|
+
function verifyEventSignature(pubKey, eventHashHex, signatureHex) {
|
|
929
|
+
try {
|
|
930
|
+
return (0, node_crypto_1.verify)(null, Buffer.from(eventHashHex, 'utf8'), pubKey, Buffer.from(signatureHex, 'hex'));
|
|
931
|
+
}
|
|
932
|
+
catch {
|
|
933
|
+
return false;
|
|
934
|
+
}
|
|
935
|
+
}
|
|
722
936
|
/**
|
|
723
937
|
* `audit-emit-event` — append one hash-chained event to
|
|
724
938
|
* `<mesh>/okrs/<id>/audit/events/<runId>.jsonl`.
|
|
@@ -747,7 +961,12 @@ const handleAuditEmitEvent = async (input) => {
|
|
|
747
961
|
}
|
|
748
962
|
catch (err) {
|
|
749
963
|
if (err.code === 'EEXIST') {
|
|
750
|
-
|
|
964
|
+
// Exponential backoff capped at LOCK_RETRY_MAX_MS. With 20
|
|
965
|
+
// attempts the wait sequence is 100, 200, 400, 500, 500, … ≈
|
|
966
|
+
// 9.6s total — enough headroom for 4–8 parallel auto-emissions
|
|
967
|
+
// from skills firing concurrently (B28a.v1.1).
|
|
968
|
+
const wait = Math.min(LOCK_RETRY_BASE_MS * (2 ** attempt), LOCK_RETRY_MAX_MS);
|
|
969
|
+
await sleep(wait);
|
|
751
970
|
continue;
|
|
752
971
|
}
|
|
753
972
|
return { ok: false, reason: `audit-lock-failed: ${err.message}` };
|
|
@@ -763,6 +982,8 @@ const handleAuditEmitEvent = async (input) => {
|
|
|
763
982
|
nextEventId = last.event_id + 1;
|
|
764
983
|
}
|
|
765
984
|
}
|
|
985
|
+
const { privKey, pubKey } = loadOrCreateRunKeypair(okrId, runId);
|
|
986
|
+
const publicKeyPem = pubKey.export({ type: 'spki', format: 'pem' });
|
|
766
987
|
const draft = {
|
|
767
988
|
event_id: nextEventId,
|
|
768
989
|
ts: new Date().toISOString(),
|
|
@@ -773,12 +994,19 @@ const handleAuditEmitEvent = async (input) => {
|
|
|
773
994
|
event_kind: eventKind,
|
|
774
995
|
payload,
|
|
775
996
|
prev_event_hash: prevHash,
|
|
997
|
+
// Embed public key on event 1 so a single-line audit excerpt
|
|
998
|
+
// still names its signer. Subsequent events reference the same
|
|
999
|
+
// committed key on disk; embedding on every line would balloon
|
|
1000
|
+
// the JSONL with no integrity gain.
|
|
1001
|
+
public_key: nextEventId === 1 ? publicKeyPem : null,
|
|
776
1002
|
event_hash: '',
|
|
1003
|
+
signature: '',
|
|
777
1004
|
};
|
|
778
1005
|
const hash = sha256(canonicalStringify(draft));
|
|
779
|
-
const
|
|
1006
|
+
const signature = signEventHash(privKey, hash);
|
|
1007
|
+
const finalEvent = { ...draft, event_hash: hash, signature };
|
|
780
1008
|
fs.appendFileSync(filePath, JSON.stringify(finalEvent) + '\n', 'utf8');
|
|
781
|
-
return { ok: true, chainHead: hash, eventId: nextEventId };
|
|
1009
|
+
return { ok: true, chainHead: hash, eventId: nextEventId, sealed: true };
|
|
782
1010
|
}
|
|
783
1011
|
finally {
|
|
784
1012
|
if (lockFd !== null) {
|
|
@@ -831,6 +1059,11 @@ const handleAuditVerifyChain = async (input) => {
|
|
|
831
1059
|
catch (err) {
|
|
832
1060
|
return { ok: false, reason: `read-failed: ${err.message}` };
|
|
833
1061
|
}
|
|
1062
|
+
const pubKey = tryLoadRunPublicKey(okrId, runId);
|
|
1063
|
+
// Track signature state across the whole chain. v1 contract: either
|
|
1064
|
+
// EVERY event is signed (sealed=true) or NO event is signed (legacy
|
|
1065
|
+
// pre-B27 chain, sealed=false). Partial signatures = tampering.
|
|
1066
|
+
let signedCount = 0;
|
|
834
1067
|
let prev = null;
|
|
835
1068
|
for (let i = 0; i < lines.length; i++) {
|
|
836
1069
|
let event;
|
|
@@ -850,14 +1083,41 @@ const handleAuditVerifyChain = async (input) => {
|
|
|
850
1083
|
if (typeof recordedHash !== 'string') {
|
|
851
1084
|
return { ok: false, reason: `missing-event-hash-line-${i + 1}` };
|
|
852
1085
|
}
|
|
853
|
-
const
|
|
1086
|
+
const recordedSignature = typeof event.signature === 'string' ? event.signature : null;
|
|
1087
|
+
// Recompute hash with BOTH event_hash and signature zeroed, since
|
|
1088
|
+
// both are filled in after the hash is computed at write time.
|
|
1089
|
+
const draft = { ...event, event_hash: '', signature: recordedSignature !== null ? '' : undefined };
|
|
1090
|
+
if (recordedSignature === null) {
|
|
1091
|
+
delete draft.signature;
|
|
1092
|
+
}
|
|
854
1093
|
const recomputed = sha256(canonicalStringify(draft));
|
|
855
1094
|
if (recordedHash !== recomputed) {
|
|
856
1095
|
return { ok: false, reason: `forged-hash-line-${i + 1}: recorded=${recordedHash.slice(0, 16)}… recomputed=${recomputed.slice(0, 16)}…` };
|
|
857
1096
|
}
|
|
1097
|
+
if (recordedSignature !== null) {
|
|
1098
|
+
signedCount++;
|
|
1099
|
+
}
|
|
858
1100
|
prev = recordedHash;
|
|
859
1101
|
}
|
|
860
|
-
|
|
1102
|
+
// Knight's Seal verification: enforce all-or-nothing.
|
|
1103
|
+
const sealed = signedCount > 0;
|
|
1104
|
+
let sealVerified = false;
|
|
1105
|
+
if (sealed) {
|
|
1106
|
+
if (signedCount !== lines.length) {
|
|
1107
|
+
return { ok: false, reason: `partial-signatures: ${signedCount}/${lines.length} events signed (chain tampered)` };
|
|
1108
|
+
}
|
|
1109
|
+
if (!pubKey) {
|
|
1110
|
+
return { ok: false, reason: `public-key-missing: events are signed but no <runId>.pub.pem found in audit/keys/` };
|
|
1111
|
+
}
|
|
1112
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1113
|
+
const event = JSON.parse(lines[i]);
|
|
1114
|
+
if (!verifyEventSignature(pubKey, event.event_hash, event.signature)) {
|
|
1115
|
+
return { ok: false, reason: `signature-mismatch-line-${i + 1}: Ed25519 verify failed` };
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
sealVerified = true;
|
|
1119
|
+
}
|
|
1120
|
+
return { ok: true, chainHead: prev, eventCount: lines.length, sealed, sealVerified };
|
|
861
1121
|
};
|
|
862
1122
|
// ─────────────────────────────────────────────────────────────────────
|
|
863
1123
|
// Registry + dispatcher
|
|
@@ -869,6 +1129,9 @@ exports.SKILLS = {
|
|
|
869
1129
|
'knowledge-mesh-threats': handleKnowledgeMeshThreats,
|
|
870
1130
|
'knowledge-mesh-adrs': handleKnowledgeMeshAdrs,
|
|
871
1131
|
'knowledge-research': handleKnowledgeResearch,
|
|
1132
|
+
'context-architecture': handleContextArchitecture,
|
|
1133
|
+
'context-security': handleContextSecurity,
|
|
1134
|
+
'context-quality': handleContextQuality,
|
|
872
1135
|
'tavily-search': handleTavilySearch,
|
|
873
1136
|
'arxiv-search': handleArxivSearch,
|
|
874
1137
|
'uspto-search': handleUsptoSearch,
|
|
@@ -881,12 +1144,65 @@ exports.SKILLS = {
|
|
|
881
1144
|
function isSkillName(name) {
|
|
882
1145
|
return Object.prototype.hasOwnProperty.call(exports.SKILLS, name);
|
|
883
1146
|
}
|
|
1147
|
+
/**
|
|
1148
|
+
* Skills whose name STARTS with one of these prefixes never trigger
|
|
1149
|
+
* audit-event auto-emission — they're the audit-event surface itself
|
|
1150
|
+
* (writer + reader). Letting them auto-emit would create either infinite
|
|
1151
|
+
* recursion (audit-emit-event audit-emitting itself) or a meaningless
|
|
1152
|
+
* `skill_call` event for a read-only verify operation.
|
|
1153
|
+
*/
|
|
1154
|
+
const NO_AUTO_EMIT_SKILLS = new Set(['audit-emit-event', 'audit-verify-chain']);
|
|
884
1155
|
async function runSkill(name, input) {
|
|
885
1156
|
const handler = exports.SKILLS[name];
|
|
886
1157
|
if (!handler) {
|
|
887
1158
|
return { ok: false, reason: `unknown-skill: ${name}` };
|
|
888
1159
|
}
|
|
889
|
-
|
|
1160
|
+
const t0 = Date.now();
|
|
1161
|
+
const result = await handler(input);
|
|
1162
|
+
const duration_ms = Date.now() - t0;
|
|
1163
|
+
// B28 — Court Recorder Auto-Logging. When the workflow has set the
|
|
1164
|
+
// session-context env vars (OKR_ID / RUN_ID / INTENT_THREAD_UUID / PHASE),
|
|
1165
|
+
// the runner deterministically emits a `skill_call` event for every
|
|
1166
|
+
// handler invocation. The agent CANNOT skip this — there's nothing to
|
|
1167
|
+
// skip; the emission happens inside the runner before the result is
|
|
1168
|
+
// returned to the caller. Falls back to legacy mode (no auto-emit) when
|
|
1169
|
+
// context env vars are absent so pre-B28 chains keep working unchanged.
|
|
1170
|
+
if (!NO_AUTO_EMIT_SKILLS.has(name)) {
|
|
1171
|
+
const ctx = (0, session_context_1.readSessionContext)();
|
|
1172
|
+
if (ctx) {
|
|
1173
|
+
// Merge handler-declared auditMetadata first so canonical fields
|
|
1174
|
+
// (skill / ok / duration_ms / reason) always win on collision —
|
|
1175
|
+
// handlers can't accidentally lie about what they were called.
|
|
1176
|
+
const extras = result.auditMetadata ?? {};
|
|
1177
|
+
const payload = { ...extras, skill: name, ok: result.ok, duration_ms };
|
|
1178
|
+
if (!result.ok) {
|
|
1179
|
+
payload.reason = result.reason;
|
|
1180
|
+
}
|
|
1181
|
+
// Best-effort: an audit-write failure must not shadow the real
|
|
1182
|
+
// skill result. But we MUST surface the failure to stderr — pre-
|
|
1183
|
+
// B28a.v1.1 these were silently swallowed and PR #108 dropped 3
|
|
1184
|
+
// of 4 parallel-search events with no warning. The chain-verify
|
|
1185
|
+
// CI gate still catches gaps post-hoc; this stderr line catches
|
|
1186
|
+
// them at write time.
|
|
1187
|
+
try {
|
|
1188
|
+
const emit = await handleAuditEmitEvent({
|
|
1189
|
+
okrId: ctx.okrId,
|
|
1190
|
+
runId: ctx.runId,
|
|
1191
|
+
phase: ctx.phase,
|
|
1192
|
+
intentThreadUuid: ctx.intentThreadUuid,
|
|
1193
|
+
eventKind: 'skill_call',
|
|
1194
|
+
payload,
|
|
1195
|
+
});
|
|
1196
|
+
if (!emit.ok) {
|
|
1197
|
+
process.stderr.write(`::warning::audit auto-emit failed for skill ${name}: ${emit.reason}\n`);
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
catch (err) {
|
|
1201
|
+
process.stderr.write(`::warning::audit auto-emit threw for skill ${name}: ${err.message}\n`);
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
return result;
|
|
890
1206
|
}
|
|
891
1207
|
/**
|
|
892
1208
|
* Read all of stdin as a UTF-8 string. Returns '' immediately on TTY
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.31",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|