@maintainabilityai/research-runner 0.1.23 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ /**
2
+ * session-context — env-var-backed run identity for skill auto-emission (B28).
3
+ *
4
+ * Every agentic-SDLC run flows through a single GitHub Actions job. That job
5
+ * already exports `MESH_PATH` for the runner; B28 extends the contract with
6
+ * four more env vars so the runner can auto-emit `skill_call` audit events
7
+ * without the agent having to call `audit-emit-event` after every skill.
8
+ *
9
+ * | env var | shape |
10
+ * |-----------------------|-----------------------------------------|
11
+ * | `OKR_ID` | non-empty string |
12
+ * | `RUN_ID` | non-empty string |
13
+ * | `INTENT_THREAD_UUID` | non-empty string (UUID expected but not validated here) |
14
+ * | `PHASE` | `'why' \| 'how' \| 'what'` |
15
+ *
16
+ * If ANY var is missing or `PHASE` is not one of the three canonical values,
17
+ * `readSessionContext()` returns `null` and the runner falls back to legacy
18
+ * behavior — the agent emits audit events explicitly via the `audit-emit-event`
19
+ * skill (or doesn't, and the workflow's chain-verify catches the gap). This
20
+ * preserves backward compatibility with pre-B28 chains while letting new runs
21
+ * benefit from deterministic emission.
22
+ *
23
+ * The auto-emission itself happens in `runSkill()` (skills.ts) — this module
24
+ * is just the env-var reader so it stays testable in isolation.
25
+ */
26
+ export type RunPhase = 'why' | 'how' | 'what';
27
+ export interface SessionContext {
28
+ okrId: string;
29
+ runId: string;
30
+ intentThreadUuid: string;
31
+ phase: RunPhase;
32
+ }
33
+ /**
34
+ * Read the four session-context env vars. Returns null if any are absent or
35
+ * `PHASE` is invalid — callers MUST handle null as "no auto-emission, run
36
+ * the skill anyway." Never throws.
37
+ */
38
+ export declare function readSessionContext(): SessionContext | null;
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+ /**
3
+ * session-context — env-var-backed run identity for skill auto-emission (B28).
4
+ *
5
+ * Every agentic-SDLC run flows through a single GitHub Actions job. That job
6
+ * already exports `MESH_PATH` for the runner; B28 extends the contract with
7
+ * four more env vars so the runner can auto-emit `skill_call` audit events
8
+ * without the agent having to call `audit-emit-event` after every skill.
9
+ *
10
+ * | env var | shape |
11
+ * |-----------------------|-----------------------------------------|
12
+ * | `OKR_ID` | non-empty string |
13
+ * | `RUN_ID` | non-empty string |
14
+ * | `INTENT_THREAD_UUID` | non-empty string (UUID expected but not validated here) |
15
+ * | `PHASE` | `'why' \| 'how' \| 'what'` |
16
+ *
17
+ * If ANY var is missing or `PHASE` is not one of the three canonical values,
18
+ * `readSessionContext()` returns `null` and the runner falls back to legacy
19
+ * behavior — the agent emits audit events explicitly via the `audit-emit-event`
20
+ * skill (or doesn't, and the workflow's chain-verify catches the gap). This
21
+ * preserves backward compatibility with pre-B28 chains while letting new runs
22
+ * benefit from deterministic emission.
23
+ *
24
+ * The auto-emission itself happens in `runSkill()` (skills.ts) — this module
25
+ * is just the env-var reader so it stays testable in isolation.
26
+ */
27
+ Object.defineProperty(exports, "__esModule", { value: true });
28
+ exports.readSessionContext = readSessionContext;
29
+ const PHASES = ['why', 'how', 'what'];
30
+ function isRunPhase(value) {
31
+ return PHASES.includes(value);
32
+ }
33
+ /**
34
+ * Read the four session-context env vars. Returns null if any are absent or
35
+ * `PHASE` is invalid — callers MUST handle null as "no auto-emission, run
36
+ * the skill anyway." Never throws.
37
+ */
38
+ function readSessionContext() {
39
+ const okrId = process.env.OKR_ID;
40
+ const runId = process.env.RUN_ID;
41
+ const intentThreadUuid = process.env.INTENT_THREAD_UUID;
42
+ const phase = process.env.PHASE;
43
+ if (!okrId || !runId || !intentThreadUuid || !phase) {
44
+ return null;
45
+ }
46
+ if (!isRunPhase(phase)) {
47
+ return null;
48
+ }
49
+ return { okrId, runId, intentThreadUuid, phase };
50
+ }
@@ -2,12 +2,21 @@
2
2
  * Shape every skill returns. Tagged union so the agent can branch on `ok`.
3
3
  * Handlers MUST NOT throw — they return `{ok: false, reason}` instead so
4
4
  * the calling agent can keep going (per SKILL.md error contracts).
5
+ *
6
+ * Optional `auditMetadata` field (B28): structured key/value pairs that the
7
+ * auto-emitter merges into the `skill_call` event payload. Handlers use it
8
+ * to declare audit-worthy details (search-skill `queries` + `result_count`,
9
+ * etc.) without the agent having to re-author them in an audit-emit-event
10
+ * call. Canonical fields (`skill`, `ok`, `duration_ms`, `reason`) always
11
+ * win on collision so handlers can't accidentally overwrite them.
5
12
  */
6
13
  export type SkillResult = ({
7
14
  ok: true;
15
+ auditMetadata?: Record<string, unknown>;
8
16
  } & Record<string, unknown>) | {
9
17
  ok: false;
10
18
  reason: string;
19
+ auditMetadata?: Record<string, unknown>;
11
20
  };
12
21
  export type SkillHandler = (input: unknown) => Promise<SkillResult>;
13
22
  export declare const SKILLS: Record<string, SkillHandler>;
@@ -64,6 +64,7 @@ exports.readStdin = readStdin;
64
64
  */
65
65
  const node_crypto_1 = require("node:crypto");
66
66
  const fs = __importStar(require("node:fs"));
67
+ const os = __importStar(require("node:os"));
67
68
  const path = __importStar(require("node:path"));
68
69
  const yaml = __importStar(require("js-yaml"));
69
70
  const zod_1 = require("zod");
@@ -72,6 +73,7 @@ const arxiv_search_1 = require("./nodes/arxiv-search");
72
73
  const hackernews_search_1 = require("./nodes/hackernews-search");
73
74
  const uspto_search_1 = require("./nodes/uspto-search");
74
75
  const dedupe_and_rank_1 = require("./nodes/dedupe-and-rank");
76
+ const session_context_1 = require("./session-context");
75
77
  // ─────────────────────────────────────────────────────────────────────
76
78
  // Mesh path resolution
77
79
  // ─────────────────────────────────────────────────────────────────────
@@ -462,6 +464,118 @@ const handleKnowledgeResearch = async (input) => {
462
464
  return { ok: true, findings, whitespace, references, rawBody: body };
463
465
  };
464
466
  // ─────────────────────────────────────────────────────────────────────
467
+ // Context skills — per-BAR slices of mesh state for PRD agent grounding
468
+ //
469
+ // The prd-agent invokes these AFTER `knowledge-mesh-bar` so the heavy
470
+ // lifting (CALM, threats, ADRs, controls) is already in its working set.
471
+ // These return a focused, persona-specific slice the agent's Architect /
472
+ // Security / Quality lenses each consume in turn during synthesis.
473
+ //
474
+ // Contract: input `{platformId, barIds}` — both required. If any BAR
475
+ // isn't resolvable in the mesh, we return ok:false (HOW agent halts per
476
+ // the "PRDs MUST be grounded" hard rule rather than fabricating).
477
+ // ─────────────────────────────────────────────────────────────────────
478
+ const ContextInput = zod_1.z.object({
479
+ platformId: zod_1.z.string().min(1),
480
+ barIds: zod_1.z.array(zod_1.z.string().min(1)).min(1),
481
+ });
482
+ /**
483
+ * Resolve a list of BAR ids to mesh paths. Returns ok:false on the first
484
+ * unresolvable id so the agent fails fast rather than synthesizing
485
+ * against a partial scope.
486
+ */
487
+ function resolveBarsOrFail(barIds) {
488
+ const mesh = meshPath();
489
+ const found = [];
490
+ for (const barId of barIds) {
491
+ const r = findBarDir(mesh, barId);
492
+ if (!r) {
493
+ return { ok: false, reason: `bar-not-found: ${barId}` };
494
+ }
495
+ found.push({ barId, barDir: r.barDir, platformSlug: r.platformSlug });
496
+ }
497
+ return { ok: true, found };
498
+ }
499
+ /**
500
+ * `context-architecture` — CALM model + ADRs + fitness functions, scoped to
501
+ * the OKR's affected BARs. The Architect persona uses this to ground FRs
502
+ * against declared nodes and flag CALM-drift.
503
+ */
504
+ const handleContextArchitecture = async (input) => {
505
+ const parsed = ContextInput.safeParse(input);
506
+ if (!parsed.success) {
507
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
508
+ }
509
+ const resolved = resolveBarsOrFail(parsed.data.barIds);
510
+ if (!resolved.ok) {
511
+ return resolved;
512
+ }
513
+ const bars = [];
514
+ for (const { barId, barDir, platformSlug } of resolved.found) {
515
+ const calmModel = readJson(path.join(barDir, 'architecture', 'bar.arch.json'));
516
+ const fitnessFunctions = readYaml(path.join(barDir, 'architecture', 'fitness-functions.yaml'));
517
+ const adrDir = path.join(barDir, 'architecture', 'ADRs');
518
+ const adrs = [];
519
+ for (const name of readDirShallow(adrDir)) {
520
+ if (!name.endsWith('.md')) {
521
+ continue;
522
+ }
523
+ try {
524
+ const body = fs.readFileSync(path.join(adrDir, name), 'utf8');
525
+ const titleMatch = body.match(/^#\s+(.+)/m);
526
+ adrs.push({ id: name.replace(/\.md$/, ''), title: (titleMatch?.[1] ?? name).trim() });
527
+ }
528
+ catch { /* skip */ }
529
+ }
530
+ bars.push({ barId, platformId: platformSlug, slice: { calmModel, fitnessFunctions, adrs } });
531
+ }
532
+ return { ok: true, scope: parsed.data, bars };
533
+ };
534
+ /**
535
+ * `context-security` — threats + controls, scoped to the affected BARs.
536
+ * The Security persona maps SRs to STRIDE THR-NNN + OWASP A0X + NIST
537
+ * controls from this slice.
538
+ */
539
+ const handleContextSecurity = async (input) => {
540
+ const parsed = ContextInput.safeParse(input);
541
+ if (!parsed.success) {
542
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
543
+ }
544
+ const resolved = resolveBarsOrFail(parsed.data.barIds);
545
+ if (!resolved.ok) {
546
+ return resolved;
547
+ }
548
+ const bars = [];
549
+ for (const { barId, barDir, platformSlug } of resolved.found) {
550
+ const threats = readYaml(path.join(barDir, 'architecture', 'threat-model.yaml'));
551
+ const controls = readYaml(path.join(barDir, 'security', 'security-controls.yaml'));
552
+ bars.push({ barId, platformId: platformSlug, slice: { threats, controls } });
553
+ }
554
+ return { ok: true, scope: parsed.data, bars };
555
+ };
556
+ /**
557
+ * `context-quality` — quality attributes + fitness functions, scoped to the
558
+ * affected BARs. The Quality persona uses this to land NFRs (perf, SLO,
559
+ * reliability) anchored to declared QA targets.
560
+ */
561
+ const handleContextQuality = async (input) => {
562
+ const parsed = ContextInput.safeParse(input);
563
+ if (!parsed.success) {
564
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
565
+ }
566
+ const resolved = resolveBarsOrFail(parsed.data.barIds);
567
+ if (!resolved.ok) {
568
+ return resolved;
569
+ }
570
+ const bars = [];
571
+ for (const { barId, barDir, platformSlug } of resolved.found) {
572
+ const qualityAttributes = readYaml(path.join(barDir, 'architecture', 'quality-attributes.yaml'));
573
+ const fitnessFunctions = readYaml(path.join(barDir, 'architecture', 'fitness-functions.yaml'));
574
+ bars.push({ barId, platformId: platformSlug, slice: { qualityAttributes, fitnessFunctions } });
575
+ }
576
+ return { ok: true, scope: parsed.data, bars };
577
+ };
578
+ // ─────────────────────────────────────────────────────────────────────
465
579
  // Search skills — thin wrappers over the existing search nodes
466
580
  // ─────────────────────────────────────────────────────────────────────
467
581
  const SearchQueriesInput = zod_1.z.object({
@@ -505,7 +619,7 @@ const handleTavilySearch = async (input) => {
505
619
  }
506
620
  const apiKey = process.env.TAVILY_API_KEY;
507
621
  if (!apiKey) {
508
- return { ok: false, reason: 'tavily-api-key-missing' };
622
+ return { ok: false, reason: 'tavily-api-key-missing', auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
509
623
  }
510
624
  try {
511
625
  const res = await (0, tavily_search_1.runTavilySearch)({
@@ -513,14 +627,15 @@ const handleTavilySearch = async (input) => {
513
627
  queries: parsed.data.queries,
514
628
  maxResultsPerQuery: parsed.data.maxResults,
515
629
  });
630
+ const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
516
631
  const failure = detectAllQueriesFailed(res.envelopes, 'tavily-search');
517
632
  if (failure) {
518
- return { ok: false, reason: failure, envelopes: res.envelopes };
633
+ return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
519
634
  }
520
- return { ok: true, envelopes: res.envelopes, results: res.results };
635
+ return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
521
636
  }
522
637
  catch (err) {
523
- return { ok: false, reason: `tavily-failed: ${err.message}` };
638
+ return { ok: false, reason: `tavily-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
524
639
  }
525
640
  };
526
641
  const handleArxivSearch = async (input) => {
@@ -533,14 +648,15 @@ const handleArxivSearch = async (input) => {
533
648
  queries: parsed.data.queries,
534
649
  maxResultsPerQuery: parsed.data.maxResults,
535
650
  });
651
+ const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
536
652
  const failure = detectAllQueriesFailed(res.envelopes, 'arxiv-search');
537
653
  if (failure) {
538
- return { ok: false, reason: failure, envelopes: res.envelopes };
654
+ return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
539
655
  }
540
- return { ok: true, envelopes: res.envelopes, results: res.results };
656
+ return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
541
657
  }
542
658
  catch (err) {
543
- return { ok: false, reason: `arxiv-failed: ${err.message}` };
659
+ return { ok: false, reason: `arxiv-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
544
660
  }
545
661
  };
546
662
  const handleUsptoSearch = async (input) => {
@@ -550,7 +666,7 @@ const handleUsptoSearch = async (input) => {
550
666
  }
551
667
  const apiKey = process.env.USPTO_API_KEY;
552
668
  if (!apiKey) {
553
- return { ok: false, reason: 'uspto-api-key-missing' };
669
+ return { ok: false, reason: 'uspto-api-key-missing', auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
554
670
  }
555
671
  try {
556
672
  const res = await (0, uspto_search_1.runUsptoSearch)({
@@ -558,14 +674,15 @@ const handleUsptoSearch = async (input) => {
558
674
  queries: parsed.data.queries,
559
675
  maxResultsPerQuery: parsed.data.maxResults,
560
676
  });
677
+ const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
561
678
  const failure = detectAllQueriesFailed(res.envelopes, 'uspto-search');
562
679
  if (failure) {
563
- return { ok: false, reason: failure, envelopes: res.envelopes };
680
+ return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
564
681
  }
565
- return { ok: true, envelopes: res.envelopes, results: res.results };
682
+ return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
566
683
  }
567
684
  catch (err) {
568
- return { ok: false, reason: `uspto-failed: ${err.message}` };
685
+ return { ok: false, reason: `uspto-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
569
686
  }
570
687
  };
571
688
  const handleHackerNewsSearch = async (input) => {
@@ -578,14 +695,15 @@ const handleHackerNewsSearch = async (input) => {
578
695
  queries: parsed.data.queries,
579
696
  hitsPerQuery: parsed.data.maxResults,
580
697
  });
698
+ const auditMetadata = { queries: parsed.data.queries, result_count: res.results.length };
581
699
  const failure = detectAllQueriesFailed(res.envelopes, 'hackernews-search');
582
700
  if (failure) {
583
- return { ok: false, reason: failure, envelopes: res.envelopes };
701
+ return { ok: false, reason: failure, envelopes: res.envelopes, auditMetadata };
584
702
  }
585
- return { ok: true, envelopes: res.envelopes, results: res.results };
703
+ return { ok: true, envelopes: res.envelopes, results: res.results, auditMetadata };
586
704
  }
587
705
  catch (err) {
588
- return { ok: false, reason: `hackernews-failed: ${err.message}` };
706
+ return { ok: false, reason: `hackernews-failed: ${err.message}`, auditMetadata: { queries: parsed.data.queries, result_count: 0 } };
589
707
  }
590
708
  };
591
709
  // ─────────────────────────────────────────────────────────────────────
@@ -719,6 +837,89 @@ function sha256(text) {
719
837
  async function sleep(ms) {
720
838
  return new Promise(resolve => setTimeout(resolve, ms));
721
839
  }
840
+ // ─────────────────────────────────────────────────────────────────────
841
+ // Knight's Seal v1 — per-run ephemeral Ed25519 signing (B27)
842
+ //
843
+ // Each run gets an ephemeral Ed25519 keypair generated on first
844
+ // `audit-emit-event` call. The PUBLIC key is persisted beside the audit
845
+ // JSONL so verify-chain (and future external auditors) can validate
846
+ // signatures forever. The PRIVATE key lives in `os.tmpdir()` for the
847
+ // duration of the run — NEVER inside the mesh repo (so a careless
848
+ // `git add` can't commit it).
849
+ //
850
+ // Per-event flow:
851
+ // 1. Build event with event_hash='' and signature=''
852
+ // 2. event_hash = sha256(canonical(event)) ← chain integrity
853
+ // 3. signature = Ed25519(privKey, event_hash) ← nonrepudiation
854
+ // 4. Persist {...event, event_hash, signature}
855
+ //
856
+ // Verify flow (in audit-verify-chain):
857
+ // 1. Recompute event_hash (set signature='' AND event_hash='')
858
+ // 2. Match recorded event_hash (current chain check)
859
+ // 3. Verify Ed25519(pubKey, recorded event_hash, recorded signature)
860
+ //
861
+ // Backward compat: a chain with NO signature fields is reported as
862
+ // `sealed: false, sealVerified: false` but still passes if hashes are
863
+ // intact. A chain with PARTIAL signatures is treated as tampering.
864
+ // ─────────────────────────────────────────────────────────────────────
865
+ function knightSealPubKeyPath(okrId, runId) {
866
+ return path.join(meshPath(), 'okrs', okrId, 'audit', 'keys', `${runId}.pub.pem`);
867
+ }
868
+ function knightSealPrivKeyPath(okrId, runId) {
869
+ // Tmpdir-scoped to avoid any chance of `git add`-ing a private key.
870
+ // Filename collision-resistant via okrId+runId.
871
+ return path.join(os.tmpdir(), '.research-runner-keys', `${okrId.replace(/[^A-Za-z0-9_-]/g, '_')}--${runId.replace(/[^A-Za-z0-9_-]/g, '_')}.priv.pem`);
872
+ }
873
+ /**
874
+ * Load the run's private key from tmp, or generate + persist a fresh
875
+ * keypair if this is the first event for the run. Returns both KeyObjects.
876
+ */
877
+ function loadOrCreateRunKeypair(okrId, runId) {
878
+ const privPath = knightSealPrivKeyPath(okrId, runId);
879
+ const pubPath = knightSealPubKeyPath(okrId, runId);
880
+ if (fs.existsSync(privPath) && fs.existsSync(pubPath)) {
881
+ const privPem = fs.readFileSync(privPath, 'utf8');
882
+ const pubPem = fs.readFileSync(pubPath, 'utf8');
883
+ return {
884
+ privKey: (0, node_crypto_1.createPrivateKey)({ key: privPem, format: 'pem' }),
885
+ pubKey: (0, node_crypto_1.createPublicKey)({ key: pubPem, format: 'pem' }),
886
+ };
887
+ }
888
+ const { privateKey, publicKey } = (0, node_crypto_1.generateKeyPairSync)('ed25519');
889
+ const privPem = privateKey.export({ type: 'pkcs8', format: 'pem' });
890
+ const pubPem = publicKey.export({ type: 'spki', format: 'pem' });
891
+ fs.mkdirSync(path.dirname(privPath), { recursive: true });
892
+ fs.writeFileSync(privPath, privPem, { encoding: 'utf8', mode: 0o600 });
893
+ fs.mkdirSync(path.dirname(pubPath), { recursive: true });
894
+ fs.writeFileSync(pubPath, pubPem, 'utf8');
895
+ return { privKey: privateKey, pubKey: publicKey };
896
+ }
897
+ /** Returns null if no public key has been persisted for this run yet. */
898
+ function tryLoadRunPublicKey(okrId, runId) {
899
+ const pubPath = knightSealPubKeyPath(okrId, runId);
900
+ if (!fs.existsSync(pubPath)) {
901
+ return null;
902
+ }
903
+ try {
904
+ return (0, node_crypto_1.createPublicKey)({ key: fs.readFileSync(pubPath, 'utf8'), format: 'pem' });
905
+ }
906
+ catch {
907
+ return null;
908
+ }
909
+ }
910
+ function signEventHash(privKey, eventHashHex) {
911
+ // Ed25519 signs raw bytes — we sign the UTF-8 bytes of the hex digest,
912
+ // which is the canonical chain anchor. Output: 64-byte signature, hex.
913
+ return (0, node_crypto_1.sign)(null, Buffer.from(eventHashHex, 'utf8'), privKey).toString('hex');
914
+ }
915
+ function verifyEventSignature(pubKey, eventHashHex, signatureHex) {
916
+ try {
917
+ return (0, node_crypto_1.verify)(null, Buffer.from(eventHashHex, 'utf8'), pubKey, Buffer.from(signatureHex, 'hex'));
918
+ }
919
+ catch {
920
+ return false;
921
+ }
922
+ }
722
923
  /**
723
924
  * `audit-emit-event` — append one hash-chained event to
724
925
  * `<mesh>/okrs/<id>/audit/events/<runId>.jsonl`.
@@ -763,6 +964,8 @@ const handleAuditEmitEvent = async (input) => {
763
964
  nextEventId = last.event_id + 1;
764
965
  }
765
966
  }
967
+ const { privKey, pubKey } = loadOrCreateRunKeypair(okrId, runId);
968
+ const publicKeyPem = pubKey.export({ type: 'spki', format: 'pem' });
766
969
  const draft = {
767
970
  event_id: nextEventId,
768
971
  ts: new Date().toISOString(),
@@ -773,12 +976,19 @@ const handleAuditEmitEvent = async (input) => {
773
976
  event_kind: eventKind,
774
977
  payload,
775
978
  prev_event_hash: prevHash,
979
+ // Embed public key on event 1 so a single-line audit excerpt
980
+ // still names its signer. Subsequent events reference the same
981
+ // committed key on disk; embedding on every line would balloon
982
+ // the JSONL with no integrity gain.
983
+ public_key: nextEventId === 1 ? publicKeyPem : null,
776
984
  event_hash: '',
985
+ signature: '',
777
986
  };
778
987
  const hash = sha256(canonicalStringify(draft));
779
- const finalEvent = { ...draft, event_hash: hash };
988
+ const signature = signEventHash(privKey, hash);
989
+ const finalEvent = { ...draft, event_hash: hash, signature };
780
990
  fs.appendFileSync(filePath, JSON.stringify(finalEvent) + '\n', 'utf8');
781
- return { ok: true, chainHead: hash, eventId: nextEventId };
991
+ return { ok: true, chainHead: hash, eventId: nextEventId, sealed: true };
782
992
  }
783
993
  finally {
784
994
  if (lockFd !== null) {
@@ -793,6 +1003,105 @@ const handleAuditEmitEvent = async (input) => {
793
1003
  return { ok: false, reason: 'audit-write-failed-after-retries' };
794
1004
  };
795
1005
  // ─────────────────────────────────────────────────────────────────────
1006
+ // Audit verify-chain — CI defense against forged audit logs
1007
+ // ─────────────────────────────────────────────────────────────────────
1008
+ const AuditVerifyInput = zod_1.z.object({
1009
+ okrId: zod_1.z.string().min(1),
1010
+ runId: zod_1.z.string().min(1),
1011
+ });
1012
+ /**
1013
+ * `audit-verify-chain` — replay the hash chain over an existing audit
1014
+ * JSONL, returning `{ok: true, chainHead, eventCount}` if the chain is
1015
+ * intact or `{ok: false, reason}` on the first integrity failure.
1016
+ *
1017
+ * Why this skill exists: an agent that loses access to the runner could
1018
+ * (and on PR #105 did) self-write the JSONL with fabricated hashes. The
1019
+ * audit-and-drift workflow calls this skill after each run; verdict
1020
+ * fails + `chain-forgery-detected` label is applied on `ok:false`. The
1021
+ * verification rules are identical to `verifyChain()` in audit-emitter.ts:
1022
+ * - first event prev_event_hash === null
1023
+ * - each prev_event_hash === preceding event.event_hash
1024
+ * - each event_hash === sha256(canonicalStringify(event-with-empty-hash))
1025
+ * - event_id is monotonic from 1
1026
+ */
1027
+ const handleAuditVerifyChain = async (input) => {
1028
+ const parsed = AuditVerifyInput.safeParse(input);
1029
+ if (!parsed.success) {
1030
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
1031
+ }
1032
+ const { okrId, runId } = parsed.data;
1033
+ const filePath = path.join(meshPath(), 'okrs', okrId, 'audit', 'events', `${runId}.jsonl`);
1034
+ if (!fs.existsSync(filePath)) {
1035
+ return { ok: false, reason: `audit-jsonl-missing: ${filePath}` };
1036
+ }
1037
+ let lines;
1038
+ try {
1039
+ lines = fs.readFileSync(filePath, 'utf8').split('\n').filter(l => l.trim().length > 0);
1040
+ }
1041
+ catch (err) {
1042
+ return { ok: false, reason: `read-failed: ${err.message}` };
1043
+ }
1044
+ const pubKey = tryLoadRunPublicKey(okrId, runId);
1045
+ // Track signature state across the whole chain. v1 contract: either
1046
+ // EVERY event is signed (sealed=true) or NO event is signed (legacy
1047
+ // pre-B27 chain, sealed=false). Partial signatures = tampering.
1048
+ let signedCount = 0;
1049
+ let prev = null;
1050
+ for (let i = 0; i < lines.length; i++) {
1051
+ let event;
1052
+ try {
1053
+ event = JSON.parse(lines[i]);
1054
+ }
1055
+ catch (err) {
1056
+ return { ok: false, reason: `bad-jsonl-line-${i + 1}: ${err.message}` };
1057
+ }
1058
+ if (event.event_id !== i + 1) {
1059
+ return { ok: false, reason: `event-id-mismatch-line-${i + 1}: expected ${i + 1} got ${event.event_id}` };
1060
+ }
1061
+ if (event.prev_event_hash !== prev) {
1062
+ return { ok: false, reason: `prev-hash-mismatch-line-${i + 1}: expected ${prev ?? 'null'} got ${event.prev_event_hash ?? 'null'}` };
1063
+ }
1064
+ const recordedHash = event.event_hash;
1065
+ if (typeof recordedHash !== 'string') {
1066
+ return { ok: false, reason: `missing-event-hash-line-${i + 1}` };
1067
+ }
1068
+ const recordedSignature = typeof event.signature === 'string' ? event.signature : null;
1069
+ // Recompute hash with BOTH event_hash and signature zeroed, since
1070
+ // both are filled in after the hash is computed at write time.
1071
+ const draft = { ...event, event_hash: '', signature: recordedSignature !== null ? '' : undefined };
1072
+ if (recordedSignature === null) {
1073
+ delete draft.signature;
1074
+ }
1075
+ const recomputed = sha256(canonicalStringify(draft));
1076
+ if (recordedHash !== recomputed) {
1077
+ return { ok: false, reason: `forged-hash-line-${i + 1}: recorded=${recordedHash.slice(0, 16)}… recomputed=${recomputed.slice(0, 16)}…` };
1078
+ }
1079
+ if (recordedSignature !== null) {
1080
+ signedCount++;
1081
+ }
1082
+ prev = recordedHash;
1083
+ }
1084
+ // Knight's Seal verification: enforce all-or-nothing.
1085
+ const sealed = signedCount > 0;
1086
+ let sealVerified = false;
1087
+ if (sealed) {
1088
+ if (signedCount !== lines.length) {
1089
+ return { ok: false, reason: `partial-signatures: ${signedCount}/${lines.length} events signed (chain tampered)` };
1090
+ }
1091
+ if (!pubKey) {
1092
+ return { ok: false, reason: `public-key-missing: events are signed but no <runId>.pub.pem found in audit/keys/` };
1093
+ }
1094
+ for (let i = 0; i < lines.length; i++) {
1095
+ const event = JSON.parse(lines[i]);
1096
+ if (!verifyEventSignature(pubKey, event.event_hash, event.signature)) {
1097
+ return { ok: false, reason: `signature-mismatch-line-${i + 1}: Ed25519 verify failed` };
1098
+ }
1099
+ }
1100
+ sealVerified = true;
1101
+ }
1102
+ return { ok: true, chainHead: prev, eventCount: lines.length, sealed, sealVerified };
1103
+ };
1104
+ // ─────────────────────────────────────────────────────────────────────
796
1105
  // Registry + dispatcher
797
1106
  // ─────────────────────────────────────────────────────────────────────
798
1107
  exports.SKILLS = {
@@ -802,6 +1111,9 @@ exports.SKILLS = {
802
1111
  'knowledge-mesh-threats': handleKnowledgeMeshThreats,
803
1112
  'knowledge-mesh-adrs': handleKnowledgeMeshAdrs,
804
1113
  'knowledge-research': handleKnowledgeResearch,
1114
+ 'context-architecture': handleContextArchitecture,
1115
+ 'context-security': handleContextSecurity,
1116
+ 'context-quality': handleContextQuality,
805
1117
  'tavily-search': handleTavilySearch,
806
1118
  'arxiv-search': handleArxivSearch,
807
1119
  'uspto-search': handleUsptoSearch,
@@ -809,16 +1121,61 @@ exports.SKILLS = {
809
1121
  'dedupe-and-rank': handleDedupeAndRank,
810
1122
  'format-research-issue-update': handleFormatResearchIssueUpdate,
811
1123
  'audit-emit-event': handleAuditEmitEvent,
1124
+ 'audit-verify-chain': handleAuditVerifyChain,
812
1125
  };
813
1126
  function isSkillName(name) {
814
1127
  return Object.prototype.hasOwnProperty.call(exports.SKILLS, name);
815
1128
  }
1129
+ /**
1130
+ * Skills whose name STARTS with one of these prefixes never trigger
1131
+ * audit-event auto-emission — they're the audit-event surface itself
1132
+ * (writer + reader). Letting them auto-emit would create either infinite
1133
+ * recursion (audit-emit-event audit-emitting itself) or a meaningless
1134
+ * `skill_call` event for a read-only verify operation.
1135
+ */
1136
+ const NO_AUTO_EMIT_SKILLS = new Set(['audit-emit-event', 'audit-verify-chain']);
816
1137
  async function runSkill(name, input) {
817
1138
  const handler = exports.SKILLS[name];
818
1139
  if (!handler) {
819
1140
  return { ok: false, reason: `unknown-skill: ${name}` };
820
1141
  }
821
- return handler(input);
1142
+ const t0 = Date.now();
1143
+ const result = await handler(input);
1144
+ const duration_ms = Date.now() - t0;
1145
+ // B28 — Court Recorder Auto-Logging. When the workflow has set the
1146
+ // session-context env vars (OKR_ID / RUN_ID / INTENT_THREAD_UUID / PHASE),
1147
+ // the runner deterministically emits a `skill_call` event for every
1148
+ // handler invocation. The agent CANNOT skip this — there's nothing to
1149
+ // skip; the emission happens inside the runner before the result is
1150
+ // returned to the caller. Falls back to legacy mode (no auto-emit) when
1151
+ // context env vars are absent so pre-B28 chains keep working unchanged.
1152
+ if (!NO_AUTO_EMIT_SKILLS.has(name)) {
1153
+ const ctx = (0, session_context_1.readSessionContext)();
1154
+ if (ctx) {
1155
+ // Merge handler-declared auditMetadata first so canonical fields
1156
+ // (skill / ok / duration_ms / reason) always win on collision —
1157
+ // handlers can't accidentally lie about what they were called.
1158
+ const extras = result.auditMetadata ?? {};
1159
+ const payload = { ...extras, skill: name, ok: result.ok, duration_ms };
1160
+ if (!result.ok) {
1161
+ payload.reason = result.reason;
1162
+ }
1163
+ // Best-effort: an audit-write failure must not shadow the real skill
1164
+ // result. The chain-verify CI gate is the catch-net for missed events.
1165
+ try {
1166
+ await handleAuditEmitEvent({
1167
+ okrId: ctx.okrId,
1168
+ runId: ctx.runId,
1169
+ phase: ctx.phase,
1170
+ intentThreadUuid: ctx.intentThreadUuid,
1171
+ eventKind: 'skill_call',
1172
+ payload,
1173
+ });
1174
+ }
1175
+ catch { /* swallow — chain-verify catches gaps */ }
1176
+ }
1177
+ }
1178
+ return result;
822
1179
  }
823
1180
  /**
824
1181
  * Read all of stdin as a UTF-8 string. Returns '' immediately on TTY
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.23",
3
+ "version": "0.1.29",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",