@maintainabilityai/research-runner 0.1.19 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -48,6 +48,7 @@ const fs = __importStar(require("node:fs"));
48
48
  const path = __importStar(require("node:path"));
49
49
  const archeologist_1 = require("./runner/archeologist");
50
50
  const prd_1 = require("./runner/prd");
51
+ const skills_1 = require("./runner/skills");
51
52
  const PKG = JSON.parse(fs.readFileSync(path.resolve(__dirname, '..', 'package.json'), 'utf8'));
52
53
  function parseFlags(argv) {
53
54
  const flags = {};
@@ -191,17 +192,54 @@ async function prdCmd(argv) {
191
192
  });
192
193
  }
193
194
  function help() {
195
+ const skillNames = Object.keys(skills_1.SKILLS).map(n => `skill-${n}`).sort().join('\n ');
194
196
  process.stdout.write(`research-runner v${PKG.version}
195
197
 
196
198
  Usage:
197
199
  research-runner archeologist --brief "<topic>" --scope-level <platform|bar> --scope-id ID [--path research|archaeology] [...]
198
200
  research-runner prd --research-pr <url|path> --scope-level <platform|bar> --scope-id ID [...]
201
+ research-runner skill-<name> # one-shot skill subcommand; reads JSON from stdin, writes JSON to stdout
202
+
203
+ Skills (called by .agent.md tools: declarations under \$MESH_PATH):
204
+ ${skillNames}
199
205
 
200
206
  See README.md for the full flag surface.
201
207
  `);
202
208
  }
209
+ /**
210
+ * skill-* dispatcher. Reads a JSON object from stdin, calls runSkill,
211
+ * writes the result as JSON to stdout. Exits non-zero on `ok: false` so
212
+ * the calling agent (or shell wrapper) can detect failure via exit code
213
+ * in addition to the structured `{ok: false, reason}` payload.
214
+ */
215
+ async function skillCmd(skillName) {
216
+ if (!(0, skills_1.isSkillName)(skillName)) {
217
+ process.stdout.write(JSON.stringify({ ok: false, reason: `unknown-skill: ${skillName}` }) + '\n');
218
+ process.exit(1);
219
+ }
220
+ const stdinRaw = await (0, skills_1.readStdin)();
221
+ let input = {};
222
+ if (stdinRaw.trim().length > 0) {
223
+ try {
224
+ input = JSON.parse(stdinRaw);
225
+ }
226
+ catch (err) {
227
+ process.stdout.write(JSON.stringify({ ok: false, reason: `bad-stdin-json: ${err.message}` }) + '\n');
228
+ process.exit(1);
229
+ }
230
+ }
231
+ const result = await (0, skills_1.runSkill)(skillName, input);
232
+ process.stdout.write(JSON.stringify(result) + '\n');
233
+ if (result.ok === false) {
234
+ process.exit(1);
235
+ }
236
+ }
203
237
  async function main() {
204
238
  const [, , subcommand, ...rest] = process.argv;
239
+ if (subcommand && subcommand.startsWith('skill-')) {
240
+ await skillCmd(subcommand.slice('skill-'.length));
241
+ return;
242
+ }
205
243
  switch (subcommand) {
206
244
  case 'archeologist':
207
245
  await archeologistCmd(rest);
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Strictly the fields a CloudEvents v1.0 envelope requires. Optional
3
+ * fields per the spec (datacontenttype, dataschema, subject, time) are
4
+ * folded in via `CloudEventsOptional`.
5
+ *
6
+ * See https://github.com/cloudevents/spec/blob/v1.0.2/cloudevents/spec.md
7
+ */
8
+ export interface CloudEventsEnvelope<T = Record<string, unknown>> {
9
+ /** CloudEvents spec version. Fixed for our emitter. */
10
+ specversion: '1.0';
11
+ /** Reverse-DNS event-source identifier — e.g. `maintainabilityai/research-runner`. */
12
+ source: string;
13
+ /** Event type — maps to `audit-emitter` event_kind plus an `mai.` prefix. */
14
+ type: string;
15
+ /** Unique id — UUID v4 generated by the emitter. */
16
+ id: string;
17
+ /** ISO timestamp when the event was emitted. */
18
+ time: string;
19
+ /** MIME type of `data`. Always `application/json` for our emitter. */
20
+ datacontenttype: 'application/json';
21
+ /** OKR phase that contextualizes this event. */
22
+ subject: string;
23
+ /** The raw audit-emitter event payload. */
24
+ data: T;
25
+ }
26
+ export interface CourtRecorderInput<T> {
27
+ /** Reverse-DNS source identifier — e.g. `maintainabilityai/research-runner`. */
28
+ source: string;
29
+ /** Event kind from `audit-emitter` — wrapped into `mai.<kind>` for CE `type`. */
30
+ eventKind: string;
31
+ /** OKR phase (`why` | `how` | `what` | `setup` etc.) → goes in `subject`. */
32
+ phase: string;
33
+ /** The original audit-emitter event payload. Becomes `data`. */
34
+ payload: T;
35
+ /** Override timestamp (defaults to `new Date().toISOString()`). */
36
+ time?: string;
37
+ /** Override id (defaults to `crypto.randomUUID()`). */
38
+ id?: string;
39
+ }
40
+ /**
41
+ * Build a single CloudEvents v1.0 envelope around an audit-event payload.
42
+ * Stateless + deterministic given an injected `id` + `time` — handy for
43
+ * unit tests.
44
+ */
45
+ export declare function buildCloudEventsEnvelope<T>(input: CourtRecorderInput<T>): CloudEventsEnvelope<T>;
46
+ /**
47
+ * Serialize an envelope to a single JSONL line (NO trailing newline; the
48
+ * caller appends `\n` when writing). Throws if the envelope can't be
49
+ * round-tripped through JSON.stringify — that signals a non-serializable
50
+ * payload, which is a programmer bug, not a runtime condition.
51
+ */
52
+ export declare function serializeCloudEventsEnvelope<T>(env: CloudEventsEnvelope<T>): string;
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.buildCloudEventsEnvelope = buildCloudEventsEnvelope;
37
+ exports.serializeCloudEventsEnvelope = serializeCloudEventsEnvelope;
38
+ /**
39
+ * Court Recorder — CloudEvents v1.0 envelope emitter for the agentic-SDLC
40
+ * audit chain. Wraps `audit-emitter`'s raw event payload in a CloudEvents
41
+ * 1.0 envelope so the resulting JSONL is SIEM-compatible without further
42
+ * transformation.
43
+ *
44
+ * Phase B-PR4 ships the emitter shape; agents will adopt it in their
45
+ * `audit-emit-event` Skill backend (B-PR1a). Each line of
46
+ * `okrs/<id>/audit/events/<run-id>.jsonl` is a fully-valid CloudEvents
47
+ * envelope whose `data` property carries the original audit-event JSON.
48
+ *
49
+ * Spec: vscode-extension/design/agentic-sdlc.md §11 (Court Recorder).
50
+ */
51
+ const crypto = __importStar(require("node:crypto"));
52
+ /**
53
+ * Build a single CloudEvents v1.0 envelope around an audit-event payload.
54
+ * Stateless + deterministic given an injected `id` + `time` — handy for
55
+ * unit tests.
56
+ */
57
+ function buildCloudEventsEnvelope(input) {
58
+ return {
59
+ specversion: '1.0',
60
+ source: input.source,
61
+ type: `mai.${input.eventKind}`,
62
+ id: input.id ?? crypto.randomUUID(),
63
+ time: input.time ?? new Date().toISOString(),
64
+ datacontenttype: 'application/json',
65
+ subject: input.phase,
66
+ data: input.payload,
67
+ };
68
+ }
69
+ /**
70
+ * Serialize an envelope to a single JSONL line (NO trailing newline; the
71
+ * caller appends `\n` when writing). Throws if the envelope can't be
72
+ * round-tripped through JSON.stringify — that signals a non-serializable
73
+ * payload, which is a programmer bug, not a runtime condition.
74
+ */
75
+ function serializeCloudEventsEnvelope(env) {
76
+ return JSON.stringify(env);
77
+ }
@@ -12,6 +12,78 @@
12
12
  * cleanly in GitHub PR bodies + the rendered docs site.
13
13
  */
14
14
  import type { GuardrailMode, LlmProvider } from '../schemas';
15
+ /**
16
+ * v4 agentic-SDLC governance fields. All are OPTIONAL on the Hatter's
17
+ * Tag input — legacy CI-only runs (without an OKR anchor) omit them
18
+ * entirely and the emitted YAML skips those keys. OKR-anchored runs
19
+ * (Phase B+) populate them so the audit chain can be walked across
20
+ * repositories via `intent_thread_uuid`.
21
+ *
22
+ * See vscode-extension/design/agentic-sdlc.md §4.4 (intent_thread_uuid
23
+ * lifecycle) and §11.1 (Hatter's Tag full provenance schema).
24
+ */
25
+ export interface HattersTagOkrContext {
26
+ /** Cross-repo audit correlation key, generated at OKR-create time. */
27
+ intent_thread_uuid: string;
28
+ /** Run id of the prior phase that produced the input artifact (null on Why). */
29
+ parent_intent_thread?: string | null;
30
+ /** OKR card id (human-readable name, e.g. OKR-2026Q1-IMDB-001-celeb-api). */
31
+ okr_id: string;
32
+ /** Which phase produced this artifact. */
33
+ phase: 'why' | 'how' | 'what';
34
+ /** Tier frozen at run start (mitigates tier creep, §6.2). */
35
+ governance_tier: 'autonomous' | 'supervised' | 'restricted';
36
+ }
37
+ /**
38
+ * Author + reviewer attestation block (v4 §11.1). All optional; legacy
39
+ * runs without distinct agent DIDs omit it. Phase B+ stamps `author_did`
40
+ * from the GitHub App installation id + agent name; reviewers fill in
41
+ * `reviewer_dids[]` after their structured-review pack runs.
42
+ */
43
+ export interface HattersTagAttestation {
44
+ /** DID of the agent that authored this artifact. */
45
+ author_did?: string;
46
+ /** Prompt pack id + version cited by the author. */
47
+ author_prompt_pack_version?: string;
48
+ /** SHA256 of the author's system prompt at run time. */
49
+ author_system_prompt_sha?: string;
50
+ /** DIDs of reviewer-agent sessions that scored this PR. */
51
+ reviewer_dids?: string[];
52
+ /** Reviewer scores keyed by reviewer name. */
53
+ reviewer_scores?: {
54
+ architect?: number | null;
55
+ security?: number | null;
56
+ };
57
+ }
58
+ /**
59
+ * Evidence-mode block (v4 §11.1.7) — forces authors to be honest about
60
+ * whether the artifact's citations came from a fresh provider search or
61
+ * from cached/reused sources.
62
+ *
63
+ * Why this exists: the first round of agentic runs (run #21) loaded the
64
+ * SKILL.md context but had no live skill backends, so the agent silently
65
+ * fell back to reading repo files and produced an updated artifact that
66
+ * LOOKED grounded but cited zero live signals. The validator workflow
67
+ * (B-PR1c) cross-checks `fresh_provider_search_performed === true`
68
+ * against the audit JSONL's `skill_call` events for the four search
69
+ * providers — mismatch ⇒ `degraded-evidence` label ⇒ governance-pass
70
+ * promotion blocked.
71
+ *
72
+ * live — every cited source came from a fresh provider call this run
73
+ * cached — agent reused prior research without re-running providers
74
+ * mixed — some cited sources are fresh, some carried forward
75
+ *
76
+ * `degraded_reason` is REQUIRED on `cached` / `mixed` so the gate can
77
+ * surface a human-readable cause (e.g. "tavily-skill-backend-missing",
78
+ * "rate-limited", "rerun-after-review").
79
+ */
80
+ export interface HattersTagEvidence {
81
+ evidence_mode: 'live' | 'cached' | 'mixed';
82
+ /** True iff at least one of the four search providers (tavily/arxiv/uspto/hackernews) was successfully called this run. */
83
+ fresh_provider_search_performed: boolean;
84
+ /** Free-text cause when evidence_mode !== 'live'. */
85
+ degraded_reason?: string;
86
+ }
15
87
  export interface HattersTagInput {
16
88
  run_id: string;
17
89
  /** Git SHA of the mesh repo at run start. */
@@ -47,6 +119,12 @@ export interface HattersTagInput {
47
119
  };
48
120
  /** ISO timestamp the artifact was published (PR created). */
49
121
  published_at: string;
122
+ /** v4: OKR-anchored runs populate this; legacy CI-only runs omit it. */
123
+ okr?: HattersTagOkrContext;
124
+ /** v4: Phase B+ agent runs populate this; legacy runs omit it. */
125
+ attestation?: HattersTagAttestation;
126
+ /** v4 §11.1.7: agentic runs populate this; legacy CI runs omit it. */
127
+ evidence?: HattersTagEvidence;
50
128
  }
51
129
  /** Build the full Hatter's Tag block, including heading + fenced YAML. */
52
130
  export declare function buildHattersTag(input: HattersTagInput): string;
@@ -1,6 +1,13 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.buildHattersTag = buildHattersTag;
4
+ function hasAnyAttestationField(a) {
5
+ return !!(a.author_did
6
+ || a.author_prompt_pack_version
7
+ || a.author_system_prompt_sha
8
+ || (a.reviewer_dids && a.reviewer_dids.length > 0)
9
+ || (a.reviewer_scores && (a.reviewer_scores.architect != null || a.reviewer_scores.security != null)));
10
+ }
4
11
  /** Build the full Hatter's Tag block, including heading + fenced YAML. */
5
12
  function buildHattersTag(input) {
6
13
  const lines = [];
@@ -29,6 +36,57 @@ function buildHattersTag(input) {
29
36
  lines.push(` iterations: ${input.grounding.iterations}`);
30
37
  lines.push(` passed: ${input.grounding.passed}`);
31
38
  }
39
+ if (input.okr) {
40
+ lines.push('okr:');
41
+ lines.push(` intent_thread_uuid: ${input.okr.intent_thread_uuid}`);
42
+ if (input.okr.parent_intent_thread !== undefined) {
43
+ lines.push(` parent_intent_thread: ${input.okr.parent_intent_thread ?? 'null'}`);
44
+ }
45
+ lines.push(` okr_id: ${input.okr.okr_id}`);
46
+ lines.push(` phase: ${input.okr.phase}`);
47
+ lines.push(` governance_tier: ${input.okr.governance_tier}`);
48
+ }
49
+ if (input.attestation && hasAnyAttestationField(input.attestation)) {
50
+ lines.push('attestation:');
51
+ if (input.attestation.author_did) {
52
+ lines.push(` author_did: ${input.attestation.author_did}`);
53
+ }
54
+ if (input.attestation.author_prompt_pack_version) {
55
+ lines.push(` author_prompt_pack_version: ${input.attestation.author_prompt_pack_version}`);
56
+ }
57
+ if (input.attestation.author_system_prompt_sha) {
58
+ lines.push(` author_system_prompt_sha: ${input.attestation.author_system_prompt_sha}`);
59
+ }
60
+ if (input.attestation.reviewer_dids && input.attestation.reviewer_dids.length > 0) {
61
+ lines.push(' reviewer_dids:');
62
+ for (const did of input.attestation.reviewer_dids) {
63
+ lines.push(` - ${did}`);
64
+ }
65
+ }
66
+ if (input.attestation.reviewer_scores
67
+ && (input.attestation.reviewer_scores.architect != null
68
+ || input.attestation.reviewer_scores.security != null)) {
69
+ lines.push(' reviewer_scores:');
70
+ if (input.attestation.reviewer_scores.architect != null) {
71
+ lines.push(` architect: ${input.attestation.reviewer_scores.architect}`);
72
+ }
73
+ if (input.attestation.reviewer_scores.security != null) {
74
+ lines.push(` security: ${input.attestation.reviewer_scores.security}`);
75
+ }
76
+ }
77
+ }
78
+ if (input.evidence) {
79
+ lines.push('evidence:');
80
+ lines.push(` evidence_mode: ${input.evidence.evidence_mode}`);
81
+ lines.push(` fresh_provider_search_performed: ${input.evidence.fresh_provider_search_performed}`);
82
+ if (input.evidence.degraded_reason) {
83
+ // Escape any colons / hashes that would break the bare YAML scalar.
84
+ const escaped = /[:#]/.test(input.evidence.degraded_reason)
85
+ ? JSON.stringify(input.evidence.degraded_reason)
86
+ : input.evidence.degraded_reason;
87
+ lines.push(` degraded_reason: ${escaped}`);
88
+ }
89
+ }
32
90
  lines.push('audit:');
33
91
  lines.push(` event_count: ${input.audit.event_count}`);
34
92
  lines.push(` chain_root_hash: ${input.audit.chain_root_hash}`);
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Shape every skill returns. Tagged union so the agent can branch on `ok`.
3
+ * Handlers MUST NOT throw — they return `{ok: false, reason}` instead so
4
+ * the calling agent can keep going (per SKILL.md error contracts).
5
+ */
6
+ export type SkillResult = ({
7
+ ok: true;
8
+ } & Record<string, unknown>) | {
9
+ ok: false;
10
+ reason: string;
11
+ };
12
+ export type SkillHandler = (input: unknown) => Promise<SkillResult>;
13
+ export declare const SKILLS: Record<string, SkillHandler>;
14
+ export declare function isSkillName(name: string): boolean;
15
+ export declare function runSkill(name: string, input: unknown): Promise<SkillResult>;
16
+ /**
17
+ * Read all of stdin as a UTF-8 string. Returns '' immediately on TTY
18
+ * (no piped input) — handlers will reject via zod with a helpful message.
19
+ */
20
+ export declare function readStdin(): Promise<string>;