npm - @maintainabilityai/research-runner - Versions diffs - 0.1.19 → 0.1.22 - Mend

@maintainabilityai/research-runner 0.1.19 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/cli.js +38 -0
package/dist/runner/court-recorder.d.ts +52 -0
package/dist/runner/court-recorder.js +77 -0
package/dist/runner/hatters-tag-builder.d.ts +78 -0
package/dist/runner/hatters-tag-builder.js +58 -0
package/dist/runner/skills.d.ts +20 -0
package/dist/runner/skills.js +792 -0
package/dist/schemas/audit-event.d.ts +144 -0
package/dist/schemas/audit-event.js +11 -0
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -48,6 +48,7 @@ const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
 const archeologist_1 = require("./runner/archeologist");
 const prd_1 = require("./runner/prd");
+const skills_1 = require("./runner/skills");
 const PKG = JSON.parse(fs.readFileSync(path.resolve(__dirname, '..', 'package.json'), 'utf8'));
 function parseFlags(argv) {
     const flags = {};
@@ -191,17 +192,54 @@ async function prdCmd(argv) {
     });
 }
 function help() {
+    const skillNames = Object.keys(skills_1.SKILLS).map(n => `skill-${n}`).sort().join('\n  ');
     process.stdout.write(`research-runner v${PKG.version}
 Usage:
   research-runner archeologist --brief "<topic>" --scope-level <platform|bar> --scope-id ID [--path research|archaeology] [...]
   research-runner prd --research-pr <url|path> --scope-level <platform|bar> --scope-id ID [...]
+  research-runner skill-<name>   # one-shot skill subcommand; reads JSON from stdin, writes JSON to stdout
+Skills (called by .agent.md tools: declarations under \$MESH_PATH):
+  ${skillNames}
 See README.md for the full flag surface.
 `);
 }
+/**
+ * skill-* dispatcher. Reads a JSON object from stdin, calls runSkill,
+ * writes the result as JSON to stdout. Exits non-zero on `ok: false` so
+ * the calling agent (or shell wrapper) can detect failure via exit code
+ * in addition to the structured `{ok: false, reason}` payload.
+ */
+async function skillCmd(skillName) {
+    if (!(0, skills_1.isSkillName)(skillName)) {
+        process.stdout.write(JSON.stringify({ ok: false, reason: `unknown-skill: ${skillName}` }) + '\n');
+        process.exit(1);
+    }
+    const stdinRaw = await (0, skills_1.readStdin)();
+    let input = {};
+    if (stdinRaw.trim().length > 0) {
+        try {
+            input = JSON.parse(stdinRaw);
+        }
+        catch (err) {
+            process.stdout.write(JSON.stringify({ ok: false, reason: `bad-stdin-json: ${err.message}` }) + '\n');
+            process.exit(1);
+        }
+    }
+    const result = await (0, skills_1.runSkill)(skillName, input);
+    process.stdout.write(JSON.stringify(result) + '\n');
+    if (result.ok === false) {
+        process.exit(1);
+    }
+}
 async function main() {
     const [, , subcommand, ...rest] = process.argv;
+    if (subcommand && subcommand.startsWith('skill-')) {
+        await skillCmd(subcommand.slice('skill-'.length));
+        return;
+    }
     switch (subcommand) {
         case 'archeologist':
             await archeologistCmd(rest);

package/dist/runner/court-recorder.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Strictly the fields a CloudEvents v1.0 envelope requires. Optional
+ * fields per the spec (datacontenttype, dataschema, subject, time) are
+ * folded in via `CloudEventsOptional`.
+ *
+ * See https://github.com/cloudevents/spec/blob/v1.0.2/cloudevents/spec.md
+ */
+export interface CloudEventsEnvelope<T = Record<string, unknown>> {
+    /** CloudEvents spec version. Fixed for our emitter. */
+    specversion: '1.0';
+    /** Reverse-DNS event-source identifier — e.g. `maintainabilityai/research-runner`. */
+    source: string;
+    /** Event type — maps to `audit-emitter` event_kind plus an `mai.` prefix. */
+    type: string;
+    /** Unique id — UUID v4 generated by the emitter. */
+    id: string;
+    /** ISO timestamp when the event was emitted. */
+    time: string;
+    /** MIME type of `data`. Always `application/json` for our emitter. */
+    datacontenttype: 'application/json';
+    /** OKR phase that contextualizes this event. */
+    subject: string;
+    /** The raw audit-emitter event payload. */
+    data: T;
+}
+export interface CourtRecorderInput<T> {
+    /** Reverse-DNS source identifier — e.g. `maintainabilityai/research-runner`. */
+    source: string;
+    /** Event kind from `audit-emitter` — wrapped into `mai.<kind>` for CE `type`. */
+    eventKind: string;
+    /** OKR phase (`why` | `how` | `what` | `setup` etc.) → goes in `subject`. */
+    phase: string;
+    /** The original audit-emitter event payload. Becomes `data`. */
+    payload: T;
+    /** Override timestamp (defaults to `new Date().toISOString()`). */
+    time?: string;
+    /** Override id (defaults to `crypto.randomUUID()`). */
+    id?: string;
+}
+/**
+ * Build a single CloudEvents v1.0 envelope around an audit-event payload.
+ * Stateless + deterministic given an injected `id` + `time` — handy for
+ * unit tests.
+ */
+export declare function buildCloudEventsEnvelope<T>(input: CourtRecorderInput<T>): CloudEventsEnvelope<T>;
+/**
+ * Serialize an envelope to a single JSONL line (NO trailing newline; the
+ * caller appends `\n` when writing). Throws if the envelope can't be
+ * round-tripped through JSON.stringify — that signals a non-serializable
+ * payload, which is a programmer bug, not a runtime condition.
+ */
+export declare function serializeCloudEventsEnvelope<T>(env: CloudEventsEnvelope<T>): string;

package/dist/runner/court-recorder.js ADDED Viewed

@@ -0,0 +1,77 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.buildCloudEventsEnvelope = buildCloudEventsEnvelope;
+exports.serializeCloudEventsEnvelope = serializeCloudEventsEnvelope;
+/**
+ * Court Recorder — CloudEvents v1.0 envelope emitter for the agentic-SDLC
+ * audit chain. Wraps `audit-emitter`'s raw event payload in a CloudEvents
+ * 1.0 envelope so the resulting JSONL is SIEM-compatible without further
+ * transformation.
+ *
+ * Phase B-PR4 ships the emitter shape; agents will adopt it in their
+ * `audit-emit-event` Skill backend (B-PR1a). Each line of
+ * `okrs/<id>/audit/events/<run-id>.jsonl` is a fully-valid CloudEvents
+ * envelope whose `data` property carries the original audit-event JSON.
+ *
+ * Spec: vscode-extension/design/agentic-sdlc.md §11 (Court Recorder).
+ */
+const crypto = __importStar(require("node:crypto"));
+/**
+ * Build a single CloudEvents v1.0 envelope around an audit-event payload.
+ * Stateless + deterministic given an injected `id` + `time` — handy for
+ * unit tests.
+ */
+function buildCloudEventsEnvelope(input) {
+    return {
+        specversion: '1.0',
+        source: input.source,
+        type: `mai.${input.eventKind}`,
+        id: input.id ?? crypto.randomUUID(),
+        time: input.time ?? new Date().toISOString(),
+        datacontenttype: 'application/json',
+        subject: input.phase,
+        data: input.payload,
+    };
+}
+/**
+ * Serialize an envelope to a single JSONL line (NO trailing newline; the
+ * caller appends `\n` when writing). Throws if the envelope can't be
+ * round-tripped through JSON.stringify — that signals a non-serializable
+ * payload, which is a programmer bug, not a runtime condition.
+ */
+function serializeCloudEventsEnvelope(env) {
+    return JSON.stringify(env);
+}

package/dist/runner/hatters-tag-builder.d.ts CHANGED Viewed

@@ -12,6 +12,78 @@
  * cleanly in GitHub PR bodies + the rendered docs site.
  */
 import type { GuardrailMode, LlmProvider } from '../schemas';
+/**
+ * v4 agentic-SDLC governance fields. All are OPTIONAL on the Hatter's
+ * Tag input — legacy CI-only runs (without an OKR anchor) omit them
+ * entirely and the emitted YAML skips those keys. OKR-anchored runs
+ * (Phase B+) populate them so the audit chain can be walked across
+ * repositories via `intent_thread_uuid`.
+ *
+ * See vscode-extension/design/agentic-sdlc.md §4.4 (intent_thread_uuid
+ * lifecycle) and §11.1 (Hatter's Tag full provenance schema).
+ */
+export interface HattersTagOkrContext {
+    /** Cross-repo audit correlation key, generated at OKR-create time. */
+    intent_thread_uuid: string;
+    /** Run id of the prior phase that produced the input artifact (null on Why). */
+    parent_intent_thread?: string | null;
+    /** OKR card id (human-readable name, e.g. OKR-2026Q1-IMDB-001-celeb-api). */
+    okr_id: string;
+    /** Which phase produced this artifact. */
+    phase: 'why' | 'how' | 'what';
+    /** Tier frozen at run start (mitigates tier creep, §6.2). */
+    governance_tier: 'autonomous' | 'supervised' | 'restricted';
+}
+/**
+ * Author + reviewer attestation block (v4 §11.1). All optional; legacy
+ * runs without distinct agent DIDs omit it. Phase B+ stamps `author_did`
+ * from the GitHub App installation id + agent name; reviewers fill in
+ * `reviewer_dids[]` after their structured-review pack runs.
+ */
+export interface HattersTagAttestation {
+    /** DID of the agent that authored this artifact. */
+    author_did?: string;
+    /** Prompt pack id + version cited by the author. */
+    author_prompt_pack_version?: string;
+    /** SHA256 of the author's system prompt at run time. */
+    author_system_prompt_sha?: string;
+    /** DIDs of reviewer-agent sessions that scored this PR. */
+    reviewer_dids?: string[];
+    /** Reviewer scores keyed by reviewer name. */
+    reviewer_scores?: {
+        architect?: number | null;
+        security?: number | null;
+    };
+}
+/**
+ * Evidence-mode block (v4 §11.1.7) — forces authors to be honest about
+ * whether the artifact's citations came from a fresh provider search or
+ * from cached/reused sources.
+ *
+ * Why this exists: the first round of agentic runs (run #21) loaded the
+ * SKILL.md context but had no live skill backends, so the agent silently
+ * fell back to reading repo files and produced an updated artifact that
+ * LOOKED grounded but cited zero live signals. The validator workflow
+ * (B-PR1c) cross-checks `fresh_provider_search_performed === true`
+ * against the audit JSONL's `skill_call` events for the four search
+ * providers — mismatch ⇒ `degraded-evidence` label ⇒ governance-pass
+ * promotion blocked.
+ *
+ *   live     — every cited source came from a fresh provider call this run
+ *   cached   — agent reused prior research without re-running providers
+ *   mixed    — some cited sources are fresh, some carried forward
+ *
+ * `degraded_reason` is REQUIRED on `cached` / `mixed` so the gate can
+ * surface a human-readable cause (e.g. "tavily-skill-backend-missing",
+ * "rate-limited", "rerun-after-review").
+ */
+export interface HattersTagEvidence {
+    evidence_mode: 'live' | 'cached' | 'mixed';
+    /** True iff at least one of the four search providers (tavily/arxiv/uspto/hackernews) was successfully called this run. */
+    fresh_provider_search_performed: boolean;
+    /** Free-text cause when evidence_mode !== 'live'. */
+    degraded_reason?: string;
+}
 export interface HattersTagInput {
     run_id: string;
     /** Git SHA of the mesh repo at run start. */
@@ -47,6 +119,12 @@ export interface HattersTagInput {
     };
     /** ISO timestamp the artifact was published (PR created). */
     published_at: string;
+    /** v4: OKR-anchored runs populate this; legacy CI-only runs omit it. */
+    okr?: HattersTagOkrContext;
+    /** v4: Phase B+ agent runs populate this; legacy runs omit it. */
+    attestation?: HattersTagAttestation;
+    /** v4 §11.1.7: agentic runs populate this; legacy CI runs omit it. */
+    evidence?: HattersTagEvidence;
 }
 /** Build the full Hatter's Tag block, including heading + fenced YAML. */
 export declare function buildHattersTag(input: HattersTagInput): string;

package/dist/runner/hatters-tag-builder.js CHANGED Viewed

@@ -1,6 +1,13 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.buildHattersTag = buildHattersTag;
+function hasAnyAttestationField(a) {
+    return !!(a.author_did
+        || a.author_prompt_pack_version
+        || a.author_system_prompt_sha
+        || (a.reviewer_dids && a.reviewer_dids.length > 0)
+        || (a.reviewer_scores && (a.reviewer_scores.architect != null || a.reviewer_scores.security != null)));
+}
 /** Build the full Hatter's Tag block, including heading + fenced YAML. */
 function buildHattersTag(input) {
     const lines = [];
@@ -29,6 +36,57 @@ function buildHattersTag(input) {
         lines.push(`  iterations: ${input.grounding.iterations}`);
         lines.push(`  passed: ${input.grounding.passed}`);
     }
+    if (input.okr) {
+        lines.push('okr:');
+        lines.push(`  intent_thread_uuid: ${input.okr.intent_thread_uuid}`);
+        if (input.okr.parent_intent_thread !== undefined) {
+            lines.push(`  parent_intent_thread: ${input.okr.parent_intent_thread ?? 'null'}`);
+        }
+        lines.push(`  okr_id: ${input.okr.okr_id}`);
+        lines.push(`  phase: ${input.okr.phase}`);
+        lines.push(`  governance_tier: ${input.okr.governance_tier}`);
+    }
+    if (input.attestation && hasAnyAttestationField(input.attestation)) {
+        lines.push('attestation:');
+        if (input.attestation.author_did) {
+            lines.push(`  author_did: ${input.attestation.author_did}`);
+        }
+        if (input.attestation.author_prompt_pack_version) {
+            lines.push(`  author_prompt_pack_version: ${input.attestation.author_prompt_pack_version}`);
+        }
+        if (input.attestation.author_system_prompt_sha) {
+            lines.push(`  author_system_prompt_sha: ${input.attestation.author_system_prompt_sha}`);
+        }
+        if (input.attestation.reviewer_dids && input.attestation.reviewer_dids.length > 0) {
+            lines.push('  reviewer_dids:');
+            for (const did of input.attestation.reviewer_dids) {
+                lines.push(`    - ${did}`);
+            }
+        }
+        if (input.attestation.reviewer_scores
+            && (input.attestation.reviewer_scores.architect != null
+                || input.attestation.reviewer_scores.security != null)) {
+            lines.push('  reviewer_scores:');
+            if (input.attestation.reviewer_scores.architect != null) {
+                lines.push(`    architect: ${input.attestation.reviewer_scores.architect}`);
+            }
+            if (input.attestation.reviewer_scores.security != null) {
+                lines.push(`    security: ${input.attestation.reviewer_scores.security}`);
+            }
+        }
+    }
+    if (input.evidence) {
+        lines.push('evidence:');
+        lines.push(`  evidence_mode: ${input.evidence.evidence_mode}`);
+        lines.push(`  fresh_provider_search_performed: ${input.evidence.fresh_provider_search_performed}`);
+        if (input.evidence.degraded_reason) {
+            // Escape any colons / hashes that would break the bare YAML scalar.
+            const escaped = /[:#]/.test(input.evidence.degraded_reason)
+                ? JSON.stringify(input.evidence.degraded_reason)
+                : input.evidence.degraded_reason;
+            lines.push(`  degraded_reason: ${escaped}`);
+        }
+    }
     lines.push('audit:');
     lines.push(`  event_count: ${input.audit.event_count}`);
     lines.push(`  chain_root_hash: ${input.audit.chain_root_hash}`);

package/dist/runner/skills.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Shape every skill returns. Tagged union so the agent can branch on `ok`.
+ * Handlers MUST NOT throw — they return `{ok: false, reason}` instead so
+ * the calling agent can keep going (per SKILL.md error contracts).
+ */
+export type SkillResult = ({
+    ok: true;
+} & Record<string, unknown>) | {
+    ok: false;
+    reason: string;
+};
+export type SkillHandler = (input: unknown) => Promise<SkillResult>;
+export declare const SKILLS: Record<string, SkillHandler>;
+export declare function isSkillName(name: string): boolean;
+export declare function runSkill(name: string, input: unknown): Promise<SkillResult>;
+/**
+ * Read all of stdin as a UTF-8 string. Returns '' immediately on TTY
+ * (no piped input) — handlers will reject via zod with a helpful message.
+ */
+export declare function readStdin(): Promise<string>;