npm - @danielblomma/cortex-mcp - Versions diffs - 2.0.4 → 2.0.6 - Mend

@danielblomma/cortex-mcp 2.0.4 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/bin/cortex.mjs +74 -25
package/package.json +1 -1
package/scaffold/mcp/package-lock.json +63 -4
package/scaffold/mcp/package.json +4 -1
package/scaffold/mcp/src/cli/stage.ts +325 -0
package/scaffold/mcp/src/core/workflow/artifact-io.ts +156 -0
package/scaffold/mcp/src/core/workflow/capabilities.ts +100 -0
package/scaffold/mcp/src/core/workflow/default-workflows.ts +83 -0
package/scaffold/mcp/src/core/workflow/enforcement.ts +206 -0
package/scaffold/mcp/src/core/workflow/envelope.ts +220 -0
package/scaffold/mcp/src/core/workflow/index.ts +8 -0
package/scaffold/mcp/src/core/workflow/mcp-tools.ts +208 -0
package/scaffold/mcp/src/core/workflow/run-lifecycle.ts +165 -0
package/scaffold/mcp/src/core/workflow/schemas.ts +125 -0
package/scaffold/mcp/src/hooks/pre-tool-use.ts +30 -0
package/scaffold/mcp/src/server.ts +75 -0
package/scaffold/mcp/tests/workflow-cli.test.mjs +293 -0
package/scaffold/mcp/tests/workflow-enforcement.test.mjs +370 -0
package/scaffold/mcp/tests/workflow-envelope.test.mjs +247 -0
package/scaffold/mcp/tests/workflow-mcp-tools.test.mjs +293 -0
package/scaffold/mcp/tests/workflow.test.mjs +283 -0
package/scaffold/scripts/bootstrap.sh +1 -1
package/scaffold/scripts/doctor.sh +6 -6
package/scaffold/scripts/embed.sh +2 -2
package/scaffold/scripts/load-ryu.sh +3 -3
package/scaffold/scripts/memory-compile.mjs +1 -1
package/scaffold/scripts/memory-lint.mjs +1 -1
package/scaffold/scripts/watch.sh +2 -7

package/scaffold/mcp/src/core/workflow/enforcement.ts ADDED Viewed

@@ -0,0 +1,206 @@
+import { isAbsolute, relative } from "node:path";
+import { minimatch } from "minimatch";
+import { readRunState } from "./artifact-io.js";
+import { DEFAULT_CAPABILITIES, type CapabilityDefinition } from "./capabilities.js";
+import { workflowDefinitionSchema, type WorkflowDefinition } from "./schemas.js";
+import { DEFAULT_WORKFLOWS } from "./default-workflows.js";
+/**
+ * Pre-tool-use enforcement for the harness. Pure function: takes the tool
+ * call shape Claude Code emits, looks up the active workflow stage's
+ * capability, returns allow/deny + reason. The hook wires this into the
+ * stdin/exit-code dance.
+ *
+ * "Active task" is identified by env var CORTEX_ACTIVE_TASK_ID. The
+ * harness sets this when invoking an agent for a stage; outside the
+ * harness, the env var is unset and this evaluator is a no-op (returns
+ * { allowed: true }).
+ */
+export type ToolCall = {
+  toolName: string;
+  toolInput: Record<string, unknown>;
+};
+export type EnforcementResult =
+  | { allowed: true; reason?: string }
+  | { allowed: false; reason: string };
+export type EvaluateOptions = {
+  cwd: string;
+  taskId: string;
+  call: ToolCall;
+  workflows?: Record<string, WorkflowDefinition>;
+  capabilities?: Record<string, CapabilityDefinition>;
+};
+/**
+ * Tool names that are pure mutations of the file system. Edits and writes
+ * gate against `write_globs`. Bash is treated as a mutation by default
+ * because we cannot reliably extract paths from arbitrary shell — agents
+ * running in restricted-write capabilities lose Bash unless the
+ * capability explicitly allow-lists it.
+ */
+const MUTATING_TOOLS = new Set(["Edit", "Write", "MultiEdit", "NotebookEdit"]);
+/**
+ * Tool names that read but do not mutate. Gate against `read_globs`.
+ */
+const READING_TOOLS = new Set(["Read", "Grep", "Glob", "NotebookRead"]);
+export function evaluateToolCall(options: EvaluateOptions): EnforcementResult {
+  const state = readRunState(options.cwd, options.taskId);
+  if (!state) {
+    return { allowed: true, reason: "no run state — harness not active" };
+  }
+  if (state.outcome !== "in_progress" || !state.current_stage) {
+    return {
+      allowed: true,
+      reason: `run not in progress (outcome=${state.outcome}) — no capability gate to apply`,
+    };
+  }
+  const workflows = options.workflows ?? DEFAULT_WORKFLOWS;
+  const workflow = workflows[state.workflow_id];
+  if (!workflow) {
+    return {
+      allowed: false,
+      reason: `unknown workflow_id ${state.workflow_id}; cannot resolve capability for current stage`,
+    };
+  }
+  // Validate so corrupt input doesn't slip through.
+  workflowDefinitionSchema.parse(workflow);
+  const stage = workflow.stages.find((s) => s.name === state.current_stage);
+  if (!stage) {
+    return {
+      allowed: false,
+      reason: `current stage ${state.current_stage} is not defined in workflow ${workflow.id}`,
+    };
+  }
+  if (!stage.capability) {
+    return { allowed: true, reason: "stage has no capability declared" };
+  }
+  const capabilities = options.capabilities ?? DEFAULT_CAPABILITIES;
+  const capability = capabilities[stage.capability];
+  if (!capability) {
+    return {
+      allowed: false,
+      reason: `capability ${stage.capability} (referenced by stage ${stage.name}) is not in the registry`,
+    };
+  }
+  return evaluateAgainstCapability(capability, options.call, options.cwd);
+}
+function evaluateAgainstCapability(
+  capability: CapabilityDefinition,
+  call: ToolCall,
+  cwd: string,
+): EnforcementResult {
+  // 1. tools_allowed: empty = no restriction; otherwise tool must be in the list.
+  if (
+    capability.tools_allowed.length > 0 &&
+    !capability.tools_allowed.includes(call.toolName)
+  ) {
+    return {
+      allowed: false,
+      reason: `capability ${capability.name} does not allow tool ${call.toolName}`,
+    };
+  }
+  const isMutation = MUTATING_TOOLS.has(call.toolName);
+  const isRead = READING_TOOLS.has(call.toolName);
+  // Bash is special: with restricted write_globs we have to assume the
+  // worst (since the shell can write anywhere). Block unless capability
+  // explicitly allow-lists Bash via tools_allowed.
+  if (call.toolName === "Bash") {
+    const isAllowedViaToolList = capability.tools_allowed.includes("Bash");
+    const writesUnrestricted = capability.write_globs.length === 0;
+    if (writesUnrestricted && !isAllowedViaToolList) {
+      return {
+        allowed: false,
+        reason: `capability ${capability.name} is read-only; Bash can mutate the filesystem and is not allow-listed`,
+      };
+    }
+    return { allowed: true };
+  }
+  if (isMutation) {
+    if (capability.write_globs.length === 0) {
+      return {
+        allowed: false,
+        reason: `capability ${capability.name} is read-only; ${call.toolName} cannot run`,
+      };
+    }
+    const targetPath = extractFilePath(call.toolInput);
+    if (!targetPath) {
+      return {
+        allowed: false,
+        reason: `${call.toolName} did not include a file_path; cannot verify against capability ${capability.name}`,
+      };
+    }
+    const relPath = toRepoRelative(cwd, targetPath);
+    if (!matchesAnyGlob(relPath, capability.write_globs)) {
+      return {
+        allowed: false,
+        reason: `path ${relPath} is outside capability ${capability.name}'s write_globs (${capability.write_globs.join(", ")})`,
+      };
+    }
+    return { allowed: true };
+  }
+  if (isRead) {
+    if (capability.read_globs.length === 0) {
+      // No reads allowed at all — only the human capability lands here.
+      return {
+        allowed: false,
+        reason: `capability ${capability.name} does not permit any read operations`,
+      };
+    }
+    const targetPath = extractFilePath(call.toolInput);
+    if (!targetPath) {
+      // Some read tools (Grep, Glob) operate on the whole repo; allow
+      // through if the capability has any read access at all.
+      return { allowed: true };
+    }
+    const relPath = toRepoRelative(cwd, targetPath);
+    if (!matchesAnyGlob(relPath, capability.read_globs)) {
+      return {
+        allowed: false,
+        reason: `path ${relPath} is outside capability ${capability.name}'s read_globs (${capability.read_globs.join(", ")})`,
+      };
+    }
+    return { allowed: true };
+  }
+  // Unknown tool — fall through to allow if not explicitly restricted.
+  return { allowed: true };
+}
+function extractFilePath(toolInput: Record<string, unknown>): string | null {
+  const candidates = ["file_path", "path", "notebook_path"];
+  for (const key of candidates) {
+    const value = toolInput[key];
+    if (typeof value === "string" && value.length > 0) return value;
+  }
+  return null;
+}
+function toRepoRelative(cwd: string, targetPath: string): string {
+  if (!isAbsolute(targetPath)) return targetPath;
+  const rel = relative(cwd, targetPath);
+  // If the path is outside the repo, return the absolute form so glob
+  // matches (which expect repo-relative) reliably miss.
+  if (rel.startsWith("..")) return targetPath;
+  return rel;
+}
+function matchesAnyGlob(path: string, globs: string[]): boolean {
+  return globs.some((pattern) =>
+    minimatch(path, pattern, { dot: true, nocase: false }),
+  );
+}

package/scaffold/mcp/src/core/workflow/envelope.ts ADDED Viewed

@@ -0,0 +1,220 @@
+import { readFileSync } from "node:fs";
+import { artifactPath, readRunState } from "./artifact-io.js";
+import { workflowDefinitionSchema, type WorkflowDefinition } from "./schemas.js";
+/**
+ * Composes the prompt one stage's agent sees. Pure function over the
+ * persisted run state plus the workflow definition — no agent invocation,
+ * no MCP, no daemon. The harness later wraps this into an MCP call or a
+ * CLI invocation.
+ *
+ * Design: the agent gets four sections in a fixed order so it can anchor
+ * on them reliably:
+ *
+ *   TASK     — what the developer asked for, copied verbatim from RunState
+ *   STAGE    — what *this* stage is supposed to produce
+ *   HANDOFFS — every prior-stage artifact the new stage declared in `reads`,
+ *              inlined raw (frontmatter + body) so the agent sees structured
+ *              outcomes alongside the reasoning
+ *   OUTPUT   — exact frontmatter contract the agent must satisfy plus the
+ *              expected artifact filename
+ *
+ * Capability constraints (which files the agent may edit, which tools it
+ * may call) are NOT enforced by the prompt — they're enforced by hooks
+ * downstream. The capability key is surfaced in the prompt as a label so
+ * the agent knows under what role it's running, but the real gate is
+ * pre-tool-use.
+ */
+export type ComposedEnvelope = {
+  /** The full prompt the agent will receive. */
+  prompt: string;
+  /** Expected artifact filename the agent must produce. */
+  expectedArtifact: string;
+  /** Frontmatter keys the agent must populate (beyond stage/status/references). */
+  requiredFields: string[];
+  /** Capability key the stage runs under (informational). */
+  capability: string | null;
+};
+export type ComposeStageEnvelopeOptions = {
+  cwd: string;
+  taskId: string;
+  workflow: WorkflowDefinition;
+  /**
+   * Defaults to the run's current_stage. Pass an explicit stageName when
+   * dry-running an envelope without driving state forward.
+   */
+  stageName?: string;
+};
+export function composeStageEnvelope(
+  options: ComposeStageEnvelopeOptions,
+): ComposedEnvelope {
+  const workflow = workflowDefinitionSchema.parse(options.workflow);
+  const state = readRunState(options.cwd, options.taskId);
+  if (!state) {
+    throw new Error(
+      `No run state found for task ${options.taskId}. Call createRun() first.`,
+    );
+  }
+  if (state.workflow_id !== workflow.id) {
+    throw new Error(
+      `Workflow mismatch: run was started with ${state.workflow_id}, envelope was composed with ${workflow.id}`,
+    );
+  }
+  const stageName = options.stageName ?? state.current_stage;
+  if (!stageName) {
+    throw new Error(
+      `Run ${options.taskId} is not at any stage (outcome=${state.outcome}). Cannot compose envelope.`,
+    );
+  }
+  const stage = workflow.stages.find((s) => s.name === stageName);
+  if (!stage) {
+    throw new Error(
+      `Stage ${stageName} is not defined in workflow ${workflow.id}`,
+    );
+  }
+  const handoffs: string[] = [];
+  for (const readName of stage.reads) {
+    const priorStage = workflow.stages.find((s) => s.name === readName);
+    if (!priorStage) {
+      throw new Error(
+        `Stage ${stageName} declares reads from unknown stage ${readName}`,
+      );
+    }
+    const priorRecord = state.stages.find((r) => r.name === readName);
+    if (!priorRecord || priorRecord.status === "pending" || !priorRecord.artifact) {
+      throw new Error(
+        `Stage ${stageName} requires artifact from ${readName}, but it has not been produced yet`,
+      );
+    }
+    const path = artifactPath(options.cwd, options.taskId, priorRecord.artifact);
+    let raw: string;
+    try {
+      raw = readFileSync(path, "utf8");
+    } catch (err) {
+      throw new Error(
+        `Failed to read handoff artifact for ${readName} at ${path}: ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+      );
+    }
+    handoffs.push(renderHandoff(readName, priorRecord.artifact, raw));
+  }
+  const requiredFields = stage.required_fields;
+  const capability = stage.capability ?? null;
+  const prompt = renderPrompt({
+    taskDescription: state.task_description,
+    workflowId: workflow.id,
+    workflowDescription: workflow.description,
+    stageName: stage.name,
+    stageDescription: stage.description,
+    expectedArtifact: stage.artifact,
+    requiredFields,
+    capability,
+    handoffs,
+  });
+  return {
+    prompt,
+    expectedArtifact: stage.artifact,
+    requiredFields,
+    capability,
+  };
+}
+function renderHandoff(
+  stageName: string,
+  artifactName: string,
+  rawArtifact: string,
+): string {
+  return [
+    `--- handoff:${stageName} (${artifactName}) ---`,
+    rawArtifact.trim(),
+    `--- end handoff:${stageName} ---`,
+  ].join("\n");
+}
+type RenderPromptOptions = {
+  taskDescription: string;
+  workflowId: string;
+  workflowDescription: string;
+  stageName: string;
+  stageDescription: string;
+  expectedArtifact: string;
+  requiredFields: string[];
+  capability: string | null;
+  handoffs: string[];
+};
+function renderPrompt(o: RenderPromptOptions): string {
+  const sections: string[] = [];
+  sections.push(
+    [
+      `# TASK`,
+      ``,
+      o.taskDescription.trim(),
+      ``,
+      `Workflow: ${o.workflowId} — ${o.workflowDescription}`,
+    ].join("\n"),
+  );
+  sections.push(
+    [
+      `# STAGE: ${o.stageName}`,
+      ``,
+      o.stageDescription.trim(),
+      ``,
+      o.capability
+        ? `Running under capability: \`${o.capability}\` (file and tool restrictions are enforced by Cortex hooks at tool-use time, not by you).`
+        : `No capability constraint declared for this stage.`,
+    ].join("\n"),
+  );
+  if (o.handoffs.length === 0) {
+    sections.push(
+      [`# HANDOFFS`, ``, `_No prior-stage artifacts; this is the first stage._`].join(
+        "\n",
+      ),
+    );
+  } else {
+    sections.push(
+      [
+        `# HANDOFFS`,
+        ``,
+        `The following stages have already run. Each artifact below is the complete file as it lives on disk; use the frontmatter for structured outcomes and the body for reasoning.`,
+        ``,
+        ...o.handoffs,
+      ].join("\n"),
+    );
+  }
+  const requiredLines =
+    o.requiredFields.length === 0
+      ? `_No additional required fields beyond the harness defaults._`
+      : o.requiredFields.map((f) => `- \`${f}\``).join("\n");
+  sections.push(
+    [
+      `# OUTPUT`,
+      ``,
+      `Produce a single markdown file named \`${o.expectedArtifact}\` with YAML frontmatter on top.`,
+      ``,
+      `Required frontmatter fields (in addition to \`stage\`, \`status\`, \`references\`, \`written_at\` which the harness manages):`,
+      ``,
+      requiredLines,
+      ``,
+      `Body: clear, well-structured markdown explaining your reasoning. Cite handoff artifacts by stage name when relevant.`,
+      ``,
+      `If you cannot complete this stage (missing context, blocking concern, conflicting prior decisions), set \`status: blocked\` in frontmatter and explain why in the body — do not fabricate work.`,
+    ].join("\n"),
+  );
+  return sections.join("\n\n");
+}

package/scaffold/mcp/src/core/workflow/index.ts ADDED Viewed

@@ -0,0 +1,8 @@
+export * from "./schemas.js";
+export * from "./artifact-io.js";
+export * from "./run-lifecycle.js";
+export * from "./envelope.js";
+export * from "./default-workflows.js";
+export * from "./mcp-tools.js";
+export * from "./capabilities.js";
+export * from "./enforcement.js";

package/scaffold/mcp/src/core/workflow/mcp-tools.ts ADDED Viewed

@@ -0,0 +1,208 @@
+import { z } from "zod";
+import { advanceStage, createRun, getRunState } from "./run-lifecycle.js";
+import { composeStageEnvelope } from "./envelope.js";
+import { DEFAULT_WORKFLOWS } from "./default-workflows.js";
+import {
+  stageStatusSchema,
+  type StageStatus,
+  type WorkflowDefinition,
+} from "./schemas.js";
+/**
+ * Pure runner functions that back the cortex.workflow.* MCP tools.
+ * Kept separate from server.ts so they can be unit-tested without spinning
+ * up an MCP server. server.ts is a thin shim that registers each runner
+ * under its tool name and serializes the result through buildToolResult.
+ */
+const slugSchema = z
+  .string()
+  .min(1)
+  .max(80)
+  .regex(/^[a-z0-9][a-z0-9-]*[a-z0-9]$/);
+export const WorkflowStartInput = z.object({
+  task_id: slugSchema,
+  task_description: z.string().min(1).max(2000),
+  workflow_id: slugSchema.default("secure-build"),
+});
+export type WorkflowStartInputT = z.infer<typeof WorkflowStartInput>;
+export const WorkflowAdvanceInput = z.object({
+  task_id: slugSchema,
+  /** Required for safety: must equal the run's current_stage. */
+  stage: slugSchema,
+  /**
+   * Stage frontmatter as a free-form object. Stage / status / references /
+   * written_at are managed by the harness and may be omitted (or, if set,
+   * are overridden). Stage-specific fields like `approved` or `score` are
+   * passed through.
+   */
+  frontmatter: z.record(z.string(), z.unknown()).default({}),
+  body: z.string().min(1),
+  /** Final stage status. Defaults to "complete". Use "blocked" or "failed" to halt the run. */
+  status: stageStatusSchema.optional(),
+  /** Optional structured outcome surfaced into state.json for fast lookup by later stages. */
+  outcome: z.record(z.string(), z.unknown()).optional(),
+});
+export type WorkflowAdvanceInputT = z.infer<typeof WorkflowAdvanceInput>;
+export const WorkflowStatusInput = z.object({
+  task_id: slugSchema,
+});
+export type WorkflowStatusInputT = z.infer<typeof WorkflowStatusInput>;
+export const WorkflowEnvelopeInput = z.object({
+  task_id: slugSchema,
+  /** Defaults to the run's current stage. */
+  stage: slugSchema.optional(),
+});
+export type WorkflowEnvelopeInputT = z.infer<typeof WorkflowEnvelopeInput>;
+/**
+ * Resolves the project root. The MCP server is started with cwd =
+ * project root and CORTEX_PROJECT_ROOT set to the same value (see
+ * bin/cortex.mjs `mcp` command). Tests pass cwd explicitly.
+ */
+export function resolveProjectRoot(): string {
+  const fromEnv = process.env.CORTEX_PROJECT_ROOT?.trim();
+  if (fromEnv) return fromEnv;
+  return process.cwd();
+}
+export type WorkflowToolContext = {
+  cwd: string;
+  workflows?: Record<string, WorkflowDefinition>;
+};
+function resolveWorkflow(
+  workflowId: string,
+  registry: Record<string, WorkflowDefinition> | undefined,
+): WorkflowDefinition {
+  const workflows = registry ?? DEFAULT_WORKFLOWS;
+  const workflow = workflows[workflowId];
+  if (!workflow) {
+    throw new Error(
+      `Unknown workflow_id: ${workflowId}. Available: ${Object.keys(workflows).join(", ") || "<none>"}`,
+    );
+  }
+  return workflow;
+}
+export function runWorkflowStart(
+  input: WorkflowStartInputT,
+  ctx: WorkflowToolContext,
+) {
+  const workflow = resolveWorkflow(input.workflow_id, ctx.workflows);
+  const state = createRun({
+    cwd: ctx.cwd,
+    taskId: input.task_id,
+    workflow,
+    taskDescription: input.task_description,
+  });
+  const envelope = composeStageEnvelope({
+    cwd: ctx.cwd,
+    taskId: input.task_id,
+    workflow,
+  });
+  return {
+    state,
+    envelope,
+  };
+}
+export function runWorkflowAdvance(
+  input: WorkflowAdvanceInputT,
+  ctx: WorkflowToolContext,
+) {
+  const state = getRunState(ctx.cwd, input.task_id);
+  if (!state) {
+    throw new Error(
+      `No run state found for task ${input.task_id}. Call cortex.workflow.start first.`,
+    );
+  }
+  const workflow = resolveWorkflow(state.workflow_id, ctx.workflows);
+  const stage = workflow.stages.find((s) => s.name === input.stage);
+  if (!stage) {
+    throw new Error(`Stage ${input.stage} is not defined in workflow ${workflow.id}`);
+  }
+  const finalStatus: StageStatus = input.status ?? "complete";
+  const nextState = advanceStage({
+    cwd: ctx.cwd,
+    taskId: input.task_id,
+    workflow,
+    stageName: input.stage,
+    artifactName: stage.artifact,
+    frontmatter: {
+      ...input.frontmatter,
+      stage: input.stage,
+      status: finalStatus,
+      references:
+        (Array.isArray((input.frontmatter as Record<string, unknown>).references)
+          ? ((input.frontmatter as Record<string, unknown>).references as unknown[])
+              .filter((v): v is string => typeof v === "string")
+          : null) ?? deriveReferencesFromReads(stage.reads, workflow),
+    },
+    body: input.body,
+    outcome: input.outcome,
+    status: finalStatus,
+  });
+  // If the run is still going, also return the next envelope so the caller
+  // can immediately know what comes next without a follow-up status round-trip.
+  let nextEnvelope: ReturnType<typeof composeStageEnvelope> | null = null;
+  if (nextState.outcome === "in_progress" && nextState.current_stage) {
+    nextEnvelope = composeStageEnvelope({
+      cwd: ctx.cwd,
+      taskId: input.task_id,
+      workflow,
+    });
+  }
+  return {
+    state: nextState,
+    next_envelope: nextEnvelope,
+  };
+}
+function deriveReferencesFromReads(
+  reads: string[],
+  workflow: WorkflowDefinition,
+): string[] {
+  const refs: string[] = [];
+  for (const readName of reads) {
+    const stage = workflow.stages.find((s) => s.name === readName);
+    if (stage) refs.push(stage.artifact);
+  }
+  return refs;
+}
+export function runWorkflowStatus(
+  input: WorkflowStatusInputT,
+  ctx: WorkflowToolContext,
+) {
+  const state = getRunState(ctx.cwd, input.task_id);
+  return { state };
+}
+export function runWorkflowEnvelope(
+  input: WorkflowEnvelopeInputT,
+  ctx: WorkflowToolContext,
+) {
+  const state = getRunState(ctx.cwd, input.task_id);
+  if (!state) {
+    throw new Error(
+      `No run state found for task ${input.task_id}. Call cortex.workflow.start first.`,
+    );
+  }
+  const workflow = resolveWorkflow(state.workflow_id, ctx.workflows);
+  const envelope = composeStageEnvelope({
+    cwd: ctx.cwd,
+    taskId: input.task_id,
+    workflow,
+    stageName: input.stage,
+  });
+  return { envelope };
+}