npm - @danielblomma/cortex-mcp - Versions diffs - 2.0.4 → 2.0.6 - Mend

@danielblomma/cortex-mcp 2.0.4 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/bin/cortex.mjs +74 -25
package/package.json +1 -1
package/scaffold/mcp/package-lock.json +63 -4
package/scaffold/mcp/package.json +4 -1
package/scaffold/mcp/src/cli/stage.ts +325 -0
package/scaffold/mcp/src/core/workflow/artifact-io.ts +156 -0
package/scaffold/mcp/src/core/workflow/capabilities.ts +100 -0
package/scaffold/mcp/src/core/workflow/default-workflows.ts +83 -0
package/scaffold/mcp/src/core/workflow/enforcement.ts +206 -0
package/scaffold/mcp/src/core/workflow/envelope.ts +220 -0
package/scaffold/mcp/src/core/workflow/index.ts +8 -0
package/scaffold/mcp/src/core/workflow/mcp-tools.ts +208 -0
package/scaffold/mcp/src/core/workflow/run-lifecycle.ts +165 -0
package/scaffold/mcp/src/core/workflow/schemas.ts +125 -0
package/scaffold/mcp/src/hooks/pre-tool-use.ts +30 -0
package/scaffold/mcp/src/server.ts +75 -0
package/scaffold/mcp/tests/workflow-cli.test.mjs +293 -0
package/scaffold/mcp/tests/workflow-enforcement.test.mjs +370 -0
package/scaffold/mcp/tests/workflow-envelope.test.mjs +247 -0
package/scaffold/mcp/tests/workflow-mcp-tools.test.mjs +293 -0
package/scaffold/mcp/tests/workflow.test.mjs +283 -0
package/scaffold/scripts/bootstrap.sh +1 -1
package/scaffold/scripts/doctor.sh +6 -6
package/scaffold/scripts/embed.sh +2 -2
package/scaffold/scripts/load-ryu.sh +3 -3
package/scaffold/scripts/memory-compile.mjs +1 -1
package/scaffold/scripts/memory-lint.mjs +1 -1
package/scaffold/scripts/watch.sh +2 -7

package/scaffold/mcp/src/core/workflow/run-lifecycle.ts ADDED Viewed

@@ -0,0 +1,165 @@
+import {
+  readRunState,
+  writeRunState,
+  writeStageArtifact,
+} from "./artifact-io.js";
+import {
+  runStateSchema,
+  stageArtifactFrontmatterSchema,
+  workflowDefinitionSchema,
+  type RunState,
+  type StageRecord,
+  type StageStatus,
+  type WorkflowDefinition,
+} from "./schemas.js";
+/**
+ * Lifecycle helpers for one workflow run. The harness composes envelopes
+ * and invokes agents elsewhere; these primitives only manipulate the
+ * persisted state under .agents/<task-id>/. Pure functions on top of
+ * artifact-io.ts so unit tests can hit them without spawning agents.
+ */
+export type CreateRunOptions = {
+  cwd: string;
+  taskId: string;
+  workflow: WorkflowDefinition;
+  taskDescription: string;
+  now?: () => Date;
+};
+export function createRun(options: CreateRunOptions): RunState {
+  const workflow = workflowDefinitionSchema.parse(options.workflow);
+  const now = (options.now ?? (() => new Date()))();
+  const startedAt = now.toISOString();
+  const stages: StageRecord[] = workflow.stages.map((stage) => ({
+    name: stage.name,
+    status: "pending" as StageStatus,
+  }));
+  const state: RunState = {
+    schema_version: 1,
+    task_id: options.taskId,
+    workflow_id: workflow.id,
+    workflow_version: workflow.version,
+    task_description: options.taskDescription,
+    current_stage: workflow.stages[0].name,
+    outcome: "in_progress",
+    started_at: startedAt,
+    completed_at: null,
+    stages,
+  };
+  // Validate before write so a malformed input never reaches disk.
+  const validated = runStateSchema.parse(state);
+  writeRunState(options.cwd, validated);
+  return validated;
+}
+export function getRunState(cwd: string, taskId: string): RunState | null {
+  return readRunState(cwd, taskId);
+}
+export type AdvanceStageOptions = {
+  cwd: string;
+  taskId: string;
+  workflow: WorkflowDefinition;
+  /** The stage we just finished. Must equal state.current_stage. */
+  stageName: string;
+  /** Filename of the artifact to write (e.g. "plan.md"). */
+  artifactName: string;
+  /** Frontmatter for the artifact, minus the auto-injected `written_at`. */
+  frontmatter: Omit<
+    import("./schemas.js").StageArtifactFrontmatter,
+    "written_at"
+  > & { written_at?: string };
+  /** Markdown body of the artifact. */
+  body: string;
+  /** Per-stage outcome surfaced into state.json for fast lookup. */
+  outcome?: Record<string, unknown>;
+  /** Final status to record for this stage. Defaults to "complete". */
+  status?: StageStatus;
+  now?: () => Date;
+};
+/**
+ * Marks `stageName` as finished, writes its artifact under .agents/<task-id>/,
+ * and advances `current_stage` to the next stage (or marks the run complete
+ * if this was the final stage). Idempotent only at the artifact layer —
+ * calling twice for the same stage will overwrite the artifact and the
+ * state.json record.
+ */
+export function advanceStage(options: AdvanceStageOptions): RunState {
+  const workflow = workflowDefinitionSchema.parse(options.workflow);
+  const state = readRunState(options.cwd, options.taskId);
+  if (!state) {
+    throw new Error(
+      `No run state found for task ${options.taskId}. Call createRun() first.`,
+    );
+  }
+  if (state.workflow_id !== workflow.id) {
+    throw new Error(
+      `Workflow mismatch: run was started with ${state.workflow_id}, advance was called with ${workflow.id}`,
+    );
+  }
+  if (state.current_stage !== options.stageName) {
+    throw new Error(
+      `Cannot advance stage ${options.stageName}: run is currently at ${
+        state.current_stage ?? "<finished>"
+      }`,
+    );
+  }
+  const now = (options.now ?? (() => new Date()))();
+  const completedAt = now.toISOString();
+  const frontmatter = stageArtifactFrontmatterSchema.parse({
+    ...options.frontmatter,
+    stage: options.stageName,
+    written_at: options.frontmatter.written_at ?? completedAt,
+  });
+  writeStageArtifact(
+    options.cwd,
+    options.taskId,
+    options.artifactName,
+    frontmatter,
+    options.body,
+  );
+  const stageIndex = workflow.stages.findIndex((s) => s.name === options.stageName);
+  const nextStage = workflow.stages[stageIndex + 1] ?? null;
+  const finalStatus = options.status ?? "complete";
+  const updatedStages: StageRecord[] = state.stages.map((record) => {
+    if (record.name !== options.stageName) return record;
+    return {
+      ...record,
+      status: finalStatus,
+      artifact: options.artifactName,
+      started_at: record.started_at ?? state.started_at,
+      completed_at: completedAt,
+      outcome: options.outcome,
+    };
+  });
+  const runOutcome: RunState["outcome"] =
+    finalStatus === "blocked" || finalStatus === "failed"
+      ? finalStatus
+      : nextStage
+        ? "in_progress"
+        : "complete";
+  const next: RunState = {
+    ...state,
+    current_stage:
+      runOutcome === "in_progress" && nextStage ? nextStage.name : null,
+    outcome: runOutcome,
+    completed_at: runOutcome === "in_progress" ? null : completedAt,
+    stages: updatedStages,
+  };
+  const validated = runStateSchema.parse(next);
+  writeRunState(options.cwd, validated);
+  return validated;
+}

package/scaffold/mcp/src/core/workflow/schemas.ts ADDED Viewed

@@ -0,0 +1,125 @@
+import { z } from "zod";
+/**
+ * Schemas for the Cortex Harness workflow engine.
+ *
+ * See docs/harness-vision.md for the design. In short:
+ *
+ *   .agents/<task-id>/
+ *     plan.md            # frontmatter + body
+ *     review.md
+ *     changes.md
+ *     mutation-report.md
+ *     security-report.md
+ *     state.json         # current run state
+ *
+ * All artifacts are markdown with YAML frontmatter; state.json is the only
+ * JSON file. Both are tracked in git so a PR carries the evidence trail.
+ */
+const slugSchema = z
+  .string()
+  .min(1)
+  .max(80)
+  .regex(
+    /^[a-z0-9][a-z0-9-]*[a-z0-9]$/,
+    "Must be lowercase alphanumeric with hyphens (no leading/trailing hyphen)",
+  );
+/**
+ * Static definition of a single stage in a workflow. Authored at the
+ * organization level (in cortex-web later) and synced down to projects.
+ */
+export const stageDefinitionSchema = z.object({
+  name: slugSchema,
+  artifact: z.string().min(1).regex(/^[a-z0-9][a-z0-9-]*\.md$/),
+  /** Stage names this stage may read artifacts from. Empty = no inputs. */
+  reads: z.array(slugSchema).default([]),
+  /** Required frontmatter fields the produced artifact must populate. */
+  required_fields: z.array(z.string().min(1)).default([]),
+  /** Capability key the stage runs under. References a separate capability registry. */
+  capability: z.string().min(1).optional(),
+  /** Human-readable summary surfaced in dashboards and audit. */
+  description: z.string().min(1).max(500),
+});
+export type StageDefinition = z.infer<typeof stageDefinitionSchema>;
+/**
+ * A complete workflow: ordered stages plus a stable identifier.
+ */
+export const workflowDefinitionSchema = z.object({
+  id: slugSchema,
+  description: z.string().min(1).max(500),
+  version: z.number().int().min(1),
+  stages: z.array(stageDefinitionSchema).min(1),
+});
+export type WorkflowDefinition = z.infer<typeof workflowDefinitionSchema>;
+/**
+ * Status of a single stage inside a run.
+ */
+export const stageStatusSchema = z.enum([
+  "pending",
+  "in_progress",
+  "complete",
+  "blocked",
+  "failed",
+]);
+export type StageStatus = z.infer<typeof stageStatusSchema>;
+/**
+ * Per-stage record inside state.json. Holds outcome metadata that the next
+ * stage's envelope composer needs without re-parsing every artifact.
+ */
+export const stageRecordSchema = z.object({
+  name: slugSchema,
+  status: stageStatusSchema,
+  artifact: z.string().min(1).optional(),
+  started_at: z.string().datetime().optional(),
+  completed_at: z.string().datetime().optional(),
+  /** Frontmatter outcome surfaced for fast lookup (e.g. approved=true on review). */
+  outcome: z.record(z.string(), z.unknown()).optional(),
+});
+export type StageRecord = z.infer<typeof stageRecordSchema>;
+/**
+ * The full state of one workflow run, persisted as
+ * .agents/<task-id>/state.json. Written only on stage boundaries so it
+ * never churns mid-tick.
+ */
+export const runStateSchema = z.object({
+  schema_version: z.literal(1),
+  task_id: slugSchema,
+  workflow_id: slugSchema,
+  workflow_version: z.number().int().min(1),
+  task_description: z.string().min(1).max(2000),
+  current_stage: slugSchema.nullable(),
+  outcome: z.enum(["in_progress", "complete", "failed", "blocked"]),
+  started_at: z.string().datetime(),
+  completed_at: z.string().datetime().nullable(),
+  stages: z.array(stageRecordSchema).min(1),
+});
+export type RunState = z.infer<typeof runStateSchema>;
+/**
+ * The required-by-convention frontmatter shape every stage artifact carries.
+ * Stages may add additional structured fields; these four are the ones the
+ * harness itself relies on.
+ */
+export const stageArtifactFrontmatterSchema = z
+  .object({
+    stage: slugSchema,
+    status: stageStatusSchema,
+    /** Sister-artifacts this artifact references (relative filenames). */
+    references: z.array(z.string().min(1)).default([]),
+    /** ISO 8601; injected by the harness, not the agent. */
+    written_at: z.string().datetime(),
+  })
+  .passthrough();
+export type StageArtifactFrontmatter = z.infer<typeof stageArtifactFrontmatterSchema>;

package/scaffold/mcp/src/hooks/pre-tool-use.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import {
   resolveDaemonEntry,
   sendHeartbeat,
 } from "./shared.js";
+import { evaluateToolCall } from "../core/workflow/enforcement.js";
 /**
  * PreToolUse hook for Claude Code.
@@ -46,6 +47,35 @@ async function main(): Promise<void> {
     });
   }
+  // Workflow capability gate. Runs before policy.check so harness-level
+  // restrictions block before the daemon's general policy machinery sees
+  // the call. No-op when CORTEX_ACTIVE_TASK_ID is unset.
+  const activeTaskId = process.env.CORTEX_ACTIVE_TASK_ID?.trim();
+  if (activeTaskId) {
+    try {
+      const verdict = evaluateToolCall({
+        cwd,
+        taskId: activeTaskId,
+        call: { toolName: tool, toolInput: input.tool_input ?? {} },
+      });
+      if (!verdict.allowed) {
+        process.stderr.write(
+          `[cortex] Blocked by harness capability: ${verdict.reason}\n`,
+        );
+        process.exit(2);
+      }
+    } catch (err) {
+      // Capability evaluation should never crash the hook — if it does,
+      // log and fall through to the existing policy.check rather than
+      // accidentally blocking a legitimate tool.
+      process.stderr.write(
+        `[cortex] capability evaluation failed (${
+          err instanceof Error ? err.message : String(err)
+        }); deferring to policy.check\n`,
+      );
+    }
+  }
   const payload: PolicyCheckPayload = {
     tool,
     cwd,

package/scaffold/mcp/src/server.ts CHANGED Viewed

@@ -11,6 +11,17 @@ import {
   getSessionEventHook,
   loadPlugins,
 } from "./plugin.js";
+import {
+  WorkflowStartInput,
+  WorkflowAdvanceInput,
+  WorkflowStatusInput,
+  WorkflowEnvelopeInput,
+  resolveProjectRoot,
+  runWorkflowAdvance,
+  runWorkflowEnvelope,
+  runWorkflowStart,
+  runWorkflowStatus,
+} from "./core/workflow/mcp-tools.js";
 type ToolPayload = Record<string, unknown>;
@@ -322,6 +333,70 @@ function registerTools(server: McpServer): void {
       return reloadContextGraph(parsed.force);
     })
   );
+  server.registerTool(
+    "cortex.workflow.start",
+    {
+      description:
+        "Start a Cortex Harness workflow run for a task. Creates .agents/<task_id>/state.json and returns the first stage's envelope (the prompt the agent should answer).",
+      inputSchema: WorkflowStartInput,
+    },
+    async (input) => executeInstrumentedTool(
+      "cortex.workflow.start",
+      input,
+      async () => runWorkflowStart(WorkflowStartInput.parse(input ?? {}), {
+        cwd: resolveProjectRoot(),
+      }) as ToolPayload,
+    ),
+  );
+  server.registerTool(
+    "cortex.workflow.advance",
+    {
+      description:
+        "Complete the current stage of a workflow run by writing its artifact and advancing the run pointer. Returns the new run state plus the next stage's envelope (or null when the run is finished, blocked, or failed).",
+      inputSchema: WorkflowAdvanceInput,
+    },
+    async (input) => executeInstrumentedTool(
+      "cortex.workflow.advance",
+      input,
+      async () => runWorkflowAdvance(WorkflowAdvanceInput.parse(input ?? {}), {
+        cwd: resolveProjectRoot(),
+      }) as ToolPayload,
+    ),
+  );
+  server.registerTool(
+    "cortex.workflow.status",
+    {
+      description:
+        "Read the current run state for a task (current stage, completed stages, outcome). Returns null state when no run exists for the given task_id.",
+      inputSchema: WorkflowStatusInput,
+    },
+    async (input) => executeInstrumentedTool(
+      "cortex.workflow.status",
+      input,
+      async () => runWorkflowStatus(WorkflowStatusInput.parse(input ?? {}), {
+        cwd: resolveProjectRoot(),
+      }) as ToolPayload,
+    ),
+  );
+  server.registerTool(
+    "cortex.workflow.envelope",
+    {
+      description:
+        "Compose the prompt envelope for a workflow stage without advancing the run. Defaults to the run's current_stage; pass `stage` to dry-run a different stage.",
+      inputSchema: WorkflowEnvelopeInput,
+    },
+    async (input) => executeInstrumentedTool(
+      "cortex.workflow.envelope",
+      input,
+      async () => runWorkflowEnvelope(WorkflowEnvelopeInput.parse(input ?? {}), {
+        cwd: resolveProjectRoot(),
+      }) as ToolPayload,
+    ),
+  );
 }
 let shutdownCalled = false;