@danielblomma/cortex-mcp 2.0.4 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ import {
2
+ readRunState,
3
+ writeRunState,
4
+ writeStageArtifact,
5
+ } from "./artifact-io.js";
6
+ import {
7
+ runStateSchema,
8
+ stageArtifactFrontmatterSchema,
9
+ workflowDefinitionSchema,
10
+ type RunState,
11
+ type StageRecord,
12
+ type StageStatus,
13
+ type WorkflowDefinition,
14
+ } from "./schemas.js";
15
+
16
+ /**
17
+ * Lifecycle helpers for one workflow run. The harness composes envelopes
18
+ * and invokes agents elsewhere; these primitives only manipulate the
19
+ * persisted state under .agents/<task-id>/. Pure functions on top of
20
+ * artifact-io.ts so unit tests can hit them without spawning agents.
21
+ */
22
+
23
+ export type CreateRunOptions = {
24
+ cwd: string;
25
+ taskId: string;
26
+ workflow: WorkflowDefinition;
27
+ taskDescription: string;
28
+ now?: () => Date;
29
+ };
30
+
31
+ export function createRun(options: CreateRunOptions): RunState {
32
+ const workflow = workflowDefinitionSchema.parse(options.workflow);
33
+ const now = (options.now ?? (() => new Date()))();
34
+ const startedAt = now.toISOString();
35
+
36
+ const stages: StageRecord[] = workflow.stages.map((stage) => ({
37
+ name: stage.name,
38
+ status: "pending" as StageStatus,
39
+ }));
40
+
41
+ const state: RunState = {
42
+ schema_version: 1,
43
+ task_id: options.taskId,
44
+ workflow_id: workflow.id,
45
+ workflow_version: workflow.version,
46
+ task_description: options.taskDescription,
47
+ current_stage: workflow.stages[0].name,
48
+ outcome: "in_progress",
49
+ started_at: startedAt,
50
+ completed_at: null,
51
+ stages,
52
+ };
53
+
54
+ // Validate before write so a malformed input never reaches disk.
55
+ const validated = runStateSchema.parse(state);
56
+ writeRunState(options.cwd, validated);
57
+ return validated;
58
+ }
59
+
60
+ export function getRunState(cwd: string, taskId: string): RunState | null {
61
+ return readRunState(cwd, taskId);
62
+ }
63
+
64
+ export type AdvanceStageOptions = {
65
+ cwd: string;
66
+ taskId: string;
67
+ workflow: WorkflowDefinition;
68
+ /** The stage we just finished. Must equal state.current_stage. */
69
+ stageName: string;
70
+ /** Filename of the artifact to write (e.g. "plan.md"). */
71
+ artifactName: string;
72
+ /** Frontmatter for the artifact, minus the auto-injected `written_at`. */
73
+ frontmatter: Omit<
74
+ import("./schemas.js").StageArtifactFrontmatter,
75
+ "written_at"
76
+ > & { written_at?: string };
77
+ /** Markdown body of the artifact. */
78
+ body: string;
79
+ /** Per-stage outcome surfaced into state.json for fast lookup. */
80
+ outcome?: Record<string, unknown>;
81
+ /** Final status to record for this stage. Defaults to "complete". */
82
+ status?: StageStatus;
83
+ now?: () => Date;
84
+ };
85
+
86
+ /**
87
+ * Marks `stageName` as finished, writes its artifact under .agents/<task-id>/,
88
+ * and advances `current_stage` to the next stage (or marks the run complete
89
+ * if this was the final stage). Idempotent only at the artifact layer —
90
+ * calling twice for the same stage will overwrite the artifact and the
91
+ * state.json record.
92
+ */
93
+ export function advanceStage(options: AdvanceStageOptions): RunState {
94
+ const workflow = workflowDefinitionSchema.parse(options.workflow);
95
+ const state = readRunState(options.cwd, options.taskId);
96
+ if (!state) {
97
+ throw new Error(
98
+ `No run state found for task ${options.taskId}. Call createRun() first.`,
99
+ );
100
+ }
101
+ if (state.workflow_id !== workflow.id) {
102
+ throw new Error(
103
+ `Workflow mismatch: run was started with ${state.workflow_id}, advance was called with ${workflow.id}`,
104
+ );
105
+ }
106
+ if (state.current_stage !== options.stageName) {
107
+ throw new Error(
108
+ `Cannot advance stage ${options.stageName}: run is currently at ${
109
+ state.current_stage ?? "<finished>"
110
+ }`,
111
+ );
112
+ }
113
+
114
+ const now = (options.now ?? (() => new Date()))();
115
+ const completedAt = now.toISOString();
116
+
117
+ const frontmatter = stageArtifactFrontmatterSchema.parse({
118
+ ...options.frontmatter,
119
+ stage: options.stageName,
120
+ written_at: options.frontmatter.written_at ?? completedAt,
121
+ });
122
+ writeStageArtifact(
123
+ options.cwd,
124
+ options.taskId,
125
+ options.artifactName,
126
+ frontmatter,
127
+ options.body,
128
+ );
129
+
130
+ const stageIndex = workflow.stages.findIndex((s) => s.name === options.stageName);
131
+ const nextStage = workflow.stages[stageIndex + 1] ?? null;
132
+ const finalStatus = options.status ?? "complete";
133
+
134
+ const updatedStages: StageRecord[] = state.stages.map((record) => {
135
+ if (record.name !== options.stageName) return record;
136
+ return {
137
+ ...record,
138
+ status: finalStatus,
139
+ artifact: options.artifactName,
140
+ started_at: record.started_at ?? state.started_at,
141
+ completed_at: completedAt,
142
+ outcome: options.outcome,
143
+ };
144
+ });
145
+
146
+ const runOutcome: RunState["outcome"] =
147
+ finalStatus === "blocked" || finalStatus === "failed"
148
+ ? finalStatus
149
+ : nextStage
150
+ ? "in_progress"
151
+ : "complete";
152
+
153
+ const next: RunState = {
154
+ ...state,
155
+ current_stage:
156
+ runOutcome === "in_progress" && nextStage ? nextStage.name : null,
157
+ outcome: runOutcome,
158
+ completed_at: runOutcome === "in_progress" ? null : completedAt,
159
+ stages: updatedStages,
160
+ };
161
+
162
+ const validated = runStateSchema.parse(next);
163
+ writeRunState(options.cwd, validated);
164
+ return validated;
165
+ }
@@ -0,0 +1,125 @@
1
+ import { z } from "zod";
2
+
3
+ /**
4
+ * Schemas for the Cortex Harness workflow engine.
5
+ *
6
+ * See docs/harness-vision.md for the design. In short:
7
+ *
8
+ * .agents/<task-id>/
9
+ * plan.md # frontmatter + body
10
+ * review.md
11
+ * changes.md
12
+ * mutation-report.md
13
+ * security-report.md
14
+ * state.json # current run state
15
+ *
16
+ * All artifacts are markdown with YAML frontmatter; state.json is the only
17
+ * JSON file. Both are tracked in git so a PR carries the evidence trail.
18
+ */
19
+
20
+ const slugSchema = z
21
+ .string()
22
+ .min(1)
23
+ .max(80)
24
+ .regex(
25
+ /^[a-z0-9][a-z0-9-]*[a-z0-9]$/,
26
+ "Must be lowercase alphanumeric with hyphens (no leading/trailing hyphen)",
27
+ );
28
+
29
+ /**
30
+ * Static definition of a single stage in a workflow. Authored at the
31
+ * organization level (in cortex-web later) and synced down to projects.
32
+ */
33
+ export const stageDefinitionSchema = z.object({
34
+ name: slugSchema,
35
+ artifact: z.string().min(1).regex(/^[a-z0-9][a-z0-9-]*\.md$/),
36
+ /** Stage names this stage may read artifacts from. Empty = no inputs. */
37
+ reads: z.array(slugSchema).default([]),
38
+ /** Required frontmatter fields the produced artifact must populate. */
39
+ required_fields: z.array(z.string().min(1)).default([]),
40
+ /** Capability key the stage runs under. References a separate capability registry. */
41
+ capability: z.string().min(1).optional(),
42
+ /** Human-readable summary surfaced in dashboards and audit. */
43
+ description: z.string().min(1).max(500),
44
+ });
45
+
46
+ export type StageDefinition = z.infer<typeof stageDefinitionSchema>;
47
+
48
+ /**
49
+ * A complete workflow: ordered stages plus a stable identifier.
50
+ */
51
+ export const workflowDefinitionSchema = z.object({
52
+ id: slugSchema,
53
+ description: z.string().min(1).max(500),
54
+ version: z.number().int().min(1),
55
+ stages: z.array(stageDefinitionSchema).min(1),
56
+ });
57
+
58
+ export type WorkflowDefinition = z.infer<typeof workflowDefinitionSchema>;
59
+
60
+ /**
61
+ * Status of a single stage inside a run.
62
+ */
63
+ export const stageStatusSchema = z.enum([
64
+ "pending",
65
+ "in_progress",
66
+ "complete",
67
+ "blocked",
68
+ "failed",
69
+ ]);
70
+
71
+ export type StageStatus = z.infer<typeof stageStatusSchema>;
72
+
73
+ /**
74
+ * Per-stage record inside state.json. Holds outcome metadata that the next
75
+ * stage's envelope composer needs without re-parsing every artifact.
76
+ */
77
+ export const stageRecordSchema = z.object({
78
+ name: slugSchema,
79
+ status: stageStatusSchema,
80
+ artifact: z.string().min(1).optional(),
81
+ started_at: z.string().datetime().optional(),
82
+ completed_at: z.string().datetime().optional(),
83
+ /** Frontmatter outcome surfaced for fast lookup (e.g. approved=true on review). */
84
+ outcome: z.record(z.string(), z.unknown()).optional(),
85
+ });
86
+
87
+ export type StageRecord = z.infer<typeof stageRecordSchema>;
88
+
89
+ /**
90
+ * The full state of one workflow run, persisted as
91
+ * .agents/<task-id>/state.json. Written only on stage boundaries so it
92
+ * never churns mid-tick.
93
+ */
94
+ export const runStateSchema = z.object({
95
+ schema_version: z.literal(1),
96
+ task_id: slugSchema,
97
+ workflow_id: slugSchema,
98
+ workflow_version: z.number().int().min(1),
99
+ task_description: z.string().min(1).max(2000),
100
+ current_stage: slugSchema.nullable(),
101
+ outcome: z.enum(["in_progress", "complete", "failed", "blocked"]),
102
+ started_at: z.string().datetime(),
103
+ completed_at: z.string().datetime().nullable(),
104
+ stages: z.array(stageRecordSchema).min(1),
105
+ });
106
+
107
+ export type RunState = z.infer<typeof runStateSchema>;
108
+
109
+ /**
110
+ * The required-by-convention frontmatter shape every stage artifact carries.
111
+ * Stages may add additional structured fields; these four are the ones the
112
+ * harness itself relies on.
113
+ */
114
+ export const stageArtifactFrontmatterSchema = z
115
+ .object({
116
+ stage: slugSchema,
117
+ status: stageStatusSchema,
118
+ /** Sister-artifacts this artifact references (relative filenames). */
119
+ references: z.array(z.string().min(1)).default([]),
120
+ /** ISO 8601; injected by the harness, not the agent. */
121
+ written_at: z.string().datetime(),
122
+ })
123
+ .passthrough();
124
+
125
+ export type StageArtifactFrontmatter = z.infer<typeof stageArtifactFrontmatterSchema>;
@@ -8,6 +8,7 @@ import {
8
8
  resolveDaemonEntry,
9
9
  sendHeartbeat,
10
10
  } from "./shared.js";
11
+ import { evaluateToolCall } from "../core/workflow/enforcement.js";
11
12
 
12
13
  /**
13
14
  * PreToolUse hook for Claude Code.
@@ -46,6 +47,35 @@ async function main(): Promise<void> {
46
47
  });
47
48
  }
48
49
 
50
+ // Workflow capability gate. Runs before policy.check so harness-level
51
+ // restrictions block before the daemon's general policy machinery sees
52
+ // the call. No-op when CORTEX_ACTIVE_TASK_ID is unset.
53
+ const activeTaskId = process.env.CORTEX_ACTIVE_TASK_ID?.trim();
54
+ if (activeTaskId) {
55
+ try {
56
+ const verdict = evaluateToolCall({
57
+ cwd,
58
+ taskId: activeTaskId,
59
+ call: { toolName: tool, toolInput: input.tool_input ?? {} },
60
+ });
61
+ if (!verdict.allowed) {
62
+ process.stderr.write(
63
+ `[cortex] Blocked by harness capability: ${verdict.reason}\n`,
64
+ );
65
+ process.exit(2);
66
+ }
67
+ } catch (err) {
68
+ // Capability evaluation should never crash the hook — if it does,
69
+ // log and fall through to the existing policy.check rather than
70
+ // accidentally blocking a legitimate tool.
71
+ process.stderr.write(
72
+ `[cortex] capability evaluation failed (${
73
+ err instanceof Error ? err.message : String(err)
74
+ }); deferring to policy.check\n`,
75
+ );
76
+ }
77
+ }
78
+
49
79
  const payload: PolicyCheckPayload = {
50
80
  tool,
51
81
  cwd,
@@ -11,6 +11,17 @@ import {
11
11
  getSessionEventHook,
12
12
  loadPlugins,
13
13
  } from "./plugin.js";
14
+ import {
15
+ WorkflowStartInput,
16
+ WorkflowAdvanceInput,
17
+ WorkflowStatusInput,
18
+ WorkflowEnvelopeInput,
19
+ resolveProjectRoot,
20
+ runWorkflowAdvance,
21
+ runWorkflowEnvelope,
22
+ runWorkflowStart,
23
+ runWorkflowStatus,
24
+ } from "./core/workflow/mcp-tools.js";
14
25
 
15
26
  type ToolPayload = Record<string, unknown>;
16
27
 
@@ -322,6 +333,70 @@ function registerTools(server: McpServer): void {
322
333
  return reloadContextGraph(parsed.force);
323
334
  })
324
335
  );
336
+
337
+ server.registerTool(
338
+ "cortex.workflow.start",
339
+ {
340
+ description:
341
+ "Start a Cortex Harness workflow run for a task. Creates .agents/<task_id>/state.json and returns the first stage's envelope (the prompt the agent should answer).",
342
+ inputSchema: WorkflowStartInput,
343
+ },
344
+ async (input) => executeInstrumentedTool(
345
+ "cortex.workflow.start",
346
+ input,
347
+ async () => runWorkflowStart(WorkflowStartInput.parse(input ?? {}), {
348
+ cwd: resolveProjectRoot(),
349
+ }) as ToolPayload,
350
+ ),
351
+ );
352
+
353
+ server.registerTool(
354
+ "cortex.workflow.advance",
355
+ {
356
+ description:
357
+ "Complete the current stage of a workflow run by writing its artifact and advancing the run pointer. Returns the new run state plus the next stage's envelope (or null when the run is finished, blocked, or failed).",
358
+ inputSchema: WorkflowAdvanceInput,
359
+ },
360
+ async (input) => executeInstrumentedTool(
361
+ "cortex.workflow.advance",
362
+ input,
363
+ async () => runWorkflowAdvance(WorkflowAdvanceInput.parse(input ?? {}), {
364
+ cwd: resolveProjectRoot(),
365
+ }) as ToolPayload,
366
+ ),
367
+ );
368
+
369
+ server.registerTool(
370
+ "cortex.workflow.status",
371
+ {
372
+ description:
373
+ "Read the current run state for a task (current stage, completed stages, outcome). Returns null state when no run exists for the given task_id.",
374
+ inputSchema: WorkflowStatusInput,
375
+ },
376
+ async (input) => executeInstrumentedTool(
377
+ "cortex.workflow.status",
378
+ input,
379
+ async () => runWorkflowStatus(WorkflowStatusInput.parse(input ?? {}), {
380
+ cwd: resolveProjectRoot(),
381
+ }) as ToolPayload,
382
+ ),
383
+ );
384
+
385
+ server.registerTool(
386
+ "cortex.workflow.envelope",
387
+ {
388
+ description:
389
+ "Compose the prompt envelope for a workflow stage without advancing the run. Defaults to the run's current_stage; pass `stage` to dry-run a different stage.",
390
+ inputSchema: WorkflowEnvelopeInput,
391
+ },
392
+ async (input) => executeInstrumentedTool(
393
+ "cortex.workflow.envelope",
394
+ input,
395
+ async () => runWorkflowEnvelope(WorkflowEnvelopeInput.parse(input ?? {}), {
396
+ cwd: resolveProjectRoot(),
397
+ }) as ToolPayload,
398
+ ),
399
+ );
325
400
  }
326
401
 
327
402
  let shutdownCalled = false;