npm - @plaited/acp-harness - Versions diffs - 0.3.2 → 0.4.0 - Mend

@plaited/acp-harness 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +53 -31
package/bin/cli.ts +15 -0
package/package.json +5 -7
package/src/acp-client.ts +7 -4
package/src/adapter-check.ts +0 -1
package/src/adapter-scaffold.ts +16 -15
package/src/calibrate.ts +28 -8
package/src/capture.ts +114 -33
package/src/grader-loader.ts +3 -3
package/src/harness.ts +4 -0
package/src/headless-cli.ts +433 -0
package/src/headless-history-builder.ts +141 -0
package/src/headless-output-parser.ts +251 -0
package/src/headless-session-manager.ts +389 -0
package/src/headless.schemas.ts +241 -0
package/src/headless.ts +71 -0
package/src/headless.types.ts +19 -0
package/src/integration_tests/acp-claude.spec.ts +170 -0
package/src/integration_tests/acp-gemini.spec.ts +174 -0
package/src/schemas.ts +88 -36
package/src/summarize.ts +4 -8
package/src/tests/acp-client.spec.ts +1 -1
package/src/tests/capture-cli.spec.ts +188 -0
package/src/tests/capture-helpers.spec.ts +229 -67
package/src/tests/constants.spec.ts +121 -0
package/src/tests/fixtures/grader-exec.py +3 -3
package/src/tests/fixtures/grader-module.ts +2 -2
package/src/tests/grader-loader.spec.ts +5 -5
package/src/tests/headless.spec.ts +460 -0
package/src/tests/schemas-cli.spec.ts +142 -0
package/src/tests/schemas.spec.ts +657 -0
package/src/tests/summarize-helpers.spec.ts +3 -3
package/src/tests/trials-cli.spec.ts +145 -0
package/src/trials.ts +6 -19
package/src/validate-refs.ts +1 -1
package/src/tests/acp-integration.docker.ts +0 -214

package/src/headless.schemas.ts ADDED Viewed

@@ -0,0 +1,241 @@
+/**
+ * Zod schemas for headless ACP adapter configuration.
+ *
+ * @remarks
+ * These schemas define how to interact with ANY headless CLI agent via a
+ * schema-driven approach. No hardcoded agent-specific logic - the schema
+ * defines everything: command, flags, output parsing rules.
+ *
+ * @packageDocumentation
+ */
+import { z } from 'zod'
+// ============================================================================
+// Output Event Mapping Schema
+// ============================================================================
+/**
+ * Schema for matching CLI output to ACP update types.
+ *
+ * @remarks
+ * Uses JSONPath-like patterns to match events in CLI JSON output
+ * and map them to ACP session update types.
+ */
+export const OutputEventMatchSchema = z.object({
+  /** JSONPath to match event type in CLI output (e.g., "$.type") */
+  path: z.string(),
+  /** Value to match at the path (e.g., "tool_use") */
+  value: z.string(),
+})
+/** Output event match type */
+export type OutputEventMatch = z.infer<typeof OutputEventMatchSchema>
+/**
+ * Schema for extracting content from matched events.
+ *
+ * @remarks
+ * Paths can be:
+ * - JSONPath expressions (e.g., "$.message.text")
+ * - Literal strings in single quotes (e.g., "'pending'")
+ */
+export const OutputEventExtractSchema = z.object({
+  /** JSONPath to extract main content */
+  content: z.string().optional(),
+  /** JSONPath to extract title (for tool calls) */
+  title: z.string().optional(),
+  /** JSONPath to extract status (or literal like "'pending'") */
+  status: z.string().optional(),
+})
+/** Output event extract type */
+export type OutputEventExtract = z.infer<typeof OutputEventExtractSchema>
+/**
+ * Schema for mapping CLI output events to ACP update types.
+ *
+ * @remarks
+ * Each mapping specifies:
+ * 1. How to match events (match.path + match.value)
+ * 2. What ACP update type to emit (emitAs)
+ * 3. What content to extract (extract)
+ */
+export const OutputEventMappingSchema = z.object({
+  /** Matching criteria for CLI output */
+  match: OutputEventMatchSchema,
+  /** ACP session update type to emit */
+  emitAs: z.enum(['thought', 'tool_call', 'message', 'plan']),
+  /** Content extraction configuration */
+  extract: OutputEventExtractSchema.optional(),
+})
+/** Output event mapping type */
+export type OutputEventMapping = z.infer<typeof OutputEventMappingSchema>
+// ============================================================================
+// Prompt Configuration Schema
+// ============================================================================
+/**
+ * Schema for how to pass prompts to the CLI.
+ */
+export const PromptConfigSchema = z.object({
+  /** Flag to pass prompt (e.g., "-p", "--prompt"). Omit for stdin. */
+  flag: z.string().optional(),
+  /** Format for stdin input in stream mode */
+  stdinFormat: z.enum(['text', 'json']).optional(),
+})
+/** Prompt configuration type */
+export type PromptConfig = z.infer<typeof PromptConfigSchema>
+// ============================================================================
+// Output Configuration Schema
+// ============================================================================
+/**
+ * Schema for output format configuration.
+ */
+export const OutputConfigSchema = z.object({
+  /** Flag for output format (e.g., "--output-format") */
+  flag: z.string(),
+  /** Value for output format (e.g., "stream-json") */
+  value: z.string(),
+})
+/** Output configuration type */
+export type OutputConfig = z.infer<typeof OutputConfigSchema>
+// ============================================================================
+// Resume Configuration Schema
+// ============================================================================
+/**
+ * Schema for session resume support (stream mode).
+ */
+export const ResumeConfigSchema = z.object({
+  /** Flag to resume session (e.g., "--resume") */
+  flag: z.string(),
+  /** JSONPath to extract session ID from output */
+  sessionIdPath: z.string(),
+})
+/** Resume configuration type */
+export type ResumeConfig = z.infer<typeof ResumeConfigSchema>
+// ============================================================================
+// Result Configuration Schema
+// ============================================================================
+/**
+ * Schema for final result extraction.
+ */
+export const ResultConfigSchema = z.object({
+  /** JSONPath to match result type (e.g., "$.type") */
+  matchPath: z.string(),
+  /** Value indicating final result (e.g., "result") */
+  matchValue: z.string(),
+  /** JSONPath to extract result content */
+  contentPath: z.string(),
+})
+/** Result configuration type */
+export type ResultConfig = z.infer<typeof ResultConfigSchema>
+// ============================================================================
+// Main Adapter Schema
+// ============================================================================
+/**
+ * Schema for headless ACP adapter configuration.
+ *
+ * @remarks
+ * This schema defines everything needed to interact with a headless CLI agent:
+ * - Command and flags to spawn
+ * - How to pass prompts
+ * - How to parse output
+ * - Session handling mode
+ *
+ * Example (Claude):
+ * ```json
+ * {
+ *   "version": 1,
+ *   "name": "claude-headless",
+ *   "command": ["claude"],
+ *   "sessionMode": "stream",
+ *   "prompt": { "flag": "-p" },
+ *   "output": { "flag": "--output-format", "value": "stream-json" },
+ *   "outputEvents": [...]
+ * }
+ * ```
+ */
+export const HeadlessAdapterSchema = z.object({
+  /** Schema version for forward compatibility */
+  version: z.literal(1),
+  /** Human-readable adapter name */
+  name: z.string(),
+  /** Base command to spawn (e.g., ["claude"], ["gemini"]) */
+  command: z.array(z.string()),
+  /**
+   * Session mode determines how multi-turn conversations work:
+   * - 'stream': Keep process alive, multi-turn via stdin
+   * - 'iterative': New process per turn, accumulate context in prompt
+   */
+  sessionMode: z.enum(['stream', 'iterative']),
+  /** How to pass the prompt */
+  prompt: PromptConfigSchema,
+  /** Output format configuration */
+  output: OutputConfigSchema,
+  /** Flags for auto-approval in headless mode (e.g., ["--allowedTools", "*"]) */
+  autoApprove: z.array(z.string()).optional(),
+  /** Session resume support (stream mode only) */
+  resume: ResumeConfigSchema.optional(),
+  /** Working directory flag (if CLI needs explicit --cwd) */
+  cwdFlag: z.string().optional(),
+  /** Output event mappings - how to parse CLI output into ACP updates */
+  outputEvents: z.array(OutputEventMappingSchema),
+  /** Final result extraction configuration */
+  result: ResultConfigSchema,
+  /** Template for formatting conversation history (iterative mode only) */
+  historyTemplate: z.string().optional(),
+})
+/** Headless adapter configuration type */
+export type HeadlessAdapterConfig = z.infer<typeof HeadlessAdapterSchema>
+// ============================================================================
+// Validation Helpers
+// ============================================================================
+/**
+ * Validates and parses a headless adapter configuration.
+ *
+ * @param config - Raw configuration object (e.g., from JSON file)
+ * @returns Validated HeadlessAdapterConfig
+ * @throws ZodError if validation fails
+ */
+export const parseHeadlessConfig = (config: unknown): HeadlessAdapterConfig => {
+  return HeadlessAdapterSchema.parse(config)
+}
+/**
+ * Safely validates a headless adapter configuration.
+ *
+ * @param config - Raw configuration object
+ * @returns Result with success/failure and data or error
+ */
+export const safeParseHeadlessConfig = (config: unknown) => {
+  return HeadlessAdapterSchema.safeParse(config)
+}

package/src/headless.ts ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Headless ACP adapter factory - schema-driven adapter for any CLI agent.
+ *
+ * @remarks
+ * Re-exports public API from the headless module. The headless adapter enables
+ * capturing trajectories from ANY headless CLI agent by defining a schema
+ * that describes how to interact with the CLI.
+ *
+ * **CLI Usage:**
+ * ```bash
+ * acp-harness headless --schema ./my-agent.json
+ * ```
+ *
+ * **Programmatic Usage:**
+ * ```typescript
+ * import { parseHeadlessConfig, createSessionManager } from '@plaited/acp-harness/headless'
+ *
+ * const schema = parseHeadlessConfig(jsonConfig)
+ * const sessions = createSessionManager({ schema })
+ * ```
+ *
+ * @packageDocumentation
+ */
+// Schema definitions and parsing
+export {
+  HeadlessAdapterSchema,
+  OutputConfigSchema,
+  OutputEventExtractSchema,
+  OutputEventMappingSchema,
+  OutputEventMatchSchema,
+  PromptConfigSchema,
+  parseHeadlessConfig,
+  ResultConfigSchema,
+  ResumeConfigSchema,
+  safeParseHeadlessConfig,
+} from './headless.schemas.ts'
+// Types
+export type {
+  HeadlessAdapterConfig,
+  OutputConfig,
+  OutputEventExtract,
+  OutputEventMapping,
+  OutputEventMatch,
+  PromptConfig,
+  ResultConfig,
+  ResumeConfig,
+} from './headless.types.ts'
+// CLI entry point
+export { headless } from './headless-cli.ts'
+export type { HistoryBuilder, HistoryBuilderConfig, HistoryTurn } from './headless-history-builder.ts'
+// History builder
+export { createHistoryBuilder } from './headless-history-builder.ts'
+export type {
+  OutputParser,
+  ParsedResult,
+  ParsedUpdate,
+  ResultParseResult,
+  SessionUpdateType,
+} from './headless-output-parser.ts'
+// Output parser
+export { createOutputParser, jsonPath, jsonPathString } from './headless-output-parser.ts'
+export type {
+  PromptResult,
+  Session,
+  SessionManager,
+  SessionManagerConfig,
+  UpdateCallback,
+} from './headless-session-manager.ts'
+// Session manager
+export { createSessionManager } from './headless-session-manager.ts'

package/src/headless.types.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Type exports for headless ACP adapter.
+ *
+ * @remarks
+ * Re-exports all types from the schemas module for external consumers.
+ *
+ * @packageDocumentation
+ */
+export type {
+  HeadlessAdapterConfig,
+  OutputConfig,
+  OutputEventExtract,
+  OutputEventMapping,
+  OutputEventMatch,
+  PromptConfig,
+  ResultConfig,
+  ResumeConfig,
+} from './headless.schemas.ts'

package/src/integration_tests/acp-claude.spec.ts ADDED Viewed

@@ -0,0 +1,170 @@
+/**
+ * Headless Adapter integration Tests - Claude Code
+ *
+ * @remarks
+ * These tests verify the headless ACP adapter works correctly with Claude Code
+ * using the schema-driven approach from `.claude/skills/acp-adapters/schemas/`.
+ *
+ * Run locally with API key:
+ * ```bash
+ * ANTHROPIC_API_KEY=sk-... bun test ./src/tests/acp-claude.spec.ts
+ * ```
+ *
+ * Prerequisites:
+ * 1. Claude CLI installed (`bunx @anthropic-ai/claude-code`)
+ * 2. API key: `ANTHROPIC_API_KEY` environment variable
+ *
+ * These tests make real API calls and consume credits.
+ *
+ * MCP servers are auto-discovered from project root via:
+ * - `.mcp.json` - MCP server configuration
+ */
+import { afterAll, beforeAll, describe, expect, setDefaultTimeout, test } from 'bun:test'
+import { join } from 'node:path'
+import { type ACPClient, createACPClient } from '../acp-client.ts'
+import { createPrompt, summarizeResponse } from '../acp-helpers.ts'
+// Long timeout for real agent interactions (2 minutes)
+setDefaultTimeout(120000)
+// Use project root as cwd - agents discover MCP servers from config files
+const PROJECT_ROOT = process.cwd()
+// Schema path for Claude headless adapter
+const SCHEMA_PATH = join(PROJECT_ROOT, '.claude/skills/acp-adapters/schemas/claude-headless.json')
+// Get API key from environment
+const API_KEY = process.env.ANTHROPIC_API_KEY ?? ''
+// Skip all tests if no API key is available
+const describeWithApiKey = API_KEY ? describe : describe.skip
+describeWithApiKey('Headless Adapter Integration - Claude', () => {
+  let client: ACPClient
+  beforeAll(async () => {
+    // Use headless adapter with Claude schema
+    client = createACPClient({
+      command: ['bun', 'src/headless-cli.ts', '--', '--schema', SCHEMA_PATH],
+      timeout: 120000, // 2 min timeout for initialization
+      env: {
+        ANTHROPIC_API_KEY: API_KEY,
+      },
+    })
+    await client.connect()
+  })
+  afterAll(async () => {
+    await client?.disconnect()
+  })
+  test('connects and initializes via headless adapter', () => {
+    expect(client.isConnected()).toBe(true)
+    const initResult = client.getInitializeResult()
+    expect(initResult).toBeDefined()
+    expect(initResult?.protocolVersion).toBeDefined()
+  })
+  test('reports agent capabilities', () => {
+    const capabilities = client.getCapabilities()
+    expect(capabilities).toBeDefined()
+  })
+  test('creates session with project cwd', async () => {
+    // Session uses project root - agent discovers MCP servers from .mcp.json
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    expect(session).toBeDefined()
+    expect(session.id).toBeDefined()
+    expect(typeof session.id).toBe('string')
+  })
+  test('sends prompt and receives response', async () => {
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    // Simple prompt that doesn't require tools
+    const { result, updates } = await client.promptSync(
+      session.id,
+      createPrompt('What is 2 + 2? Reply with just the number.'),
+    )
+    expect(result).toBeDefined()
+    expect(updates).toBeInstanceOf(Array)
+    // Summarize and verify response structure
+    const summary = summarizeResponse(updates)
+    expect(summary.text).toBeDefined()
+    expect(summary.text.length).toBeGreaterThan(0)
+  })
+  test('streaming prompt yields updates', async () => {
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    const events: string[] = []
+    for await (const event of client.prompt(session.id, createPrompt('Say "hello" and nothing else.'))) {
+      events.push(event.type)
+      if (event.type === 'complete') {
+        expect(event.result).toBeDefined()
+      }
+    }
+    expect(events).toContain('complete')
+  })
+  test('uses MCP server from project config', async () => {
+    // This test verifies that Claude discovers MCP servers from .mcp.json
+    // The bun-docs MCP server is configured at project root
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    // Query the bun-docs MCP server (configured in .mcp.json)
+    const { updates } = await client.promptSync(
+      session.id,
+      createPrompt(
+        'Use the bun-docs MCP server to search for information about Bun.serve(). ' +
+          'What are the key options for creating an HTTP server with Bun?',
+      ),
+    )
+    const summary = summarizeResponse(updates)
+    // Response should contain Bun server-related information
+    expect(summary.text.length).toBeGreaterThan(0)
+    // Should mention server/HTTP-related concepts from Bun docs
+    expect(summary.text.toLowerCase()).toMatch(/serve|server|http|port|fetch|handler/)
+  })
+  test('multi-turn conversation maintains context', async () => {
+    // Multi-turn: multiple prompts to same session via headless adapter
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    // Turn 1: Establish context
+    const { updates: turn1Updates } = await client.promptSync(
+      session.id,
+      createPrompt('Remember this number: 42. Just confirm you have it.'),
+    )
+    const turn1Summary = summarizeResponse(turn1Updates)
+    expect(turn1Summary.text).toMatch(/42|forty.?two|remember/i)
+    // Turn 2: Reference previous context
+    const { updates: turn2Updates } = await client.promptSync(
+      session.id,
+      createPrompt('What number did I ask you to remember? Reply with just the number.'),
+    )
+    const turn2Summary = summarizeResponse(turn2Updates)
+    expect(turn2Summary.text).toMatch(/42/)
+  })
+})

package/src/integration_tests/acp-gemini.spec.ts ADDED Viewed

@@ -0,0 +1,174 @@
+/**
+ * Headless Adapter integration Tests - Gemini CLI
+ *
+ * @remarks
+ * These tests verify the headless ACP adapter works correctly with Gemini CLI
+ * using the schema-driven approach from `.claude/skills/acp-adapters/schemas/`.
+ *
+ * Run locally with API key:
+ * ```bash
+ * GEMINI_API_KEY=... bun test ./src/tests/acp-gemini.spec.ts
+ * ```
+ *
+ * Prerequisites:
+ * 1. Gemini CLI installed (`npm install -g @anthropic-ai/gemini-cli`)
+ * 2. API key: `GEMINI_API_KEY` environment variable
+ *
+ * These tests make real API calls and consume credits.
+ *
+ * MCP servers are auto-discovered from project root via:
+ * - `.gemini/settings.json` - Gemini MCP server configuration
+ */
+import { afterAll, beforeAll, describe, expect, setDefaultTimeout, test } from 'bun:test'
+import { join } from 'node:path'
+import { type ACPClient, createACPClient } from '../acp-client.ts'
+import { createPrompt, summarizeResponse } from '../acp-helpers.ts'
+// Long timeout for real agent interactions (2 minutes)
+setDefaultTimeout(120000)
+// Use project root as cwd - agents discover MCP servers from config files
+const PROJECT_ROOT = process.cwd()
+// Schema path for Gemini headless adapter
+const SCHEMA_PATH = join(PROJECT_ROOT, '.claude/skills/acp-adapters/schemas/gemini-headless.json')
+// Gemini CLI accepts GEMINI_API_KEY
+// Use either one if available
+const GEMINI_API_KEY = process.env.GEMINI_API_KEY ?? ''
+// Skip all tests if no API key is available
+const describeWithApiKey = GEMINI_API_KEY ? describe : describe.skip
+describeWithApiKey('Headless Adapter Integration - Gemini', () => {
+  let client: ACPClient
+  beforeAll(async () => {
+    // Use headless adapter with Gemini schema
+    // Pass both API key variants - Gemini CLI should pick up whichever it prefers
+    client = createACPClient({
+      command: ['bun', 'src/headless-cli.ts', '--', '--schema', SCHEMA_PATH],
+      timeout: 120000, // 2 min timeout for initialization
+      env: {
+        GEMINI_API_KEY,
+      },
+    })
+    await client.connect()
+  })
+  afterAll(async () => {
+    await client?.disconnect()
+  })
+  test('connects and initializes via headless adapter', () => {
+    expect(client.isConnected()).toBe(true)
+    const initResult = client.getInitializeResult()
+    expect(initResult).toBeDefined()
+    expect(initResult?.protocolVersion).toBeDefined()
+  })
+  test('reports agent capabilities', () => {
+    const capabilities = client.getCapabilities()
+    expect(capabilities).toBeDefined()
+  })
+  test('creates session with project cwd', async () => {
+    // Session uses project root - agent discovers MCP servers from .gemini/settings.json
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    expect(session).toBeDefined()
+    expect(session.id).toBeDefined()
+    expect(typeof session.id).toBe('string')
+  })
+  test('sends prompt and receives response', async () => {
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    // Simple prompt that doesn't require tools
+    const { result, updates } = await client.promptSync(
+      session.id,
+      createPrompt('What is 2 + 2? Reply with just the number.'),
+    )
+    expect(result).toBeDefined()
+    expect(updates).toBeInstanceOf(Array)
+    // Summarize and verify response structure
+    const summary = summarizeResponse(updates)
+    expect(summary.text).toBeDefined()
+    expect(summary.text.length).toBeGreaterThan(0)
+    // Should contain "4" somewhere in the response
+    expect(summary.text).toMatch(/4/)
+  })
+  test('streaming prompt yields updates', async () => {
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    const events: string[] = []
+    for await (const event of client.prompt(session.id, createPrompt('Say "hello" and nothing else.'))) {
+      events.push(event.type)
+      if (event.type === 'complete') {
+        expect(event.result).toBeDefined()
+      }
+    }
+    expect(events).toContain('complete')
+  })
+  test('uses MCP server from project config', async () => {
+    // This test verifies that Gemini discovers MCP servers from .gemini/settings.json
+    // The agent-client-protocol MCP server is configured at project root
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    // Query the agent-client-protocol MCP server (configured in .gemini/settings.json)
+    const { updates } = await client.promptSync(
+      session.id,
+      createPrompt(
+        'Use the agent-client-protocol MCP server to search for information about ACP. ' +
+          'What is the Agent Client Protocol and what problem does it solve?',
+      ),
+    )
+    const summary = summarizeResponse(updates)
+    // Response should contain ACP-related information
+    expect(summary.text.length).toBeGreaterThan(0)
+    // Should mention protocol/agent-related concepts
+    expect(summary.text.toLowerCase()).toMatch(/agent|protocol|client|json-rpc|stdio/)
+  })
+  test('multi-turn conversation maintains context (iterative mode)', async () => {
+    // Multi-turn via headless adapter in iterative mode (history accumulation)
+    const session = await client.createSession({
+      cwd: PROJECT_ROOT,
+    })
+    // Turn 1: Establish context
+    const { updates: turn1Updates } = await client.promptSync(
+      session.id,
+      createPrompt('Remember this number: 42. Just confirm you have it.'),
+    )
+    const turn1Summary = summarizeResponse(turn1Updates)
+    expect(turn1Summary.text).toMatch(/42|forty.?two|remember/i)
+    // Turn 2: Reference previous context
+    const { updates: turn2Updates } = await client.promptSync(
+      session.id,
+      createPrompt('What number did I ask you to remember? Reply with just the number.'),
+    )
+    const turn2Summary = summarizeResponse(turn2Updates)
+    expect(turn2Summary.text).toMatch(/42/)
+  })
+})