npm - @poncho-ai/harness - Versions diffs - 0.2.0 - Mend

@poncho-ai/harness 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/.turbo/turbo-build.log +14 -0
package/.turbo/turbo-test.log +22 -0
package/CHANGELOG.md +16 -0
package/LICENSE +21 -0
package/dist/index.d.ts +416 -0
package/dist/index.js +3015 -0
package/package.json +53 -0
package/src/agent-parser.ts +127 -0
package/src/anthropic-client.ts +134 -0
package/src/config.ts +141 -0
package/src/default-tools.ts +89 -0
package/src/harness.ts +522 -0
package/src/index.ts +17 -0
package/src/latitude-capture.ts +108 -0
package/src/local-tools.ts +108 -0
package/src/mcp.ts +287 -0
package/src/memory.ts +700 -0
package/src/model-client.ts +44 -0
package/src/model-factory.ts +14 -0
package/src/openai-client.ts +169 -0
package/src/skill-context.ts +259 -0
package/src/skill-tools.ts +357 -0
package/src/state.ts +1017 -0
package/src/telemetry.ts +108 -0
package/src/tool-dispatcher.ts +69 -0
package/test/agent-parser.test.ts +39 -0
package/test/harness.test.ts +716 -0
package/test/mcp.test.ts +82 -0
package/test/memory.test.ts +50 -0
package/test/model-factory.test.ts +16 -0
package/test/state.test.ts +43 -0
package/test/telemetry.test.ts +57 -0
package/tsconfig.json +8 -0

package/src/harness.ts ADDED Viewed

@@ -0,0 +1,522 @@
+import { randomUUID } from "node:crypto";
+import type {
+  AgentEvent,
+  Message,
+  RunInput,
+  RunResult,
+  ToolContext,
+  ToolDefinition,
+} from "@poncho-ai/sdk";
+import { parseAgentFile, renderAgentPrompt, type ParsedAgent } from "./agent-parser.js";
+import { loadPonchoConfig, resolveMemoryConfig, type PonchoConfig } from "./config.js";
+import { createDefaultTools, createWriteTool } from "./default-tools.js";
+import { LatitudeCapture } from "./latitude-capture.js";
+import {
+  createMemoryStore,
+  createMemoryTools,
+  type MemoryStore,
+} from "./memory.js";
+import { LocalMcpBridge } from "./mcp.js";
+import type { ModelClient, ModelResponse } from "./model-client.js";
+import { createModelClient } from "./model-factory.js";
+import { buildSkillContextWindow, loadSkillMetadata } from "./skill-context.js";
+import { createSkillTools } from "./skill-tools.js";
+import { ToolDispatcher } from "./tool-dispatcher.js";
+export interface HarnessOptions {
+  workingDir?: string;
+  environment?: "development" | "staging" | "production";
+  toolDefinitions?: ToolDefinition[];
+  approvalHandler?: (request: {
+    tool: string;
+    input: Record<string, unknown>;
+    runId: string;
+    step: number;
+    approvalId: string;
+  }) => Promise<boolean> | boolean;
+}
+export interface HarnessRunOutput {
+  runId: string;
+  result: RunResult;
+  events: AgentEvent[];
+  messages: Message[];
+}
+const now = (): number => Date.now();
+const MAX_CONTEXT_MESSAGES = 40;
+const trimMessageWindow = (messages: Message[]): Message[] =>
+  messages.length <= MAX_CONTEXT_MESSAGES
+    ? messages
+    : messages.slice(messages.length - MAX_CONTEXT_MESSAGES);
+const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
+You are running locally in development mode. Treat this as an editable agent workspace.
+When users ask about customization:
+- Explain and edit \`poncho.config.js\` for model/provider, storage+memory, auth, telemetry, and MCP settings.
+- Help create or update local skills under \`skills/<skill-name>/SKILL.md\`.
+- For executable skills, add JavaScript/TypeScript scripts under \`skills/<skill-name>/scripts/\` and run them via \`run_skill_script\`.
+- For setup, skills, MCP, auth, storage, telemetry, or "how do I..." questions, proactively read \`README.md\` with \`read_file\` before answering.
+- Prefer quoting concrete commands and examples from \`README.md\` over guessing.
+- Keep edits minimal, preserve unrelated settings/code, and summarize what changed.`;
+export class AgentHarness {
+  private readonly workingDir: string;
+  private readonly environment: HarnessOptions["environment"];
+  private modelClient: ModelClient;
+  private readonly dispatcher = new ToolDispatcher();
+  private readonly approvalHandler?: HarnessOptions["approvalHandler"];
+  private skillContextWindow = "";
+  private memoryStore?: MemoryStore;
+  private parsedAgent?: ParsedAgent;
+  private mcpBridge?: LocalMcpBridge;
+  private getConfiguredToolFlag(
+    config: PonchoConfig | undefined,
+    name: keyof NonNullable<NonNullable<PonchoConfig["tools"]>["defaults"]>,
+  ): boolean | undefined {
+    const defaults = config?.tools?.defaults;
+    const environment = this.environment ?? "development";
+    const envOverrides = config?.tools?.byEnvironment?.[environment];
+    return envOverrides?.[name] ?? defaults?.[name];
+  }
+  private isBuiltInToolEnabled(config: PonchoConfig | undefined, name: string): boolean {
+    if (name === "write_file") {
+      const allowedByEnvironment = this.shouldEnableWriteTool();
+      const configured = this.getConfiguredToolFlag(config, "write_file");
+      return allowedByEnvironment && configured !== false;
+    }
+    if (name === "list_directory") {
+      const configured = this.getConfiguredToolFlag(config, "list_directory");
+      return configured !== false;
+    }
+    if (name === "read_file") {
+      const configured = this.getConfiguredToolFlag(config, "read_file");
+      return configured !== false;
+    }
+    return true;
+  }
+  private registerIfMissing(tool: ToolDefinition): void {
+    if (!this.dispatcher.get(tool.name)) {
+      this.dispatcher.register(tool);
+    }
+  }
+  private registerConfiguredBuiltInTools(config: PonchoConfig | undefined): void {
+    for (const tool of createDefaultTools(this.workingDir)) {
+      if (this.isBuiltInToolEnabled(config, tool.name)) {
+        this.registerIfMissing(tool);
+      }
+    }
+    if (this.isBuiltInToolEnabled(config, "write_file")) {
+      this.registerIfMissing(createWriteTool(this.workingDir));
+    }
+  }
+  private shouldEnableWriteTool(): boolean {
+    const override = process.env.PONCHO_FS_WRITE?.toLowerCase();
+    if (override === "1" || override === "true" || override === "yes") {
+      return true;
+    }
+    if (override === "0" || override === "false" || override === "no") {
+      return false;
+    }
+    return this.environment !== "production";
+  }
+  constructor(options: HarnessOptions = {}) {
+    this.workingDir = options.workingDir ?? process.cwd();
+    this.environment = options.environment ?? "development";
+    this.modelClient = createModelClient("anthropic");
+    this.approvalHandler = options.approvalHandler;
+    if (options.toolDefinitions?.length) {
+      this.dispatcher.registerMany(options.toolDefinitions);
+    }
+  }
+  async initialize(): Promise<void> {
+    this.parsedAgent = await parseAgentFile(this.workingDir);
+    const config = await loadPonchoConfig(this.workingDir);
+    this.registerConfiguredBuiltInTools(config);
+    const provider = this.parsedAgent.frontmatter.model?.provider ?? "anthropic";
+    const memoryConfig = resolveMemoryConfig(config);
+    const latitudeCapture = new LatitudeCapture({
+      apiKey:
+        config?.telemetry?.latitude?.apiKey ?? process.env.LATITUDE_API_KEY,
+      projectId:
+        config?.telemetry?.latitude?.projectId ??
+        process.env.LATITUDE_PROJECT_ID,
+      path:
+        config?.telemetry?.latitude?.path ??
+        config?.telemetry?.latitude?.documentPath ??
+        process.env.LATITUDE_PATH ??
+        process.env.LATITUDE_DOCUMENT_PATH,
+      defaultPath: `agents/${this.parsedAgent.frontmatter.name}/model-call`,
+    });
+    this.modelClient = createModelClient(provider, { latitudeCapture });
+    const bridge = new LocalMcpBridge(config);
+    this.mcpBridge = bridge;
+    const extraSkillPaths = config?.skillPaths;
+    const skillMetadata = await loadSkillMetadata(this.workingDir, extraSkillPaths);
+    this.skillContextWindow = buildSkillContextWindow(skillMetadata);
+    this.dispatcher.registerMany(createSkillTools(skillMetadata));
+    if (memoryConfig?.enabled) {
+      this.memoryStore = createMemoryStore(
+        this.parsedAgent.frontmatter.name,
+        memoryConfig,
+        { workingDir: this.workingDir },
+      );
+      this.dispatcher.registerMany(
+        createMemoryTools(this.memoryStore, {
+          maxRecallConversations: memoryConfig.maxRecallConversations,
+        }),
+      );
+    }
+    await bridge.startLocalServers();
+    this.dispatcher.registerMany(await bridge.loadTools());
+  }
+  async shutdown(): Promise<void> {
+    await this.mcpBridge?.stopLocalServers();
+  }
+  listTools(): ToolDefinition[] {
+    return this.dispatcher.list();
+  }
+  async *run(input: RunInput): AsyncGenerator<AgentEvent> {
+    if (!this.parsedAgent) {
+      await this.initialize();
+    }
+    const agent = this.parsedAgent as ParsedAgent;
+    const runId = `run_${randomUUID()}`;
+    const start = now();
+    const maxSteps = agent.frontmatter.limits?.maxSteps ?? 50;
+    const timeoutMs = (agent.frontmatter.limits?.timeout ?? 300) * 1000;
+    const messages: Message[] = [...(input.messages ?? [])];
+    const events: AgentEvent[] = [];
+    const systemPrompt = renderAgentPrompt(agent, {
+      parameters: input.parameters,
+      runtime: {
+        runId,
+        agentId: agent.frontmatter.name,
+        environment: this.environment,
+        workingDir: this.workingDir,
+      },
+    });
+    const developmentContext =
+      this.environment === "development" ? `\n\n${DEVELOPMENT_MODE_CONTEXT}` : "";
+    const promptWithSkills = this.skillContextWindow
+      ? `${systemPrompt}${developmentContext}\n\n${this.skillContextWindow}`
+      : `${systemPrompt}${developmentContext}`;
+    const mainMemory = this.memoryStore
+      ? await this.memoryStore.getMainMemory()
+      : undefined;
+    const boundedMainMemory =
+      mainMemory && mainMemory.content.length > 4000
+        ? `${mainMemory.content.slice(0, 4000)}\n...[truncated]`
+        : mainMemory?.content;
+    const memoryContext =
+      boundedMainMemory && boundedMainMemory.trim().length > 0
+        ? `
+## Persistent Memory
+${boundedMainMemory.trim()}`
+        : "";
+    const integrityPrompt = `${promptWithSkills}${memoryContext}
+## Execution Integrity
+- Do not claim that you executed a tool unless you actually emitted a tool call in this run.
+- Do not fabricate "Tool Used" or "Tool Result" logs as plain text.
+- Never output faux execution transcripts, markdown tool logs, or "Tool Used/Result" sections.
+- If no suitable tool is available, explicitly say that and ask for guidance.`;
+    const pushEvent = (event: AgentEvent): AgentEvent => {
+      events.push(event);
+      return event;
+    };
+    yield pushEvent({
+      type: "run:started",
+      runId,
+      agentId: agent.frontmatter.name,
+    });
+    messages.push({
+      role: "user",
+      content: input.task,
+      metadata: { timestamp: now(), id: randomUUID() },
+    });
+    let responseText = "";
+    let totalInputTokens = 0;
+    let totalOutputTokens = 0;
+    for (let step = 1; step <= maxSteps; step += 1) {
+      if (now() - start > timeoutMs) {
+        yield pushEvent({
+          type: "run:error",
+          runId,
+          error: {
+            code: "TIMEOUT",
+            message: `Run exceeded timeout of ${Math.floor(timeoutMs / 1000)}s`,
+          },
+        });
+        return;
+      }
+      const stepStart = now();
+      yield pushEvent({ type: "step:started", step });
+      yield pushEvent({ type: "model:request", tokens: 0 });
+      const modelCallInput = {
+        modelName: agent.frontmatter.model?.name ?? "claude-opus-4-5",
+        temperature: agent.frontmatter.model?.temperature,
+        maxTokens: agent.frontmatter.model?.maxTokens,
+        systemPrompt: integrityPrompt,
+        messages: trimMessageWindow(messages),
+        tools: this.dispatcher.list(),
+      };
+      let modelResponse: ModelResponse | undefined;
+      let streamedAnyChunk = false;
+      if (this.modelClient.generateStream) {
+        for await (const streamEvent of this.modelClient.generateStream(modelCallInput)) {
+          if (streamEvent.type === "chunk" && streamEvent.content.length > 0) {
+            streamedAnyChunk = true;
+            yield pushEvent({ type: "model:chunk", content: streamEvent.content });
+          }
+          if (streamEvent.type === "final") {
+            modelResponse = streamEvent.response;
+          }
+        }
+      } else {
+        modelResponse = await this.modelClient.generate(modelCallInput);
+      }
+      if (!modelResponse) {
+        throw new Error("Model response ended without final payload");
+      }
+      totalInputTokens += modelResponse.usage.input;
+      totalOutputTokens += modelResponse.usage.output;
+      if (!streamedAnyChunk && modelResponse.text) {
+        yield pushEvent({ type: "model:chunk", content: modelResponse.text });
+      }
+      yield pushEvent({
+        type: "model:response",
+        usage: {
+          input: modelResponse.usage.input,
+          output: modelResponse.usage.output,
+          cached: 0,
+        },
+      });
+      if (modelResponse.toolCalls.length === 0) {
+        responseText = modelResponse.text;
+        yield pushEvent({
+          type: "step:completed",
+          step,
+          duration: now() - stepStart,
+        });
+        const result: RunResult = {
+          status: "completed",
+          response: responseText,
+          steps: step,
+          tokens: {
+            input: totalInputTokens,
+            output: totalOutputTokens,
+            cached: 0,
+          },
+          duration: now() - start,
+        };
+        yield pushEvent({ type: "run:completed", runId, result });
+        return;
+      }
+      const toolContext: ToolContext = {
+        runId,
+        agentId: agent.frontmatter.name,
+        step,
+        workingDir: this.workingDir,
+        parameters: input.parameters ?? {},
+      };
+      const toolResultsForModel: Array<{
+        type: "tool_result";
+        tool_use_id: string;
+        content: string;
+      }> = [];
+      const approvedCalls: Array<{
+        id: string;
+        name: string;
+        input: Record<string, unknown>;
+      }> = [];
+      for (const call of modelResponse.toolCalls) {
+        yield pushEvent({ type: "tool:started", tool: call.name, input: call.input });
+        const definition = this.dispatcher.get(call.name);
+        if (definition?.requiresApproval) {
+          const approvalId = `approval_${randomUUID()}`;
+          yield pushEvent({
+            type: "tool:approval:required",
+            tool: call.name,
+            input: call.input,
+            approvalId,
+          });
+          const approved = this.approvalHandler
+            ? await this.approvalHandler({
+                tool: call.name,
+                input: call.input,
+                runId,
+                step,
+                approvalId,
+              })
+            : false;
+          if (!approved) {
+            yield pushEvent({
+              type: "tool:approval:denied",
+              approvalId,
+              reason: "No approval handler granted execution",
+            });
+            yield pushEvent({
+              type: "tool:error",
+              tool: call.name,
+              error: "Tool execution denied by approval policy",
+              recoverable: true,
+            });
+            toolResultsForModel.push({
+              type: "tool_result",
+              tool_use_id: call.id,
+              content: "Tool error: Tool execution denied by approval policy",
+            });
+            continue;
+          }
+          yield pushEvent({ type: "tool:approval:granted", approvalId });
+        }
+        approvedCalls.push({
+          id: call.id,
+          name: call.name,
+          input: call.input,
+        });
+      }
+      const batchStart = now();
+      const batchResults =
+        approvedCalls.length > 0
+          ? await this.dispatcher.executeBatch(approvedCalls, toolContext)
+          : [];
+      for (const result of batchResults) {
+        if (result.error) {
+          yield pushEvent({
+            type: "tool:error",
+            tool: result.tool,
+            error: result.error,
+            recoverable: true,
+          });
+          toolResultsForModel.push({
+            type: "tool_result",
+            tool_use_id: result.callId,
+            content: `Tool error: ${result.error}`,
+          });
+        } else {
+          yield pushEvent({
+            type: "tool:completed",
+            tool: result.tool,
+            output: result.output,
+            duration: now() - batchStart,
+          });
+          toolResultsForModel.push({
+            type: "tool_result",
+            tool_use_id: result.callId,
+            content: JSON.stringify(result.output ?? null),
+          });
+        }
+      }
+      messages.push({
+        role: "assistant",
+        content: modelResponse.text || `[tool calls: ${modelResponse.toolCalls.length}]`,
+        metadata: { timestamp: now(), id: randomUUID(), step },
+      });
+      messages.push({
+        role: "tool",
+        content: JSON.stringify(toolResultsForModel),
+        metadata: { timestamp: now(), id: randomUUID(), step },
+      });
+      yield pushEvent({
+        type: "step:completed",
+        step,
+        duration: now() - stepStart,
+      });
+    }
+    yield {
+      type: "run:error",
+      runId,
+      error: {
+        code: "MAX_STEPS_EXCEEDED",
+        message: `Run reached maximum of ${maxSteps} steps`,
+      },
+    };
+  }
+  async runToCompletion(input: RunInput): Promise<HarnessRunOutput> {
+    const events: AgentEvent[] = [];
+    let runId = "";
+    let finalResult: RunResult | undefined;
+    const messages: Message[] = [...(input.messages ?? [])];
+    messages.push({ role: "user", content: input.task });
+    for await (const event of this.run(input)) {
+      events.push(event);
+      if (event.type === "run:started") {
+        runId = event.runId;
+      }
+      if (event.type === "run:completed") {
+        finalResult = event.result;
+        messages.push({
+          role: "assistant",
+          content: event.result.response ?? "",
+        });
+      }
+      if (event.type === "run:error") {
+        finalResult = {
+          status: "error",
+          response: event.error.message,
+          steps: 0,
+          tokens: { input: 0, output: 0, cached: 0 },
+          duration: 0,
+        };
+      }
+    }
+    return {
+      runId,
+      events,
+      messages,
+      result:
+        finalResult ??
+        ({
+          status: "error",
+          response: "Run ended unexpectedly",
+          steps: 0,
+          tokens: { input: 0, output: 0, cached: 0 },
+          duration: 0,
+        } satisfies RunResult),
+    };
+  }
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,17 @@
+export * from "./agent-parser.js";
+export * from "./config.js";
+export * from "./default-tools.js";
+export * from "./harness.js";
+export * from "./latitude-capture.js";
+export * from "./memory.js";
+export * from "./mcp.js";
+export * from "./model-client.js";
+export * from "./model-factory.js";
+export * from "./openai-client.js";
+export * from "./skill-context.js";
+export * from "./skill-tools.js";
+export * from "./state.js";
+export * from "./telemetry.js";
+export * from "./tool-dispatcher.js";
+export { defineTool } from "@poncho-ai/sdk";
+export type { ToolDefinition } from "@poncho-ai/sdk";

package/src/latitude-capture.ts ADDED Viewed

@@ -0,0 +1,108 @@
+export interface LatitudeCaptureConfig {
+  apiKey?: string;
+  projectId?: string | number;
+  path?: string;
+  defaultPath?: string;
+}
+const sanitizePath = (value: string): string =>
+  value
+    .trim()
+    .replace(/[^a-zA-Z0-9\-_/\.]/g, "-")
+    .replace(/-+/g, "-");
+export class LatitudeCapture {
+  private readonly apiKey?: string;
+  private telemetryPromise?: Promise<
+    | {
+        capture: <T>(
+          context: { projectId: number; path: string },
+          fn: () => Promise<T>,
+        ) => Promise<T>;
+      }
+    | undefined
+  >;
+  private readonly projectId?: number;
+  private readonly path?: string;
+  constructor(config?: LatitudeCaptureConfig) {
+    this.apiKey = config?.apiKey ?? process.env.LATITUDE_API_KEY;
+    if (!this.apiKey) {
+      return;
+    }
+    const rawProjectId = config?.projectId ?? process.env.LATITUDE_PROJECT_ID;
+    const projectIdNumber =
+      typeof rawProjectId === "number"
+        ? rawProjectId
+        : rawProjectId
+          ? Number.parseInt(rawProjectId, 10)
+          : Number.NaN;
+    this.projectId = Number.isFinite(projectIdNumber) ? projectIdNumber : undefined;
+    const rawPath =
+      config?.path ??
+      process.env.LATITUDE_PATH ??
+      process.env.LATITUDE_DOCUMENT_PATH ??
+      config?.defaultPath;
+    this.path = rawPath ? sanitizePath(rawPath) : undefined;
+  }
+  private async initializeTelemetry(): Promise<
+    | {
+        capture: <T>(
+          context: { projectId: number; path: string },
+          fn: () => Promise<T>,
+        ) => Promise<T>;
+      }
+    | undefined
+  > {
+    if (!this.apiKey) {
+      return undefined;
+    }
+    try {
+      const [{ LatitudeTelemetry }, AnthropicSdk, { default: OpenAI }] = await Promise.all([
+        import("@latitude-data/telemetry"),
+        import("@anthropic-ai/sdk"),
+        import("openai"),
+      ]);
+      const disableAnthropicInstrumentation =
+        process.env.LATITUDE_DISABLE_ANTHROPIC_INSTRUMENTATION === "true";
+      return new LatitudeTelemetry(this.apiKey, {
+        instrumentations: {
+          ...(disableAnthropicInstrumentation
+            ? {}
+            : { anthropic: AnthropicSdk as unknown }),
+          openai: OpenAI as unknown,
+        },
+      });
+    } catch {
+      // If instrumentation setup fails, skip Latitude capture and run normally.
+      return undefined;
+    }
+  }
+  async capture<T>(fn: () => Promise<T>): Promise<T> {
+    if (!this.apiKey || !this.projectId || !this.path) {
+      return await fn();
+    }
+    if (!this.telemetryPromise) {
+      this.telemetryPromise = this.initializeTelemetry();
+    }
+    const telemetry = await this.telemetryPromise;
+    if (!telemetry) {
+      return await fn();
+    }
+    try {
+      return await telemetry.capture(
+        {
+          projectId: this.projectId,
+          path: this.path,
+        },
+        fn,
+      );
+    } catch {
+      // Telemetry must never break runtime model calls.
+      return await fn();
+    }
+  }
+}