npm - joonecli - Versions diffs - 0.1.0 → 0.2.0 - Mend

joonecli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

package/README.md +12 -12
package/dist/__tests__/optimizations.test.js.map +1 -1
package/dist/__tests__/promptBuilder.test.js +14 -20
package/dist/__tests__/promptBuilder.test.js.map +1 -1
package/dist/agents/agentRegistry.d.ts +37 -0
package/dist/agents/agentRegistry.js +58 -0
package/dist/agents/agentRegistry.js.map +1 -0
package/dist/agents/agentSpec.d.ts +54 -0
package/dist/agents/agentSpec.js +9 -0
package/dist/agents/agentSpec.js.map +1 -0
package/dist/agents/builtinAgents.d.ts +20 -0
package/{src/agents/builtinAgents.ts → dist/agents/builtinAgents.js} +84 -101
package/dist/agents/builtinAgents.js.map +1 -0
package/dist/cli/config.d.ts +4 -0
package/dist/cli/config.js.map +1 -1
package/dist/cli/index.js +29 -2
package/dist/cli/index.js.map +1 -1
package/dist/cli/postinstall.d.ts +2 -0
package/dist/cli/postinstall.js +25 -0
package/dist/cli/postinstall.js.map +1 -0
package/dist/commands/builtinCommands.d.ts +21 -0
package/dist/commands/builtinCommands.js +241 -0
package/dist/commands/builtinCommands.js.map +1 -0
package/dist/commands/commandRegistry.d.ts +92 -0
package/dist/commands/commandRegistry.js +128 -0
package/dist/commands/commandRegistry.js.map +1 -0
package/dist/core/agentLoop.d.ts +7 -2
package/dist/core/agentLoop.js +35 -13
package/dist/core/agentLoop.js.map +1 -1
package/dist/core/autoSave.d.ts +41 -0
package/dist/core/autoSave.js +69 -0
package/dist/core/autoSave.js.map +1 -0
package/dist/core/compactor.d.ts +66 -0
package/dist/core/compactor.js +170 -0
package/dist/core/compactor.js.map +1 -0
package/dist/core/contextGuard.d.ts +38 -0
package/dist/core/contextGuard.js +122 -0
package/dist/core/contextGuard.js.map +1 -0
package/dist/core/events.d.ts +45 -0
package/dist/core/events.js +8 -0
package/dist/core/events.js.map +1 -0
package/dist/core/promptBuilder.d.ts +16 -1
package/dist/core/promptBuilder.js +27 -14
package/dist/core/promptBuilder.js.map +1 -1
package/dist/core/sessionResumer.js +3 -3
package/dist/core/sessionResumer.js.map +1 -1
package/dist/core/sessionStore.js +3 -2
package/dist/core/sessionStore.js.map +1 -1
package/dist/core/subAgent.d.ts +56 -0
package/dist/core/subAgent.js +240 -0
package/dist/core/subAgent.js.map +1 -0
package/dist/core/tokenCounter.d.ts +8 -1
package/dist/core/tokenCounter.js +28 -0
package/dist/core/tokenCounter.js.map +1 -1
package/dist/debug_google.d.ts +1 -0
package/dist/debug_google.js +23 -0
package/dist/debug_google.js.map +1 -0
package/dist/middleware/permission.js +1 -0
package/dist/middleware/permission.js.map +1 -1
package/dist/test_google.d.ts +1 -0
package/dist/test_google.js +32 -89
package/dist/test_google.js.map +1 -0
package/dist/tools/browser.js +4 -1
package/dist/tools/browser.js.map +1 -1
package/dist/tools/index.d.ts +2 -1
package/dist/tools/index.js +11 -3
package/dist/tools/index.js.map +1 -1
package/dist/tools/installHostDeps.d.ts +2 -0
package/dist/tools/installHostDeps.js +37 -0
package/dist/tools/installHostDeps.js.map +1 -0
package/dist/tools/router.js +3 -0
package/dist/tools/router.js.map +1 -1
package/dist/tools/spawnAgent.d.ts +19 -0
package/dist/tools/spawnAgent.js +132 -0
package/dist/tools/spawnAgent.js.map +1 -0
package/dist/tracing/sessionTracer.d.ts +1 -0
package/dist/tracing/sessionTracer.js +4 -1
package/dist/tracing/sessionTracer.js.map +1 -1
package/dist/ui/App.js +94 -6
package/dist/ui/App.js.map +1 -1
package/dist/ui/components/ActionLog.d.ts +7 -0
package/dist/ui/components/ActionLog.js +63 -0
package/dist/ui/components/ActionLog.js.map +1 -0
package/dist/ui/components/FileBrowser.d.ts +2 -0
package/dist/ui/components/FileBrowser.js +41 -0
package/dist/ui/components/FileBrowser.js.map +1 -0
package/package.json +5 -6
package/AGENTS.md +0 -56
package/Handover.md +0 -115
package/PROGRESS.md +0 -160
package/docs/01_insights_and_patterns.md +0 -27
package/docs/02_edge_cases_and_mitigations.md +0 -143
package/docs/03_initial_implementation_plan.md +0 -66
package/docs/04_tech_stack_proposal.md +0 -20
package/docs/05_prd.md +0 -87
package/docs/06_user_stories.md +0 -72
package/docs/07_system_architecture.md +0 -138
package/docs/08_roadmap.md +0 -200
package/e2b/Dockerfile +0 -26
package/src/__tests__/bootstrap.test.ts +0 -111
package/src/__tests__/config.test.ts +0 -97
package/src/__tests__/m55.test.ts +0 -238
package/src/__tests__/middleware.test.ts +0 -219
package/src/__tests__/modelFactory.test.ts +0 -63
package/src/__tests__/optimizations.test.ts +0 -201
package/src/__tests__/promptBuilder.test.ts +0 -141
package/src/__tests__/sandbox.test.ts +0 -102
package/src/__tests__/security.test.ts +0 -122
package/src/__tests__/streaming.test.ts +0 -82
package/src/__tests__/toolRouter.test.ts +0 -52
package/src/__tests__/tools.test.ts +0 -146
package/src/__tests__/tracing.test.ts +0 -196
package/src/agents/agentRegistry.ts +0 -69
package/src/agents/agentSpec.ts +0 -67
package/src/cli/config.ts +0 -124
package/src/cli/index.ts +0 -730
package/src/cli/modelFactory.ts +0 -174
package/src/cli/providers.ts +0 -107
package/src/commands/builtinCommands.ts +0 -293
package/src/commands/commandRegistry.ts +0 -194
package/src/core/agentLoop.d.ts.map +0 -1
package/src/core/agentLoop.ts +0 -312
package/src/core/autoSave.ts +0 -95
package/src/core/compactor.ts +0 -252
package/src/core/contextGuard.ts +0 -129
package/src/core/errors.ts +0 -202
package/src/core/promptBuilder.d.ts.map +0 -1
package/src/core/promptBuilder.ts +0 -139
package/src/core/reasoningRouter.ts +0 -121
package/src/core/retry.ts +0 -75
package/src/core/sessionResumer.ts +0 -90
package/src/core/sessionStore.ts +0 -215
package/src/core/subAgent.ts +0 -339
package/src/core/tokenCounter.ts +0 -64
package/src/evals/dataset.ts +0 -67
package/src/evals/evaluator.ts +0 -81
package/src/hitl/bridge.ts +0 -160
package/src/middleware/commandSanitizer.ts +0 -60
package/src/middleware/loopDetection.ts +0 -63
package/src/middleware/permission.ts +0 -72
package/src/middleware/pipeline.ts +0 -75
package/src/middleware/preCompletion.ts +0 -94
package/src/middleware/types.ts +0 -45
package/src/sandbox/bootstrap.ts +0 -121
package/src/sandbox/manager.ts +0 -239
package/src/sandbox/sync.ts +0 -157
package/src/skills/loader.ts +0 -143
package/src/skills/tools.ts +0 -99
package/src/skills/types.ts +0 -13
package/src/test_cache.ts +0 -72
package/src/test_google.js +0 -40
package/src/test_google.ts +0 -40
package/src/tools/askUser.ts +0 -47
package/src/tools/browser.ts +0 -137
package/src/tools/index.d.ts.map +0 -1
package/src/tools/index.ts +0 -237
package/src/tools/registry.ts +0 -198
package/src/tools/router.ts +0 -78
package/src/tools/security.ts +0 -220
package/src/tools/spawnAgent.ts +0 -158
package/src/tools/webSearch.ts +0 -142
package/src/tracing/analyzer.ts +0 -265
package/src/tracing/langsmith.ts +0 -63
package/src/tracing/sessionTracer.ts +0 -202
package/src/tracing/types.ts +0 -49
package/src/types/valyu.d.ts +0 -37
package/src/ui/App.tsx +0 -404
package/src/ui/components/HITLPrompt.tsx +0 -119
package/src/ui/components/Header.tsx +0 -51
package/src/ui/components/MessageBubble.tsx +0 -46
package/src/ui/components/StatusBar.tsx +0 -138
package/src/ui/components/StreamingText.tsx +0 -48
package/src/ui/components/ToolCallPanel.tsx +0 -80
package/tests/commands/commands.test.ts +0 -356
package/tests/core/compactor.test.ts +0 -217
package/tests/core/retryAndErrors.test.ts +0 -164
package/tests/core/sessionResumer.test.ts +0 -95
package/tests/core/sessionStore.test.ts +0 -84
package/tests/core/stability.test.ts +0 -165
package/tests/core/subAgent.test.ts +0 -238
package/tests/hitl/hitlBridge.test.ts +0 -115
package/tsconfig.json +0 -16
package/vitest.config.ts +0 -10
package/vitest.out +0 -48

package/src/core/subAgent.ts DELETED Viewed

@@ -1,339 +0,0 @@
-/**
- * Sub-Agent Manager
- *
- * Spawns and orchestrates isolated sub-agents for scoped tasks.
- * Each sub-agent gets its own ExecutionHarness with a separate conversation
- * history. Only the final SubAgentResult is returned to the main agent,
- * discarding the sub-agent's internal conversation to save context.
- *
- * Supports both synchronous (blocking) and asynchronous (non-blocking) modes.
- *
- * Safety:
- * - Depth limit of 1: sub-agents cannot spawn other sub-agents
- * - maxTurns cap per agent prevents doom-loops
- * - Concurrent async agent cap of 3 prevents resource exhaustion
- * - Per-agent token budget tracking
- */
-import { AgentSpec, SubAgentResult } from "../agents/agentSpec.js";
-import { AgentRegistry } from "../agents/agentRegistry.js";
-import { DynamicToolInterface, ToolResult } from "../tools/index.js";
-import { ContextState } from "../core/promptBuilder.js";
-import { countMessageTokens } from "../core/tokenCounter.js";
-import { BaseMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage } from "@langchain/core/messages";
-import { BaseChatModel } from "@langchain/core/language_models/chat_models";
-import { Runnable } from "@langchain/core/runnables";
-// ─── Constants ──────────────────────────────────────────────────────────────────
-const DEFAULT_MAX_TURNS = 10;
-const MAX_CONCURRENT_ASYNC = 3;
-const ASYNC_EXPIRY_MS = 5 * 60 * 1000; // 5 minutes
-// ─── Async Task State ───────────────────────────────────────────────────────────
-interface AsyncTask {
-  taskId: string;
-  agentName: string;
-  taskDescription: string;
-  promise: Promise<SubAgentResult>;
-  result?: SubAgentResult;
-  startedAt: number;
-  completed: boolean;
-}
-// ─── SubAgentManager ────────────────────────────────────────────────────────────
-export class SubAgentManager {
-  private registry: AgentRegistry;
-  private allTools: DynamicToolInterface[];
-  private llm: Runnable | BaseChatModel;
-  private asyncTasks: Map<string, AsyncTask> = new Map();
-  private taskCounter = 0;
-  constructor(
-    registry: AgentRegistry,
-    tools: DynamicToolInterface[],
-    llm: Runnable | BaseChatModel,
-  ) {
-    this.registry = registry;
-    // Filter out spawn_agent and check_agent to prevent recursive nesting (depth-1 limit)
-    this.allTools = tools.filter(
-      (t) => t.name !== "spawn_agent" && t.name !== "check_agent"
-    );
-    this.llm = llm;
-  }
-  /**
-   * Synchronous spawn — blocks until the sub-agent finishes.
-   */
-  async spawn(
-    agentName: string,
-    task: string,
-    maxTurnsOverride?: number
-  ): Promise<SubAgentResult> {
-    const spec = this.registry.get(agentName);
-    if (!spec) {
-      return this.makeErrorResult(
-        agentName,
-        task,
-        `Unknown agent "${agentName}". Available: ${this.registry.getNames().join(", ")}`
-      );
-    }
-    return this.runAgent(spec, task, maxTurnsOverride);
-  }
-  /**
-   * Asynchronous spawn — returns immediately with a taskId.
-   * The main agent can poll with getResult(taskId).
-   */
-  async spawnAsync(
-    agentName: string,
-    task: string,
-    maxTurnsOverride?: number
-  ): Promise<string> {
-    // Cap concurrent async agents
-    this.cleanupExpired();
-    const activeCount = Array.from(this.asyncTasks.values())
-      .filter((t) => !t.completed).length;
-    if (activeCount >= MAX_CONCURRENT_ASYNC) {
-      throw new Error(
-        `Maximum concurrent async agents reached (${MAX_CONCURRENT_ASYNC}). ` +
-        `Wait for existing tasks to complete or check them with check_agent.`
-      );
-    }
-    const spec = this.registry.get(agentName);
-    if (!spec) {
-      throw new Error(
-        `Unknown agent "${agentName}". Available: ${this.registry.getNames().join(", ")}`
-      );
-    }
-    const taskId = `task_${++this.taskCounter}_${Date.now()}`;
-    const promise = this.runAgent(spec, task, maxTurnsOverride).then((result) => {
-      const asyncTask = this.asyncTasks.get(taskId);
-      if (asyncTask) {
-        asyncTask.result = result;
-        asyncTask.completed = true;
-      }
-      return result;
-    });
-    this.asyncTasks.set(taskId, {
-      taskId,
-      agentName,
-      taskDescription: task,
-      promise,
-      startedAt: Date.now(),
-      completed: false,
-    });
-    return taskId;
-  }
-  /**
-   * Check the status or get the result of an async task.
-   * Returns the result if completed, or a status message if still running.
-   */
-  async getResult(taskId: string): Promise<SubAgentResult | string> {
-    const asyncTask = this.asyncTasks.get(taskId);
-    if (!asyncTask) {
-      return `Unknown task ID: ${taskId}. No such async task exists.`;
-    }
-    if (asyncTask.completed && asyncTask.result) {
-      // Clean up the task
-      this.asyncTasks.delete(taskId);
-      return asyncTask.result;
-    }
-    const elapsed = Math.round((Date.now() - asyncTask.startedAt) / 1000);
-    return `Task "${asyncTask.taskDescription}" (agent: ${asyncTask.agentName}) ` +
-      `is still running (${elapsed}s elapsed).`;
-  }
-  /**
-   * Core execution loop for a sub-agent.
-   * Creates an isolated conversation and runs a multi-turn loop.
-   */
-  private async runAgent(
-    spec: AgentSpec,
-    task: string,
-    maxTurnsOverride?: number
-  ): Promise<SubAgentResult> {
-    const startTime = Date.now();
-    const maxTurns = maxTurnsOverride ?? spec.maxTurns ?? DEFAULT_MAX_TURNS;
-    // Resolve available tools for this agent
-    const agentTools = spec.tools
-      ? this.allTools.filter((t) => spec.tools!.includes(t.name))
-      : this.allTools;
-    // Create isolated conversation history
-    const systemPrompt = new SystemMessage(
-      `${spec.systemPrompt}\n\n--- Current Task ---\n${task}`
-    );
-    const history: BaseMessage[] = [
-      new HumanMessage(task),
-    ];
-    let promptTokens = 0;
-    let completionTokens = 0;
-    let toolCallCount = 0;
-    let turnsUsed = 0;
-    let lastResponse = "";
-    const filesModified: Set<string> = new Set();
-    // Build LangChain tool declarations for binding
-    const toolDeclarations = agentTools.map((t) => ({
-      name: t.name,
-      description: t.description,
-      schema: t.schema,
-    }));
-    try {
-      // Bind tools to the LLM for this sub-agent session
-      let boundLlm: any;
-      if ("bindTools" in this.llm && typeof (this.llm as any).bindTools === "function") {
-        boundLlm = (this.llm as any).bindTools(toolDeclarations);
-      } else {
-        boundLlm = this.llm;
-      }
-      for (let turn = 0; turn < maxTurns; turn++) {
-        turnsUsed++;
-        // Build the full message array
-        const messages = [systemPrompt, ...history];
-        const stepPromptTokens = countMessageTokens(messages);
-        promptTokens += stepPromptTokens;
-        // Invoke the LLM
-        const response = await boundLlm.invoke(messages);
-        const responseTokens = countMessageTokens([response as AIMessage]);
-        completionTokens += responseTokens;
-        const aiMessage = response as AIMessage;
-        history.push(aiMessage);
-        // Extract text content
-        if (typeof aiMessage.content === "string" && aiMessage.content.length > 0) {
-          lastResponse = aiMessage.content;
-        }
-        // Check for tool calls
-        if (!aiMessage.tool_calls || aiMessage.tool_calls.length === 0) {
-          // No tool calls — agent is done
-          break;
-        }
-        // Execute tool calls
-        for (const call of aiMessage.tool_calls) {
-          if (!call.id) continue;
-          const tool = agentTools.find((t) => t.name === call.name);
-          if (!tool) {
-            history.push(new ToolMessage({
-              content: `Error: Tool "${call.name}" is not available to this sub-agent.`,
-              tool_call_id: call.id,
-            }));
-            continue;
-          }
-          toolCallCount++;
-          try {
-            const result = await tool.execute(call.args);
-            const output = typeof result === "string" ? result : (result as ToolResult).content;
-            // Track file modifications
-            if (call.name === "write_file" && call.args?.path) {
-              filesModified.add(call.args.path);
-            }
-            history.push(new ToolMessage({
-              content: output,
-              tool_call_id: call.id,
-            }));
-          } catch (err: any) {
-            history.push(new ToolMessage({
-              content: `Tool error: ${err.message}`,
-              tool_call_id: call.id,
-            }));
-          }
-        }
-      }
-      // Determine outcome
-      const outcome = turnsUsed >= maxTurns ? "partial" : "success";
-      return {
-        agentName: spec.name,
-        taskDescription: task,
-        outcome,
-        result: lastResponse || "(Sub-agent produced no text output)",
-        filesModified: Array.from(filesModified),
-        toolCallCount,
-        tokenUsage: { prompt: promptTokens, completion: completionTokens },
-        duration: Date.now() - startTime,
-        turnsUsed,
-      };
-    } catch (error: any) {
-      return this.makeErrorResult(
-        spec.name,
-        task,
-        `Sub-agent error: ${error.message}`,
-        { promptTokens, completionTokens, toolCallCount, turnsUsed, startTime, filesModified }
-      );
-    }
-  }
-  /**
-   * Creates an error SubAgentResult.
-   */
-  private makeErrorResult(
-    agentName: string,
-    task: string,
-    errorMsg: string,
-    partial?: {
-      promptTokens: number;
-      completionTokens: number;
-      toolCallCount: number;
-      turnsUsed: number;
-      startTime: number;
-      filesModified: Set<string>;
-    }
-  ): SubAgentResult {
-    return {
-      agentName,
-      taskDescription: task,
-      outcome: "failure",
-      result: errorMsg,
-      filesModified: partial ? Array.from(partial.filesModified) : [],
-      toolCallCount: partial?.toolCallCount ?? 0,
-      tokenUsage: {
-        prompt: partial?.promptTokens ?? 0,
-        completion: partial?.completionTokens ?? 0,
-      },
-      duration: partial ? Date.now() - partial.startTime : 0,
-      turnsUsed: partial?.turnsUsed ?? 0,
-    };
-  }
-  /**
-   * Clean up expired async tasks.
-   */
-  private cleanupExpired(): void {
-    const now = Date.now();
-    for (const [taskId, task] of this.asyncTasks.entries()) {
-      if (now - task.startedAt > ASYNC_EXPIRY_MS) {
-        this.asyncTasks.delete(taskId);
-      }
-    }
-  }
-}

package/src/core/tokenCounter.ts DELETED Viewed

@@ -1,64 +0,0 @@
-import { BaseMessage } from "@langchain/core/messages";
-/**
- * Lightweight token counter using character-based heuristic.
- *
- * Approximation: ~4 characters per token for English text.
- * This avoids a dependency on tiktoken while being accurate enough
- * for capacity threshold decisions (~90% accuracy for English).
- *
- * For production accuracy, swap to tiktoken with the appropriate
- * model-specific encoding.
- */
-const CHARS_PER_TOKEN = 4;
-/**
- * Estimates the token count for a string.
- */
-export function estimateTokens(text: string): number {
-  return Math.ceil(text.length / CHARS_PER_TOKEN);
-}
-/**
- * Estimates the total token count across a list of messages.
- */
-export function countMessageTokens(messages: BaseMessage[]): number {
-  let total = 0;
-  for (const msg of messages) {
-    if (typeof msg.content === "string") {
-      total += estimateTokens(msg.content);
-    } else if (Array.isArray(msg.content)) {
-      // Handle multi-part messages (text + tool calls)
-      for (const part of msg.content) {
-        if (typeof part === "string") {
-          total += estimateTokens(part);
-        } else if ("text" in part && typeof part.text === "string") {
-          total += estimateTokens(part.text);
-        }
-      }
-    }
-    // Account for role/name overhead (~4 tokens per message)
-    total += 4;
-  }
-  return total;
-}
-/**
- * Checks if the message history is approaching the context window limit.
- *
- * @param messages - The current conversation messages.
- * @param maxTokens - The model's context window size.
- * @param threshold - Fraction of capacity to trigger compaction (default: 0.8 = 80%).
- */
-export function isNearCapacity(
-  messages: BaseMessage[],
-  maxTokens: number,
-  threshold = 0.8
-): boolean {
-  const used = countMessageTokens(messages);
-  return used >= maxTokens * threshold;
-}

package/src/evals/dataset.ts DELETED Viewed

@@ -1,67 +0,0 @@
-import { Client } from "langsmith";
-const client = new Client();
-const DATASET_NAME = "joone-baseline-v1";
-/**
- * Definition of our baseline evaluation dataset.
- */
-const BASELINE_EXAMPLES = [
-  {
-    inputs: {
-      instruction: "Write a python script that calculates the 10th fibonacci number and saves the result to /workspace/fib_result.txt",
-    },
-    outputs: {
-      expected_file: "/workspace/fib_result.txt",
-      expected_content: "55\n", // 0,1,1,2,3,5,8,13,21,34,55
-    },
-  },
-  {
-    inputs: {
-      instruction: `Create a TypeScript file at /workspace/math.ts with a function 'add(a: number, b: number)' that returns their sum.
-Then write a test file at /workspace/math.test.ts using the 'node:assert' module.
-Finally, use the bash tool to run 'npx tsx math.test.ts' to verify it passes.`,
-    },
-    outputs: {
-      expected_file: "/workspace/math.ts",
-      expected_test_execution: true,
-    },
-  },
-  {
-    inputs: {
-      instruction: "List all files in the current project root directory and save the output to /workspace/ls.txt",
-    },
-    outputs: {
-      expected_file: "/workspace/ls.txt",
-    },
-  },
-];
-/**
- * Programmatically creates the baseline dataset in LangSmith if it doesn't already exist.
- */
-export async function ensureBaselineDataset(): Promise<string> {
-  try {
-    const dataset = await client.readDataset({ datasetName: DATASET_NAME });
-    console.log(`[Eval] Dataset '${DATASET_NAME}' already exists (ID: ${dataset.id}).`);
-    return DATASET_NAME;
-  } catch (error: any) {
-    if (error?.message?.includes("not found") || error?.status === 404) {
-      console.log(`[Eval] Creating dataset '${DATASET_NAME}' from scratch...`);
-      const dataset = await client.createDataset(DATASET_NAME, {
-        description: "Baseline tasks to evaluate Joone's core sandbox, tool routing, and reasoning precision.",
-      });
-      for (const example of BASELINE_EXAMPLES) {
-        await client.createExample(
-          example.inputs,
-          example.outputs,
-          { datasetId: dataset.id }
-        );
-      }
-      console.log(`[Eval] Successfully seeded dataset '${DATASET_NAME}' with ${BASELINE_EXAMPLES.length} examples.`);
-      return DATASET_NAME;
-    }
-    throw error;
-  }
-}

package/src/evals/evaluator.ts DELETED Viewed

@@ -1,81 +0,0 @@
-import { Run, Example } from "langsmith";
-import { EvaluationResult } from "langsmith/evaluation";
-/**
- * Custom evaluator: Success Validator
- * Checks if the agent crashed or returned a fatal error trace.
- */
-export async function successEvaluator(run: Run, example?: Example): Promise<EvaluationResult> {
-  // If the trace has an error field, the harness threw an unhandled exception.
-  const isError = !!run.error;
-  return {
-    key: "execution_success",
-    score: isError ? 0 : 1,
-    comment: isError ? run.error : "Agent completed execution loop cleanly.",
-  };
-}
-/**
- * Custom evaluator: Cache Efficiency
- * Checks if the run utilized Anthropic Prompt Caching efficiently (> 70%).
- *
- * Note: Requires the LLM to emit `cache_creation_input_tokens` and `cache_read_input_tokens`
- * in its usage metadata payload, which is currently extracted by the SessionTracer.
- */
-export async function cacheEfficiencyEvaluator(run: Run, example?: Example): Promise<EvaluationResult> {
-  const outputs = run.outputs || {};
-  const metrics = outputs.metrics; // We will attach metrics to the harness output
-  if (!metrics || !metrics.totalTokens) {
-    return {
-      key: "cache_hit_rate",
-      score: null, // N/A (e.g., OpenAI or missing data)
-      comment: "No token metrics found in run output.",
-    };
-  }
-  const creationTokens = metrics.cacheCreationTokens || 0;
-  const readTokens = metrics.cacheReadTokens || 0;
-  if (creationTokens === 0 && readTokens === 0) {
-    return {
-      key: "cache_hit_rate",
-      score: 0,
-      comment: "Prompt caching is not active or not supported by this provider.",
-    };
-  }
-  const totalInputTokens = metrics.promptTokens;
-  const hitRate = readTokens / totalInputTokens;
-  return {
-    key: "cache_hit_rate",
-    score: hitRate,
-    comment: `Cache Hit Rate: ${(hitRate * 100).toFixed(1)}% (${readTokens} / ${totalInputTokens} input tokens)`,
-  };
-}
-/**
- * Custom evaluator: Output Artifact Check
- * Verifies if the file the agent was instructed to create actually exists
- * in the Sandbox after execution.
- */
-export async function filePresenceEvaluator(run: Run, example?: Example): Promise<EvaluationResult> {
-  if (!example?.outputs?.expected_file) {
-    return { key: "expected_file_created", score: null };
-  }
-  // The harnessed output should return a manifest or state snapshot we can verify
-  const outputs = run.outputs || {};
-  const fileManifest = outputs.fileManifest || [];
-  const expectedFile = example.outputs.expected_file;
-  const didCreate = fileManifest.includes(expectedFile);
-  return {
-    key: "expected_file_created",
-    score: didCreate ? 1 : 0,
-    comment: didCreate ? `File ${expectedFile} created successfully.` : `Failed to create expected file: ${expectedFile}`,
-  };
-}