npm - @bluecopa/harness - Versions diffs - 1.0.0 → 2.0.0 - Mend

@bluecopa/harness 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/README.md +212 -117
package/dist/arc/index.d.ts +796 -0
package/dist/arc/index.js +2863 -0
package/dist/arc/index.js.map +1 -0
package/dist/observability/otel.d.ts +36 -0
package/dist/observability/otel.js +73 -0
package/dist/observability/otel.js.map +1 -0
package/dist/shared-types-DRxnerLT.d.ts +138 -0
package/dist/skills/index.d.ts +67 -0
package/dist/skills/index.js +282 -0
package/dist/skills/index.js.map +1 -0
package/package.json +26 -2
package/AGENTS.md +0 -18
package/docs/guides/observability.md +0 -32
package/docs/guides/providers.md +0 -51
package/docs/guides/skills.md +0 -25
package/docs/security/skill-sandbox-threat-model.md +0 -20
package/src/agent/create-agent.ts +0 -884
package/src/agent/create-tools.ts +0 -33
package/src/agent/step-executor.ts +0 -15
package/src/agent/types.ts +0 -57
package/src/context/llm-compaction-strategy.ts +0 -37
package/src/context/prepare-step.ts +0 -65
package/src/context/token-tracker.ts +0 -26
package/src/extracted/manifest.json +0 -10
package/src/extracted/prompts/compaction.md +0 -5
package/src/extracted/prompts/system.md +0 -5
package/src/extracted/tools.json +0 -82
package/src/hooks/hook-runner.ts +0 -22
package/src/hooks/tool-wrappers.ts +0 -64
package/src/interfaces/compaction-strategy.ts +0 -18
package/src/interfaces/hooks.ts +0 -24
package/src/interfaces/sandbox-provider.ts +0 -29
package/src/interfaces/session-store.ts +0 -48
package/src/interfaces/tool-provider.ts +0 -70
package/src/loop/bridge.ts +0 -363
package/src/loop/context-store.ts +0 -207
package/src/loop/lcm-tool-loop.ts +0 -163
package/src/loop/vercel-agent-loop.ts +0 -279
package/src/observability/context.ts +0 -17
package/src/observability/metrics.ts +0 -27
package/src/observability/otel.ts +0 -105
package/src/observability/tracing.ts +0 -13
package/src/optimization/agent-evaluator.ts +0 -40
package/src/optimization/config-serializer.ts +0 -16
package/src/optimization/optimization-runner.ts +0 -39
package/src/optimization/trace-collector.ts +0 -33
package/src/permissions/permission-manager.ts +0 -34
package/src/providers/composite-tool-provider.ts +0 -72
package/src/providers/control-plane-e2b-executor.ts +0 -218
package/src/providers/e2b-tool-provider.ts +0 -68
package/src/providers/local-tool-provider.ts +0 -190
package/src/providers/skill-sandbox-provider.ts +0 -46
package/src/sessions/file-session-store.ts +0 -61
package/src/sessions/in-memory-session-store.ts +0 -39
package/src/sessions/session-manager.ts +0 -44
package/src/skills/skill-loader.ts +0 -52
package/src/skills/skill-manager.ts +0 -175
package/src/skills/skill-router.ts +0 -99
package/src/skills/skill-types.ts +0 -26
package/src/subagents/subagent-manager.ts +0 -22
package/src/subagents/task-tool.ts +0 -13
package/tests/integration/agent-loop-basic.spec.ts +0 -56
package/tests/integration/agent-skill-default-from-sandbox.spec.ts +0 -66
package/tests/integration/concurrency-single-turn.spec.ts +0 -35
package/tests/integration/otel-metrics-emission.spec.ts +0 -62
package/tests/integration/otel-trace-propagation.spec.ts +0 -48
package/tests/integration/parity-benchmark.spec.ts +0 -45
package/tests/integration/provider-local-smoke.spec.ts +0 -63
package/tests/integration/session-resume.spec.ts +0 -30
package/tests/integration/skill-install-rollback.spec.ts +0 -64
package/tests/integration/skill-sandbox-file-blob.spec.ts +0 -54
package/tests/integration/skills-progressive-disclosure.spec.ts +0 -61
package/tests/integration/streaming-compaction-boundary.spec.ts +0 -43
package/tests/integration/structured-messages-agent.spec.ts +0 -265
package/tests/integration/subagent-isolation.spec.ts +0 -24
package/tests/security/skill-sandbox-isolation.spec.ts +0 -51
package/tests/unit/create-tools-schema-parity.spec.ts +0 -22
package/tests/unit/extracted-manifest.spec.ts +0 -41
package/tests/unit/interfaces-contract.spec.ts +0 -101
package/tests/unit/structured-messages.spec.ts +0 -176
package/tests/unit/token-tracker.spec.ts +0 -22
package/tsconfig.json +0 -14
package/vitest.config.ts +0 -7

package/src/loop/bridge.ts DELETED Viewed

@@ -1,363 +0,0 @@
-import type { ToolProvider, ToolResult } from '../interfaces/tool-provider';
-// ── Request / response types ──
-export interface LlmRequest {
-  id: string;
-  model: string;
-  prompt: string;
-}
-export interface WebFetchRequest {
-  id: string;
-  url: string;
-}
-export interface WebSearchRequest {
-  id: string;
-  query: string;
-}
-export interface AskUserRequest {
-  id: string;
-  question: string;
-}
-export interface TellUserRequest {
-  id: string;
-  message: string;
-}
-export type BridgeRequest =
-  | { id: string; type: 'llm'; model: string; prompt: string }
-  | { id: string; type: 'web_fetch'; url: string }
-  | { id: string; type: 'web_search'; query: string }
-  | { id: string; type: 'ask_user'; question: string }
-  | { id: string; type: 'tell_user'; message: string };
-// ── Activity log entry ──
-export interface ActivityEntry {
-  ts: number;
-  event: 'tool_start' | 'tool_end';
-  tool: string;
-  input?: string;
-  output?: string;
-  exit_code?: number;
-  model?: string;
-  duration_ms?: number;
-}
-// ── Config ──
-export interface BridgeConfig {
-  toolProvider: ToolProvider;
-  bridgeDir?: string;
-  pollIntervalMs?: number;
-  onLlmRequest?(req: LlmRequest): Promise<string>;
-  onWebFetchRequest?(req: WebFetchRequest): Promise<string>;
-  onWebSearchRequest?(req: WebSearchRequest): Promise<string>;
-  onAskUserRequest?(req: AskUserRequest): Promise<string>;
-  onTellUserRequest?(req: TellUserRequest): Promise<void>;
-  onActivity?(entry: ActivityEntry): void;
-}
-const DEFAULT_BRIDGE_DIR = '/var/run/bridge';
-const DEFAULT_POLL_INTERVAL = 200;
-// ── Python module template ──
-function bridgePythonModule(bridgeDir: string): string {
-  return `"""
-harness_bridge — file-based IPC for sandbox REPL scripts.
-All external I/O (LLM calls, web fetch, user interaction) is routed through
-request/response files that the harness polls and fulfills.
-Every operation is logged to activity.jsonl for real-time observability.
-"""
-import json, time, os, uuid, subprocess
-BRIDGE_DIR = ${JSON.stringify(bridgeDir)}
-ACTIVITY_FILE = os.path.join(BRIDGE_DIR, "activity.jsonl")
-REQUESTS_FILE = os.path.join(BRIDGE_DIR, "requests.jsonl")
-def _log_activity(event, detail):
-    entry = {"ts": time.time(), "event": event, **detail}
-    with open(ACTIVITY_FILE, "a") as f:
-        f.write(json.dumps(entry) + "\\n")
-def _call(req_type, payload):
-    req_id = str(uuid.uuid4())[:8]
-    _log_activity("tool_start", {"tool": req_type, **{k: str(v)[:200] for k, v in payload.items()}})
-    start = time.time()
-    with open(REQUESTS_FILE, "a") as f:
-        f.write(json.dumps({"id": req_id, "type": req_type, **payload}) + "\\n")
-    resp_file = os.path.join(BRIDGE_DIR, f"resp_{req_id}.json")
-    while not os.path.exists(resp_file):
-        time.sleep(0.1)
-    with open(resp_file) as f:
-        result = json.load(f)["output"]
-    elapsed = int((time.time() - start) * 1000)
-    _log_activity("tool_end", {"tool": req_type, "output": str(result)[:500], "duration_ms": elapsed})
-    return result
-def llm_query(prompt, model="claude-sonnet-4-5"):
-    """Send a prompt to an LLM. The harness fulfills this via its API key."""
-    return _call("llm", {"model": model, "prompt": prompt})
-def web_fetch(url):
-    """Fetch a URL. The harness fulfills this (sandbox has no network)."""
-    return _call("web_fetch", {"url": url})
-def web_search(query):
-    """Web search. The harness fulfills this (sandbox has no network)."""
-    return _call("web_search", {"query": query})
-def ask_user(question):
-    """Ask the user a question. The harness prompts in the terminal."""
-    return _call("ask_user", {"question": question})
-def tell_user(message):
-    """Display a message to the user. The harness renders it."""
-    _call("tell_user", {"message": message})
-def bash(command):
-    """Run a shell command locally in the sandbox."""
-    _log_activity("tool_start", {"tool": "bash", "input": command[:200]})
-    start = time.time()
-    r = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=300)
-    output = r.stdout if r.returncode == 0 else f"ERROR (exit {r.returncode}): {r.stderr}"
-    elapsed = int((time.time() - start) * 1000)
-    _log_activity("tool_end", {"tool": "bash", "exit_code": r.returncode, "output": output[:1000], "duration_ms": elapsed})
-    return output
-def read_file(path):
-    """Read a file from the sandbox filesystem."""
-    _log_activity("tool_start", {"tool": "read_file", "input": path})
-    start = time.time()
-    try:
-        with open(path) as f:
-            content = f.read()
-        elapsed = int((time.time() - start) * 1000)
-        _log_activity("tool_end", {"tool": "read_file", "output": f"{len(content)} chars", "duration_ms": elapsed})
-        return content
-    except Exception as e:
-        elapsed = int((time.time() - start) * 1000)
-        _log_activity("tool_end", {"tool": "read_file", "output": f"ERROR: {e}", "duration_ms": elapsed})
-        return f"ERROR: {e}"
-def write_file(path, content):
-    """Write a file to the sandbox filesystem."""
-    _log_activity("tool_start", {"tool": "write_file", "input": f"{path} ({len(content)} chars)"})
-    start = time.time()
-    try:
-        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
-        with open(path, "w") as f:
-            f.write(content)
-        elapsed = int((time.time() - start) * 1000)
-        _log_activity("tool_end", {"tool": "write_file", "output": "ok", "duration_ms": elapsed})
-    except Exception as e:
-        elapsed = int((time.time() - start) * 1000)
-        _log_activity("tool_end", {"tool": "write_file", "output": f"ERROR: {e}", "duration_ms": elapsed})
-        raise
-`;
-}
-// ── Bridge implementation ──
-export class SandboxBridge {
-  private readonly tp: ToolProvider;
-  private readonly bridgeDir: string;
-  private readonly pollInterval: number;
-  private readonly config: BridgeConfig;
-  private processedLineCount = 0;
-  private activityLineCount = 0;
-  constructor(config: BridgeConfig) {
-    this.config = config;
-    this.tp = config.toolProvider;
-    this.bridgeDir = config.bridgeDir ?? DEFAULT_BRIDGE_DIR;
-    this.pollInterval = config.pollIntervalMs ?? DEFAULT_POLL_INTERVAL;
-  }
-  /** Inject the bridge Python module and create the bridge directory in the sandbox. */
-  async setup(): Promise<void> {
-    await this.tp.bash(`mkdir -p ${this.bridgeDir}`);
-    await this.tp.writeFile(
-      `${this.bridgeDir}/harness_bridge.py`,
-      bridgePythonModule(this.bridgeDir)
-    );
-    // Clear any stale files from a previous run
-    await this.tp.bash(`rm -f ${this.bridgeDir}/requests.jsonl ${this.bridgeDir}/activity.jsonl ${this.bridgeDir}/resp_*.json`);
-    this.processedLineCount = 0;
-    this.activityLineCount = 0;
-  }
-  /**
-   * Run the REPL script and poll for bridge requests until it completes.
-   * Returns the script's stdout/stderr.
-   */
-  async pollUntilComplete(scriptPath: string): Promise<string> {
-    // Start the script (long-running — use large timeout)
-    const scriptPromise = this.tp.bash(
-      `cd /workspace 2>/dev/null; PYTHONPATH=${this.bridgeDir}:$PYTHONPATH python3 ${scriptPath} 2>&1`,
-      { timeout: 600_000 }
-    );
-    let scriptDone = false;
-    let scriptResult: ToolResult | undefined;
-    // Attach a non-blocking completion handler
-    scriptPromise.then((result) => {
-      scriptDone = true;
-      scriptResult = result;
-    }).catch((err) => {
-      scriptDone = true;
-      scriptResult = {
-        success: false,
-        output: '',
-        error: err instanceof Error ? err.message : String(err),
-      };
-    });
-    // Poll loop
-    while (!scriptDone) {
-      await this.pollOnce();
-      await sleep(this.pollInterval);
-    }
-    // Final poll to catch any trailing requests/activity
-    await this.pollOnce();
-    return scriptResult!.success
-      ? scriptResult!.output
-      : `REPL ERROR: ${scriptResult!.error ?? scriptResult!.output}`;
-  }
-  // ── internal polling ──
-  private async pollOnce(): Promise<void> {
-    await Promise.all([
-      this.pollRequests(),
-      this.pollActivity(),
-    ]);
-  }
-  private async pollRequests(): Promise<void> {
-    const result = await this.tp.readFile(`${this.bridgeDir}/requests.jsonl`);
-    if (!result.success || !result.output.trim()) return;
-    const lines = result.output.split('\n').filter(Boolean);
-    const newLines = lines.slice(this.processedLineCount);
-    this.processedLineCount = lines.length;
-    for (const line of newLines) {
-      let req: BridgeRequest;
-      try {
-        req = JSON.parse(line) as BridgeRequest;
-      } catch {
-        continue;
-      }
-      await this.fulfillRequest(req);
-    }
-  }
-  private async pollActivity(): Promise<void> {
-    if (!this.config.onActivity) return;
-    const result = await this.tp.readFile(`${this.bridgeDir}/activity.jsonl`);
-    if (!result.success || !result.output.trim()) return;
-    const lines = result.output.split('\n').filter(Boolean);
-    const newLines = lines.slice(this.activityLineCount);
-    this.activityLineCount = lines.length;
-    for (const line of newLines) {
-      try {
-        const entry = JSON.parse(line) as ActivityEntry;
-        this.config.onActivity(entry);
-      } catch {
-        // skip malformed
-      }
-    }
-  }
-  private async fulfillRequest(req: BridgeRequest): Promise<void> {
-    let output: string;
-    try {
-      switch (req.type) {
-        case 'llm':
-          if (!this.config.onLlmRequest) {
-            output = 'ERROR: LLM callback not configured';
-            break;
-          }
-          output = await this.config.onLlmRequest({
-            id: req.id,
-            model: req.model,
-            prompt: req.prompt,
-          });
-          break;
-        case 'web_fetch':
-          if (!this.config.onWebFetchRequest) {
-            output = 'ERROR: WebFetch callback not configured';
-            break;
-          }
-          output = await this.config.onWebFetchRequest({
-            id: req.id,
-            url: req.url,
-          });
-          break;
-        case 'web_search':
-          if (!this.config.onWebSearchRequest) {
-            output = 'ERROR: WebSearch callback not configured';
-            break;
-          }
-          output = await this.config.onWebSearchRequest({
-            id: req.id,
-            query: req.query,
-          });
-          break;
-        case 'ask_user':
-          if (!this.config.onAskUserRequest) {
-            output = 'ERROR: AskUser callback not configured';
-            break;
-          }
-          output = await this.config.onAskUserRequest({
-            id: req.id,
-            question: req.question,
-          });
-          break;
-        case 'tell_user':
-          if (this.config.onTellUserRequest) {
-            await this.config.onTellUserRequest({
-              id: req.id,
-              message: req.message,
-            });
-          }
-          output = 'ok';
-          break;
-        default:
-          output = `ERROR: unknown request type "${(req as any).type}"`;
-      }
-    } catch (err) {
-      output = `ERROR: ${err instanceof Error ? err.message : String(err)}`;
-    }
-    // Write response file so the Python side unblocks
-    await this.tp.writeFile(
-      `${this.bridgeDir}/resp_${req.id}.json`,
-      JSON.stringify({ output })
-    );
-  }
-}
-function sleep(ms: number): Promise<void> {
-  return new Promise((resolve) => setTimeout(resolve, ms));
-}

package/src/loop/context-store.ts DELETED Viewed

@@ -1,207 +0,0 @@
-import type { AgentMessage } from '../agent/types';
-export interface ContextStoreConfig {
-  /** Maximum token budget for the trimmed view. Default: 150_000 */
-  maxTokenBudget?: number;
-  /** Fraction of budget that triggers trimming. Default: 0.80 */
-  trimThreshold?: number;
-  /** Minimum char length before a tool output gets stubbed. Default: 500 */
-  stubThreshold?: number;
-}
-const DEFAULT_MAX_BUDGET = 150_000;
-const DEFAULT_TRIM_THRESHOLD = 0.80;
-const DEFAULT_STUB_THRESHOLD = 500;
-const CHARS_PER_TOKEN = 4;
-const HOT_ZONE_RATIO = 0.60;
-/**
- * Lossless context store.
- *
- * Stores every AgentMessage verbatim in `raw`. Produces a trimmed `view`
- * that fits within a token budget by stubbing only mechanical overhead
- * (long tool outputs, base64 blobs, ANSI codes) in the cold zone.
- *
- * User and assistant text messages are **never** modified.
- */
-export class LosslessContextStore {
-  private raw: AgentMessage[] = [];
-  private readonly maxBudget: number;
-  private readonly trimThreshold: number;
-  private readonly stubThreshold: number;
-  constructor(config: ContextStoreConfig = {}) {
-    this.maxBudget = config.maxTokenBudget ?? DEFAULT_MAX_BUDGET;
-    this.trimThreshold = config.trimThreshold ?? DEFAULT_TRIM_THRESHOLD;
-    this.stubThreshold = config.stubThreshold ?? DEFAULT_STUB_THRESHOLD;
-  }
-  /** Append new messages, dedup by comparing length against existing tail. */
-  ingest(messages: AgentMessage[]): void {
-    if (messages.length === 0) return;
-    // If incoming is a superset of existing (common case — agent sends full history each call),
-    // replace entirely. Otherwise append the delta.
-    if (messages.length >= this.raw.length && this.isPrefixMatch(messages)) {
-      const newMessages = messages.slice(this.raw.length);
-      this.raw.push(...newMessages);
-    } else {
-      // Full replacement (conversation was reset or diverged)
-      this.raw = [...messages];
-    }
-  }
-  /** Trimmed view that fits within the token budget. */
-  getView(): AgentMessage[] {
-    const estimated = this.estimateTokens();
-    const threshold = this.maxBudget * this.trimThreshold;
-    // Under threshold — return as-is (shallow copy)
-    if (estimated <= threshold) {
-      return [...this.raw];
-    }
-    return this.buildTrimmedView();
-  }
-  /** Full unmodified history. */
-  getRaw(): AgentMessage[] {
-    return [...this.raw];
-  }
-  /** Approximate token count of the raw store. */
-  estimateTokens(): number {
-    return this.estimateTokensFor(this.raw);
-  }
-  /** Stats for logging. */
-  stats(): { raw: number; view: number; tokensRaw: number; tokensView: number } {
-    const view = this.getView();
-    return {
-      raw: this.raw.length,
-      view: view.length,
-      tokensRaw: this.estimateTokensFor(this.raw),
-      tokensView: this.estimateTokensFor(view),
-    };
-  }
-  // ── internals ──
-  private isPrefixMatch(incoming: AgentMessage[]): boolean {
-    for (let i = 0; i < this.raw.length; i++) {
-      const existing = this.raw[i]!;
-      const candidate = incoming[i]!;
-      if (existing.role !== candidate.role || existing.content !== candidate.content) {
-        return false;
-      }
-    }
-    return true;
-  }
-  private estimateTokensFor(msgs: AgentMessage[]): number {
-    let chars = 0;
-    for (const m of msgs) {
-      chars += m.content.length + m.role.length + 4; // role + separators
-    }
-    return Math.ceil(chars / CHARS_PER_TOKEN);
-  }
-  private buildTrimmedView(): AgentMessage[] {
-    const hotBudgetTokens = Math.floor(this.maxBudget * HOT_ZONE_RATIO);
-    const coldBudgetTokens = this.maxBudget - hotBudgetTokens;
-    // ── 1. Determine hot zone boundary (work backward from end) ──
-    let hotTokens = 0;
-    let hotStart = this.raw.length;
-    for (let i = this.raw.length - 1; i >= 0; i--) {
-      const msgTokens = this.estimateMessageTokens(this.raw[i]!);
-      if (hotTokens + msgTokens > hotBudgetTokens) break;
-      hotTokens += msgTokens;
-      hotStart = i;
-    }
-    const hotZone = this.raw.slice(hotStart);
-    const coldZone = this.raw.slice(0, hotStart);
-    if (coldZone.length === 0) return [...hotZone];
-    // ── Pass 1: detect live tool IDs referenced in hot zone ──
-    const liveToolIds = new Set<string>();
-    for (const m of hotZone) {
-      // Tool results are formatted as "ToolName: output" by create-agent
-      if (m.role === 'tool') {
-        const colonIdx = m.content.indexOf(':');
-        if (colonIdx > 0) {
-          liveToolIds.add(m.content.slice(0, colonIdx));
-        }
-      }
-    }
-    // ── Pass 2 & 3: trim cold zone ──
-    const trimmedCold: AgentMessage[] = [];
-    let coldTokens = 0;
-    for (const m of coldZone) {
-      // Never modify user or assistant text
-      if (m.role === 'user' || m.role === 'assistant') {
-        const tokens = this.estimateMessageTokens(m);
-        if (coldTokens + tokens <= coldBudgetTokens) {
-          trimmedCold.push(m);
-          coldTokens += tokens;
-        }
-        continue;
-      }
-      // System messages: keep as-is
-      if (m.role === 'system') {
-        const tokens = this.estimateMessageTokens(m);
-        if (coldTokens + tokens <= coldBudgetTokens) {
-          trimmedCold.push(m);
-          coldTokens += tokens;
-        }
-        continue;
-      }
-      // Tool results in cold zone
-      if (m.role === 'tool') {
-        const colonIdx = m.content.indexOf(':');
-        const toolName = colonIdx > 0 ? m.content.slice(0, colonIdx) : '';
-        const toolOutput = colonIdx > 0 ? m.content.slice(colonIdx + 2) : m.content;
-        // Pass 3a: drop orphaned tool results (tool not referenced in hot zone and output is large)
-        if (!liveToolIds.has(toolName) && toolOutput.length > this.stubThreshold * 2) {
-          continue; // drop entirely
-        }
-        // Pass 2: stub large tool outputs
-        let content = m.content;
-        if (toolOutput.length > this.stubThreshold) {
-          content = `${toolName}: [output truncated: ${toolOutput.length} chars]`;
-        }
-        // Pass 3b: strip base64 data and ANSI codes
-        content = this.stripMechanicalOverhead(content);
-        const tokens = Math.ceil(content.length / CHARS_PER_TOKEN);
-        if (coldTokens + tokens <= coldBudgetTokens) {
-          trimmedCold.push({ ...m, content });
-          coldTokens += tokens;
-        }
-      }
-    }
-    return [...trimmedCold, ...hotZone];
-  }
-  private estimateMessageTokens(m: AgentMessage): number {
-    return Math.ceil((m.content.length + m.role.length + 4) / CHARS_PER_TOKEN);
-  }
-  private stripMechanicalOverhead(content: string): string {
-    // Strip base64 data URIs
-    let cleaned = content.replace(/data:[^;]+;base64,[A-Za-z0-9+/=]{100,}/g, '[base64 data removed]');
-    // Strip ANSI escape codes
-    cleaned = cleaned.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, '');
-    return cleaned;
-  }
-}