npm - @bluecopa/harness - Versions diffs - 0.1.0-snapshot.119 → 0.1.0-snapshot.12 - Mend

@bluecopa/harness 0.1.0-snapshot.119 → 0.1.0-snapshot.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/AGENTS.md +18 -0
package/README.md +148 -208
package/docs/guides/observability.md +32 -0
package/docs/guides/providers.md +51 -0
package/docs/guides/skills.md +25 -0
package/docs/security/skill-sandbox-threat-model.md +20 -0
package/package.json +1 -28
package/src/agent/create-agent.ts +893 -0
package/src/agent/create-tools.ts +33 -0
package/src/agent/step-executor.ts +15 -0
package/src/agent/types.ts +70 -0
package/src/arc/arc-loop.ts +396 -0
package/src/arc/arc-types.ts +215 -0
package/src/arc/bridge-tools.ts +170 -0
package/src/arc/bridged-tool-provider.ts +80 -0
package/src/arc/consolidation.ts +118 -0
package/src/arc/create-arc-agent.ts +80 -0
package/src/arc/debug.ts +62 -0
package/src/arc/episode-compressor.ts +151 -0
package/src/arc/object-store/fs-object-store.ts +60 -0
package/src/arc/object-store/memory-object-store.ts +41 -0
package/src/arc/object-store/object-store.ts +12 -0
package/src/arc/stores/episode-store.ts +120 -0
package/src/arc/stores/long-term-store.ts +86 -0
package/src/arc/stores/rxdb-setup.ts +112 -0
package/src/arc/stores/session-memo-store.ts +58 -0
package/src/arc/thread-executor.ts +404 -0
package/src/arc/thread-tool.ts +29 -0
package/src/context/llm-compaction-strategy.ts +37 -0
package/src/context/prepare-step.ts +65 -0
package/src/context/token-tracker.ts +26 -0
package/src/extracted/manifest.json +10 -0
package/src/extracted/prompts/compaction.md +5 -0
package/src/extracted/prompts/system.md +5 -0
package/src/extracted/tools.json +82 -0
package/src/hooks/hook-runner.ts +22 -0
package/src/hooks/tool-wrappers.ts +64 -0
package/src/interfaces/compaction-strategy.ts +18 -0
package/src/interfaces/hooks.ts +24 -0
package/src/interfaces/sandbox-provider.ts +29 -0
package/src/interfaces/session-store.ts +48 -0
package/src/interfaces/tool-provider.ts +70 -0
package/src/loop/bridge.ts +363 -0
package/src/loop/context-store.ts +210 -0
package/src/loop/lcm-tool-loop.ts +163 -0
package/src/loop/vercel-agent-loop.ts +285 -0
package/src/observability/context.ts +17 -0
package/src/observability/metrics.ts +27 -0
package/src/observability/otel.ts +105 -0
package/src/observability/tracing.ts +13 -0
package/src/optimization/agent-evaluator.ts +40 -0
package/src/optimization/config-serializer.ts +16 -0
package/src/optimization/optimization-runner.ts +39 -0
package/src/optimization/trace-collector.ts +33 -0
package/src/permissions/permission-manager.ts +34 -0
package/src/providers/composite-tool-provider.ts +72 -0
package/src/providers/control-plane-e2b-executor.ts +218 -0
package/src/providers/e2b-tool-provider.ts +68 -0
package/src/providers/local-tool-provider.ts +190 -0
package/src/providers/skill-sandbox-provider.ts +46 -0
package/src/sessions/file-session-store.ts +61 -0
package/src/sessions/in-memory-session-store.ts +39 -0
package/src/sessions/session-manager.ts +44 -0
package/src/skills/skill-loader.ts +52 -0
package/src/skills/skill-manager.ts +175 -0
package/src/skills/skill-router.ts +99 -0
package/src/skills/skill-types.ts +26 -0
package/src/subagents/subagent-manager.ts +22 -0
package/src/subagents/task-tool.ts +13 -0
package/tests/integration/agent-loop-basic.spec.ts +56 -0
package/tests/integration/agent-skill-default-from-sandbox.spec.ts +67 -0
package/tests/integration/concurrency-single-turn.spec.ts +35 -0
package/tests/integration/otel-metrics-emission.spec.ts +62 -0
package/tests/integration/otel-trace-propagation.spec.ts +48 -0
package/tests/integration/parity-benchmark.spec.ts +45 -0
package/tests/integration/provider-local-smoke.spec.ts +63 -0
package/tests/integration/session-resume.spec.ts +30 -0
package/tests/integration/skill-install-rollback.spec.ts +64 -0
package/tests/integration/skill-sandbox-file-blob.spec.ts +54 -0
package/tests/integration/skills-progressive-disclosure.spec.ts +61 -0
package/tests/integration/streaming-compaction-boundary.spec.ts +43 -0
package/tests/integration/structured-messages-agent.spec.ts +265 -0
package/tests/integration/subagent-isolation.spec.ts +24 -0
package/tests/security/skill-sandbox-isolation.spec.ts +51 -0
package/tests/unit/create-tools-schema-parity.spec.ts +22 -0
package/tests/unit/extracted-manifest.spec.ts +41 -0
package/tests/unit/interfaces-contract.spec.ts +101 -0
package/tests/unit/structured-messages.spec.ts +176 -0
package/tests/unit/token-tracker.spec.ts +22 -0
package/tsconfig.json +14 -0
package/vitest.config.ts +7 -0
package/dist/arc/app-adapter.d.ts +0 -108
package/dist/arc/app-adapter.js +0 -423
package/dist/arc/app-adapter.js.map +0 -1
package/dist/arc/create-arc-agent.d.ts +0 -50
package/dist/arc/create-arc-agent.js +0 -4317
package/dist/arc/create-arc-agent.js.map +0 -1
package/dist/arc/profile-builder.d.ts +0 -49
package/dist/arc/profile-builder.js +0 -171
package/dist/arc/profile-builder.js.map +0 -1
package/dist/loop/vercel-agent-loop.d.ts +0 -125
package/dist/loop/vercel-agent-loop.js +0 -345
package/dist/loop/vercel-agent-loop.js.map +0 -1
package/dist/types-HplqyDx-.d.ts +0 -873

package/AGENTS.md ADDED Viewed

@@ -0,0 +1,18 @@
+# AGENTS.md
+Guidance for agents working in `harness/`.
+Reference: https://agents.md/
+## Scope
+`harness/` contains the TypeScript agent framework core.
+## Rules
+- Keep API changes explicit and typed.
+- Maintain deterministic behavior in agent loop, compaction, and tool execution.
+- Preserve compatibility of extracted tool schemas unless intentionally versioned.
+## Commands
+```bash
+pnpm install
+pnpm test
+```

package/README.md CHANGED Viewed

@@ -2,17 +2,9 @@
 Provider-agnostic TypeScript agent framework with Claude-code-compatible tool semantics.
-Published on npm as **`@bluecopa/harness`**.
+The harness provides the core loop that drives an AI agent: send messages to an LLM, execute the tool calls it returns, feed results back, and repeat until the LLM produces a final text response.
-Two execution modes: a simple single-agent loop (`createAgent` + `VercelAgentLoop`) and a process-based orchestrator (`ArcLoop`) that dispatches parallel processes with context management, memory, and resilience.
-## Install
-```bash
-pnpm add @bluecopa/harness
-```
-## Development
+## Quickstart
 ```bash
 pnpm install
@@ -21,11 +13,9 @@ pnpm test
 ## Architecture
-### Single-Agent Loop
 ```
 ┌──────────────┐     ┌──────────────┐     ┌──────────────────┐
-│  createAgent │────►│  AgentLoop   │────►│  LLM (Claude)    │
+│  createAgent │────▶│  AgentLoop   │────▶│  LLM (Claude)    │
 │  (turn loop) │     │  (nextAction)│     │                  │
 └──────┬───────┘     └──────────────┘     └──────────────────┘
        │                                           │
@@ -37,82 +27,20 @@ pnpm test
 └──────────────┘
 ```
-### ArcLoop Orchestrator
+1. `createAgent` drives a deterministic step loop
+2. Each step calls `loop.nextAction(messages)` to get the LLM's decision
+3. If it's a tool call, the harness executes it via `ToolProvider` and appends the result
+4. If it's a final action, the loop ends and returns the result
-```
-Orchestrator (ArcLoop — Opus 4.6 by default)
-  │  tools: Thread, Check, Cancel, Remember, ReadEpisode
-  │
-  │  Turn 1 (parallel):
-  ├──► Process 0 ("read auth", model=fast)        ─┐
-  ├──► Process 1 ("read routes", model=fast)       ─┼──► Episodes
-  ├──► Process 2 ("read tests", model=fast)        ─┘
-  │
-  │  Turn 2 (dispatch dependent work):
-  ├──► Thread("fix bug", context=[ep0,ep1,ep2])    ──► Episode
-  │
-  │  Turn 3 (parallel):
-  ├──► Thread("run tests", context=[ep3])          ─┐
-  ├──► Thread("update docs", context=[ep3])        ─┘
-  │
-  └──► Final text response
-```
+## Using with the sandbox
-Full architecture doc: [`docs/arc.md`](../docs/arc.md)
----
-## ToolProvider
-The contract for tool execution. All agent modes use this interface.
+The most common setup connects the harness to a running sandbox service via `ControlPlaneE2BExecutor`:
-```typescript
-interface ToolProvider {
-  bash(command: string, options?: BashOptions): Promise<ToolResult>;
-  readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
-  writeFile(path: string, content: string): Promise<ToolResult>;
-  editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
-  glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
-  grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
-  webFetch?(options: WebFetchOptions): Promise<ToolResult>;
-  webSearch?(query: string): Promise<ToolResult>;
-  capabilities(): ToolProviderCapabilities;
-}
-interface ToolResult {
-  success: boolean;
-  output: string;
-  error?: string;
-}
-```
-Built-in implementations:
-| Provider | Description |
-|----------|-------------|
-| `LocalToolProvider` | Runs tools on the local filesystem |
-| `E2BToolProvider` | Routes tools to a sandbox VM via `ControlPlaneE2BExecutor` |
-| `CompositeToolProvider` | Combines multiple providers (e.g. local filesystem + sandbox bash) |
-## SandboxProvider
-Higher-level sandbox operations beyond basic tool calls:
-```typescript
-interface SandboxProvider {
-  exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
-  readSandboxFile(path: string): Promise<SandboxFileBlob>;
-  writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
-}
-```
-Used by `SkillManager` for executing skill scripts in isolated VMs.
-## Connecting to a Sandbox
-```typescript
-import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
+```ts
+import { createAgent } from './src/agent/create-agent';
 import { E2BToolProvider } from './src/providers/e2b-tool-provider';
+import { ControlPlaneE2BExecutor } from './src/providers/control-plane-e2b-executor';
+import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
 // Connect to sandbox service
 const executor = new ControlPlaneE2BExecutor({
@@ -122,172 +50,187 @@ const executor = new ControlPlaneE2BExecutor({
 });
 await executor.initialize();  // creates a Firecracker VM
-const toolProvider = new E2BToolProvider(executor);
+// Build and run the agent
+const agent = createAgent({
+  toolProvider: new E2BToolProvider(executor),
+  loop: new VercelAgentLoop(),  // needs ANTHROPIC_API_KEY
+});
-// ... use with createAgent or ArcLoop
+const result = await agent.run('create a bar chart of sales data');
+console.log(result.output);   // LLM's final response
+console.log(result.steps);    // number of tool steps
-await executor.destroy();  // tears down the VM
+await executor.destroy();     // tears down the VM
 ```
-From environment variables: `ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY`.
+For a complete working example, see [`examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts).
+### From environment variables
----
+`ControlPlaneE2BExecutor.fromEnv()` reads `SAMYX_BASE_URL` and `SAMYX_API_KEY` automatically:
-## Single-Agent Mode (`createAgent`)
+```ts
+const executor = ControlPlaneE2BExecutor.fromEnv();
+```
+## Using locally (no sandbox)
-For simple tasks that don't need orchestration:
+For development without a sandbox service, use `LocalToolProvider` which runs tools on the local machine:
-```typescript
+```ts
 import { createAgent } from './src/agent/create-agent';
 import { LocalToolProvider } from './src/providers/local-tool-provider';
 const agent = createAgent({
   toolProvider: new LocalToolProvider(process.cwd()),
-  loop: new VercelAgentLoop(),  // needs ANTHROPIC_API_KEY
+  loop: new VercelAgentLoop(),
 });
 const result = await agent.run('list all TypeScript files');
-console.log(result.output);
 ```
-### Configuration
+## Key modules
+### Agent creation (`src/agent/create-agent.ts`)
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `toolProvider` | `ToolProvider` | required | Executes tool calls |
-| `loop` | `AgentLoop` | `VercelAgentLoop` | LLM decision loop |
-| `sandboxProvider` | `SandboxProvider` | — | Higher-level sandbox operations |
-| `maxSteps` | `number` | 30 | Max tool steps per run |
-| `telemetry` | `HarnessTelemetry` | — | OpenTelemetry-style tracing |
-| `skillIndexPath` | `string` | — | Path to skill index JSON for routing |
+`createAgent(options)` returns an agent with a `.run(prompt, options?)` method. Options:
-### VercelAgentLoop
+| Option | Type | Description |
+|--------|------|-------------|
+| `toolProvider` | `ToolProvider` | Required. Executes tool calls |
+| `loop` | `AgentLoop` | LLM decision loop (default: `VercelAgentLoop`) |
+| `sandboxProvider` | `SandboxProvider` | Optional. Higher-level sandbox ops (file download, exec with env) |
+| `maxSteps` | `number` | Max tool steps per run (default: 30) |
+| `telemetry` | `HarnessTelemetry` | Optional. OpenTelemetry-style tracing |
+| `skillIndexPath` | `string` | Optional. Path to skill index JSON |
-Calls Claude via the Vercel AI SDK. Supports parallel tool calls and configurable system prompt.
+### Agent loop (`src/loop/vercel-agent-loop.ts`)
-```typescript
+`VercelAgentLoop` calls Claude via the Vercel AI SDK. It supports:
+- Parallel tool calls (returns `ToolBatchAction` when the LLM requests multiple tools at once)
+- Configurable system prompt
+- Model selection via `HARNESS_MODEL` env var (default: `claude-sonnet-4-5`)
+```ts
 const loop = new VercelAgentLoop({
   systemPrompt: 'You are a helpful coding assistant.',
-  model: 'claude-sonnet-4-5',  // or HARNESS_MODEL env var
 });
 ```
-### LCMToolLoop
+### Tool provider (`src/interfaces/tool-provider.ts`)
-Wraps another loop to add Lossless Context Management and optional REPL orchestration:
+The contract for tool execution:
-```typescript
-import { LCMToolLoop } from './src/loop/lcm-tool-loop';
-import { VercelAgentLoop } from './src/loop/vercel-agent-loop';
+```ts
+interface ToolProvider {
+  bash(command: string, options?: BashOptions): Promise<ToolResult>;
+  readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
+  writeFile(path: string, content: string): Promise<ToolResult>;
+  editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
+  glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
+  grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
+  webFetch?(options: WebFetchOptions): Promise<ToolResult>;
+  webSearch?(query: string): Promise<ToolResult>;
+  capabilities(): ToolProviderCapabilities;
+}
-const loop = new LCMToolLoop({
-  innerLoop: new VercelAgentLoop(),
-  toolProvider: mySandboxProvider,
-  enableRepl: true,           // default: true
-  bridgeDir: '/var/run/bridge',
-  onActivity: (entry) => console.log(entry),
-  onLlmRequest: async (prompt) => callLLM(prompt),
-  onWebFetchRequest: async (url) => fetch(url),
-});
+interface ToolResult {
+  success: boolean;
+  output: string;
+  error?: string;
+}
 ```
-**Standard mode**: Lossless context trimming — the LLM always sees a coherent, budget-fitting view of the full conversation.
-**REPL mode**: When the LLM returns a Bash action with the REPL marker, the loop writes a Python script into the sandbox, injects the bridge module, runs the script, and polls for sub-requests (LLM, web_fetch, ask_user) that the harness fulfills.
+Built-in implementations:
----
+| Provider | Description |
+|----------|-------------|
+| `LocalToolProvider` | Runs tools on the local filesystem |
+| `E2BToolProvider` | Routes tools to an E2B-compatible executor over HTTP |
+| `CompositeToolProvider` | Combines multiple providers (e.g. sandbox + web) |
-## ArcLoop (Orchestrator Mode)
+### Action types (`src/agent/types.ts`)
-For complex tasks that benefit from parallel processes, context management, and memory:
+The LLM returns one of these action types each turn:
-```typescript
-import { createArcAgent } from './src/arc/create-arc-agent';
-const agent = await createArcAgent({
-  toolProvider: myToolProvider,
-  episodeStore: myEpisodeStore,       // required
-  sessionMemoStore: mySessionMemoStore, // required
-  longTermStore: myLongTermStore,       // required
-  taskId: 'task-1',
-  sessionId: 'session-1',
-});
+```ts
+// Single tool call
+interface ToolCallAction {
+  type: 'tool';
+  name: 'Bash' | 'Read' | 'Write' | 'Edit' | 'Glob' | 'Grep' | ...;
+  args: Record<string, unknown>;
+}
-// Streaming
-for await (const event of agent.stream(messages, signal)) {
-  if (event.type === 'text_delta') process.stdout.write(event.text);
-  if (event.type === 'process_dispatched') console.log(`  → ${event.action}`);
-  if (event.type === 'done') console.log(`Done in ${event.stats.durationMs}ms`);
+// Multiple independent tool calls (executed in parallel)
+interface ToolBatchAction {
+  type: 'tool_batch';
+  calls: ToolCallAction[];
 }
-// Non-streaming
-const result = await agent.run(messages, signal);
+// Final text response (ends the loop)
+interface FinalAction {
+  type: 'final';
+  content: string;
+}
 ```
-### ArcLoopConfig
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `model` | `string` | `'claude-opus-4-6'` | Orchestrator model (ID or tier name) |
-| `modelMap` | `Record<ModelTier, string>` | haiku/sonnet/opus | Maps fast/medium/strong to model IDs |
-| `apiKey` | `string` | — | Anthropic API key |
-| `systemPrompt` | `string` | built-in | Custom orchestrator system prompt |
-| `maxTurns` | `number` | 30 | Max orchestrator turns |
-| `processTimeout` | `number` | 120_000 | Per-process timeout (ms) |
-| `processMaxSteps` | `number` | 20 | Per-process max tool steps |
-| `contextWindowSize` | `number` | 200_000 | Context window in tokens |
-| `outputReserve` | `number` | 20_000 | Tokens reserved for output |
-| `autoMemory` | `boolean` | true | Auto-detect patterns from episodes |
-| `episodeStore` | `EpisodeStore` | required | Stores episode summaries + traces |
-| `sessionMemoStore` | `SessionMemoStore` | required | Stores session memos |
-| `longTermStore` | `LongTermStore` | required | Stores long-term memories |
-| `taskId` | `string` | required | Task identifier |
-| `sessionId` | `string` | required | Session identifier |
-| `toolProvider` | `ToolProvider` | required | Tool execution |
-| `processTools` | `Record<string, AnyTool>` | builtinTools | Tools available inside processes |
-| `extraOrchestratorTools` | `Record<string, AnyTool>` | — | Custom orchestrator tools |
-| `onOrchestratorTool` | `function` | — | Handler for custom orchestrator tools |
-| `resilience` | `ResiliencePolicy` | — | Composable resilience pipeline |
-| `traceWriter` | `function` | — | Callback for trace event emission |
-### Resilience
-```typescript
-import { resilience } from './src/arc/resilience';
-const pipeline = resilience()
-  .retry({ maxRetries: 2, baseDelay: 1000 })
-  .timeout({ durationMs: 30_000 })
-  .circuitBreaker({ failureThreshold: 5 })
-  .build();
-const agent = await createArcAgent({
-  // ...config
-  resilience: pipeline,
-});
+### LCM tool loop (`src/loop/lcm-tool-loop.ts`)
+`LCMToolLoop` wraps another loop to add LCM-based tool routing, REPL script execution, and bridge-based tool dispatch. Used in the chat-assistant example.
+### Sandbox provider (`src/interfaces/sandbox-provider.ts`)
+Higher-level sandbox operations beyond basic tool calls:
+```ts
+interface SandboxProvider {
+  exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
+  readSandboxFile(path: string): Promise<SandboxFileBlob>;
+  writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
+}
 ```
-### Trace Emission
+### Observability (`src/observability/otel.ts`)
+`HarnessTelemetry` provides OpenTelemetry-style spans and metrics for agent runs.
+### Arc: Orchestrator + Thread Architecture (`src/arc/`)
+`ArcLoop` is an `AgentLoop` implementation where an orchestrator LLM dispatches bounded threads via a single `Thread` tool. Threads produce episodes (summary + full trace). The orchestrator only sees summaries, keeping its context small.
+```ts
+import { createArcAgent } from './src/arc/create-arc-agent';
+import { InMemoryEpisodeStore } from './src/arc/stores/episode-store';
+import { InMemorySessionMemoStore } from './src/arc/stores/session-memo-store';
+import { InMemoryLongTermStore } from './src/arc/stores/long-term-store';
-```typescript
-const traces: TraceEvent[] = [];
-const agent = await createArcAgent({
-  // ...config
-  traceWriter: (event) => traces.push(event),
+const agent = createArcAgent({
+  toolProvider: new LocalToolProvider(process.cwd()),
+  episodeStore: new InMemoryEpisodeStore(),
+  sessionMemoStore: new InMemorySessionMemoStore(),
+  longTermStore: new InMemoryLongTermStore(),
+  taskId: 'task-1',
+  sessionId: 'session-1',
 });
+const result = await agent.run('Fix the authentication bug');
 ```
-Traces can be validated against the formal model: `cd verify && cargo run -- trace file.ndjson`
+Key features:
+- **Parallel threads**: orchestrator calls Thread N times in one turn → all run concurrently
+- **Four-tier memory**: thread context → episodes → session memos → long-term
+- **Per-thread models**: Haiku for reads, Sonnet for implementation
+- **Template compression**: zero-LLM-call episode summaries
+- **Async consolidation**: non-blocking background distillation
----
+Full architecture doc: [`docs/arc.md`](../docs/arc.md)
-## Package Layout
+## Package layout
 ```
 src/
 ├── agent/          # createAgent, step executor, types
-├── arc/            # ArcLoop orchestrator, processes, memory, resilience
-│   ├── resilience/ # Retry, circuit breaker, timeout, bulkhead, fallback
+├── arc/            # ArcLoop orchestrator, threads, memory hierarchy
 │   ├── stores/     # RxDB + in-memory store implementations
 │   └── object-store/ # Pluggable cloud sync (fs, memory)
 ├── interfaces/     # ToolProvider, SandboxProvider, AgentLoop contracts
@@ -297,20 +240,17 @@ src/
 ├── hooks/          # Pre/post tool call hooks
 ├── permissions/    # Tool permission checks
 ├── sessions/       # Session persistence
-├── subagents/      # Subagent spawning
+├── subagents/      # Subagent spawning and task tools
 ├── skills/         # Skill index, routing, and management
 ├── optimization/   # Benchmark runner
 └── observability/  # OpenTelemetry integration
-verify/             # Rust formal verification (Stateright model checker)
-testing/            # Adversarial scenario replay harness
-tests/              # Vitest test suite
 ```
 ## Documentation
-- [Arc architecture](../docs/arc.md) — process model, context window, memory, resilience, verification
-- [Testing](../docs/testing.md) — test layers, running tests, writing new tests
-- [Sandbox setup](../docs/PUBLIC_SANDBOX.md) — deploying the sandbox service
-- [Release process](../docs/RELEASE.md) — versioning and publishing
-- [Example](../examples/chat-assistant/src/chat.ts) — complete working chat assistant
+- **Arc architecture**: [`docs/arc.md`](../docs/arc.md)
+- Provider guide: `docs/guides/providers.md`
+- Skills guide: `docs/guides/skills.md`
+- Observability guide: `docs/guides/observability.md`
+- Release process: `../docs/RELEASE.md`
+- Full example: [`../examples/chat-assistant/src/chat.ts`](../examples/chat-assistant/src/chat.ts)

package/docs/guides/observability.md ADDED Viewed

@@ -0,0 +1,32 @@
+# Observability Guide
+Harness emits OpenTelemetry-style traces and metrics through `HarnessTelemetry`.
+## Spans
+- `agent.run`
+- `agent.step`
+- `tool.call`
+- `context.compaction`
+- `skill.exec`
+- `subagent.run`
+## Metrics
+- `agent_steps_total`
+- `tool_calls_total`
+- `tool_call_duration_ms`
+- `compactions_total`
+- `agent_errors_total`
+## Correlation Fields
+Attach these fields to logs where available:
+- `trace_id`
+- `span_id`
+- `run_id`
+- `session_id`
+## Disable Mode
+Create telemetry with disabled mode for zero-impact execution:
+```ts
+const telemetry = new HarnessTelemetry(false);
+```

package/docs/guides/providers.md ADDED Viewed

@@ -0,0 +1,51 @@
+# Providers Guide
+## ToolProvider
+Implement the `ToolProvider` interface to expose agent tools (`Bash`, `Read`, `Write`, `Edit`, `Glob`, `Grep`).
+Included foundations:
+- `LocalToolProvider`
+- `CompositeToolProvider`
+- `E2BToolProvider` (executor-backed adapter)
+## SandboxProvider
+Use `SandboxProvider` for infrastructure actions (skill execution, setup/install tasks). Keep it separate from `ToolProvider`.
+Current sandbox file contract is binary-first:
+```ts
+type SandboxFileBlob = {
+  data: Uint8Array;
+  mimeType?: string;
+  filename?: string;
+};
+interface SandboxProvider {
+  exec(command: string, options?: SandboxExecOptions): Promise<SandboxExecResult>;
+  readSandboxFile(path: string): Promise<SandboxFileBlob>;
+  writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void>;
+}
+```
+Use `mimeType`/`filename` for transport metadata (for example raw download endpoints). Keep file contents in `data` as bytes.
+## Capability Routing
+`CompositeToolProvider` routes calls to the first provider that advertises each capability.
+## Default Skill Sandbox
+`SkillManager` now defaults to the harness-provided `SkillSandboxProvider`:
+```ts
+const skillManager = new SkillManager();
+```
+Default provider env vars:
+- `SAMYX_BASE_URL` or `SANDBOX_BASE_URL`
+- `SAMYX_API_KEY` or `SANDBOX_API_KEY`
+- optional `SANDBOX_TEMPLATE` (default: `ubuntu-22.04`)
+You can still override with a custom provider:
+```ts
+const skillManager = new SkillManager(customSandboxProvider);
+```

package/docs/guides/skills.md ADDED Viewed

@@ -0,0 +1,25 @@
+# Skills Guide
+## Progressive Disclosure
+`SkillManager` stores only summary metadata for prompt injection and loads full `SKILL.md` instructions on invocation.
+## Skill Routing
+`createAgent` uses a `SkillRouter` before invocation:
+- direct skill-name match (word boundary)
+- alias match (for example `excel -> xlsx`, `word -> docx`, `powerpoint -> pptx`)
+- Haiku model fallback for semantic matching
+Environment knobs:
+- `HARNESS_SKILL_ROUTER_MODEL` (default: `claude-3-5-haiku-latest`)
+- `HARNESS_SKILL_ROUTER_THRESHOLD` (default: `0.55`)
+## Install Lifecycle
+Dependency install state transitions:
+- `installing`
+- `ready`
+- `degraded`
+If install fails, state becomes `degraded` and the error is surfaced.
+## Security Baseline
+See `docs/security/skill-sandbox-threat-model.md` for path traversal and sandbox boundary rules.

package/docs/security/skill-sandbox-threat-model.md ADDED Viewed

@@ -0,0 +1,20 @@
+# Skill Sandbox Threat Model
+## Scope
+This document defines the baseline security assumptions for skill execution in harness.
+## Trust Boundaries
+- Skill scripts are untrusted input.
+- Sandbox runtime is the security boundary.
+- Host filesystem and host network are outside trust boundary.
+## Controls
+- Deny host mounts by default.
+- Deny outbound network by default unless explicitly allowed.
+- Use tenant-scoped credentials and ephemeral filesystems.
+- Disallow path traversal (`..`) in skill paths.
+## Required Tests
+- Sandbox escape attempt should fail.
+- Cross-tenant path access should fail.
+- Dependency install failures should degrade skill state and block execution until retry.

package/package.json CHANGED Viewed

@@ -1,35 +1,9 @@
 {
   "name": "@bluecopa/harness",
-  "version": "0.1.0-snapshot.119",
+  "version": "0.1.0-snapshot.12",
   "description": "Provider-agnostic TypeScript agent framework",
   "license": "UNLICENSED",
-  "type": "module",
-  "files": [
-    "dist",
-    "README.md"
-  ],
-  "exports": {
-    "./arc/app-adapter": {
-      "types": "./dist/arc/app-adapter.d.ts",
-      "import": "./dist/arc/app-adapter.js"
-    },
-    "./arc/create-arc-agent": {
-      "types": "./dist/arc/create-arc-agent.d.ts",
-      "import": "./dist/arc/create-arc-agent.js"
-    },
-    "./arc/profile-builder": {
-      "types": "./dist/arc/profile-builder.d.ts",
-      "import": "./dist/arc/profile-builder.js"
-    },
-    "./loop/vercel-agent-loop": {
-      "types": "./dist/loop/vercel-agent-loop.d.ts",
-      "import": "./dist/loop/vercel-agent-loop.js"
-    },
-    "./package.json": "./package.json"
-  },
   "scripts": {
-    "build": "tsup",
-    "prepack": "pnpm run build",
     "test": "vitest run",
     "test:watch": "vitest"
   },
@@ -41,7 +15,6 @@
   },
   "devDependencies": {
     "@types/node": "^24.3.0",
-    "tsup": "^8.5.1",
     "typescript": "^5.9.2",
     "vitest": "^3.2.4"
   },