npm - @purista/harness - Versions diffs - 1.0.0 → 1.1.0 - Mend

@purista/harness 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +15 -0
package/dist/agents/index.d.ts +5 -3
package/dist/agents/index.js +58 -6
package/dist/errors/catalog.d.ts +11 -4
package/dist/eval/index.d.ts +57 -0
package/dist/eval/index.js +181 -0
package/dist/harness/defineHarness.d.ts +38 -18
package/dist/harness/defineHarness.js +23 -2
package/dist/index.d.ts +2 -0
package/dist/index.js +2 -0
package/dist/memory/sandbox/index.d.ts +17 -0
package/dist/memory/sandbox/index.js +122 -0
package/dist/models/registry.js +32 -7
package/dist/ports/capabilities.d.ts +24 -2
package/dist/ports/harness-context.d.ts +4 -1
package/dist/ports/index.d.ts +1 -0
package/dist/ports/index.js +1 -0
package/dist/ports/memory/facade.d.ts +5 -0
package/dist/ports/memory/facade.js +123 -0
package/dist/ports/memory/telemetry.d.ts +16 -0
package/dist/ports/memory/telemetry.js +77 -0
package/dist/ports/memory/types.d.ts +204 -0
package/dist/ports/memory/types.js +1 -0
package/dist/ports/memory/validation.d.ts +19 -0
package/dist/ports/memory/validation.js +160 -0
package/dist/ports/memory.d.ts +3 -0
package/dist/ports/memory.js +3 -0
package/dist/sessions/index.d.ts +2 -0
package/dist/sessions/index.js +275 -68
package/dist/telemetry/shim.d.ts +20 -0
package/dist/telemetry/shim.js +28 -0
package/dist/testing/fakeMemoryAdapter.d.ts +16 -0
package/dist/testing/fakeMemoryAdapter.js +110 -0
package/dist/testing/index.d.ts +3 -0
package/dist/testing/index.js +2 -0
package/package.json +8 -3

package/README.md CHANGED Viewed

@@ -3,6 +3,21 @@
 Self-hosted enterprise agent harness for typed tools, agents, workflows, state,
 sandboxing, streaming, and OpenTelemetry instrumentation.
+The core package also exports provider-neutral eval helpers:
+- `evaluatePromptCandidates(...)` compares prompt candidates against a fixed
+  item set and deterministic or custom scorers.
+- `evaluateDeterministicScorer(...)` runs JSON Pointer based deterministic
+  scorer definitions without provider calls. It is exported from the main
+  package and re-exported from `@purista/harness/testing`.
+Telemetry defaults to dual GenAI and OpenInference attributes with no content
+capture. `InvokeOptions.traceparent` and `tracestate` accept inbound W3C Trace
+Context so application traces can parent harness run spans.
+See [Evaluating Prompts](https://github.com/puristajs/harness/blob/main/docs/guides/evaluating-prompts.md)
+for the execution model, scorer limits, and privacy behavior.
 ## Install
 ```bash

package/dist/agents/index.d.ts CHANGED Viewed

@@ -1,9 +1,10 @@
 import type { Logger } from '../logger/index.js';
 import type { JsonValue } from '../models/json.js';
 import type { Message } from '../models/state.js';
-import type { AgentDefinition, ResolvedSkill, RunEvent, SessionMemory, ToolsConfig } from '../harness/defineHarness.js';
+import type { AgentDefinition, ResolvedSkill, RunEvent, ToolsConfig } from '../harness/defineHarness.js';
+import type { MemoryFacade } from '../ports/memory.js';
 import type { SandboxSession } from '../sandbox/index.js';
-import type { TelemetryShim } from '../telemetry/index.js';
+import { type TelemetryShim } from '../telemetry/index.js';
 import { type McpRunnerRegistry } from '../tools/mcp/runner.js';
 export declare function runDefaultAgent(args: {
     harnessName: string;
@@ -19,7 +20,7 @@ export declare function runDefaultAgent(args: {
     customTools: ToolsConfig;
     mcpRegistry?: McpRunnerRegistry;
     session: SandboxSession;
-    memory: SessionMemory;
+    memory: MemoryFacade;
     mountedSkills: Set<string>;
     historyWindow?: number;
     maxSteps: number;
@@ -28,6 +29,7 @@ export declare function runDefaultAgent(args: {
     logger: Logger;
     telemetry: TelemetryShim;
     emitEvent?: (event: RunEvent) => Promise<void>;
+    metadata?: Readonly<Record<string, JsonValue>>;
 }): Promise<{
     output: JsonValue;
     emitted: Message[];

package/dist/agents/index.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { z } from 'zod';
 import { ATTR_GEN_AI_AGENT_ID, ATTR_GEN_AI_AGENT_NAME, ATTR_GEN_AI_TOOL_CALL_ID, ATTR_GEN_AI_TOOL_NAME, ATTR_GEN_AI_TOOL_TYPE } from '@opentelemetry/semantic-conventions/incubating';
 import { AgentLoopBudgetError, HarnessError, OperationCancelledError, OperationTimeoutError, PermissionDeniedError, ToolError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
+import { createMetrics } from '../telemetry/index.js';
 import { buildSkillIndex, mountSkillsOnce } from '../skills/index.js';
 import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } from '../tools/index.js';
 import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
@@ -31,14 +32,40 @@ export async function runDefaultAgent(args) {
         'harness.run.id': args.runId,
         ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
         'harness.agent.id': args.agentId,
+        'gen_ai.operation.name': 'invoke_agent',
+        'openinference.span.kind': 'AGENT',
+        'metadata.agent_name': args.agentId,
+        'metadata.agent_id': args.agentId,
         [ATTR_GEN_AI_AGENT_NAME]: args.agentId,
-        [ATTR_GEN_AI_AGENT_ID]: args.runId,
+        [ATTR_GEN_AI_AGENT_ID]: args.agentId,
         'harness.agent.model': args.agent.model,
-        'harness.agent.has_handler': args.agent.handler !== undefined
+        'harness.agent.has_handler': args.agent.handler !== undefined,
+        ...metadataSpanAttrs(args.metadata)
     };
-    const execute = () => runDefaultAgentInner(args);
+    const metrics = createMetrics(args.telemetry, agentAttrs);
+    const execute = () => runDefaultAgentInner({ ...args, metrics });
     return args.telemetry.span(`invoke_agent ${args.agentId}`, agentAttrs, execute);
 }
+function metadataSpanAttrs(metadata) {
+    const attrs = {};
+    for (const [key, value] of Object.entries(metadata ?? {})) {
+        if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
+            continue;
+        if (typeof value === 'string') {
+            if (value.length <= 256)
+                attrs[`harness.metadata.${key}`] = value;
+            continue;
+        }
+        if (typeof value === 'number' && Number.isFinite(value)) {
+            attrs[`harness.metadata.${key}`] = value;
+            continue;
+        }
+        if (typeof value === 'boolean') {
+            attrs[`harness.metadata.${key}`] = value;
+        }
+    }
+    return attrs;
+}
 async function runDefaultAgentInner(args) {
     args.signal.throwIfAborted();
     const inputSchema = args.agent.input ?? z.string();
@@ -57,13 +84,15 @@ async function runDefaultAgentInner(args) {
             runId: args.runId,
             sessionId: args.sessionId,
             history: { list: async () => args.history },
-            memory: args.memory
+            memory: args.memory,
+            metadata: args.metadata ?? {},
+            metrics: args.metrics
         });
         const validated = parseAgentSchema(outputSchema, output, 'agent_output');
         return { output: validated, emitted: [{ id: `msg_${Date.now()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
     }
     const baseInstructions = typeof args.agent.instructions === 'function'
-        ? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory })
+        ? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, metadata: args.metadata ?? {}, metrics: args.metrics })
         : args.agent.instructions;
     const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
     const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
@@ -117,6 +146,7 @@ async function runDefaultAgentInner(args) {
         if (toolCalls.length === 0) {
             const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
             emitted.push({ id: `msg_${Date.now()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
+            await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: validated, usage: response.usage });
             await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
             return { output: validated, emitted };
         }
@@ -159,7 +189,25 @@ async function runDefaultAgentInner(args) {
                     }
                     const tsTool = tool;
                     const parsed = tsTool.input.parse(input);
-                    const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tsTool.handler({ signal, sandbox: withSandboxTelemetry(args, canonical), logger: args.logger, telemetry: args.telemetry, runId: args.runId, sessionId: args.sessionId, agentId: args.agentId, toolId: canonical }, parsed));
+                    const out = await withToolSignal(args.signal, args.toolTimeoutMs, (signal) => tsTool.handler({
+                        signal,
+                        sandbox: withSandboxTelemetry(args, canonical),
+                        logger: args.logger,
+                        telemetry: args.telemetry,
+                        metrics: createMetrics(args.telemetry, {
+                            'harness.name': args.harnessName,
+                            'harness.session.id': args.sessionId,
+                            'harness.run.id': args.runId,
+                            ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
+                            'harness.agent.id': args.agentId,
+                            'harness.tool.id': canonical
+                        }),
+                        memory: args.memory,
+                        runId: args.runId,
+                        sessionId: args.sessionId,
+                        agentId: args.agentId,
+                        toolId: canonical
+                    }, parsed));
                     return { output: tsTool.output.parse(out) };
                 });
             }
@@ -226,6 +274,10 @@ async function withToolSpan(args, toolId, callId, toolKind, mcpAttrs, fn) {
         ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
         'harness.agent.id': args.agentId,
         'harness.tool.id': toolId,
+        'gen_ai.operation.name': 'execute_tool',
+        'openinference.span.kind': 'TOOL',
+        'tool.name': toolId,
+        'tool.call.id': callId,
         [ATTR_GEN_AI_TOOL_NAME]: toolId,
         [ATTR_GEN_AI_TOOL_CALL_ID]: callId,
         [ATTR_GEN_AI_TOOL_TYPE]: toolKind,

package/dist/errors/catalog.d.ts CHANGED Viewed

@@ -22,9 +22,14 @@ export declare class ValidationError extends HarnessError {
         /** Model provider response shape is invalid. */  | 'model_response'
         /** Session memory key is invalid. */  | 'memory_key'
         /** Session memory value is invalid or non-serializable. */  | 'memory_value'
+        /** Session memory scope is invalid or unsupported. */  | 'memory_scope'
+        /** Session memory options are invalid or unsupported. */  | 'memory_write_options'
+        /** Session memory listing options are invalid. */  | 'memory_list_options'
+        /** Session memory search query is invalid. */  | 'memory_search_query'
         /** Message envelope validation failed. */  | 'message'
         /** Session history shape validation failed. */  | 'session_history'
-        /** Invocation options are invalid. */  | 'invoke_options';
+        /** Invocation options are invalid. */  | 'invoke_options'
+        /** Evaluation helper input is invalid. */  | 'eval_input';
         issues: unknown;
     }, cause?: unknown);
 }
@@ -139,21 +144,23 @@ export declare class SessionBusyError extends HarnessError {
 /** State backend operation failed. */
 export declare class StateError extends HarnessError {
     constructor(message: string, meta: {
-        op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents';
+        op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents' | 'memory.get' | 'memory.set' | 'memory.delete' | 'memory.list' | 'memory.search';
         reason?: 'duplicate_message_id' | string;
+        adapter?: 'memory' | string;
+        memory_provider?: string;
     }, cause?: unknown);
 }
 /** Timed execution budget expired. */
 export declare class OperationTimeoutError extends HarnessError {
     constructor(message: string, meta: {
-        scope: 'run' | 'model' | 'tool' | 'sandbox_run';
+        scope: 'run' | 'model' | 'tool' | 'sandbox_run' | 'memory';
         timeout_ms: number;
     }, cause?: unknown);
 }
 /** Operation cancelled by abort signal or explicit cancellation path. */
 export declare class OperationCancelledError extends HarnessError {
     constructor(message: string, meta: {
-        scope: 'run' | 'workflow' | 'agent' | 'model' | 'tool' | 'sandbox';
+        scope: 'run' | 'workflow' | 'agent' | 'model' | 'tool' | 'sandbox' | 'memory';
     }, cause?: unknown);
 }
 /** MCP transport/protocol failure. */

package/dist/eval/index.d.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import type { JsonValue } from '../models/json.js';
+export type DeterministicScorerDefinition = {
+    type: 'regex';
+    path: string;
+    pattern: string;
+    flags?: 'i' | 'm' | 'im';
+} | {
+    type: 'json-schema';
+    schema: JsonValue;
+} | {
+    type: 'contains';
+    path: string;
+    value: string;
+    caseInsensitive?: boolean;
+} | {
+    type: 'attribute-equality';
+    leftPath: string;
+    rightPath: string;
+};
+export interface ScorerTarget {
+    input: unknown;
+    output: unknown;
+    expected?: unknown;
+    context?: unknown[];
+}
+export interface ScorerResult {
+    score: number;
+    passed: boolean;
+    evidence?: JsonValue;
+}
+export interface PromptCandidate<I = unknown> {
+    id: string;
+    prompt: string;
+    metadata?: Record<string, JsonValue>;
+}
+export interface EvaluationItem<I = unknown> {
+    id: string;
+    input: I;
+    expected?: unknown;
+    context?: unknown[];
+}
+export interface CandidateScore {
+    candidateId: string;
+    meanScore: number;
+    passRate: number;
+    itemCount: number;
+    scorerCount: number;
+}
+export interface EvaluatePromptCandidatesInput<I = unknown> {
+    candidates: PromptCandidate<I>[];
+    items: EvaluationItem<I>[];
+    scorer: (target: ScorerTarget, signal: AbortSignal) => Promise<ScorerResult>;
+    runCandidate: (candidate: PromptCandidate<I>, item: EvaluationItem<I>, signal: AbortSignal) => Promise<unknown>;
+    signal: AbortSignal;
+}
+export declare function evaluateDeterministicScorer(definition: DeterministicScorerDefinition, target: ScorerTarget): ScorerResult;
+export declare function evaluatePromptCandidates<I = unknown>(input: EvaluatePromptCandidatesInput<I>): Promise<CandidateScore[]>;

package/dist/eval/index.js ADDED Viewed

@@ -0,0 +1,181 @@
+import { ValidationError } from '../errors/index.js';
+export function evaluateDeterministicScorer(definition, target) {
+    switch (definition.type) {
+        case 'regex': {
+            const selected = readPointer(target.output, definition.path);
+            if (!selected.found)
+                return missingPointer(definition.path);
+            return binary(new RegExp(definition.pattern, definition.flags ?? '').test(String(selected.value)));
+        }
+        case 'contains': {
+            const selected = readPointer(target.output, definition.path);
+            if (!selected.found)
+                return missingPointer(definition.path);
+            const haystack = String(selected.value);
+            const needle = definition.value;
+            return binary(definition.caseInsensitive
+                ? haystack.toLocaleLowerCase().includes(needle.toLocaleLowerCase())
+                : haystack.includes(needle));
+        }
+        case 'attribute-equality': {
+            const left = readPointer(target.output, definition.leftPath);
+            if (!left.found)
+                return missingPointer(definition.leftPath);
+            const right = readPointer(target.output, definition.rightPath);
+            if (!right.found)
+                return missingPointer(definition.rightPath);
+            return deepEqual(left.value, right.value)
+                ? binary(true)
+                : { score: 0, passed: false, evidence: { left: toJsonValue(left.value), right: toJsonValue(right.value) } };
+        }
+        case 'json-schema': {
+            const result = validateJsonSchema(definition.schema, target.output);
+            return result.passed
+                ? binary(true)
+                : { score: 0, passed: false, evidence: { reason: 'schema_validation_failed', issues: result.issues } };
+        }
+    }
+}
+export async function evaluatePromptCandidates(input) {
+    if (input.candidates.length === 0) {
+        throw new ValidationError('At least one prompt candidate is required.', { where: 'eval_input', issues: { candidates: 'empty' } });
+    }
+    if (input.items.length === 0) {
+        throw new ValidationError('At least one evaluation item is required.', { where: 'eval_input', issues: { items: 'empty' } });
+    }
+    const scores = [];
+    for (const candidate of input.candidates) {
+        input.signal.throwIfAborted();
+        let total = 0;
+        let passed = 0;
+        let scorerCount = 0;
+        for (const item of input.items) {
+            input.signal.throwIfAborted();
+            const output = await input.runCandidate(candidate, item, input.signal);
+            const target = {
+                input: item.input,
+                output
+            };
+            if (item.expected !== undefined)
+                target.expected = item.expected;
+            if (item.context !== undefined)
+                target.context = item.context;
+            const result = await input.scorer(target, input.signal);
+            total += result.score;
+            passed += result.passed ? 1 : 0;
+            scorerCount += 1;
+        }
+        scores.push({
+            candidateId: candidate.id,
+            meanScore: total / scorerCount,
+            passRate: passed / scorerCount,
+            itemCount: input.items.length,
+            scorerCount
+        });
+    }
+    return scores.sort((a, b) => {
+        if (a.meanScore !== b.meanScore)
+            return b.meanScore - a.meanScore;
+        if (a.passRate !== b.passRate)
+            return b.passRate - a.passRate;
+        return a.candidateId.localeCompare(b.candidateId);
+    });
+}
+function binary(passed) {
+    return { score: passed ? 1 : 0, passed };
+}
+function missingPointer(path) {
+    return { score: 0, passed: false, evidence: { reason: 'missing_pointer', path } };
+}
+function readPointer(value, pointer) {
+    if (pointer === '')
+        return { found: true, value };
+    if (!pointer.startsWith('/'))
+        return { found: false };
+    let current = value;
+    for (const rawPart of pointer.slice(1).split('/')) {
+        const part = rawPart.replace(/~1/g, '/').replace(/~0/g, '~');
+        if (Array.isArray(current)) {
+            const index = Number(part);
+            if (!Number.isInteger(index) || index < 0 || index >= current.length)
+                return { found: false };
+            current = current[index];
+            continue;
+        }
+        if (!isRecord(current) || !(part in current))
+            return { found: false };
+        current = current[part];
+    }
+    return { found: true, value: current };
+}
+function validateJsonSchema(schema, value) {
+    const issues = [];
+    validateSchemaAt(schema, value, '', issues);
+    return { passed: issues.length === 0, issues };
+}
+function validateSchemaAt(schema, value, path, issues) {
+    if (!isRecord(schema))
+        return;
+    if ('const' in schema && !deepEqual(value, schema['const'])) {
+        issues.push({ path, reason: 'const', expected: toJsonValue(schema['const']), actual: toJsonValue(value) });
+        return;
+    }
+    if (Array.isArray(schema['enum']) && !schema['enum'].some((entry) => deepEqual(entry, value))) {
+        issues.push({ path, reason: 'enum', actual: toJsonValue(value) });
+        return;
+    }
+    const type = typeof schema['type'] === 'string' ? schema['type'] : undefined;
+    if (type && !matchesType(value, type)) {
+        issues.push({ path, reason: 'type', expected: type, actual: typeof value });
+        return;
+    }
+    if (type === 'object' || schema['properties']) {
+        if (!isRecord(value)) {
+            issues.push({ path, reason: 'type', expected: 'object', actual: typeof value });
+            return;
+        }
+        const required = Array.isArray(schema['required']) ? schema['required'].filter((entry) => typeof entry === 'string') : [];
+        for (const key of required) {
+            if (!(key in value))
+                issues.push({ path: `${path}/${key}`, reason: 'required' });
+        }
+        const properties = isRecord(schema['properties']) ? schema['properties'] : {};
+        for (const [key, childSchema] of Object.entries(properties)) {
+            if (key in value)
+                validateSchemaAt(childSchema, value[key], `${path}/${key}`, issues);
+        }
+        if (schema['additionalProperties'] === false) {
+            for (const key of Object.keys(value)) {
+                if (!(key in properties))
+                    issues.push({ path: `${path}/${key}`, reason: 'additional_properties' });
+            }
+        }
+    }
+}
+function matchesType(value, type) {
+    switch (type) {
+        case 'object': return isRecord(value);
+        case 'array': return Array.isArray(value);
+        case 'string': return typeof value === 'string';
+        case 'number': return typeof value === 'number' && Number.isFinite(value);
+        case 'integer': return Number.isInteger(value);
+        case 'boolean': return typeof value === 'boolean';
+        case 'null': return value === null;
+        default: return true;
+    }
+}
+function isRecord(value) {
+    return value !== null && typeof value === 'object' && !Array.isArray(value);
+}
+function deepEqual(a, b) {
+    return JSON.stringify(a) === JSON.stringify(b);
+}
+function toJsonValue(value) {
+    if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean')
+        return value;
+    if (Array.isArray(value))
+        return value.map((entry) => toJsonValue(entry));
+    if (isRecord(value))
+        return Object.fromEntries(Object.entries(value).map(([key, entry]) => [key, toJsonValue(entry)]));
+    return String(value);
+}

package/dist/harness/defineHarness.d.ts CHANGED Viewed

@@ -2,10 +2,12 @@ import { z } from 'zod';
 import { type Logger } from '../logger/index.js';
 import type { ModelAlias, ModelCapability, TokenUsage } from '../ports/model-provider.js';
 import type { StateStore } from '../ports/state.js';
-import type { TelemetryShim } from '../telemetry/index.js';
+import type { Metrics, TelemetryShim } from '../telemetry/index.js';
 import type { HarnessAdapterContext } from '../ports/harness-context.js';
+import type { MemoryAdapter, MemoryFacade, SessionMemory } from '../ports/memory.js';
 import type { JsonValue } from '../models/json.js';
 import type { Message } from '../models/state.js';
+import type { RunStatus } from '../models/state.js';
 import type { HarnessError } from '../errors/harness-error.js';
 import { type Sandbox } from '../sandbox/index.js';
 import type { ModelHandle } from '../models/registry.js';
@@ -13,12 +15,13 @@ import { type AdapterCapability, type DurableRuntimeAdapter, type HarnessInspect
 /** Stable harness version string for diagnostics and generated documentation. */
 export declare const HARNESS_VERSION = "0.0.0";
 /** OpenTelemetry capture controls used by the harness. */
+export type TelemetryFlavor = 'dual' | 'gen_ai_only' | 'openinference_only';
+export type ContentCaptureMode = 'NO_CONTENT' | 'SPAN_ONLY' | 'EVENT_ONLY' | 'SPAN_AND_EVENT';
 export interface TelemetryOptions {
-    /**
-     * When `true`, emitted telemetry may include full prompt/message content.
-     * The default is `false` to avoid accidental sensitive-content capture.
-     */
-    captureContent?: boolean;
+    /** Backend emission shape. */
+    flavor?: TelemetryFlavor;
+    /** Span/event content capture mode. */
+    contentCaptureMode?: ContentCaptureMode;
 }
 /** Default harness budgets and execution behavior. */
 export interface HarnessDefaults {
@@ -51,6 +54,12 @@ export interface InvokeOptions {
     timeoutMs?: number;
     /** Optional history-window override for this call only. */
     historyWindow?: number;
+    /** Optional W3C Trace Context parent. */
+    traceparent?: string;
+    /** Optional W3C Trace Context state. */
+    tracestate?: string;
+    /** Scalar metadata exposed to handlers and telemetry sanitizers. */
+    metadata?: Record<string, JsonValue>;
 }
 /** Canonical built-in tool names provided by the harness. */
 export type BuiltinToolName = 'bash' | 'read' | 'write' | 'edit' | 'glob' | 'grep' | 'list';
@@ -102,17 +111,6 @@ export interface ResolvedSkill {
     /** Absolute directory mounted into `/skills/<name>`. */
     directory: string;
 }
-/** Sandbox-backed per-session memory facade. */
-export interface SessionMemory {
-    /** Reads `/memory/<key>.json` and returns the parsed JSON value if present. */
-    read<T = JsonValue>(key: string): Promise<T | undefined>;
-    /** Writes JSON-serializable data to `/memory/<key>.json`. */
-    write(key: string, value: JsonValue): Promise<void>;
-    /** Deletes `/memory/<key>.json` if it exists. */
-    delete(key: string): Promise<void>;
-    /** Lists known memory keys without the `.json` suffix. */
-    list(): Promise<string[]>;
-}
 /** Conversation history accessor for a single session thread. */
 export interface ConversationHistory {
     /** Returns persisted conversation messages for the session. */
@@ -127,6 +125,8 @@ export interface ToolHandlerContext {
     sandbox: import('../sandbox/index.js').SandboxSession;
     logger: Logger;
     telemetry: TelemetryShim;
+    metrics: Metrics;
+    memory: MemoryFacade;
     runId: string;
     sessionId: string;
     agentId: string;
@@ -258,7 +258,9 @@ export interface AgentContextMinimal<S extends BuilderState, I> {
     sessionId: string;
     runId: string;
     history: ConversationHistory;
-    memory: SessionMemory;
+    memory: MemoryFacade;
+    metadata: Readonly<Record<string, JsonValue>>;
+    metrics: Metrics;
 }
 /** Full context passed to workflow handlers. */
 export interface WorkflowContext<S extends BuilderState, I, O> {
@@ -270,6 +272,9 @@ export interface WorkflowContext<S extends BuilderState, I, O> {
     signal: AbortSignal;
     runId: string;
     sessionId: string;
+    metadata: Readonly<Record<string, JsonValue>>;
+    memory: MemoryFacade;
+    metrics: Metrics;
     output?: O;
 }
 /** Full context passed to custom agent handlers. */
@@ -442,6 +447,7 @@ export interface Session<S extends BuilderState> {
     };
     memory: SessionMemory;
     history: ConversationHistory;
+    getRunSummary(runId: string): Promise<RunSummary | undefined>;
     clearHistory(): Promise<void>;
     replaceHistory(messages: ReadonlyArray<Omit<Message, 'id' | 'timestamp'>>): Promise<void>;
     close(): Promise<void>;
@@ -454,6 +460,18 @@ export interface SerializedError {
     message: string;
     meta?: Record<string, unknown>;
 }
+export interface RunSummary {
+    runId: string;
+    sessionId: string;
+    status: RunStatus;
+    startedAt: string;
+    finishedAt?: string;
+    tokenTotals: TokenUsage;
+    modelCalls: number;
+    toolCalls: number;
+    agentCalls: number;
+    error?: SerializedError;
+}
 /** Harness streaming events emitted from `session.workflows.<id>.stream(...)`. */
 export type RunEvent = {
     type: 'run.started';
@@ -512,6 +530,7 @@ export type RunEvent = {
     runId: string;
     agentId?: string;
     object: JsonValue;
+    usage?: TokenUsage;
 } | {
     type: 'model.embedding.completed';
     runId: string;
@@ -538,6 +557,7 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
     logger(logger: Logger): HarnessBuilder<S>;
     state(store: StateStore): HarnessBuilder<S>;
     sandbox(sandbox?: Sandbox<any>): HarnessBuilder<S>;
+    memory(adapter: MemoryAdapter): HarnessBuilder<S>;
     runtime(runtime: DurableRuntimeAdapter): HarnessBuilder<S>;
     requires(capabilities: readonly AdapterCapability[]): HarnessBuilder<S>;
     defaults(defaults: HarnessDefaults): HarnessBuilder<S>;

package/dist/harness/defineHarness.js CHANGED Viewed

@@ -1,5 +1,7 @@
 import { z } from 'zod';
 import { JsonLogger } from '../logger/index.js';
+import { sandboxMemory } from '../memory/sandbox/index.js';
+import { validateMemoryAdapter } from '../ports/memory.js';
 import { InMemoryStateStore } from '../state/in-memory.js';
 import { HarnessConfigError } from '../errors/catalog.js';
 import { autoDetectSandbox } from '../sandbox/index.js';
@@ -26,6 +28,13 @@ class Builder {
     sandbox(sandbox = autoDetectSandbox()) {
         return this.clone({ sandbox });
     }
+    memory(memory) {
+        if (this.configured.memory) {
+            throw new HarnessConfigError('Memory adapter is already configured.', { reason: 'duplicate_adapter', path: 'memory' });
+        }
+        validateMemoryAdapter(memory);
+        return this.clone({ memory });
+    }
     runtime(runtime) {
         return this.clone({ runtime });
     }
@@ -68,7 +77,9 @@ class Builder {
             throw new HarnessConfigError('At least one model alias is required.', { reason: 'missing_models', path: 'models' });
         }
         const sandbox = this.configured.sandbox ?? autoDetectSandbox();
-        const inspection = this.resolveInspection(this.options.name ?? 'agent-harness', sandbox, models);
+        const memory = this.configured.memory ?? sandboxMemory();
+        validateMemoryAdapter(memory);
+        const inspection = this.resolveInspection(this.options.name ?? 'agent-harness', sandbox, memory, models);
         const missing = missingCapabilities(inspection.requiredCapabilities, inspection.capabilities);
         if (missing.length > 0) {
             throw new HarnessConfigError('Required adapter capabilities are not available.', {
@@ -83,6 +94,7 @@ class Builder {
             ...(this.configured.telemetry ? { telemetry: this.configured.telemetry } : {}),
             state: this.configured.state ?? new InMemoryStateStore(),
             sandbox,
+            memory,
             defaults: {
                 agentMaxIterations: this.configured.defaults?.agentMaxIterations ?? 16,
                 runTimeoutMs: this.configured.defaults?.runTimeoutMs ?? 600_000,
@@ -103,7 +115,7 @@ class Builder {
     clone(patch) {
         return new Builder(this.options, { ...this.configured, ...patch });
     }
-    resolveInspection(name, sandbox, models) {
+    resolveInspection(name, sandbox, memory, models) {
         const adapters = [];
         const sandboxCapabilities = hasAdapterCapabilities(sandbox) ? uniqueCapabilities(sandbox.capabilities) : [];
         adapters.push({
@@ -111,6 +123,15 @@ class Builder {
             id: getAdapterId(sandbox, 'sandbox'),
             capabilities: sandboxCapabilities
         });
+        adapters.push({
+            kind: 'memory',
+            id: memory.info.id,
+            capabilities: uniqueCapabilities(memory.info.capabilities),
+            metadata: {
+                packageName: memory.info.packageName,
+                ...(memory.info.version ? { version: memory.info.version } : {})
+            }
+        });
         if (this.configured.runtime) {
             adapters.push({
                 kind: 'runtime',

package/dist/index.d.ts CHANGED Viewed

@@ -9,6 +9,8 @@ export * from './state/in-memory.js';
 export * from './models/json.js';
 export type { SessionRecord, Message, RunRecord, PersistedRunEvent, RunStatus } from './models/state.js';
 export * from './models/registry.js';
+export * from './eval/index.js';
+export * from './memory/sandbox/index.js';
 export * from './sandbox/index.js';
 export * from './tools/mcp/index.js';
 export * from './harness/defineHarness.js';

package/dist/index.js CHANGED Viewed

@@ -7,6 +7,8 @@ export { createDurableWorkflowContext, DurableStepError, DurableRunLeaseError, D
 export * from './state/in-memory.js';
 export * from './models/json.js';
 export * from './models/registry.js';
+export * from './eval/index.js';
+export * from './memory/sandbox/index.js';
 export * from './sandbox/index.js';
 export * from './tools/mcp/index.js';
 export * from './harness/defineHarness.js';