npm - @princetheprogrammerbtw/husk - Versions diffs - 0.1.1 → 0.3.0 - Mend

@princetheprogrammerbtw/husk 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +76 -11
package/dist/cli/index.js +164 -6
package/dist/cli/index.js.map +1 -1
package/dist/index.d.ts +372 -1
package/dist/index.js +438 -1
package/dist/index.js.map +1 -1
package/dist/otel/index.d.ts +49 -0
package/dist/otel/index.js +75 -0
package/dist/otel/index.js.map +1 -0
package/dist/tracer-y41CTrNG.d.ts +64 -0
package/package.json +2 -1

package/dist/index.d.ts CHANGED Viewed

@@ -1,3 +1,6 @@
+import { T as Tracer } from './tracer-y41CTrNG.js';
+export { N as NoopTracer, b as Span, a as SpanContext, c as SpanKind, S as SpanOptions } from './tracer-y41CTrNG.js';
 /**
  * Husk — core type definitions.
  *
@@ -352,6 +355,169 @@ declare class FileStore implements MemoryStore {
     private fileFor;
 }
+/**
+ * Husk — vector memory types and interfaces.
+ *
+ * Long-term memory for agents, separate from the short-term
+ * Message[] memory in src/core/memory.ts. Vector stores are queried
+ * by semantic similarity: you provide a query, get back the top-K
+ * most similar past items.
+ *
+ * Design choice: the agent accesses vector memory through TOOLS
+ * (MemorySearch, Remember) rather than automatic injection. This
+ * means:
+ * - The model decides when to recall (avoids noisy "here's some
+ *   vaguely related past conversation" injections)
+ * - The same memory store can be used by multiple agents
+ * - Vector memory integrates with the existing tool framework, no
+ *   agent-loop changes
+ *
+ * The VectorStore interface is intentionally simple so users can
+ * plug in their own backend (Chroma, Pinecone, sqlite-vec, etc.).
+ * Husk ships one in-memory backend for v0.3.0.
+ */
+/**
+ * A single memory item: the text, its embedding, and optional
+ * metadata for filtering or display.
+ */
+interface MemoryItem {
+    /** Unique id (caller-provided, allows updates/deletes). */
+    readonly id: string;
+    /** The text content. What the model sees when this is recalled. */
+    readonly content: string;
+    /** Pre-computed embedding vector. */
+    readonly embedding: readonly number[];
+    /** Optional metadata (timestamp, source, tags, etc.). */
+    readonly metadata?: Readonly<Record<string, unknown>>;
+}
+/**
+ * The result of a similarity search: the matched item plus its
+ * similarity score (higher = more similar). Score is implementation-
+ * dependent (cosine similarity for the in-memory backend).
+ */
+interface SearchResult {
+    readonly id: string;
+    readonly content: string;
+    readonly score: number;
+    readonly metadata?: Readonly<Record<string, unknown>>;
+}
+interface VectorStore {
+    /** Add or update a memory item. */
+    upsert(item: MemoryItem): Promise<void>;
+    /** Search for the top-K most similar items to the query embedding. */
+    search(queryEmbedding: readonly number[], topK: number): Promise<readonly SearchResult[]>;
+    /** Remove a memory by id. No-op if not present. */
+    remove(id: string): Promise<void>;
+    /** List all memory ids (for debugging/inspection). */
+    list(): Promise<readonly string[]>;
+    /** Remove all memories. */
+    clear(): Promise<void>;
+    /** Total count of memories. */
+    count(): Promise<number>;
+}
+interface EmbeddingProvider {
+    /** Generate an embedding vector for the given text. */
+    embed(text: string): Promise<readonly number[]>;
+    /** The dimensionality of the vectors this provider produces. */
+    readonly dimensions: number;
+}
+interface MemoryToolOptions {
+    /** The vector store to read/write. */
+    readonly store: VectorStore;
+    /** The embedding provider (used inside the tools). */
+    readonly embedder: EmbeddingProvider;
+    /**
+     * Default top-K for searches when the agent doesn't specify.
+     * Default: 5.
+     */
+    readonly defaultTopK?: number;
+}
+/**
+ * Build the MemorySearch tool: agent calls it with a natural-
+ * language query, gets back the top-K most similar past items.
+ */
+declare function defineMemorySearchTool(options: MemoryToolOptions): ToolDefinition<{
+    query: string;
+    topK?: number;
+}>;
+/**
+ * Build the Remember tool: agent calls it to save a fact/observation
+ * to long-term memory for later recall.
+ */
+declare function defineRememberTool(options: MemoryToolOptions): ToolDefinition<{
+    id: string;
+    content: string;
+}>;
+/**
+ * Husk — in-memory vector store.
+ *
+ * Naive O(n) linear scan with cosine similarity. Fine for thousands
+ * of memories; slow for millions. The VectorStore interface is
+ * pluggable so users can swap in Chroma, Pinecone, sqlite-vec, or
+ * any ANN index for production scale.
+ *
+ * Why we ship this: zero external dependencies, deterministic
+ * behavior for testing, good enough for the common case of
+ * "remember user preferences across sessions" (a few hundred items).
+ *
+ * For very large stores, see:
+ * - chroma (separate server, ~3-line adapter)
+ * - pinecone (managed, REST API)
+ * - sqlite-vec (in-process, single binary)
+ * - hnswlib-node (in-process, true ANN)
+ */
+declare class InMemoryVectorStore implements VectorStore {
+    private readonly items;
+    upsert(item: MemoryItem): Promise<void>;
+    search(queryEmbedding: readonly number[], topK: number): Promise<readonly SearchResult[]>;
+    remove(id: string): Promise<void>;
+    list(): Promise<readonly string[]>;
+    clear(): Promise<void>;
+    count(): Promise<number>;
+}
+/**
+ * Cosine similarity in [-1, 1]. Returns 0 if either vector is zero.
+ * (1.0 = identical direction, 0 = orthogonal, -1 = opposite)
+ */
+declare function cosineSimilarity(a: readonly number[], b: readonly number[]): number;
+/**
+ * Husk — simple embedding provider for testing and offline use.
+ *
+ * Produces deterministic pseudo-embeddings from text by hashing
+ * character n-grams into a fixed-dimension vector. NOT a real
+ * embedding model — semantic quality is poor, but it's:
+ *
+ * - Deterministic (same text → same vector)
+ * - Zero-dependency (no API call, no model file)
+ * - Useful for tests, demos, and offline development
+ *
+ * For real semantic search, use a real EmbeddingProvider:
+ * - OpenAIEmbedder (text-embedding-3-small, 1536 dims)
+ * - sentence-transformers via a small Python sidecar
+ * - CohereEmbedder, VoyageEmbedder, etc.
+ *
+ * The "similarity" this produces is bag-of-chars similarity, not
+ * semantic similarity. Two texts with similar character n-grams
+ * will score high even if they mean different things.
+ */
+interface HashEmbedderOptions {
+    /** Output vector dimensions. Default: 256. */
+    readonly dimensions?: number;
+    /** N-gram size for the hashing. Default: 3 (trigrams). */
+    readonly ngramSize?: number;
+}
+declare class HashEmbedder implements EmbeddingProvider {
+    readonly dimensions: number;
+    private readonly ngramSize;
+    constructor(options?: HashEmbedderOptions);
+    embed(text: string): Promise<readonly number[]>;
+}
 /**
  * Husk — steering prompt builder.
  *
@@ -525,6 +691,48 @@ declare class OpenAIProvider implements Provider {
     chat(request: ChatRequest): Promise<ChatResponse>;
 }
+/**
+ * Husk — Ollama provider adapter.
+ *
+ * Wraps Ollama's OpenAI-compatible Chat Completions API. Because Ollama
+ * exposes the exact same wire format as OpenAI, we can reuse the OpenAI
+ * adapter internally — only the default model name, base URL, and the
+ * provider 'name' field differ.
+ *
+ * Why this exists: local models (llama3.2, deepseek-r1, qwen2.5, etc.)
+ * are a first-class use case. Privacy, cost, and offline-ability all
+ * matter. Ollama is the dominant local-model runtime and uses the
+ * OpenAI API surface, so the adapter is a thin shell.
+ *
+ * Defaults:
+ *   - model: 'llama3.2' (override via constructor)
+ *   - baseURL: 'http://localhost:11434/v1' (override for remote Ollama)
+ *   - apiKey: 'ollama' (Ollama ignores the value but the OpenAI SDK
+ *     requires a non-empty string)
+ *
+ * Usage:
+ *   const agent = new Agent({ model: new OllamaProvider() });
+ *   const result = await agent.run('Explain quantum entanglement');
+ *
+ * For a list of models: `ollama list` (in your terminal).
+ */
+interface OllamaProviderOptions {
+    /** Model id (run `ollama list` to see what's pulled locally). Default: 'llama3.2'. */
+    readonly model?: string;
+    /** Ollama server URL. Default: 'http://localhost:11434/v1'. */
+    readonly baseURL?: string;
+    /** API key — Ollama ignores this but the OpenAI SDK requires it. Default: 'ollama'. */
+    readonly apiKey?: string;
+}
+declare class OllamaProvider implements Provider {
+    readonly name = "ollama";
+    readonly model: string;
+    private readonly inner;
+    constructor(options?: OllamaProviderOptions);
+    chat(request: Parameters<Provider['chat']>[0]): ReturnType<Provider['chat']>;
+}
 /**
  * Husk — tool registry helpers.
  *
@@ -688,6 +896,169 @@ interface GrepInput {
 }
 declare const Grep: ToolDefinition<GrepInput>;
+/**
+ * Husk — eval runner types and API.
+ *
+ * The eval runner lets users assert that an agent's output meets
+ * expectations. Three primitives:
+ *
+ *   1. EvalCase — an input + the expected outcome (an assertion or a set of them)
+ *   2. Assertion — a function that takes the agent's result and returns pass/fail
+ *   3. EvalSuite — a named collection of eval cases, runnable as a unit
+ *
+ * The design choice: assertions are plain async functions, not a DSL.
+ * Users can use the 4 built-ins (equals, contains, matches, fn) or
+ * write their own. The DSL is intentionally tiny — a heavy DSL
+ * (think Jest matchers) is a maintainability trap.
+ *
+ * Example:
+ *
+ *   const suite = defineSuite({
+ *     name: 'hello-agent',
+ *     cases: [
+ *       {
+ *         name: 'answers geography',
+ *         input: 'What is the capital of France? Answer in one word.',
+ *         assertions: [
+ *           contains('Paris'),
+ *           matches(/^[A-Z][a-z]+$/),  // single capitalized word
+ *         ],
+ *       },
+ *     ],
+ *   });
+ *
+ *   const results = await runSuite(suite, () => new Agent({ model: ... }));
+ *   console.log(`${results.passed}/${results.total} passed`);
+ */
+/**
+ * A function that checks whether an agent's output meets a criterion.
+ * Returns a pass/fail with an optional message explaining the failure.
+ */
+type Assertion = (result: AgentResult) => AssertionResult | Promise<AssertionResult>;
+interface AssertionResult {
+    /** Whether the assertion passed. */
+    readonly pass: boolean;
+    /** Human-readable name shown in eval reports. */
+    readonly name: string;
+    /** Optional message — required when pass is false to explain why. */
+    readonly message?: string;
+}
+/** Output exactly equals the expected string. */
+declare function equals(expected: string): Assertion;
+/** Output contains the expected substring (case-sensitive). */
+declare function contains(needle: string): Assertion;
+/** Output matches the expected regex. */
+declare function matches(pattern: RegExp): Assertion;
+/** Output passes a custom predicate. Use this for shape-based checks. */
+declare function fn(name: string, predicate: (output: string) => boolean, message?: string): Assertion;
+/** Output does NOT contain the given substring. */
+declare function notContains(needle: string): Assertion;
+/** Output length is within bounds. */
+declare function lengthBetween(min: number, max: number): Assertion;
+interface EvalCase {
+    /** Human-readable name shown in eval reports. */
+    readonly name: string;
+    /** The input to pass to agent.run(). */
+    readonly input: string;
+    /** Assertions to run on the result. All must pass for the case to pass. */
+    readonly assertions: readonly Assertion[];
+    /**
+     * Optional max iterations override. Lets you cap runaway agents per-case
+     * without affecting other cases in the suite.
+     */
+    readonly maxIterations?: number;
+}
+interface EvalSuite {
+    /** Suite name shown in reports. */
+    readonly name: string;
+    /** Cases in this suite, run sequentially. */
+    readonly cases: readonly EvalCase[];
+}
+interface CaseResult {
+    readonly caseName: string;
+    readonly passed: boolean;
+    readonly assertionResults: readonly AssertionResult[];
+    readonly agentResult: AgentResult;
+    readonly durationMs: number;
+}
+interface SuiteResult {
+    readonly suiteName: string;
+    readonly results: readonly CaseResult[];
+    readonly passed: number;
+    readonly total: number;
+    readonly durationMs: number;
+}
+/**
+ * Husk — eval runner.
+ *
+ * Takes an EvalSuite + a factory that returns an Agent, runs each
+ * case sequentially, applies the assertions, and reports results.
+ *
+ * Why a factory (not an Agent instance): each case might want its
+ * own agent configuration. The factory pattern gives the user full
+ * control without forcing a specific shape.
+ *
+ * Why sequential (not parallel): LLM calls compete for rate limits
+ * and cost $$$. Sequential gives predictable billing and easier
+ * debugging. Parallel mode is a v0.3.0 addition.
+ *
+ * Failure handling: an agent run that throws an error is reported
+ * as a case failure (not a runner crash). The error message is
+ * included in the assertion results so the user can see what broke.
+ */
+/**
+ * A factory that produces a fresh Agent per case. Called once per
+ * case so each case can have isolated memory, config, etc.
+ */
+type AgentFactory = () => Agent | Promise<Agent>;
+interface RunSuiteOptions {
+    /** Stop on first failing case. Default: false (run all cases regardless). */
+    readonly failFast?: boolean;
+    /** Custom logger for runner-level events. Default: silent. */
+    readonly onCaseStart?: (caseName: string) => void;
+    readonly onCaseEnd?: (result: CaseResult) => void;
+}
+declare function runSuite(suite: EvalSuite, factory: AgentFactory, options?: RunSuiteOptions): Promise<SuiteResult>;
+/**
+ * Build a suite with less boilerplate. Equivalent to constructing
+ * the object inline, but reads more clearly at the call site.
+ */
+declare function defineSuite(suite: {
+    name: string;
+    cases: readonly EvalCase[];
+}): EvalSuite;
+/**
+ * Husk — agent event → tracer mapper.
+ *
+ * Translates the typed AgentEvent stream into tracer spans. The top-
+ * level 'agent:start' begins a trace, each iteration becomes a child
+ * span, and tool calls become their own spans under the iteration.
+ *
+ * Design: spans are created in startSpanOrder. Tool spans nest under
+ * the iteration span. The end of the agent run ends the trace span.
+ *
+ * Usage:
+ *   const mapper = new EventTracer(myTracer);
+ *   agent.onAny(mapper.onEvent.bind(mapper));
+ *   await agent.run(...);  // emits spans to myTracer
+ */
+declare class EventTracer {
+    private readonly tracer;
+    private traceSpan;
+    private iterationSpan;
+    private toolSpans;
+    constructor(tracer: Tracer);
+    /**
+     * Bind as an event handler: `agent.onAny(tracer.onEvent.bind(tracer))`
+     */
+    onEvent: AgentEventHandler;
+}
 /**
  * Husk — public API entry point.
  *
@@ -699,4 +1070,4 @@ declare const Grep: ToolDefinition<GrepInput>;
  */
 declare const VERSION = "0.1.0";
-export { Agent, type AgentConfig, type AgentEvent, AgentEventEmitter, type AgentEventHandler, type AgentResult, AnthropicProvider, type AnthropicProviderOptions, Bash, type BashInput, type ChatChunk, type ChatRequest, type ChatResponse, ConsoleLogger, type ContentBlock, Edit, type EditInput, type Example, FileStore, type FileStoreOptions, Grep, type GrepInput, InMemoryStore, type JSONSchema, type JSONSchemaField, type LogLevel, type Logger, type MemoryStore, type Message, type MessageContent, OpenAIProvider, type OpenAIProviderOptions, type Provider, Read, type ReadInput, type Role, type SteeringConfig, type StopReason, type TextBlock, type TokenUsage, type ToolContext, type ToolDefinition, type ToolResult, type ToolResultBlock, type ToolUseBlock, VERSION, Write, type WriteInput, arrayField, booleanField, buildExampleMessages, buildSystemPrompt, defineTool, integerField, logEventsTo, numberField, objectField, objectSchema, stringField };
+export { Agent, type AgentConfig, type AgentEvent, AgentEventEmitter, type AgentEventHandler, type AgentFactory, type AgentResult, AnthropicProvider, type AnthropicProviderOptions, type Assertion, type AssertionResult, Bash, type BashInput, type CaseResult, type ChatChunk, type ChatRequest, type ChatResponse, ConsoleLogger, type ContentBlock, Edit, type EditInput, type EmbeddingProvider, type EvalCase, type EvalSuite, EventTracer, type Example, FileStore, type FileStoreOptions, Grep, type GrepInput, HashEmbedder, type HashEmbedderOptions, InMemoryStore, InMemoryVectorStore, type JSONSchema, type JSONSchemaField, type LogLevel, type Logger, type MemoryItem, type MemoryStore, type MemoryToolOptions, type Message, type MessageContent, OllamaProvider, type OllamaProviderOptions, OpenAIProvider, type OpenAIProviderOptions, type Provider, Read, type ReadInput, type Role, type RunSuiteOptions, type SearchResult, type SteeringConfig, type StopReason, type SuiteResult, type TextBlock, type TokenUsage, type ToolContext, type ToolDefinition, type ToolResult, type ToolResultBlock, type ToolUseBlock, Tracer, VERSION, type VectorStore, Write, type WriteInput, arrayField, booleanField, buildExampleMessages, buildSystemPrompt, contains, cosineSimilarity, defineMemorySearchTool, defineRememberTool, defineSuite, defineTool, equals, fn, integerField, lengthBetween, logEventsTo, matches, notContains, numberField, objectField, objectSchema, runSuite, stringField };