@princetheprogrammerbtw/husk 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,3 +1,6 @@
1
+ import { T as Tracer } from './tracer-y41CTrNG.js';
2
+ export { N as NoopTracer, b as Span, a as SpanContext, c as SpanKind, S as SpanOptions } from './tracer-y41CTrNG.js';
3
+
1
4
  /**
2
5
  * Husk — core type definitions.
3
6
  *
@@ -352,6 +355,169 @@ declare class FileStore implements MemoryStore {
352
355
  private fileFor;
353
356
  }
354
357
 
358
+ /**
359
+ * Husk — vector memory types and interfaces.
360
+ *
361
+ * Long-term memory for agents, separate from the short-term
362
+ * Message[] memory in src/core/memory.ts. Vector stores are queried
363
+ * by semantic similarity: you provide a query, get back the top-K
364
+ * most similar past items.
365
+ *
366
+ * Design choice: the agent accesses vector memory through TOOLS
367
+ * (MemorySearch, Remember) rather than automatic injection. This
368
+ * means:
369
+ * - The model decides when to recall (avoids noisy "here's some
370
+ * vaguely related past conversation" injections)
371
+ * - The same memory store can be used by multiple agents
372
+ * - Vector memory integrates with the existing tool framework, no
373
+ * agent-loop changes
374
+ *
375
+ * The VectorStore interface is intentionally simple so users can
376
+ * plug in their own backend (Chroma, Pinecone, sqlite-vec, etc.).
377
+ * Husk ships one in-memory backend for v0.3.0.
378
+ */
379
+
380
+ /**
381
+ * A single memory item: the text, its embedding, and optional
382
+ * metadata for filtering or display.
383
+ */
384
+ interface MemoryItem {
385
+ /** Unique id (caller-provided, allows updates/deletes). */
386
+ readonly id: string;
387
+ /** The text content. What the model sees when this is recalled. */
388
+ readonly content: string;
389
+ /** Pre-computed embedding vector. */
390
+ readonly embedding: readonly number[];
391
+ /** Optional metadata (timestamp, source, tags, etc.). */
392
+ readonly metadata?: Readonly<Record<string, unknown>>;
393
+ }
394
+ /**
395
+ * The result of a similarity search: the matched item plus its
396
+ * similarity score (higher = more similar). Score is implementation-
397
+ * dependent (cosine similarity for the in-memory backend).
398
+ */
399
+ interface SearchResult {
400
+ readonly id: string;
401
+ readonly content: string;
402
+ readonly score: number;
403
+ readonly metadata?: Readonly<Record<string, unknown>>;
404
+ }
405
+ interface VectorStore {
406
+ /** Add or update a memory item. */
407
+ upsert(item: MemoryItem): Promise<void>;
408
+ /** Search for the top-K most similar items to the query embedding. */
409
+ search(queryEmbedding: readonly number[], topK: number): Promise<readonly SearchResult[]>;
410
+ /** Remove a memory by id. No-op if not present. */
411
+ remove(id: string): Promise<void>;
412
+ /** List all memory ids (for debugging/inspection). */
413
+ list(): Promise<readonly string[]>;
414
+ /** Remove all memories. */
415
+ clear(): Promise<void>;
416
+ /** Total count of memories. */
417
+ count(): Promise<number>;
418
+ }
419
+ interface EmbeddingProvider {
420
+ /** Generate an embedding vector for the given text. */
421
+ embed(text: string): Promise<readonly number[]>;
422
+ /** The dimensionality of the vectors this provider produces. */
423
+ readonly dimensions: number;
424
+ }
425
+ interface MemoryToolOptions {
426
+ /** The vector store to read/write. */
427
+ readonly store: VectorStore;
428
+ /** The embedding provider (used inside the tools). */
429
+ readonly embedder: EmbeddingProvider;
430
+ /**
431
+ * Default top-K for searches when the agent doesn't specify.
432
+ * Default: 5.
433
+ */
434
+ readonly defaultTopK?: number;
435
+ }
436
+ /**
437
+ * Build the MemorySearch tool: agent calls it with a natural-
438
+ * language query, gets back the top-K most similar past items.
439
+ */
440
+ declare function defineMemorySearchTool(options: MemoryToolOptions): ToolDefinition<{
441
+ query: string;
442
+ topK?: number;
443
+ }>;
444
+ /**
445
+ * Build the Remember tool: agent calls it to save a fact/observation
446
+ * to long-term memory for later recall.
447
+ */
448
+ declare function defineRememberTool(options: MemoryToolOptions): ToolDefinition<{
449
+ id: string;
450
+ content: string;
451
+ }>;
452
+
453
+ /**
454
+ * Husk — in-memory vector store.
455
+ *
456
+ * Naive O(n) linear scan with cosine similarity. Fine for thousands
457
+ * of memories; slow for millions. The VectorStore interface is
458
+ * pluggable so users can swap in Chroma, Pinecone, sqlite-vec, or
459
+ * any ANN index for production scale.
460
+ *
461
+ * Why we ship this: zero external dependencies, deterministic
462
+ * behavior for testing, good enough for the common case of
463
+ * "remember user preferences across sessions" (a few hundred items).
464
+ *
465
+ * For very large stores, see:
466
+ * - chroma (separate server, ~3-line adapter)
467
+ * - pinecone (managed, REST API)
468
+ * - sqlite-vec (in-process, single binary)
469
+ * - hnswlib-node (in-process, true ANN)
470
+ */
471
+
472
+ declare class InMemoryVectorStore implements VectorStore {
473
+ private readonly items;
474
+ upsert(item: MemoryItem): Promise<void>;
475
+ search(queryEmbedding: readonly number[], topK: number): Promise<readonly SearchResult[]>;
476
+ remove(id: string): Promise<void>;
477
+ list(): Promise<readonly string[]>;
478
+ clear(): Promise<void>;
479
+ count(): Promise<number>;
480
+ }
481
+ /**
482
+ * Cosine similarity in [-1, 1]. Returns 0 if either vector is zero.
483
+ * (1.0 = identical direction, 0 = orthogonal, -1 = opposite)
484
+ */
485
+ declare function cosineSimilarity(a: readonly number[], b: readonly number[]): number;
486
+
487
+ /**
488
+ * Husk — simple embedding provider for testing and offline use.
489
+ *
490
+ * Produces deterministic pseudo-embeddings from text by hashing
491
+ * character n-grams into a fixed-dimension vector. NOT a real
492
+ * embedding model — semantic quality is poor, but it's:
493
+ *
494
+ * - Deterministic (same text → same vector)
495
+ * - Zero-dependency (no API call, no model file)
496
+ * - Useful for tests, demos, and offline development
497
+ *
498
+ * For real semantic search, use a real EmbeddingProvider:
499
+ * - OpenAIEmbedder (text-embedding-3-small, 1536 dims)
500
+ * - sentence-transformers via a small Python sidecar
501
+ * - CohereEmbedder, VoyageEmbedder, etc.
502
+ *
503
+ * The "similarity" this produces is bag-of-chars similarity, not
504
+ * semantic similarity. Two texts with similar character n-grams
505
+ * will score high even if they mean different things.
506
+ */
507
+
508
+ interface HashEmbedderOptions {
509
+ /** Output vector dimensions. Default: 256. */
510
+ readonly dimensions?: number;
511
+ /** N-gram size for the hashing. Default: 3 (trigrams). */
512
+ readonly ngramSize?: number;
513
+ }
514
+ declare class HashEmbedder implements EmbeddingProvider {
515
+ readonly dimensions: number;
516
+ private readonly ngramSize;
517
+ constructor(options?: HashEmbedderOptions);
518
+ embed(text: string): Promise<readonly number[]>;
519
+ }
520
+
355
521
  /**
356
522
  * Husk — steering prompt builder.
357
523
  *
@@ -525,6 +691,48 @@ declare class OpenAIProvider implements Provider {
525
691
  chat(request: ChatRequest): Promise<ChatResponse>;
526
692
  }
527
693
 
694
+ /**
695
+ * Husk — Ollama provider adapter.
696
+ *
697
+ * Wraps Ollama's OpenAI-compatible Chat Completions API. Because Ollama
698
+ * exposes the exact same wire format as OpenAI, we can reuse the OpenAI
699
+ * adapter internally — only the default model name, base URL, and the
700
+ * provider 'name' field differ.
701
+ *
702
+ * Why this exists: local models (llama3.2, deepseek-r1, qwen2.5, etc.)
703
+ * are a first-class use case. Privacy, cost, and offline-ability all
704
+ * matter. Ollama is the dominant local-model runtime and uses the
705
+ * OpenAI API surface, so the adapter is a thin shell.
706
+ *
707
+ * Defaults:
708
+ * - model: 'llama3.2' (override via constructor)
709
+ * - baseURL: 'http://localhost:11434/v1' (override for remote Ollama)
710
+ * - apiKey: 'ollama' (Ollama ignores the value but the OpenAI SDK
711
+ * requires a non-empty string)
712
+ *
713
+ * Usage:
714
+ * const agent = new Agent({ model: new OllamaProvider() });
715
+ * const result = await agent.run('Explain quantum entanglement');
716
+ *
717
+ * For a list of models: `ollama list` (in your terminal).
718
+ */
719
+
720
+ interface OllamaProviderOptions {
721
+ /** Model id (run `ollama list` to see what's pulled locally). Default: 'llama3.2'. */
722
+ readonly model?: string;
723
+ /** Ollama server URL. Default: 'http://localhost:11434/v1'. */
724
+ readonly baseURL?: string;
725
+ /** API key — Ollama ignores this but the OpenAI SDK requires it. Default: 'ollama'. */
726
+ readonly apiKey?: string;
727
+ }
728
+ declare class OllamaProvider implements Provider {
729
+ readonly name = "ollama";
730
+ readonly model: string;
731
+ private readonly inner;
732
+ constructor(options?: OllamaProviderOptions);
733
+ chat(request: Parameters<Provider['chat']>[0]): ReturnType<Provider['chat']>;
734
+ }
735
+
528
736
  /**
529
737
  * Husk — tool registry helpers.
530
738
  *
@@ -688,6 +896,169 @@ interface GrepInput {
688
896
  }
689
897
  declare const Grep: ToolDefinition<GrepInput>;
690
898
 
899
+ /**
900
+ * Husk — eval runner types and API.
901
+ *
902
+ * The eval runner lets users assert that an agent's output meets
903
+ * expectations. Three primitives:
904
+ *
905
+ * 1. EvalCase — an input + the expected outcome (an assertion or a set of them)
906
+ * 2. Assertion — a function that takes the agent's result and returns pass/fail
907
+ * 3. EvalSuite — a named collection of eval cases, runnable as a unit
908
+ *
909
+ * The design choice: assertions are plain async functions, not a DSL.
910
+ * Users can use the 4 built-ins (equals, contains, matches, fn) or
911
+ * write their own. The DSL is intentionally tiny — a heavy DSL
912
+ * (think Jest matchers) is a maintainability trap.
913
+ *
914
+ * Example:
915
+ *
916
+ * const suite = defineSuite({
917
+ * name: 'hello-agent',
918
+ * cases: [
919
+ * {
920
+ * name: 'answers geography',
921
+ * input: 'What is the capital of France? Answer in one word.',
922
+ * assertions: [
923
+ * contains('Paris'),
924
+ * matches(/^[A-Z][a-z]+$/), // single capitalized word
925
+ * ],
926
+ * },
927
+ * ],
928
+ * });
929
+ *
930
+ * const results = await runSuite(suite, () => new Agent({ model: ... }));
931
+ * console.log(`${results.passed}/${results.total} passed`);
932
+ */
933
+
934
+ /**
935
+ * A function that checks whether an agent's output meets a criterion.
936
+ * Returns a pass/fail with an optional message explaining the failure.
937
+ */
938
+ type Assertion = (result: AgentResult) => AssertionResult | Promise<AssertionResult>;
939
+ interface AssertionResult {
940
+ /** Whether the assertion passed. */
941
+ readonly pass: boolean;
942
+ /** Human-readable name shown in eval reports. */
943
+ readonly name: string;
944
+ /** Optional message — required when pass is false to explain why. */
945
+ readonly message?: string;
946
+ }
947
+ /** Output exactly equals the expected string. */
948
+ declare function equals(expected: string): Assertion;
949
+ /** Output contains the expected substring (case-sensitive). */
950
+ declare function contains(needle: string): Assertion;
951
+ /** Output matches the expected regex. */
952
+ declare function matches(pattern: RegExp): Assertion;
953
+ /** Output passes a custom predicate. Use this for shape-based checks. */
954
+ declare function fn(name: string, predicate: (output: string) => boolean, message?: string): Assertion;
955
+ /** Output does NOT contain the given substring. */
956
+ declare function notContains(needle: string): Assertion;
957
+ /** Output length is within bounds. */
958
+ declare function lengthBetween(min: number, max: number): Assertion;
959
+ interface EvalCase {
960
+ /** Human-readable name shown in eval reports. */
961
+ readonly name: string;
962
+ /** The input to pass to agent.run(). */
963
+ readonly input: string;
964
+ /** Assertions to run on the result. All must pass for the case to pass. */
965
+ readonly assertions: readonly Assertion[];
966
+ /**
967
+ * Optional max iterations override. Lets you cap runaway agents per-case
968
+ * without affecting other cases in the suite.
969
+ */
970
+ readonly maxIterations?: number;
971
+ }
972
+ interface EvalSuite {
973
+ /** Suite name shown in reports. */
974
+ readonly name: string;
975
+ /** Cases in this suite, run sequentially. */
976
+ readonly cases: readonly EvalCase[];
977
+ }
978
+ interface CaseResult {
979
+ readonly caseName: string;
980
+ readonly passed: boolean;
981
+ readonly assertionResults: readonly AssertionResult[];
982
+ readonly agentResult: AgentResult;
983
+ readonly durationMs: number;
984
+ }
985
+ interface SuiteResult {
986
+ readonly suiteName: string;
987
+ readonly results: readonly CaseResult[];
988
+ readonly passed: number;
989
+ readonly total: number;
990
+ readonly durationMs: number;
991
+ }
992
+
993
+ /**
994
+ * Husk — eval runner.
995
+ *
996
+ * Takes an EvalSuite + a factory that returns an Agent, runs each
997
+ * case sequentially, applies the assertions, and reports results.
998
+ *
999
+ * Why a factory (not an Agent instance): each case might want its
1000
+ * own agent configuration. The factory pattern gives the user full
1001
+ * control without forcing a specific shape.
1002
+ *
1003
+ * Why sequential (not parallel): LLM calls compete for rate limits
1004
+ * and cost $$$. Sequential gives predictable billing and easier
1005
+ * debugging. Parallel mode is a v0.3.0 addition.
1006
+ *
1007
+ * Failure handling: an agent run that throws an error is reported
1008
+ * as a case failure (not a runner crash). The error message is
1009
+ * included in the assertion results so the user can see what broke.
1010
+ */
1011
+
1012
+ /**
1013
+ * A factory that produces a fresh Agent per case. Called once per
1014
+ * case so each case can have isolated memory, config, etc.
1015
+ */
1016
+ type AgentFactory = () => Agent | Promise<Agent>;
1017
+ interface RunSuiteOptions {
1018
+ /** Stop on first failing case. Default: false (run all cases regardless). */
1019
+ readonly failFast?: boolean;
1020
+ /** Custom logger for runner-level events. Default: silent. */
1021
+ readonly onCaseStart?: (caseName: string) => void;
1022
+ readonly onCaseEnd?: (result: CaseResult) => void;
1023
+ }
1024
+ declare function runSuite(suite: EvalSuite, factory: AgentFactory, options?: RunSuiteOptions): Promise<SuiteResult>;
1025
+ /**
1026
+ * Build a suite with less boilerplate. Equivalent to constructing
1027
+ * the object inline, but reads more clearly at the call site.
1028
+ */
1029
+ declare function defineSuite(suite: {
1030
+ name: string;
1031
+ cases: readonly EvalCase[];
1032
+ }): EvalSuite;
1033
+
1034
+ /**
1035
+ * Husk — agent event → tracer mapper.
1036
+ *
1037
+ * Translates the typed AgentEvent stream into tracer spans. The top-
1038
+ * level 'agent:start' begins a trace, each iteration becomes a child
1039
+ * span, and tool calls become their own spans under the iteration.
1040
+ *
1041
+ * Design: spans are created in startSpanOrder. Tool spans nest under
1042
+ * the iteration span. The end of the agent run ends the trace span.
1043
+ *
1044
+ * Usage:
1045
+ * const mapper = new EventTracer(myTracer);
1046
+ * agent.onAny(mapper.onEvent.bind(mapper));
1047
+ * await agent.run(...); // emits spans to myTracer
1048
+ */
1049
+
1050
+ declare class EventTracer {
1051
+ private readonly tracer;
1052
+ private traceSpan;
1053
+ private iterationSpan;
1054
+ private toolSpans;
1055
+ constructor(tracer: Tracer);
1056
+ /**
1057
+ * Bind as an event handler: `agent.onAny(tracer.onEvent.bind(tracer))`
1058
+ */
1059
+ onEvent: AgentEventHandler;
1060
+ }
1061
+
691
1062
  /**
692
1063
  * Husk — public API entry point.
693
1064
  *
@@ -699,4 +1070,4 @@ declare const Grep: ToolDefinition<GrepInput>;
699
1070
  */
700
1071
  declare const VERSION = "0.1.0";
701
1072
 
702
- export { Agent, type AgentConfig, type AgentEvent, AgentEventEmitter, type AgentEventHandler, type AgentResult, AnthropicProvider, type AnthropicProviderOptions, Bash, type BashInput, type ChatChunk, type ChatRequest, type ChatResponse, ConsoleLogger, type ContentBlock, Edit, type EditInput, type Example, FileStore, type FileStoreOptions, Grep, type GrepInput, InMemoryStore, type JSONSchema, type JSONSchemaField, type LogLevel, type Logger, type MemoryStore, type Message, type MessageContent, OpenAIProvider, type OpenAIProviderOptions, type Provider, Read, type ReadInput, type Role, type SteeringConfig, type StopReason, type TextBlock, type TokenUsage, type ToolContext, type ToolDefinition, type ToolResult, type ToolResultBlock, type ToolUseBlock, VERSION, Write, type WriteInput, arrayField, booleanField, buildExampleMessages, buildSystemPrompt, defineTool, integerField, logEventsTo, numberField, objectField, objectSchema, stringField };
1073
+ export { Agent, type AgentConfig, type AgentEvent, AgentEventEmitter, type AgentEventHandler, type AgentFactory, type AgentResult, AnthropicProvider, type AnthropicProviderOptions, type Assertion, type AssertionResult, Bash, type BashInput, type CaseResult, type ChatChunk, type ChatRequest, type ChatResponse, ConsoleLogger, type ContentBlock, Edit, type EditInput, type EmbeddingProvider, type EvalCase, type EvalSuite, EventTracer, type Example, FileStore, type FileStoreOptions, Grep, type GrepInput, HashEmbedder, type HashEmbedderOptions, InMemoryStore, InMemoryVectorStore, type JSONSchema, type JSONSchemaField, type LogLevel, type Logger, type MemoryItem, type MemoryStore, type MemoryToolOptions, type Message, type MessageContent, OllamaProvider, type OllamaProviderOptions, OpenAIProvider, type OpenAIProviderOptions, type Provider, Read, type ReadInput, type Role, type RunSuiteOptions, type SearchResult, type SteeringConfig, type StopReason, type SuiteResult, type TextBlock, type TokenUsage, type ToolContext, type ToolDefinition, type ToolResult, type ToolResultBlock, type ToolUseBlock, Tracer, VERSION, type VectorStore, Write, type WriteInput, arrayField, booleanField, buildExampleMessages, buildSystemPrompt, contains, cosineSimilarity, defineMemorySearchTool, defineRememberTool, defineSuite, defineTool, equals, fn, integerField, lengthBetween, logEventsTo, matches, notContains, numberField, objectField, objectSchema, runSuite, stringField };