npm - recursive-llm-ts - Versions diffs - 4.4.1 → 4.6.0 - Mend

recursive-llm-ts 4.4.1 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +375 -12
package/bin/rlm-go +0 -0
package/dist/bridge-interface.d.ts +19 -2
package/dist/cache.d.ts +78 -0
package/dist/cache.js +246 -0
package/dist/config.d.ts +37 -0
package/dist/config.js +162 -0
package/dist/errors.d.ts +113 -0
package/dist/errors.js +219 -0
package/dist/events.d.ts +126 -0
package/dist/events.js +77 -0
package/dist/index.d.ts +8 -2
package/dist/index.js +38 -1
package/dist/retry.d.ts +56 -0
package/dist/retry.js +185 -0
package/dist/rlm.d.ts +391 -13
package/dist/rlm.js +815 -182
package/dist/streaming.d.ts +96 -0
package/dist/streaming.js +210 -0
package/go/README.md +9 -1
package/go/rlm/context_overflow.go +566 -0
package/go/rlm/context_overflow_test.go +783 -0
package/go/rlm/errors.go +161 -1
package/go/rlm/rlm.go +10 -0
package/go/rlm/structured.go +53 -0
package/go/rlm/textrank.go +273 -0
package/go/rlm/textrank_test.go +335 -0
package/go/rlm/tfidf.go +225 -0
package/go/rlm/tfidf_test.go +272 -0
package/go/rlm/types.go +25 -2
package/package.json +16 -4

package/README.md CHANGED Viewed

@@ -6,16 +6,33 @@ TypeScript/JavaScript package for [Recursive Language Models (RLM)](https://gith
 ## Features
-✨ **Pure Go Implementation** - No Python dependencies required
-🚀 **50x Faster Startup** - Native binary vs Python runtime
-💾 **3x Less Memory** - Efficient Go implementation
-📦 **Single Binary** - Easy distribution and deployment
-🔄 **Unbounded Context** - Process 10M+ tokens without degradation
-🎯 **Provider Agnostic** - Works with OpenAI, Anthropic, Azure, Bedrock, local models
-🔍 **Structured Outputs** - Extract typed data with Zod schemas and parallel execution
-🧠 **Meta-Agent Mode** - Automatically optimize queries for better results
-📊 **Observability** - OpenTelemetry tracing, Langfuse integration, and debug logging
-📁 **File Storage** - Process local directories or S3/MinIO/LocalStack buckets as LLM context
+**Core**
+- **Unbounded Context** - Process 10M+ tokens without degradation via recursive decomposition
+- **Structured Outputs** - Extract typed data with Zod schemas, parallel execution, and instructor-style retry
+- **Streaming** - Progressive text output and partial structured objects via async iterables
+- **Batch Operations** - Process multiple queries in parallel with concurrency control
+**Performance & Resilience**
+- **Pure Go Backend** - 50x faster startup, 3x less memory vs Python
+- **Context Overflow Recovery** - Automatic detection and 6 reduction strategies (mapreduce, truncate, chunked, tfidf, textrank, refine)
+- **Caching** - Exact-match caching with in-memory and file-based backends
+- **Retry & Fallback** - Exponential backoff, jitter, and multi-provider fallback chains
+- **AbortController** - Cancel any operation mid-flight
+**Developer Experience**
+- **Typed Errors** - Rich error hierarchy with codes, retryable flags, and suggestions
+- **Event System** - Monitor LLM calls, cache hits, retries, and errors in real-time
+- **Builder API** - Fluent configuration with full IDE discoverability
+- **Factory Methods** - `RLM.fromEnv()`, `RLM.withDebug()`, `RLM.forAzure()`
+- **Config Validation** - Catches typos and invalid settings at construction time
+- **Result Formatters** - `prettyStats()`, `toJSON()`, `toMarkdown()`
+**Ecosystem**
+- **Provider Agnostic** - Works with OpenAI, Anthropic, Azure, Bedrock, local models
+- **Meta-Agent Mode** - Automatically optimize queries for better results
+- **Observability** - OpenTelemetry tracing, Langfuse integration, and debug logging
+- **File Storage** - Process local directories or S3/MinIO/LocalStack buckets as LLM context
+- **150+ Tests** - Comprehensive Vitest + Go test suites
 ## Installation
@@ -131,6 +148,236 @@ const result = await rlm.structuredCompletion(
 );
 ```
+### Streaming
+Get progressive output with async iterables and AbortController support:
+```typescript
+// Stream text output
+const stream = rlm.streamCompletion('Summarize', longDocument);
+for await (const chunk of stream) {
+  if (chunk.type === 'text') process.stdout.write(chunk.text);
+}
+// Collect all text
+const text = await rlm.streamCompletion('Summarize', doc).toText();
+// Cancel mid-stream
+const controller = new AbortController();
+const stream = rlm.streamCompletion('Summarize', doc, { signal: controller.signal });
+setTimeout(() => controller.abort(), 5000);
+```
+### Caching
+Avoid redundant API calls with exact-match caching:
+```typescript
+const rlm = new RLM('gpt-4o-mini', {
+  api_key: process.env.OPENAI_API_KEY,
+  cache: {
+    enabled: true,
+    strategy: 'exact',   // 'exact' | 'none'
+    maxEntries: 1000,
+    ttl: 3600,            // seconds
+    storage: 'memory',    // 'memory' | 'file'
+  }
+});
+const r1 = await rlm.completion('Summarize', doc); // API call
+const r2 = await rlm.completion('Summarize', doc); // Cache hit!
+console.log(r2.cached); // true
+console.log(rlm.getCacheStats()); // { hits: 1, misses: 1, hitRate: 0.5, ... }
+```
+### Retry & Resilience
+Automatic retry with exponential backoff and provider fallback:
+```typescript
+const rlm = new RLM('gpt-4o-mini', {
+  api_key: process.env.OPENAI_API_KEY,
+  retry: {
+    maxRetries: 3,
+    backoff: 'exponential', // 1s, 2s, 4s with jitter
+    onRetry: (attempt, error, delay) => {
+      console.log(`Retry ${attempt} after ${delay}ms: ${error.message}`);
+    },
+  },
+});
+// Or use standalone retry/fallback utilities:
+import { withRetry, withFallback } from 'recursive-llm-ts';
+const result = await withFallback(
+  (model) => rlm.completion(query, context),
+  { models: ['gpt-4o', 'claude-sonnet-4-20250514', 'gemini-2.0-flash'] }
+);
+```
+### Event System
+Monitor operations in real-time:
+```typescript
+const rlm = new RLM('gpt-4o-mini', { api_key: process.env.OPENAI_API_KEY });
+rlm.on('llm_call', (e) => console.log(`Calling ${e.model}...`));
+rlm.on('llm_response', (e) => console.log(`Response in ${e.duration}ms`));
+rlm.on('cache', (e) => console.log(`Cache ${e.action}`));
+rlm.on('error', (e) => reportToSentry(e.error));
+rlm.on('completion_start', (e) => showSpinner());
+rlm.on('completion_end', (e) => hideSpinner());
+```
+### Builder API
+Fluent configuration with full IDE discoverability:
+```typescript
+const rlm = RLM.builder('gpt-4o-mini')
+  .apiKey(process.env.OPENAI_API_KEY!)
+  .maxDepth(10)
+  .maxIterations(30)
+  .withMetaAgent({ model: 'gpt-4o' })
+  .withDebug()
+  .withCache({ strategy: 'exact' })
+  .withRetry({ maxRetries: 3 })
+  .withFallback(['gpt-4o', 'claude-sonnet-4-20250514'])
+  .build();
+```
+### Factory Methods
+Quick setup for common configurations:
+```typescript
+// From environment variables
+const rlm = RLM.fromEnv('gpt-4o-mini');
+// Debug mode
+const rlm = RLM.withDebug('gpt-4o-mini');
+// Azure OpenAI
+const rlm = RLM.forAzure('my-deployment', {
+  apiBase: 'https://myresource.openai.azure.com',
+  apiVersion: '2024-02-15-preview',
+});
+```
+### Batch Operations
+Process multiple queries in parallel:
+```typescript
+const results = await rlm.batchCompletion([
+  { query: 'Summarize chapter 1', context: ch1 },
+  { query: 'Summarize chapter 2', context: ch2 },
+  { query: 'Summarize chapter 3', context: ch3 },
+], { concurrency: 2 });
+```
+### Error Handling
+Rich error hierarchy with actionable information:
+```typescript
+import {
+  RLMRateLimitError, RLMValidationError,
+  RLMTimeoutError, RLMContextOverflowError
+} from 'recursive-llm-ts';
+try {
+  const result = await rlm.completion(query, context);
+} catch (err) {
+  if (err instanceof RLMContextOverflowError) {
+    console.log(`Context overflow: ${err.requestTokens} tokens > ${err.modelLimit} limit`);
+    // Enable context_overflow config to auto-recover from this
+  } else if (err instanceof RLMRateLimitError) {
+    console.log(`Rate limited. Retry after: ${err.retryAfter}s`);
+  } else if (err instanceof RLMValidationError) {
+    console.log(`Schema mismatch:`, err.zodErrors);
+  } else if (err instanceof RLMTimeoutError) {
+    console.log(`Timed out after ${err.elapsed}ms`);
+  }
+  // All RLM errors have: err.code, err.retryable, err.suggestion
+}
+```
+### Context Overflow Handling
+Automatically detect and recover from context window overflows. When your input exceeds the model's token limit, RLM catches the error and applies a reduction strategy to fit the context within bounds.
+```typescript
+const rlm = new RLM('gpt-4o-mini', {
+  api_key: process.env.OPENAI_API_KEY,
+  context_overflow: {
+    enabled: true,           // Enable overflow recovery (default: true)
+    strategy: 'tfidf',       // Reduction strategy (see table below)
+    max_model_tokens: 32768, // Override auto-detected limit (optional)
+    safety_margin: 0.15,     // Reserve 15% for prompts/overhead (default: 0.15)
+    max_reduction_attempts: 3, // Max retry attempts (default: 3)
+  }
+});
+// Process a document that may exceed the model's context window
+const result = await rlm.completion(
+  'Summarize the key findings',
+  veryLargeDocument  // If too large, auto-reduces and retries
+);
+```
+**Builder API:**
+```typescript
+const rlm = RLM.builder('gpt-4o-mini')
+  .apiKey(process.env.OPENAI_API_KEY!)
+  .withContextOverflow({ strategy: 'textrank', max_model_tokens: 32768 })
+  .build();
+```
+**Strategy Comparison:**
+| Strategy | API Calls | Speed | Quality | Best For |
+|----------|-----------|-------|---------|----------|
+| `mapreduce` | Many (parallel) | Medium | High | General-purpose, large documents |
+| `truncate` | 0 | Fastest | Low | Quick-and-dirty, when beginning of doc matters |
+| `chunked` | Many (sequential) | Slow | High | Detailed extraction from specific sections |
+| `tfidf` | 0 | Fast | Medium | Fast first pass, keyword-rich documents |
+| `textrank` | 0 | Fast | Medium-High | Documents with clear sentence structure |
+| `refine` | Many (sequential) | Slow | Highest | When quality matters most, iterative refinement |
+**Strategy Details:**
+- **`mapreduce`** (default) - Splits context into chunks, summarizes each in parallel via LLM calls, then merges summaries. Good balance of quality and speed.
+- **`truncate`** - Drops tokens from the end to fit the budget. Zero API calls, but loses information. Best when the beginning of the document is most important.
+- **`chunked`** - Processes chunks sequentially, extracting relevant content from each. Higher quality than mapreduce for targeted extraction.
+- **`tfidf`** - Pure Go, zero API calls. Uses TF-IDF scoring to select the most informative sentences. Preserves original document order. Great for a fast, no-cost first pass.
+- **`textrank`** - Pure Go, zero API calls. Graph-based sentence ranking using PageRank over cosine-similarity of TF-IDF vectors. Better at identifying structurally important sentences than plain TF-IDF.
+- **`refine`** - Sequential iterative refinement. Processes chunks one at a time, building and refining an answer progressively. Highest quality but slowest, as each chunk sees the accumulated context.
+### Config Validation
+Catch configuration issues at construction time:
+```typescript
+const rlm = new RLM('gpt-4o-mini', { max_detph: 5 }); // typo!
+const result = rlm.validate();
+// result.issues: [{ level: 'warning', field: 'max_detph', message: 'Unknown config key...' }]
+```
+### Result Formatting
+```typescript
+const result = await rlm.completion(query, context);
+const formatted = rlm.formatResult(result);
+console.log(formatted.prettyStats());
+// "LLM Calls: 3 | Iterations: 12 | Depth: 2"
+console.log(formatted.toMarkdown());
+// Full markdown-formatted result with stats table
+```
 ### Agent Coordinator (Advanced)
 For complex multi-field schemas, use the coordinator API:
@@ -463,6 +710,42 @@ Process a query using files from local or S3 storage as context.
 Extract structured data from file-based context.
+#### `streamCompletion(query, context, options?): RLMStream`
+Stream a completion with progressive text output. Returns an async iterable.
+#### `streamStructuredCompletion<T>(query, context, schema, options?): RLMStream<T>`
+Stream a structured completion with partial object updates.
+#### `batchCompletion(queries, options?): Promise<Array<RLMCompletionResult | Error>>`
+Execute multiple completions in parallel with concurrency control.
+#### `batchStructuredCompletion<T>(queries, options?): Promise<Array<StructuredRLMResult<T> | Error>>`
+Execute multiple structured completions in parallel.
+#### `validate(): ValidationResult`
+Validate the current configuration without making API calls.
+#### `getCacheStats(): CacheStats`
+Get cache performance statistics (hits, misses, hit rate).
+#### `clearCache(): void`
+Clear the completion cache.
+#### `formatResult(result): RLMResultFormatter`
+Create a formatted result with `prettyStats()`, `toJSON()`, `toMarkdown()`.
+#### `on(event, listener) / off(event, listener) / once(event, listener)`
+Register/remove event listeners. Events: `llm_call`, `llm_response`, `error`, `cache`, `completion_start`, `completion_end`, `retry`, `validation_retry`, `meta_agent`, `recursion`.
 #### `cleanup(): Promise<void>`
 Clean up the bridge and free resources.
@@ -508,12 +791,50 @@ interface RLMConfig {
   // Shorthand for observability.debug
   debug?: boolean;
-  // LiteLLM parameters - pass any additional parameters supported by LiteLLM
+  // LiteLLM parameters
   api_version?: string;          // API version (e.g., for Azure)
   timeout?: number;              // Request timeout in seconds
   temperature?: number;          // Sampling temperature
   max_tokens?: number;           // Maximum tokens in response
-  [key: string]: any;            // Any other LiteLLM parameters
+  // Context overflow recovery
+  context_overflow?: ContextOverflowConfig;
+  // Caching, retry, fallback
+  cache?: CacheConfig;           // Cache configuration
+  retry?: RetryConfig;           // Retry configuration
+  fallback?: FallbackConfig;     // Fallback model configuration
+}
+interface CacheConfig {
+  enabled?: boolean;             // Enable caching (default: false)
+  strategy?: 'exact' | 'none';  // Cache strategy (default: 'exact')
+  maxEntries?: number;           // Max cached entries (default: 1000)
+  ttl?: number;                  // Time-to-live in seconds (default: 3600)
+  storage?: 'memory' | 'file';  // Storage backend (default: 'memory')
+  cacheDir?: string;             // Dir for file cache (default: '.rlm-cache')
+}
+interface RetryConfig {
+  maxRetries?: number;           // Max retries (default: 3)
+  backoff?: 'exponential' | 'linear' | 'fixed';
+  baseDelay?: number;            // Base delay ms (default: 1000)
+  maxDelay?: number;             // Max delay ms (default: 30000)
+  jitter?: boolean;              // Add jitter (default: true)
+  onRetry?: (attempt: number, error: Error, delay: number) => void;
+}
+interface FallbackConfig {
+  models?: string[];             // Ordered fallback models
+  strategy?: 'sequential';      // Fallback strategy
+}
+interface ContextOverflowConfig {
+  enabled?: boolean;             // Enable overflow recovery (default: true)
+  max_model_tokens?: number;     // Override auto-detected model limit (0 = auto-detect)
+  strategy?: 'mapreduce' | 'truncate' | 'chunked' | 'tfidf' | 'textrank' | 'refine';
+  safety_margin?: number;        // Fraction to reserve for overhead (default: 0.15)
+  max_reduction_attempts?: number; // Max reduction retries (default: 3)
 }
 interface MetaAgentConfig {
@@ -583,6 +904,22 @@ interface FileStorageResult {
   totalSize: number;
   skipped: Array<{ relativePath: string; reason: string }>;
 }
+// Error hierarchy - all extend RLMError
+class RLMError extends Error {
+  code: string;                  // Machine-readable: "RATE_LIMIT", "VALIDATION", etc.
+  retryable: boolean;            // Whether caller should retry
+  suggestion?: string;           // Human-readable fix suggestion
+}
+class RLMValidationError extends RLMError { expected; received; zodErrors; }
+class RLMRateLimitError extends RLMError { retryAfter?: number; }
+class RLMTimeoutError extends RLMError { elapsed; limit; }
+class RLMProviderError extends RLMError { statusCode; provider; }
+class RLMBinaryError extends RLMError { binaryPath; }
+class RLMConfigError extends RLMError { field; value; }
+class RLMContextOverflowError extends RLMError { modelLimit; requestTokens; }
+class RLMSchemaError extends RLMError { path; constraint; }
+class RLMAbortError extends RLMError {}
 ```
 ## Environment Variables
@@ -882,6 +1219,32 @@ The recursive-llm approach breaks down large contexts into manageable chunks and
 - ✅ **Type-safe** - Full TypeScript type definitions
 - ✅ **Simple API** - Just `npm install` and start using
+## Testing
+```bash
+# Run all tests (Vitest)
+npm test
+# Watch mode
+npm run test:watch
+# Coverage
+npm run test:coverage
+# Type-check
+npm run typecheck
+# Go tests
+cd go && go test ./rlm/... -v
+```
+## Documentation
+- [Quick Start Guide](docs/QUICKSTART.md)
+- [Architecture Overview](docs/ARCHITECTURE.md)
+- [Contributing Guide](CONTRIBUTING.md)
+- [UX/DX Gap Analysis](docs/UX-DX-GAP-ANALYSIS.md)
 ## Publishing
 This package uses automated GitHub Actions workflows to publish to npm. See [RELEASE.md](RELEASE.md) for detailed instructions on publishing new versions.

package/bin/rlm-go CHANGED Viewed

Binary file

package/dist/bridge-interface.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@ export interface RLMStats {
     parsing_retries?: number;
 }
 export interface RLMResult {
-    result: string | any;
+    result: string;
     stats: RLMStats;
     structured_result?: boolean;
     trace_events?: TraceEvent[];
@@ -36,6 +36,18 @@ export interface TraceEvent {
     span_id?: string;
     parent_id?: string;
 }
+export interface ContextOverflowConfig {
+    /** Enable automatic context overflow recovery (default: true) */
+    enabled?: boolean;
+    /** Override detected model token limit (0 = auto-detect from API errors) */
+    max_model_tokens?: number;
+    /** Strategy: 'mapreduce' (default), 'truncate', 'chunked', 'tfidf', 'textrank', or 'refine' */
+    strategy?: 'mapreduce' | 'truncate' | 'chunked' | 'tfidf' | 'textrank' | 'refine';
+    /** Fraction of token budget to reserve for prompts/overhead (default: 0.15) */
+    safety_margin?: number;
+    /** Maximum reduction attempts before giving up (default: 3) */
+    max_reduction_attempts?: number;
+}
 export interface RLMConfig {
     recursive_model?: string;
     api_base?: string;
@@ -46,8 +58,13 @@ export interface RLMConfig {
     go_binary_path?: string;
     meta_agent?: MetaAgentConfig;
     observability?: ObservabilityConfig;
+    context_overflow?: ContextOverflowConfig;
     debug?: boolean;
-    [key: string]: any;
+    api_version?: string;
+    timeout?: number;
+    temperature?: number;
+    max_tokens?: number;
+    structured?: any;
 }
 export interface FileStorageConfig {
     /** Storage type: 'local' or 's3' */

package/dist/cache.d.ts ADDED Viewed

@@ -0,0 +1,78 @@
+/**
+ * Caching layer for recursive-llm-ts completions.
+ *
+ * Provides exact-match caching to avoid redundant API calls for
+ * identical query+context pairs. Supports in-memory and file-based storage.
+ */
+export interface CacheConfig {
+    /** Enable/disable caching (default: false) */
+    enabled?: boolean;
+    /** Cache strategy (default: 'exact') */
+    strategy?: 'exact' | 'none';
+    /** Maximum number of cached entries (default: 1000) */
+    maxEntries?: number;
+    /** Time-to-live in seconds (default: 3600 = 1 hour) */
+    ttl?: number;
+    /** Storage backend (default: 'memory') */
+    storage?: 'memory' | 'file';
+    /** Directory for file-based cache (default: .rlm-cache) */
+    cacheDir?: string;
+}
+export interface CacheStats {
+    hits: number;
+    misses: number;
+    size: number;
+    hitRate: number;
+    evictions: number;
+}
+export interface CacheProvider {
+    get<T>(key: string): T | undefined;
+    set<T>(key: string, value: T, ttl: number): void;
+    has(key: string): boolean;
+    delete(key: string): boolean;
+    clear(): void;
+    size(): number;
+}
+export declare class MemoryCache implements CacheProvider {
+    private store;
+    private maxEntries;
+    constructor(maxEntries?: number);
+    get<T>(key: string): T | undefined;
+    set<T>(key: string, value: T, ttl: number): void;
+    has(key: string): boolean;
+    delete(key: string): boolean;
+    clear(): void;
+    size(): number;
+}
+export declare class FileCache implements CacheProvider {
+    private cacheDir;
+    private maxEntries;
+    constructor(cacheDir?: string, maxEntries?: number);
+    private filePath;
+    get<T>(key: string): T | undefined;
+    set<T>(key: string, value: T, ttl: number): void;
+    has(key: string): boolean;
+    delete(key: string): boolean;
+    clear(): void;
+    size(): number;
+}
+export declare class RLMCache {
+    private provider;
+    private config;
+    private stats;
+    constructor(config?: CacheConfig);
+    /** Check if caching is enabled */
+    get enabled(): boolean;
+    /** Look up a cached result */
+    lookup<T>(model: string, query: string, context: string, extra?: Record<string, unknown>): {
+        hit: boolean;
+        value?: T;
+    };
+    /** Store a result in the cache */
+    store<T>(model: string, query: string, context: string, value: T, extra?: Record<string, unknown>): void;
+    /** Get cache statistics */
+    getStats(): CacheStats;
+    /** Clear the cache */
+    clear(): void;
+    private updateHitRate;
+}