npm - adaptive-memory-multi-model-router - Versions diffs - 1.2.2 - Mend

adaptive-memory-multi-model-router 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/README.md +114 -0
package/demo/research-demo.js +266 -0
package/dist/cache/prefixCache.d.ts +114 -0
package/dist/cache/prefixCache.d.ts.map +1 -0
package/dist/cache/prefixCache.js +285 -0
package/dist/cache/prefixCache.js.map +1 -0
package/dist/cache/responseCache.d.ts +58 -0
package/dist/cache/responseCache.d.ts.map +1 -0
package/dist/cache/responseCache.js +153 -0
package/dist/cache/responseCache.js.map +1 -0
package/dist/cli.js +59 -0
package/dist/cost/costTracker.d.ts +95 -0
package/dist/cost/costTracker.d.ts.map +1 -0
package/dist/cost/costTracker.js +240 -0
package/dist/cost/costTracker.js.map +1 -0
package/dist/index.d.ts +723 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +239 -0
package/dist/index.js.map +1 -0
package/dist/memory/episodicMemory.d.ts +82 -0
package/dist/memory/episodicMemory.d.ts.map +1 -0
package/dist/memory/episodicMemory.js +145 -0
package/dist/memory/episodicMemory.js.map +1 -0
package/dist/orchestration/haloOrchestrator.d.ts +102 -0
package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
package/dist/orchestration/haloOrchestrator.js +207 -0
package/dist/orchestration/haloOrchestrator.js.map +1 -0
package/dist/orchestration/mctsWorkflow.d.ts +85 -0
package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
package/dist/orchestration/mctsWorkflow.js +210 -0
package/dist/orchestration/mctsWorkflow.js.map +1 -0
package/dist/providers/localProvider.d.ts +102 -0
package/dist/providers/localProvider.d.ts.map +1 -0
package/dist/providers/localProvider.js +338 -0
package/dist/providers/localProvider.js.map +1 -0
package/dist/providers/registry.d.ts +55 -0
package/dist/providers/registry.d.ts.map +1 -0
package/dist/providers/registry.js +138 -0
package/dist/providers/registry.js.map +1 -0
package/dist/routing/advancedRouter.d.ts +68 -0
package/dist/routing/advancedRouter.d.ts.map +1 -0
package/dist/routing/advancedRouter.js +332 -0
package/dist/routing/advancedRouter.js.map +1 -0
package/dist/tools/tmlpdTools.d.ts +101 -0
package/dist/tools/tmlpdTools.d.ts.map +1 -0
package/dist/tools/tmlpdTools.js +368 -0
package/dist/tools/tmlpdTools.js.map +1 -0
package/dist/utils/batchProcessor.d.ts +96 -0
package/dist/utils/batchProcessor.d.ts.map +1 -0
package/dist/utils/batchProcessor.js +170 -0
package/dist/utils/batchProcessor.js.map +1 -0
package/dist/utils/compression.d.ts +61 -0
package/dist/utils/compression.d.ts.map +1 -0
package/dist/utils/compression.js +281 -0
package/dist/utils/compression.js.map +1 -0
package/dist/utils/reliability.d.ts +74 -0
package/dist/utils/reliability.d.ts.map +1 -0
package/dist/utils/reliability.js +177 -0
package/dist/utils/reliability.js.map +1 -0
package/dist/utils/speculativeDecoding.d.ts +117 -0
package/dist/utils/speculativeDecoding.d.ts.map +1 -0
package/dist/utils/speculativeDecoding.js +246 -0
package/dist/utils/speculativeDecoding.js.map +1 -0
package/dist/utils/tokenUtils.d.ts +50 -0
package/dist/utils/tokenUtils.d.ts.map +1 -0
package/dist/utils/tokenUtils.js +124 -0
package/dist/utils/tokenUtils.js.map +1 -0
package/examples/QUICKSTART.md +183 -0
package/notebooks/quickstart.ipynb +157 -0
package/package.json +83 -0
package/python/examples.py +53 -0
package/python/integrations.py +330 -0
package/python/setup.py +28 -0
package/python/tmlpd.py +369 -0
package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
package/qna/TMLPD_QNA.md +751 -0
package/rust/tmlpd.h +268 -0
package/skill/SKILL.md +238 -0
package/src/cache/prefixCache.ts +365 -0
package/src/cache/responseCache.ts +147 -0
package/src/cost/costTracker.ts +302 -0
package/src/index.ts +224 -0
package/src/memory/episodicMemory.ts +185 -0
package/src/orchestration/haloOrchestrator.ts +266 -0
package/src/orchestration/mctsWorkflow.ts +262 -0
package/src/providers/localProvider.ts +406 -0
package/src/providers/registry.ts +164 -0
package/src/routing/advancedRouter.ts +406 -0
package/src/tools/tmlpdTools.ts +433 -0
package/src/utils/batchProcessor.ts +232 -0
package/src/utils/compression.ts +325 -0
package/src/utils/reliability.ts +221 -0
package/src/utils/speculativeDecoding.ts +344 -0
package/src/utils/tokenUtils.ts +145 -0
package/tsconfig.json +18 -0

package/src/utils/compression.ts ADDED Viewed

@@ -0,0 +1,325 @@
+/**
+ * TMLPD Context Compression Utilities
+ *
+ * Strategies for reducing context window usage:
+ * - Smart truncation
+ * - Message summarization
+ * - ISON-style encoding (inspired by Reddit ISON format)
+ * - Context window management
+ */
+export interface Message {
+  role: "system" | "user" | "assistant";
+  content: string;
+  name?: string;
+  tool_calls?: any[];
+  tool_call_id?: string;
+}
+export type CompressionStrategy = "smart" | "first" | "last" | "isentropy";
+/**
+ * ISON (Intelligence-Sparse Object Notation)
+ *
+ * A compression format that reduces token count by:
+ * - Removing redundant whitespace
+ * - Shortening common phrases
+ * - Using abbreviations strategically
+ *
+ * Example: "The quick brown fox jumps over the lazy dog"
+ *      → "quick brown fox jumps lazy dog" (removes articles, repeated words)
+ */
+const ISON_REPLACEMENTS: Array<[RegExp, string]> = [
+  // Common phrase abbreviations
+  [/\bthe\b/g, ""],
+  [/\ba\b/g, ""],
+  [/\ban\b/g, ""],
+  [/\bthat\b/g, "that"],
+  [/\bthis\b/g, "this"],
+  [/\bwith\b/g, "w/"],
+  [/\bwithout\b/g, "w/o"],
+  [/\band\b/g, "&"],
+  [/\bor\b/g, "|"],
+  [/\bfor\b/g, "4"],
+  [/\bto\b/g, "2"],
+  [/\binto\b/g, "2"],
+  [/\bfrom\b/g, "fr"],
+  [/\bplease\b/gi, ""],
+  [/\bthank you\b/gi, "thx"],
+  [/\byou are\b/gi, "u r"],
+  [/\byou can\b/gi, "u c"],
+  [/\bcan you\b/gi, "c?"],
+  [/\bhow do\b/gi, "how 2"],
+  [/\bwhat is\b/gi, "wat"],
+  [/\bwhat are\b/gi, "wat"],
+  // Whitespace normalization
+  [/\s+/g, " "],
+  [/^\s+|\s+$/g, ""],
+  // Remove repeated characters
+  [/(\w)\1{2,}/g, "$1$1"],
+  // Shorten common technical terms
+  [/\binformation\b/gi, "info"],
+  [/\bprocessing\b/gi, "proc"],
+  [/\bdevelopment\b/gi, "dev"],
+  [/\bapplication\b/gi, "app"],
+  [/\bconfiguration\b/gi, "config"],
+  [/\brepresentation\b/gi, "repr"],
+  [/\bunderstanding\b/gi, "unders"],
+  [/\brecommendation\b/gi, "rec"],
+];
+const ISON_UNREPLACEMENTS: Array<[RegExp, string]> = [
+  [/w\//g, "with "],
+  [/w\/o/g, "without "],
+  [/&/g, " and "],
+  [/\b4\b/g, " for "],
+  [/\b2\b/g, " to "],
+  [/\bfr\b/g, "from "],
+  [/\bthx\b/gi, "thank you"],
+  [/\bu r\b/gi, "you are"],
+  [/\bu c\b/gi, "you can"],
+  [/\bc\?\b/g, "can you"],
+  [/how 2\b/gi, "how do"],
+  [/\bwat\b/g, "what is"],
+];
+export interface CompressionResult {
+  original_tokens: number;
+  compressed_tokens: number;
+  ratio: number;
+  compressed_text: string;
+}
+/**
+ * ISON encode text for token reduction.
+ */
+export function isonEncode(text: string): string {
+  let result = text;
+  // Apply replacements
+  for (const [pattern, replacement] of ISON_REPLACEMENTS) {
+    result = result.replace(pattern, replacement);
+  }
+  // Remove extra spaces and trim
+  result = result.replace(/\s+/g, " ").trim();
+  return result;
+}
+/**
+ * Decode ISON encoded text.
+ */
+export function isonDecode(text: string): string {
+  let result = text;
+  // Apply un-replacements
+  for (const [pattern, replacement] of ISON_UNREPLACEMENTS) {
+    result = result.replace(pattern, replacement);
+  }
+  return result;
+}
+/**
+ * Compress text using ISON encoding.
+ */
+export function compressText(text: string): CompressionResult {
+  const original_tokens = estimateTokens(text);
+  const compressed = isonEncode(text);
+  const compressed_tokens = estimateTokens(compressed);
+  return {
+    original_tokens,
+    compressed_tokens,
+    ratio: compressed_tokens / original_tokens,
+    compressed_text: compressed
+  };
+}
+/**
+ * Estimate tokens (fallback if no model specified).
+ */
+function estimateTokens(text: string): number {
+  const words = text.trim().split(/\s+/).filter(w => w.length > 0);
+  return Math.ceil(words.length * 1.3);
+}
+/**
+ * Truncate messages to fit within token budget.
+ *
+ * @param messages - Conversation messages
+ * @param max_tokens - Maximum tokens allowed
+ * @param strategy - "smart" (preserve system + recent), "first" (keep start), "last" (keep end)
+ */
+export function truncateMessages(
+  messages: Message[],
+  max_tokens: number,
+  strategy: CompressionStrategy = "smart"
+): Message[] {
+  if (!messages || messages.length === 0) return [];
+  // Calculate total tokens
+  const totalTokens = (msg: Message) => {
+    const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
+    return estimateTokens(content) + 4; // +4 for role/format overhead
+  };
+  let currentTokens = messages.reduce((sum, m) => sum + totalTokens(m), 0);
+  if (currentTokens <= max_tokens) {
+    return messages; // Already fits
+  }
+  const result: Message[] = [];
+  if (strategy === "first") {
+    // Keep system (first) messages, truncate from middle
+    let keepTokens = 0;
+    for (const msg of messages) {
+      const msgTokens = totalTokens(msg);
+      if (keepTokens + msgTokens <= max_tokens) {
+        result.push(msg);
+        keepTokens += msgTokens;
+      } else if (msg.role === "system" && result.length === 0) {
+        // Always keep system message, possibly truncated
+        const systemContent = typeof msg.content === 'string' ? msg.content : "";
+        const truncated = truncateToTokenBudget(systemContent, max_tokens - 4);
+        result.push({ ...msg, content: truncated });
+        break;
+      } else {
+        break;
+      }
+    }
+  } else if (strategy === "last") {
+    // Keep only most recent messages
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      const msgTokens = totalTokens(msg);
+      if (currentTokens - msgTokens <= max_tokens) {
+        result.unshift(msg);
+        currentTokens -= msgTokens;
+      } else if (msg.role === "user") {
+        // Try to keep a truncated user message
+        const truncated = truncateToTokenBudget(msg.content as string, max_tokens - currentTokens);
+        if (truncated.length > 20) {
+          result.unshift({ ...msg, content: truncated });
+        }
+        break;
+      }
+    }
+  } else if (strategy === "smart") {
+    // Keep system, compress middle, keep recent
+    const systemMessages: Message[] = [];
+    const middleMessages: Message[] = [];
+    const recentMessages: Message[] = [];
+    for (const msg of messages) {
+      if (msg.role === "system") {
+        systemMessages.push(msg);
+      } else if (messages.indexOf(msg) >= messages.length - 3) {
+        recentMessages.push(msg);
+      } else {
+        middleMessages.push(msg);
+      }
+    }
+    // Start with system
+    for (const msg of systemMessages) {
+      const msgTokens = totalTokens(msg);
+      if (currentTokens <= max_tokens) {
+        result.push(msg);
+        currentTokens -= msgTokens;
+      } else {
+        // Truncate system message
+        const truncated = truncateToTokenBudget(msg.content as string, max_tokens - currentTokens - 10);
+        result.push({ ...msg, content: truncated });
+        currentTokens = max_tokens;
+        break;
+      }
+    }
+    // Add compressed middle
+    if (currentTokens > max_tokens / 2 && middleMessages.length > 0) {
+      // Compress middle messages into a summary
+      const middleContent = middleMessages
+        .map(m => m.content)
+        .join("\n");
+      const summaryTokenBudget = Math.min(
+        max_tokens / 4,
+        max_tokens - currentTokens
+      );
+      const summary = truncateToTokenBudget(
+        `[Previous ${middleMessages.length} messages]: ${middleContent}`,
+        summaryTokenBudget
+      );
+      result.push({ role: "assistant", content: summary });
+      currentTokens -= estimateTokens(summary);
+    }
+    // Add recent messages if room
+    for (const msg of recentMessages) {
+      const msgTokens = totalTokens(msg);
+      if (currentTokens + msgTokens <= max_tokens) {
+        result.push(msg);
+        currentTokens += msgTokens;
+      }
+    }
+  }
+  return result;
+}
+/**
+ * Truncate a single string to fit within token budget.
+ */
+export function truncateToTokenBudget(text: string, max_tokens: number): string {
+  const words = text.split(/\s+/);
+  let current = 0;
+  const targetWords: string[] = [];
+  for (const word of words) {
+    const wordTokens = estimateTokens(word);
+    if (current + wordTokens <= max_tokens) {
+      targetWords.push(word);
+      current += wordTokens;
+    } else {
+      break;
+    }
+  }
+  let result = targetWords.join(" ");
+  // If we truncated, add ellipsis
+  if (result.length < text.length) {
+    result += "...";
+  }
+  return result;
+}
+/**
+ * Calculate compression ratio for context.
+ */
+export function calculateCompressionRatio(messages: Message[], max_tokens: number): number {
+  const totalTokens = messages.reduce((sum, m) => {
+    const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
+    return sum + estimateTokens(content) + 4;
+  }, 0);
+  return Math.min(1, max_tokens / totalTokens);
+}
+export default {
+  isonEncode,
+  isonDecode,
+  compressText,
+  truncateMessages,
+  truncateToTokenBudget,
+  calculateCompressionRatio
+};

package/src/utils/reliability.ts ADDED Viewed

@@ -0,0 +1,221 @@
+/**
+ * TMLPD Reliability Engine
+ *
+ * Circuit breaker, retry with jitter, and enhanced cooldown logic.
+ * Designed to handle flaky API calls gracefully.
+ */
+export interface RetryConfig {
+  max_attempts: number;
+  base_delay_ms: number;
+  max_delay_ms: number;
+  jitter: number; // 0-1, percentage of delay to randomize
+  retryable_status_codes: number[];
+}
+export interface CircuitBreakerConfig {
+  failure_threshold: number;
+  recovery_timeout_ms: number;
+  half_open_max_calls: number;
+}
+export interface CircuitState {
+  status: "closed" | "open" | "half_open";
+  failure_count: number;
+  last_failure_time: number | null;
+  last_success_time: number | null;
+  consecutive_successes: number;
+}
+export const DEFAULT_RETRY_CONFIG: RetryConfig = {
+  max_attempts: 3,
+  base_delay_ms: 500,
+  max_delay_ms: 30000,
+  jitter: 0.3,
+  retryable_status_codes: [408, 429, 500, 502, 503, 504],
+};
+export const DEFAULT_CIRCUIT_BREAKER_CONFIG: CircuitBreakerConfig = {
+  failure_threshold: 5,
+  recovery_timeout_ms: 60000,
+  half_open_max_calls: 3,
+};
+/**
+ * Calculate delay with exponential backoff and jitter
+ */
+export function calculateRetryDelay(
+  attempt: number,
+  config: RetryConfig = DEFAULT_RETRY_CONFIG
+): number {
+  // Exponential backoff
+  const exponential_delay = config.base_delay_ms * Math.pow(2, attempt - 1);
+  // Cap at max delay
+  const capped_delay = Math.min(exponential_delay, config.max_delay_ms);
+  // Add jitter
+  const jitter_range = capped_delay * config.jitter;
+  const jitter = (Math.random() * 2 - 1) * jitter_range;
+  return Math.round(capped_delay + jitter);
+}
+/**
+ * Check if status code is retryable
+ */
+export function isRetryableStatus(statusCode: number | null, config: RetryConfig = DEFAULT_RETRY_CONFIG): boolean {
+  if (statusCode === null) return true; // Network errors are retryable
+  return config.retryable_status_codes.includes(statusCode);
+}
+/**
+ * Circuit Breaker implementation
+ */
+export class CircuitBreaker {
+  private config: CircuitBreakerConfig;
+  private state: CircuitState;
+  private half_open_calls = 0;
+  constructor(config: Partial<CircuitBreakerConfig> = {}) {
+    this.config = { ...DEFAULT_CIRCUIT_BREAKER_CONFIG, ...config };
+    this.state = {
+      status: "closed",
+      failure_count: 0,
+      last_failure_time: null,
+      last_success_time: null,
+      consecutive_successes: 0,
+    };
+  }
+  /**
+   * Check if circuit allows requests
+   */
+  canExecute(): boolean {
+    if (this.state.status === "closed") return true;
+    if (this.state.status === "open") {
+      // Check if recovery timeout has passed
+      if (this.state.last_failure_time && Date.now() - this.state.last_failure_time >= this.config.recovery_timeout_ms) {
+        this.state.status = "half_open";
+        this.half_open_calls = 0;
+        return true;
+      }
+      return false;
+    }
+    // half_open
+    return this.half_open_calls < this.config.half_open_max_calls;
+  }
+  /**
+   * Record a successful execution
+   */
+  recordSuccess(): void {
+    this.state.last_success_time = Date.now();
+    this.half_open_calls++;
+    if (this.state.status === "half_open") {
+      this.state.consecutive_successes++;
+      if (this.state.consecutive_successes >= this.config.half_open_max_calls) {
+        // Circuit recovered
+        this.state.status = "closed";
+        this.state.failure_count = 0;
+        this.state.consecutive_successes = 0;
+      }
+    } else {
+      this.state.failure_count = 0;
+    }
+  }
+  /**
+   * Record a failed execution
+   */
+  recordFailure(): void {
+    this.state.last_failure_time = Date.now();
+    this.state.failure_count++;
+    this.state.consecutive_successes = 0;
+    if (this.state.status === "half_open") {
+      // Trip circuit back open
+      this.state.status = "open";
+    } else if (this.state.failure_count >= this.config.failure_threshold) {
+      this.state.status = "open";
+    }
+  }
+  /**
+   * Get current circuit state
+   */
+  getState(): CircuitState {
+    return { ...this.state };
+  }
+  /**
+   * Force reset circuit
+   */
+  reset(): void {
+    this.state = {
+      status: "closed",
+      failure_count: 0,
+      last_failure_time: null,
+      last_success_time: null,
+      consecutive_successes: 0,
+    };
+    this.half_open_calls = 0;
+  }
+}
+/**
+ * Enhanced retry wrapper with circuit breaker integration
+ */
+export async function withRetry<T>(
+  fn: () => Promise<T>,
+  config: Partial<RetryConfig> = {},
+  circuitBreaker?: CircuitBreaker
+): Promise<{ result: T | null; error: Error | null; attempts: number; circuit_tripped: boolean }> {
+  const retryConfig = { ...DEFAULT_RETRY_CONFIG, ...config };
+  let lastError: Error | null = null;
+  let attempts = 0;
+  let circuit_tripped = false;
+  for (let i = 0; i < retryConfig.max_attempts; i++) {
+    attempts++;
+    try {
+      // Check circuit breaker before attempt
+      if (circuitBreaker && !circuitBreaker.canExecute()) {
+        circuit_tripped = true;
+        throw new Error("Circuit breaker is open");
+      }
+      const result = await fn();
+      if (circuitBreaker) {
+        circuitBreaker.recordSuccess();
+      }
+      return { result, error: null, attempts, circuit_tripped };
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+      // Check if should retry
+      const statusCode = (error as any).statusCode || (error as any).response?.statusCode || null;
+      if (!isRetryableStatus(statusCode, retryConfig)) {
+        return { result: null, error: lastError, attempts, circuit_tripped };
+      }
+      if (circuitBreaker) {
+        circuitBreaker.recordFailure();
+      }
+      // Don't wait after last attempt
+      if (i < retryConfig.max_attempts - 1) {
+        const delay = calculateRetryDelay(i + 1, retryConfig);
+        await new Promise((resolve) => setTimeout(resolve, delay));
+      }
+    }
+  }
+  return { result: null, error: lastError, attempts, circuit_tripped };
+}