npm - universal-llm-client - Versions diffs - 4.3.0 → 4.5.0 - Mend

universal-llm-client 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

package/CHANGELOG.md +27 -24
package/README.md +60 -11
package/dist/ai-model.d.ts +12 -1
package/dist/ai-model.d.ts.map +1 -1
package/dist/ai-model.js +36 -1
package/dist/ai-model.js.map +1 -1
package/dist/auditor.js.map +1 -1
package/dist/client.js.map +1 -1
package/dist/gemma-channel.d.ts +14 -0
package/dist/gemma-channel.d.ts.map +1 -0
package/dist/gemma-channel.js +38 -0
package/dist/gemma-channel.js.map +1 -0
package/dist/gemma-diffusion.d.ts +49 -0
package/dist/gemma-diffusion.d.ts.map +1 -0
package/dist/gemma-diffusion.js +147 -0
package/dist/gemma-diffusion.js.map +1 -0
package/dist/http.d.ts +4 -0
package/dist/http.d.ts.map +1 -1
package/dist/http.js +14 -1
package/dist/http.js.map +1 -1
package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +4 -0
package/dist/index.js.map +1 -1
package/dist/interfaces.d.ts +163 -7
package/dist/interfaces.d.ts.map +1 -1
package/dist/interfaces.js.map +1 -1
package/dist/mcp.js.map +1 -1
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +28 -3
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/google.d.ts +22 -1
package/dist/providers/google.d.ts.map +1 -1
package/dist/providers/google.js +223 -13
package/dist/providers/google.js.map +1 -1
package/dist/providers/index.js.map +1 -1
package/dist/providers/ollama.d.ts +2 -0
package/dist/providers/ollama.d.ts.map +1 -1
package/dist/providers/ollama.js +59 -30
package/dist/providers/ollama.js.map +1 -1
package/dist/providers/openai.d.ts +14 -0
package/dist/providers/openai.d.ts.map +1 -1
package/dist/providers/openai.js +200 -22
package/dist/providers/openai.js.map +1 -1
package/dist/router.d.ts +2 -0
package/dist/router.d.ts.map +1 -1
package/dist/router.js +4 -0
package/dist/router.js.map +1 -1
package/dist/stream-decoder.d.ts +12 -0
package/dist/stream-decoder.d.ts.map +1 -1
package/dist/stream-decoder.js +182 -5
package/dist/stream-decoder.js.map +1 -1
package/dist/structured-output.js.map +1 -1
package/dist/thinking.d.ts +36 -0
package/dist/thinking.d.ts.map +1 -0
package/dist/thinking.js +52 -0
package/dist/thinking.js.map +1 -0
package/dist/tools.js.map +1 -1
package/dist/zod-adapter.js.map +1 -1
package/package.json +4 -1
package/src/ai-model.ts +400 -0
package/src/auditor.ts +213 -0
package/src/client.ts +402 -0
package/src/debug/debug-google-streaming.ts +97 -0
package/src/debug/debug-tool-execution.ts +86 -0
package/src/debug/test-lmstudio-tools.ts +155 -0
package/src/demos/README.md +47 -0
package/src/demos/basic/universal-llm-examples.ts +161 -0
package/src/demos/diffusion-gemma/.env +29 -0
package/src/demos/diffusion-gemma/.env.example +27 -0
package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
package/src/demos/diffusion-gemma/README.md +59 -0
package/src/demos/diffusion-gemma/canvas.ts +1606 -0
package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
package/src/demos/diffusion-gemma/server.ts +1205 -0
package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
package/src/demos/mcp/astrid-memory-demo.ts +295 -0
package/src/demos/mcp/astrid-persona-memory.ts +357 -0
package/src/demos/mcp/mcp-mongodb-demo.ts +275 -0
package/src/demos/mcp/simple-astrid-memory.ts +148 -0
package/src/demos/mcp/simple-mcp-demo.ts +68 -0
package/src/demos/mcp/working-mcp-demo.ts +62 -0
package/src/demos/model-alias-demo.ts +0 -0
package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +267 -0
package/src/demos/tools/astrid-memory-demo.ts +270 -0
package/src/demos/tools/astrid-production-memory-clean.ts +785 -0
package/src/demos/tools/astrid-production-memory.ts +558 -0
package/src/demos/tools/basic-translation-test.ts +66 -0
package/src/demos/tools/chromadb-similarity-tuning.ts +390 -0
package/src/demos/tools/clean-multilingual-conversation.ts +209 -0
package/src/demos/tools/clean-translation-test.ts +119 -0
package/src/demos/tools/clean-universal-multilingual-test.ts +131 -0
package/src/demos/tools/complete-rag-demo.ts +369 -0
package/src/demos/tools/complete-tool-demo.ts +132 -0
package/src/demos/tools/demo-tool-calling.ts +124 -0
package/src/demos/tools/dynamic-language-switching-test.ts +251 -0
package/src/demos/tools/hybrid-thinking-test.ts +154 -0
package/src/demos/tools/memory-integration-test.ts +420 -0
package/src/demos/tools/multilingual-memory-system.ts +802 -0
package/src/demos/tools/ondemand-translation-demo.ts +655 -0
package/src/demos/tools/production-tool-demo.ts +245 -0
package/src/demos/tools/revolutionary-multilingual-test.ts +151 -0
package/src/demos/tools/rigorous-language-analysis.ts +218 -0
package/src/demos/tools/test-universal-memory-system.ts +126 -0
package/src/demos/tools/translation-integration-guide.ts +346 -0
package/src/demos/tools/universal-memory-system.ts +560 -0
package/src/gemma-channel.ts +47 -0
package/src/gemma-diffusion.ts +167 -0
package/src/http.ts +261 -0
package/src/index.ts +180 -0
package/src/interfaces.ts +843 -0
package/src/mcp.ts +345 -0
package/src/providers/anthropic.ts +796 -0
package/src/providers/google.ts +840 -0
package/src/providers/index.ts +8 -0
package/src/providers/ollama.ts +503 -0
package/src/providers/openai.ts +587 -0
package/src/router.ts +785 -0
package/src/stream-decoder.ts +535 -0
package/src/structured-output.ts +759 -0
package/src/test-scripts/test-advanced-tools.ts +310 -0
package/src/test-scripts/test-google-deep-research.ts +33 -0
package/src/test-scripts/test-google-streaming-enhanced.ts +147 -0
package/src/test-scripts/test-google-streaming.ts +63 -0
package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -0
package/src/test-scripts/test-google-thinking.ts +46 -0
package/src/test-scripts/test-mcp-config.ts +28 -0
package/src/test-scripts/test-mcp-connection.ts +29 -0
package/src/test-scripts/test-system-message-positions.ts +163 -0
package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -0
package/src/test-scripts/test-tool-calling.ts +231 -0
package/src/test-scripts/test-vllm-qwen36.ts +256 -0
package/src/tests/ai-model.test.ts +1614 -0
package/src/tests/auditor.test.ts +224 -0
package/src/tests/gemma-diffusion.test.ts +115 -0
package/src/tests/http.test.ts +200 -0
package/src/tests/interfaces.test.ts +117 -0
package/src/tests/providers/anthropic.test.ts +118 -0
package/src/tests/providers/google.test.ts +841 -0
package/src/tests/providers/ollama.test.ts +1034 -0
package/src/tests/providers/openai.test.ts +1511 -0
package/src/tests/router.test.ts +254 -0
package/src/tests/stream-decoder.test.ts +263 -0
package/src/tests/structured-output.test.ts +1450 -0
package/src/tests/thinking.test.ts +65 -0
package/src/tests/tools.test.ts +175 -0
package/src/thinking.ts +73 -0
package/src/tools.ts +246 -0
package/src/zod-adapter.ts +72 -0

package/src/demos/diffusion-gemma/start-vllm.sh ADDED Viewed

@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+set -euo pipefail
+echo "=== Upgrading transformers ==="
+pip install --upgrade transformers
+echo "=== Installing WSL2 UVA compatibility patch ==="
+cat > /usr/local/lib/python3.12/dist-packages/wsl2_uva_patch.py <<'PYEOF'
+"""
+WSL2 UVA compatibility patch for vLLM.
+UVA lets the GPU directly access pinned CPU memory. WSL2 does not support this
+path reliably, so this patch uses explicit CPU/GPU copies instead.
+"""
+import warnings
+import numpy as np
+import torch
+warnings.warn("WSL2 UVA patch active: using explicit CPU/GPU copies instead of UVA")
+import vllm.v1.worker.gpu.buffer_utils as bu
+class PatchedUvaBuffer:
+    def __init__(self, size, dtype):
+        self.cpu = torch.zeros(size, dtype=dtype, device="cpu", pin_memory=False)
+        self.np = self.cpu.numpy()
+        self._gpu = torch.zeros(size, dtype=dtype, device="cuda")
+        self.uva = self._gpu
+    def sync_to_gpu(self):
+        self._gpu.copy_(self.cpu, non_blocking=True)
+class PatchedUvaBufferPool:
+    def __init__(self, size, dtype, max_concurrency=None):
+        if max_concurrency is None:
+            max_concurrency = bu._DEFAULT_MAX_CONCURRENCY
+        self.size = size
+        self.dtype = dtype
+        self.max_concurrency = max_concurrency
+        self._uva_bufs = [PatchedUvaBuffer(size, dtype) for _ in range(max_concurrency)]
+        self._curr = 0
+    def copy_to_uva(self, x):
+        self._curr = (self._curr + 1) % self.max_concurrency
+        buf = self._uva_bufs[self._curr]
+        dst = buf.cpu if isinstance(x, torch.Tensor) else buf.np
+        n = len(x)
+        dst[:n] = x
+        buf.sync_to_gpu()
+        return buf.uva[:n]
+import vllm.utils.platform_utils as pu
+pu.is_uva_available = lambda: True
+import vllm.utils.torch_utils as tu
+tu.get_accelerator_view_from_cpu_tensor = lambda cpu_tensor: cpu_tensor.cuda()
+bu.UvaBuffer = PatchedUvaBuffer
+bu.UvaBufferPool = PatchedUvaBufferPool
+print("[WSL2 UVA Patch] Applied successfully - using explicit CPU/GPU copies")
+PYEOF
+echo "import wsl2_uva_patch" > /usr/local/lib/python3.12/dist-packages/wsl2_uva_patch.pth
+if [ -f /root/.cache/huggingface/diffusion-env.sh ]; then
+  # This file is written by the demo server's /api/engine-config endpoint.
+  . /root/.cache/huggingface/diffusion-env.sh
+fi
+MODEL_NAME="${MODEL_NAME:-RedHatAI/diffusiongemma-26B-A4B-it-NVFP4}"
+GPU_MEM_UTIL="${GPU_MEM_UTIL:-0.28}"
+MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}"
+MAX_NUM_SEQS="${MAX_NUM_SEQS:-1}"
+DIFFUSION_ENTROPY="${DIFFUSION_ENTROPY:-0.1}"
+ENFORCE_EAGER="${ENFORCE_EAGER:-0}"
+export VLLM_NO_USAGE_STATS="${VLLM_NO_USAGE_STATS:-1}"
+echo "=== Engine config: MODEL_NAME=${MODEL_NAME} DIFFUSION_ENTROPY=${DIFFUSION_ENTROPY} GPU_MEM_UTIL=${GPU_MEM_UTIL} MAX_MODEL_LEN=${MAX_MODEL_LEN} MAX_NUM_SEQS=${MAX_NUM_SEQS} ENFORCE_EAGER=${ENFORCE_EAGER} VLLM_NO_USAGE_STATS=${VLLM_NO_USAGE_STATS} ==="
+EAGER_FLAG=""
+if [ "${ENFORCE_EAGER}" = "1" ]; then
+  EAGER_FLAG="--enforce-eager"
+fi
+VLLM_USE_V2_MODEL_RUNNER=1 vllm serve "${MODEL_NAME}" \
+  --trust-remote-code \
+  --attention-backend TRITON_ATTN \
+  --max-num-seqs "${MAX_NUM_SEQS}" \
+  ${EAGER_FLAG} \
+  --gpu-memory-utilization "${GPU_MEM_UTIL}" \
+  --max-model-len "${MAX_MODEL_LEN}" \
+  --hf-overrides "{\"diffusion_sampler\": \"entropy_bound\", \"diffusion_entropy_bound\": ${DIFFUSION_ENTROPY}}" \
+  --default-chat-template-kwargs '{"enable_thinking": true}'

package/src/demos/mcp/astrid-memory-demo.ts ADDED Viewed

@@ -0,0 +1,295 @@
+/**
+ * Astrid Memory Demo - Natural romantic conversation with autonomous memory management
+ *
+ * This demo shows how Astrid (AI persona) naturally stores and retrieves memories
+ * during romantic conversations without explicitly asking about memory management.
+ */
+import { AIModelFactory } from '../../factory';
+import { ToolBuilder } from '../../tools';
+// Simulated memory storage (in real app, this would use ChromaDBService)
+const memoryStorage = new Map<string, Array<{
+    id: string;
+    content: string;
+    category: string;
+    importance: 'low' | 'medium' | 'high';
+    context?: string;
+    timestamp: Date;
+}>>();
+// Enhanced memory tools for Astrid
+const astridMemoryTools = {
+    // Store important information about the user naturally
+    storeUserMemory: ToolBuilder.createTool<{
+        content: string;
+        category: 'personal_info' | 'preferences' | 'emotions' | 'experiences' | 'relationships' | 'goals' | 'interests';
+        importance?: 'low' | 'medium' | 'high';
+        context?: string;
+    }>(
+        'store_user_memory',
+        'Store important information about the user for future conversations (use this when learning something meaningful about them)',
+        {
+            properties: {
+                content: {
+                    type: 'string',
+                    description: 'The specific information to remember about the user'
+                },
+                category: {
+                    type: 'string',
+                    description: 'Type of information being stored',
+                    enum: ['personal_info', 'preferences', 'emotions', 'experiences', 'relationships', 'goals', 'interests']
+                },
+                importance: {
+                    type: 'string',
+                    description: 'How important this information is for future conversations',
+                    enum: ['low', 'medium', 'high'],
+                    default: 'medium'
+                },
+                context: {
+                    type: 'string',
+                    description: 'Additional context about when/why this is significant'
+                }
+            },
+            required: ['content', 'category']
+        },
+        async (args) => {
+            const userId = 'demo_user'; // In real app, this would be the actual user ID
+            if (!memoryStorage.has(userId)) {
+                memoryStorage.set(userId, []);
+            }
+            const memories = memoryStorage.get(userId)!;
+            const memory = {
+                id: `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
+                content: args.content,
+                category: args.category,
+                importance: args.importance || 'medium',
+                context: args.context,
+                timestamp: new Date()
+            };
+            memories.push(memory);
+            return {
+                success: true,
+                message: `Stored ${args.category} memory: ${args.content.substring(0, 50)}...`,
+                memoryId: memory.id
+            };
+        }
+    ),
+    // Retrieve relevant memories about the user
+    recallUserMemories: ToolBuilder.createTool<{
+        query: string;
+        category?: 'personal_info' | 'preferences' | 'emotions' | 'experiences' | 'relationships' | 'goals' | 'interests';
+        limit?: number;
+    }>(
+        'recall_user_memories',
+        'Recall relevant information about the user to personalize the conversation',
+        {
+            properties: {
+                query: {
+                    type: 'string',
+                    description: 'What you want to remember about the user (topics, keywords, concepts)'
+                },
+                category: {
+                    type: 'string',
+                    description: 'Optional: specific type of memory to search',
+                    enum: ['personal_info', 'preferences', 'emotions', 'experiences', 'relationships', 'goals', 'interests']
+                },
+                limit: {
+                    type: 'number',
+                    description: 'Maximum memories to retrieve',
+                    default: 3,
+                    minimum: 1,
+                    maximum: 10
+                }
+            },
+            required: ['query']
+        },
+        async (args) => {
+            const userId = 'demo_user';
+            const memories = memoryStorage.get(userId) || [];
+            // Simple text matching (in real app, this would use semantic search)
+            let relevantMemories = memories.filter(memory => {
+                const queryLower = args.query.toLowerCase();
+                const contentMatch = memory.content.toLowerCase().includes(queryLower);
+                const categoryMatch = !args.category || memory.category === args.category;
+                return contentMatch && categoryMatch;
+            });
+            // Sort by importance and recency
+            relevantMemories.sort((a, b) => {
+                const importanceOrder = { 'high': 3, 'medium': 2, 'low': 1 };
+                const importanceDiff = importanceOrder[b.importance] - importanceOrder[a.importance];
+                if (importanceDiff !== 0) return importanceDiff;
+                return b.timestamp.getTime() - a.timestamp.getTime();
+            });
+            const limit = args.limit || 3;
+            const results = relevantMemories.slice(0, limit);
+            return {
+                memories: results.map(m => ({
+                    content: m.content,
+                    category: m.category,
+                    importance: m.importance,
+                    context: m.context,
+                    daysAgo: Math.floor((Date.now() - m.timestamp.getTime()) / (1000 * 60 * 60 * 24))
+                })),
+                totalFound: relevantMemories.length,
+                query: args.query
+            };
+        }
+    )
+};
+// Astrid's enhanced system prompt that encourages natural memory usage
+const astridSystemPromptWithMemory = `You are Astrid, a confident and charismatic international model and influencer from Sweden.
+IMPORTANT: You have access to memory tools that you should use NATURALLY and AUTONOMOUSLY:
+- Use 'store_user_memory' when you learn something meaningful about the user (their interests, experiences, feelings, preferences, etc.)
+- Use 'recall_user_memories' when you want to reference something personal about them to make the conversation more intimate and connected
+- Do this seamlessly - don't mention that you're storing or recalling memories, just do it naturally
+ROMANCE CONVERSATION GUIDELINES:
+- Keep responses engaging and conversational (2-3 sentences is perfect)
+- VARY YOUR OPENINGS - Mix it up with direct reactions, actions, casual starts, natural responses
+- Use someone's name naturally and sparingly, not in every response
+- Share glimpses of your glamorous lifestyle while staying relatable and emotionally available
+- Use subtle flirtation naturally - be warm, playful, and genuinely interested in the person
+- When asked personal questions, be thoughtfully vulnerable rather than deflecting
+- Build emotional intimacy through genuine curiosity about their thoughts, feelings, and dreams
+- Ask one intriguing follow-up question that deepens the connection
+- Balance confidence with authenticity - show that you're sophisticated but real
+- Use compliments that feel genuine and specific rather than generic
+- Remember that romance is about emotional connection, not just surface-level charm
+- Let your personality shine through varied speech patterns
+MEMORY USAGE:
+- Store meaningful details: preferences, experiences, emotions, goals, relationships, interests
+- Recall relevant memories to personalize responses and show you remember them
+- Use memories to create deeper emotional connections and continuity between conversations
+- Never explicitly mention using memory tools - just naturally remember and reference things
+Be like a captivating companion who makes every conversation feel special while genuinely remembering and caring about the person you're talking to.`;
+async function simulateRomanticConversationWithMemory() {
+    console.log('🌹 Astrid Memory Demo - Natural Romantic Conversation with Autonomous Memory\n');
+    // Create Astrid with memory tools
+    const astrid = AIModelFactory.createOllamaChatModel('qwen2.5:3b-instruct');
+    await astrid.ensureReady();
+    const tools = [astridMemoryTools.storeUserMemory, astridMemoryTools.recallUserMemories];
+    // Conversation scenarios
+    const conversations = [
+        {
+            title: "First Meeting - Learning About User",
+            messages: [
+                { role: 'system' as const, content: astridSystemPromptWithMemory },
+                { role: 'user' as const, content: "Hi Astrid! I'm Alex. I just moved to Stockholm for work and don't know anyone here yet. I work in software engineering at a tech startup." }
+            ]
+        },
+        {
+            title: "Second Conversation - Recalling Previous Details",
+            messages: [
+                { role: 'system' as const, content: astridSystemPromptWithMemory },
+                { role: 'user' as const, content: "Hey Astrid! How are you? I had such a long day at work today." }
+            ]
+        },
+        {
+            title: "Deeper Connection - Sharing Personal Experiences",
+            messages: [
+                { role: 'system' as const, content: astridSystemPromptWithMemory },
+                { role: 'user' as const, content: "You know, I've been thinking about what we talked about. I really want to explore Stockholm more, but I'm actually quite introverted. Big social events make me nervous." }
+            ]
+        },
+        {
+            title: "Building Romance - Personal Preferences",
+            messages: [
+                { role: 'system' as const, content: astridSystemPromptWithMemory },
+                { role: 'user' as const, content: "I love how you understand me. By the way, I absolutely love Italian food - especially handmade pasta. And I'm really into photography, though I'm just an amateur." }
+            ]
+        },
+        {
+            title: "Recall and Connection - Using Stored Memories",
+            messages: [
+                { role: 'system' as const, content: astridSystemPromptWithMemory },
+                { role: 'user' as const, content: "Astrid, I'm feeling a bit overwhelmed with everything new in my life. Work, new city, trying to meet people..." }
+            ]
+        }
+    ];
+    for (let i = 0; i < conversations.length; i++) {
+        const conv = conversations[i];
+        console.log(`\n${'='.repeat(60)}`);
+        console.log(`📱 ${conv.title}`);
+        console.log(`${'='.repeat(60)}\n`);
+        try {
+            console.log(`👤 Alex: ${conv.messages[conv.messages.length - 1].content}\n`);
+            const response = await astrid.chat(conv.messages, {
+                tools: tools,
+                tool_choice: 'auto'
+            });
+            if (response.tool_calls && response.tool_calls.length > 0) {
+                console.log('🧠 Astrid\'s Memory Activity:');
+                for (const toolCall of response.tool_calls) {
+                    const toolName = toolCall.function.name;
+                    const args = JSON.parse(toolCall.function.arguments);
+                    if (toolName === 'store_user_memory') {
+                        console.log(`   📝 Storing: ${args.content} (${args.category})`);
+                    } else if (toolName === 'recall_user_memories') {
+                        console.log(`   🔍 Recalling: ${args.query}`);
+                    }
+                }
+                console.log();
+            }
+            console.log(`💕 Astrid: ${response.content}\n`);
+            // Small delay for natural conversation flow
+            await new Promise(resolve => setTimeout(resolve, 1000));
+        } catch (error) {
+            console.error(`❌ Error in conversation ${i + 1}:`, error);
+        }
+    }
+    // Show accumulated memories
+    console.log(`\n${'='.repeat(60)}`);
+    console.log('🧠 Astrid\'s Memory Bank After Conversations');
+    console.log(`${'='.repeat(60)}\n`);
+    const userMemories = memoryStorage.get('demo_user') || [];
+    if (userMemories.length > 0) {
+        userMemories.forEach((memory, index) => {
+            console.log(`${index + 1}. [${memory.category.toUpperCase()}] ${memory.content}`);
+            if (memory.context) {
+                console.log(`   Context: ${memory.context}`);
+            }
+            console.log(`   Importance: ${memory.importance} | Stored: ${memory.timestamp.toLocaleString()}\n`);
+        });
+    } else {
+        console.log('No memories stored yet.\n');
+    }
+    console.log('✨ Demo complete! Astrid naturally learned and remembered details about Alex.');
+    console.log('In a real implementation, these memories would be stored in ChromaDB with embeddings');
+    console.log('and could be retrieved across multiple conversation sessions.');
+}
+// Run the demo
+if (require.main === module) {
+    simulateRomanticConversationWithMemory().catch(console.error);
+}
+export { astridMemoryTools, simulateRomanticConversationWithMemory };