universal-llm-client 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/CHANGELOG.md +27 -24
  2. package/README.md +60 -11
  3. package/dist/ai-model.d.ts +12 -1
  4. package/dist/ai-model.d.ts.map +1 -1
  5. package/dist/ai-model.js +36 -1
  6. package/dist/ai-model.js.map +1 -1
  7. package/dist/auditor.js.map +1 -1
  8. package/dist/client.js.map +1 -1
  9. package/dist/gemma-channel.d.ts +14 -0
  10. package/dist/gemma-channel.d.ts.map +1 -0
  11. package/dist/gemma-channel.js +38 -0
  12. package/dist/gemma-channel.js.map +1 -0
  13. package/dist/gemma-diffusion.d.ts +49 -0
  14. package/dist/gemma-diffusion.d.ts.map +1 -0
  15. package/dist/gemma-diffusion.js +147 -0
  16. package/dist/gemma-diffusion.js.map +1 -0
  17. package/dist/http.d.ts +4 -0
  18. package/dist/http.d.ts.map +1 -1
  19. package/dist/http.js +14 -1
  20. package/dist/http.js.map +1 -1
  21. package/dist/index.d.ts +2 -1
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +4 -0
  24. package/dist/index.js.map +1 -1
  25. package/dist/interfaces.d.ts +163 -7
  26. package/dist/interfaces.d.ts.map +1 -1
  27. package/dist/interfaces.js.map +1 -1
  28. package/dist/mcp.js.map +1 -1
  29. package/dist/providers/anthropic.d.ts.map +1 -1
  30. package/dist/providers/anthropic.js +28 -3
  31. package/dist/providers/anthropic.js.map +1 -1
  32. package/dist/providers/google.d.ts +22 -1
  33. package/dist/providers/google.d.ts.map +1 -1
  34. package/dist/providers/google.js +223 -13
  35. package/dist/providers/google.js.map +1 -1
  36. package/dist/providers/index.js.map +1 -1
  37. package/dist/providers/ollama.d.ts +2 -0
  38. package/dist/providers/ollama.d.ts.map +1 -1
  39. package/dist/providers/ollama.js +59 -30
  40. package/dist/providers/ollama.js.map +1 -1
  41. package/dist/providers/openai.d.ts +14 -0
  42. package/dist/providers/openai.d.ts.map +1 -1
  43. package/dist/providers/openai.js +200 -22
  44. package/dist/providers/openai.js.map +1 -1
  45. package/dist/router.d.ts +2 -0
  46. package/dist/router.d.ts.map +1 -1
  47. package/dist/router.js +4 -0
  48. package/dist/router.js.map +1 -1
  49. package/dist/stream-decoder.d.ts +12 -0
  50. package/dist/stream-decoder.d.ts.map +1 -1
  51. package/dist/stream-decoder.js +182 -5
  52. package/dist/stream-decoder.js.map +1 -1
  53. package/dist/structured-output.js.map +1 -1
  54. package/dist/thinking.d.ts +36 -0
  55. package/dist/thinking.d.ts.map +1 -0
  56. package/dist/thinking.js +52 -0
  57. package/dist/thinking.js.map +1 -0
  58. package/dist/tools.js.map +1 -1
  59. package/dist/zod-adapter.js.map +1 -1
  60. package/package.json +4 -1
  61. package/src/ai-model.ts +400 -0
  62. package/src/auditor.ts +213 -0
  63. package/src/client.ts +402 -0
  64. package/src/debug/debug-google-streaming.ts +97 -0
  65. package/src/debug/debug-tool-execution.ts +86 -0
  66. package/src/debug/test-lmstudio-tools.ts +155 -0
  67. package/src/demos/README.md +47 -0
  68. package/src/demos/basic/universal-llm-examples.ts +161 -0
  69. package/src/demos/diffusion-gemma/.env +29 -0
  70. package/src/demos/diffusion-gemma/.env.example +27 -0
  71. package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
  72. package/src/demos/diffusion-gemma/README.md +59 -0
  73. package/src/demos/diffusion-gemma/canvas.ts +1606 -0
  74. package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
  75. package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
  76. package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
  77. package/src/demos/diffusion-gemma/server.ts +1205 -0
  78. package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
  79. package/src/demos/mcp/astrid-memory-demo.ts +295 -0
  80. package/src/demos/mcp/astrid-persona-memory.ts +357 -0
  81. package/src/demos/mcp/mcp-mongodb-demo.ts +275 -0
  82. package/src/demos/mcp/simple-astrid-memory.ts +148 -0
  83. package/src/demos/mcp/simple-mcp-demo.ts +68 -0
  84. package/src/demos/mcp/working-mcp-demo.ts +62 -0
  85. package/src/demos/model-alias-demo.ts +0 -0
  86. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +267 -0
  87. package/src/demos/tools/astrid-memory-demo.ts +270 -0
  88. package/src/demos/tools/astrid-production-memory-clean.ts +785 -0
  89. package/src/demos/tools/astrid-production-memory.ts +558 -0
  90. package/src/demos/tools/basic-translation-test.ts +66 -0
  91. package/src/demos/tools/chromadb-similarity-tuning.ts +390 -0
  92. package/src/demos/tools/clean-multilingual-conversation.ts +209 -0
  93. package/src/demos/tools/clean-translation-test.ts +119 -0
  94. package/src/demos/tools/clean-universal-multilingual-test.ts +131 -0
  95. package/src/demos/tools/complete-rag-demo.ts +369 -0
  96. package/src/demos/tools/complete-tool-demo.ts +132 -0
  97. package/src/demos/tools/demo-tool-calling.ts +124 -0
  98. package/src/demos/tools/dynamic-language-switching-test.ts +251 -0
  99. package/src/demos/tools/hybrid-thinking-test.ts +154 -0
  100. package/src/demos/tools/memory-integration-test.ts +420 -0
  101. package/src/demos/tools/multilingual-memory-system.ts +802 -0
  102. package/src/demos/tools/ondemand-translation-demo.ts +655 -0
  103. package/src/demos/tools/production-tool-demo.ts +245 -0
  104. package/src/demos/tools/revolutionary-multilingual-test.ts +151 -0
  105. package/src/demos/tools/rigorous-language-analysis.ts +218 -0
  106. package/src/demos/tools/test-universal-memory-system.ts +126 -0
  107. package/src/demos/tools/translation-integration-guide.ts +346 -0
  108. package/src/demos/tools/universal-memory-system.ts +560 -0
  109. package/src/gemma-channel.ts +47 -0
  110. package/src/gemma-diffusion.ts +167 -0
  111. package/src/http.ts +261 -0
  112. package/src/index.ts +180 -0
  113. package/src/interfaces.ts +843 -0
  114. package/src/mcp.ts +345 -0
  115. package/src/providers/anthropic.ts +796 -0
  116. package/src/providers/google.ts +840 -0
  117. package/src/providers/index.ts +8 -0
  118. package/src/providers/ollama.ts +503 -0
  119. package/src/providers/openai.ts +587 -0
  120. package/src/router.ts +785 -0
  121. package/src/stream-decoder.ts +535 -0
  122. package/src/structured-output.ts +759 -0
  123. package/src/test-scripts/test-advanced-tools.ts +310 -0
  124. package/src/test-scripts/test-google-deep-research.ts +33 -0
  125. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -0
  126. package/src/test-scripts/test-google-streaming.ts +63 -0
  127. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -0
  128. package/src/test-scripts/test-google-thinking.ts +46 -0
  129. package/src/test-scripts/test-mcp-config.ts +28 -0
  130. package/src/test-scripts/test-mcp-connection.ts +29 -0
  131. package/src/test-scripts/test-system-message-positions.ts +163 -0
  132. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -0
  133. package/src/test-scripts/test-tool-calling.ts +231 -0
  134. package/src/test-scripts/test-vllm-qwen36.ts +256 -0
  135. package/src/tests/ai-model.test.ts +1614 -0
  136. package/src/tests/auditor.test.ts +224 -0
  137. package/src/tests/gemma-diffusion.test.ts +115 -0
  138. package/src/tests/http.test.ts +200 -0
  139. package/src/tests/interfaces.test.ts +117 -0
  140. package/src/tests/providers/anthropic.test.ts +118 -0
  141. package/src/tests/providers/google.test.ts +841 -0
  142. package/src/tests/providers/ollama.test.ts +1034 -0
  143. package/src/tests/providers/openai.test.ts +1511 -0
  144. package/src/tests/router.test.ts +254 -0
  145. package/src/tests/stream-decoder.test.ts +263 -0
  146. package/src/tests/structured-output.test.ts +1450 -0
  147. package/src/tests/thinking.test.ts +65 -0
  148. package/src/tests/tools.test.ts +175 -0
  149. package/src/thinking.ts +73 -0
  150. package/src/tools.ts +246 -0
  151. package/src/zod-adapter.ts +72 -0
@@ -0,0 +1,29 @@
1
+ services:
2
+ diffusiongemma:
3
+ container_name: diffusiongemma
4
+ image: ${VLLM_IMAGE:-vllm/vllm-openai:gemma}
5
+ entrypoint: ["bash", "-lc", "/start-vllm.sh"]
6
+ ipc: host
7
+ shm_size: 2gb
8
+ ports:
9
+ - "${VLLM_PORT:-8000}:8000"
10
+ environment:
11
+ MODEL_NAME: ${MODEL_NAME:-RedHatAI/diffusiongemma-26B-A4B-it-NVFP4}
12
+ GPU_MEM_UTIL: ${GPU_MEM_UTIL:-0.28}
13
+ MAX_MODEL_LEN: ${MAX_MODEL_LEN:-32768}
14
+ MAX_NUM_SEQS: ${MAX_NUM_SEQS:-1}
15
+ DIFFUSION_ENTROPY: ${DIFFUSION_ENTROPY:-0.1}
16
+ ENFORCE_EAGER: ${ENFORCE_EAGER:-0}
17
+ VLLM_NO_USAGE_STATS: ${VLLM_NO_USAGE_STATS:-1}
18
+ NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
19
+ NVIDIA_DRIVER_CAPABILITIES: compute,utility
20
+ volumes:
21
+ - ./start-vllm.sh:/start-vllm.sh:ro
22
+ - ./.cache/huggingface:/root/.cache/huggingface
23
+ deploy:
24
+ resources:
25
+ reservations:
26
+ devices:
27
+ - driver: nvidia
28
+ count: all
29
+ capabilities: [gpu]
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Probe the raw vLLM SSE stream to understand chunk arrival patterns.
3
+ * Logs: chunk index, ms since start, gap since last chunk, content length, field, preview.
4
+ */
5
+
6
+ const res = await fetch('http://localhost:3333/api/stream-raw', {
7
+ method: 'POST',
8
+ headers: { 'Content-Type': 'application/json' },
9
+ body: JSON.stringify({
10
+ prompt: 'Write a short poem about the stars at night.',
11
+ maxTokens: 512,
12
+ }),
13
+ });
14
+
15
+ if (!res.ok || !res.body) {
16
+ console.error('HTTP', res.status, res.statusText);
17
+ process.exit(1);
18
+ }
19
+
20
+ const reader = res.body.getReader();
21
+ const decoder = new TextDecoder();
22
+ let buffer = '';
23
+ const t0 = performance.now();
24
+ let last = t0;
25
+ let i = 0;
26
+ let total = 0;
27
+
28
+ while (true) {
29
+ const { done, value } = await reader.read();
30
+ if (done) break;
31
+ buffer += decoder.decode(value, { stream: true });
32
+ const lines = buffer.split('\n');
33
+ buffer = lines.pop() ?? '';
34
+ for (const line of lines) {
35
+ if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
36
+ let chunk: any;
37
+ try { chunk = JSON.parse(line.slice(6)); } catch { continue; }
38
+ const delta = chunk.choices?.[0]?.delta;
39
+ if (!delta) continue;
40
+ const field = delta.reasoning_content != null ? 'reasoning' : delta.content != null ? 'content' : '?';
41
+ const text: string = delta.reasoning_content ?? delta.content ?? '';
42
+ const now = performance.now();
43
+ total += text.length;
44
+ const extraKeys = Object.keys(delta).filter(k => !['content', 'reasoning_content', 'role', 'tool_calls'].includes(k));
45
+ console.log(
46
+ `#${String(i++).padStart(3)} t=${(now - t0).toFixed(0).padStart(6)}ms gap=${(now - last).toFixed(1).padStart(7)}ms len=${String(text.length).padStart(4)} ${field.padEnd(9)} ${JSON.stringify(text.slice(0, 60))}${extraKeys.length ? ' extra=' + JSON.stringify(extraKeys) : ''}`,
47
+ );
48
+ last = now;
49
+ }
50
+ }
51
+ console.log(`\nTotal: ${i} chunks, ${total} chars, ${(performance.now() - t0).toFixed(0)}ms`);
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Probe the full native tool-calling loop against vLLM, step by step,
3
+ * printing raw wire content (skip_special_tokens: false throughout).
4
+ */
5
+
6
+ const MODEL = process.env.MODEL_NAME ?? 'RedHatAI/diffusiongemma-26B-A4B-it-NVFP4';
7
+ const VLLM = process.env.VLLM_URL ?? 'http://localhost:8000';
8
+
9
+ const tools = [{
10
+ type: 'function',
11
+ function: {
12
+ name: 'get_weather',
13
+ description: 'Get current weather for a city',
14
+ parameters: {
15
+ type: 'object',
16
+ properties: {
17
+ city: { type: 'string', description: 'City name' },
18
+ unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
19
+ },
20
+ required: ['city'],
21
+ },
22
+ },
23
+ }];
24
+
25
+ async function post(messages: unknown[], withTools: boolean): Promise<string> {
26
+ const res = await fetch(`${VLLM}/v1/chat/completions`, {
27
+ method: 'POST',
28
+ headers: { 'Content-Type': 'application/json' },
29
+ body: JSON.stringify({
30
+ model: MODEL,
31
+ messages,
32
+ max_tokens: 1024,
33
+ skip_special_tokens: false,
34
+ ...(withTools ? { tools, tool_choice: 'none' } : {}),
35
+ }),
36
+ });
37
+ const d = await res.json() as any;
38
+ return d.choices?.[0]?.message?.content ?? JSON.stringify(d).slice(0, 300);
39
+ }
40
+
41
+ const followUp = [
42
+ { role: 'user', content: 'What is the weather in Paris right now, in celsius?' },
43
+ {
44
+ role: 'assistant', content: '', tool_calls: [{
45
+ id: 'call_x', type: 'function',
46
+ function: { name: 'get_weather', arguments: JSON.stringify({ city: 'Paris', unit: 'celsius' }) },
47
+ }],
48
+ },
49
+ { role: 'tool', tool_call_id: 'call_x', content: JSON.stringify({ temp_c: 18, condition: 'partly cloudy' }) },
50
+ ];
51
+
52
+ console.log('A) follow-up WITH tools+choice none:');
53
+ console.log(' ', JSON.stringify(await post(followUp, true)).slice(0, 500));
54
+ console.log('B) follow-up WITHOUT tools:');
55
+ console.log(' ', JSON.stringify(await post(followUp, false)).slice(0, 500));