universal-llm-client 4.5.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +2 -0
  3. package/dist/ai-model.d.ts +0 -1
  4. package/dist/ai-model.js +0 -1
  5. package/dist/auditor.d.ts +0 -1
  6. package/dist/auditor.js +0 -1
  7. package/dist/client.d.ts +0 -1
  8. package/dist/client.js +0 -1
  9. package/dist/gemma-channel.d.ts +0 -1
  10. package/dist/gemma-channel.js +0 -1
  11. package/dist/gemma-diffusion.d.ts +0 -1
  12. package/dist/gemma-diffusion.js +0 -1
  13. package/dist/http.d.ts +0 -1
  14. package/dist/http.js +0 -1
  15. package/dist/index.d.ts +0 -1
  16. package/dist/index.js +0 -1
  17. package/dist/interfaces.d.ts +0 -1
  18. package/dist/interfaces.js +0 -1
  19. package/dist/mcp.d.ts +0 -1
  20. package/dist/mcp.js +0 -1
  21. package/dist/providers/anthropic.d.ts +0 -1
  22. package/dist/providers/anthropic.js +0 -1
  23. package/dist/providers/google.d.ts +0 -1
  24. package/dist/providers/google.js +0 -1
  25. package/dist/providers/index.d.ts +0 -1
  26. package/dist/providers/index.js +0 -1
  27. package/dist/providers/ollama.d.ts +0 -1
  28. package/dist/providers/ollama.js +0 -1
  29. package/dist/providers/openai.d.ts +2 -1
  30. package/dist/providers/openai.js +303 -74
  31. package/dist/router.d.ts +0 -1
  32. package/dist/router.js +0 -1
  33. package/dist/stream-decoder.d.ts +0 -1
  34. package/dist/stream-decoder.js +0 -1
  35. package/dist/structured-output.d.ts +0 -1
  36. package/dist/structured-output.js +0 -1
  37. package/dist/thinking.d.ts +0 -1
  38. package/dist/thinking.js +0 -1
  39. package/dist/tools.d.ts +0 -1
  40. package/dist/tools.js +0 -1
  41. package/dist/zod-adapter.d.ts +0 -1
  42. package/dist/zod-adapter.js +0 -1
  43. package/package.json +1 -2
  44. package/dist/ai-model.d.ts.map +0 -1
  45. package/dist/ai-model.js.map +0 -1
  46. package/dist/auditor.d.ts.map +0 -1
  47. package/dist/auditor.js.map +0 -1
  48. package/dist/client.d.ts.map +0 -1
  49. package/dist/client.js.map +0 -1
  50. package/dist/gemma-channel.d.ts.map +0 -1
  51. package/dist/gemma-channel.js.map +0 -1
  52. package/dist/gemma-diffusion.d.ts.map +0 -1
  53. package/dist/gemma-diffusion.js.map +0 -1
  54. package/dist/http.d.ts.map +0 -1
  55. package/dist/http.js.map +0 -1
  56. package/dist/index.d.ts.map +0 -1
  57. package/dist/index.js.map +0 -1
  58. package/dist/interfaces.d.ts.map +0 -1
  59. package/dist/interfaces.js.map +0 -1
  60. package/dist/mcp.d.ts.map +0 -1
  61. package/dist/mcp.js.map +0 -1
  62. package/dist/providers/anthropic.d.ts.map +0 -1
  63. package/dist/providers/anthropic.js.map +0 -1
  64. package/dist/providers/google.d.ts.map +0 -1
  65. package/dist/providers/google.js.map +0 -1
  66. package/dist/providers/index.d.ts.map +0 -1
  67. package/dist/providers/index.js.map +0 -1
  68. package/dist/providers/ollama.d.ts.map +0 -1
  69. package/dist/providers/ollama.js.map +0 -1
  70. package/dist/providers/openai.d.ts.map +0 -1
  71. package/dist/providers/openai.js.map +0 -1
  72. package/dist/router.d.ts.map +0 -1
  73. package/dist/router.js.map +0 -1
  74. package/dist/stream-decoder.d.ts.map +0 -1
  75. package/dist/stream-decoder.js.map +0 -1
  76. package/dist/structured-output.d.ts.map +0 -1
  77. package/dist/structured-output.js.map +0 -1
  78. package/dist/thinking.d.ts.map +0 -1
  79. package/dist/thinking.js.map +0 -1
  80. package/dist/tools.d.ts.map +0 -1
  81. package/dist/tools.js.map +0 -1
  82. package/dist/zod-adapter.d.ts.map +0 -1
  83. package/dist/zod-adapter.js.map +0 -1
  84. package/src/ai-model.ts +0 -400
  85. package/src/auditor.ts +0 -213
  86. package/src/client.ts +0 -402
  87. package/src/debug/debug-google-streaming.ts +0 -97
  88. package/src/debug/debug-tool-execution.ts +0 -86
  89. package/src/debug/test-lmstudio-tools.ts +0 -155
  90. package/src/demos/README.md +0 -47
  91. package/src/demos/basic/universal-llm-examples.ts +0 -161
  92. package/src/demos/diffusion-gemma/.env +0 -29
  93. package/src/demos/diffusion-gemma/.env.example +0 -27
  94. package/src/demos/diffusion-gemma/CLAUDE.md +0 -95
  95. package/src/demos/diffusion-gemma/README.md +0 -59
  96. package/src/demos/diffusion-gemma/canvas.ts +0 -1606
  97. package/src/demos/diffusion-gemma/docker-compose.yml +0 -29
  98. package/src/demos/diffusion-gemma/probe-stream.ts +0 -51
  99. package/src/demos/diffusion-gemma/probe-tools.ts +0 -55
  100. package/src/demos/diffusion-gemma/server.ts +0 -1205
  101. package/src/demos/diffusion-gemma/start-vllm.sh +0 -98
  102. package/src/demos/mcp/astrid-memory-demo.ts +0 -295
  103. package/src/demos/mcp/astrid-persona-memory.ts +0 -357
  104. package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
  105. package/src/demos/mcp/simple-astrid-memory.ts +0 -148
  106. package/src/demos/mcp/simple-mcp-demo.ts +0 -68
  107. package/src/demos/mcp/working-mcp-demo.ts +0 -62
  108. package/src/demos/model-alias-demo.ts +0 -0
  109. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
  110. package/src/demos/tools/astrid-memory-demo.ts +0 -270
  111. package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
  112. package/src/demos/tools/astrid-production-memory.ts +0 -558
  113. package/src/demos/tools/basic-translation-test.ts +0 -66
  114. package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
  115. package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
  116. package/src/demos/tools/clean-translation-test.ts +0 -119
  117. package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
  118. package/src/demos/tools/complete-rag-demo.ts +0 -369
  119. package/src/demos/tools/complete-tool-demo.ts +0 -132
  120. package/src/demos/tools/demo-tool-calling.ts +0 -124
  121. package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
  122. package/src/demos/tools/hybrid-thinking-test.ts +0 -154
  123. package/src/demos/tools/memory-integration-test.ts +0 -420
  124. package/src/demos/tools/multilingual-memory-system.ts +0 -802
  125. package/src/demos/tools/ondemand-translation-demo.ts +0 -655
  126. package/src/demos/tools/production-tool-demo.ts +0 -245
  127. package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
  128. package/src/demos/tools/rigorous-language-analysis.ts +0 -218
  129. package/src/demos/tools/test-universal-memory-system.ts +0 -126
  130. package/src/demos/tools/translation-integration-guide.ts +0 -346
  131. package/src/demos/tools/universal-memory-system.ts +0 -560
  132. package/src/gemma-channel.ts +0 -47
  133. package/src/gemma-diffusion.ts +0 -167
  134. package/src/http.ts +0 -261
  135. package/src/index.ts +0 -180
  136. package/src/interfaces.ts +0 -843
  137. package/src/mcp.ts +0 -345
  138. package/src/providers/anthropic.ts +0 -796
  139. package/src/providers/google.ts +0 -840
  140. package/src/providers/index.ts +0 -8
  141. package/src/providers/ollama.ts +0 -503
  142. package/src/providers/openai.ts +0 -587
  143. package/src/router.ts +0 -785
  144. package/src/stream-decoder.ts +0 -535
  145. package/src/structured-output.ts +0 -759
  146. package/src/test-scripts/test-advanced-tools.ts +0 -310
  147. package/src/test-scripts/test-google-deep-research.ts +0 -33
  148. package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
  149. package/src/test-scripts/test-google-streaming.ts +0 -63
  150. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
  151. package/src/test-scripts/test-google-thinking.ts +0 -46
  152. package/src/test-scripts/test-mcp-config.ts +0 -28
  153. package/src/test-scripts/test-mcp-connection.ts +0 -29
  154. package/src/test-scripts/test-system-message-positions.ts +0 -163
  155. package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
  156. package/src/test-scripts/test-tool-calling.ts +0 -231
  157. package/src/test-scripts/test-vllm-qwen36.ts +0 -256
  158. package/src/tests/ai-model.test.ts +0 -1614
  159. package/src/tests/auditor.test.ts +0 -224
  160. package/src/tests/gemma-diffusion.test.ts +0 -115
  161. package/src/tests/http.test.ts +0 -200
  162. package/src/tests/interfaces.test.ts +0 -117
  163. package/src/tests/providers/anthropic.test.ts +0 -118
  164. package/src/tests/providers/google.test.ts +0 -841
  165. package/src/tests/providers/ollama.test.ts +0 -1034
  166. package/src/tests/providers/openai.test.ts +0 -1511
  167. package/src/tests/router.test.ts +0 -254
  168. package/src/tests/stream-decoder.test.ts +0 -263
  169. package/src/tests/structured-output.test.ts +0 -1450
  170. package/src/tests/thinking.test.ts +0 -65
  171. package/src/tests/tools.test.ts +0 -175
  172. package/src/thinking.ts +0 -73
  173. package/src/tools.ts +0 -246
  174. package/src/zod-adapter.ts +0 -72
@@ -1,29 +0,0 @@
1
- services:
2
- diffusiongemma:
3
- container_name: diffusiongemma
4
- image: ${VLLM_IMAGE:-vllm/vllm-openai:gemma}
5
- entrypoint: ["bash", "-lc", "/start-vllm.sh"]
6
- ipc: host
7
- shm_size: 2gb
8
- ports:
9
- - "${VLLM_PORT:-8000}:8000"
10
- environment:
11
- MODEL_NAME: ${MODEL_NAME:-RedHatAI/diffusiongemma-26B-A4B-it-NVFP4}
12
- GPU_MEM_UTIL: ${GPU_MEM_UTIL:-0.28}
13
- MAX_MODEL_LEN: ${MAX_MODEL_LEN:-32768}
14
- MAX_NUM_SEQS: ${MAX_NUM_SEQS:-1}
15
- DIFFUSION_ENTROPY: ${DIFFUSION_ENTROPY:-0.1}
16
- ENFORCE_EAGER: ${ENFORCE_EAGER:-0}
17
- VLLM_NO_USAGE_STATS: ${VLLM_NO_USAGE_STATS:-1}
18
- NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
19
- NVIDIA_DRIVER_CAPABILITIES: compute,utility
20
- volumes:
21
- - ./start-vllm.sh:/start-vllm.sh:ro
22
- - ./.cache/huggingface:/root/.cache/huggingface
23
- deploy:
24
- resources:
25
- reservations:
26
- devices:
27
- - driver: nvidia
28
- count: all
29
- capabilities: [gpu]
@@ -1,51 +0,0 @@
1
- /**
2
- * Probe the raw vLLM SSE stream to understand chunk arrival patterns.
3
- * Logs: chunk index, ms since start, gap since last chunk, content length, field, preview.
4
- */
5
-
6
- const res = await fetch('http://localhost:3333/api/stream-raw', {
7
- method: 'POST',
8
- headers: { 'Content-Type': 'application/json' },
9
- body: JSON.stringify({
10
- prompt: 'Write a short poem about the stars at night.',
11
- maxTokens: 512,
12
- }),
13
- });
14
-
15
- if (!res.ok || !res.body) {
16
- console.error('HTTP', res.status, res.statusText);
17
- process.exit(1);
18
- }
19
-
20
- const reader = res.body.getReader();
21
- const decoder = new TextDecoder();
22
- let buffer = '';
23
- const t0 = performance.now();
24
- let last = t0;
25
- let i = 0;
26
- let total = 0;
27
-
28
- while (true) {
29
- const { done, value } = await reader.read();
30
- if (done) break;
31
- buffer += decoder.decode(value, { stream: true });
32
- const lines = buffer.split('\n');
33
- buffer = lines.pop() ?? '';
34
- for (const line of lines) {
35
- if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
36
- let chunk: any;
37
- try { chunk = JSON.parse(line.slice(6)); } catch { continue; }
38
- const delta = chunk.choices?.[0]?.delta;
39
- if (!delta) continue;
40
- const field = delta.reasoning_content != null ? 'reasoning' : delta.content != null ? 'content' : '?';
41
- const text: string = delta.reasoning_content ?? delta.content ?? '';
42
- const now = performance.now();
43
- total += text.length;
44
- const extraKeys = Object.keys(delta).filter(k => !['content', 'reasoning_content', 'role', 'tool_calls'].includes(k));
45
- console.log(
46
- `#${String(i++).padStart(3)} t=${(now - t0).toFixed(0).padStart(6)}ms gap=${(now - last).toFixed(1).padStart(7)}ms len=${String(text.length).padStart(4)} ${field.padEnd(9)} ${JSON.stringify(text.slice(0, 60))}${extraKeys.length ? ' extra=' + JSON.stringify(extraKeys) : ''}`,
47
- );
48
- last = now;
49
- }
50
- }
51
- console.log(`\nTotal: ${i} chunks, ${total} chars, ${(performance.now() - t0).toFixed(0)}ms`);
@@ -1,55 +0,0 @@
1
- /**
2
- * Probe the full native tool-calling loop against vLLM, step by step,
3
- * printing raw wire content (skip_special_tokens: false throughout).
4
- */
5
-
6
- const MODEL = process.env.MODEL_NAME ?? 'RedHatAI/diffusiongemma-26B-A4B-it-NVFP4';
7
- const VLLM = process.env.VLLM_URL ?? 'http://localhost:8000';
8
-
9
- const tools = [{
10
- type: 'function',
11
- function: {
12
- name: 'get_weather',
13
- description: 'Get current weather for a city',
14
- parameters: {
15
- type: 'object',
16
- properties: {
17
- city: { type: 'string', description: 'City name' },
18
- unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
19
- },
20
- required: ['city'],
21
- },
22
- },
23
- }];
24
-
25
- async function post(messages: unknown[], withTools: boolean): Promise<string> {
26
- const res = await fetch(`${VLLM}/v1/chat/completions`, {
27
- method: 'POST',
28
- headers: { 'Content-Type': 'application/json' },
29
- body: JSON.stringify({
30
- model: MODEL,
31
- messages,
32
- max_tokens: 1024,
33
- skip_special_tokens: false,
34
- ...(withTools ? { tools, tool_choice: 'none' } : {}),
35
- }),
36
- });
37
- const d = await res.json() as any;
38
- return d.choices?.[0]?.message?.content ?? JSON.stringify(d).slice(0, 300);
39
- }
40
-
41
- const followUp = [
42
- { role: 'user', content: 'What is the weather in Paris right now, in celsius?' },
43
- {
44
- role: 'assistant', content: '', tool_calls: [{
45
- id: 'call_x', type: 'function',
46
- function: { name: 'get_weather', arguments: JSON.stringify({ city: 'Paris', unit: 'celsius' }) },
47
- }],
48
- },
49
- { role: 'tool', tool_call_id: 'call_x', content: JSON.stringify({ temp_c: 18, condition: 'partly cloudy' }) },
50
- ];
51
-
52
- console.log('A) follow-up WITH tools+choice none:');
53
- console.log(' ', JSON.stringify(await post(followUp, true)).slice(0, 500));
54
- console.log('B) follow-up WITHOUT tools:');
55
- console.log(' ', JSON.stringify(await post(followUp, false)).slice(0, 500));