universal-llm-client 4.2.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +142 -103
  2. package/LICENSE +21 -21
  3. package/README.md +640 -591
  4. package/dist/ai-model.d.ts +12 -1
  5. package/dist/ai-model.d.ts.map +1 -1
  6. package/dist/ai-model.js +36 -1
  7. package/dist/ai-model.js.map +1 -1
  8. package/dist/gemma-channel.d.ts +14 -0
  9. package/dist/gemma-channel.d.ts.map +1 -0
  10. package/dist/gemma-channel.js +38 -0
  11. package/dist/gemma-channel.js.map +1 -0
  12. package/dist/gemma-diffusion.d.ts +49 -0
  13. package/dist/gemma-diffusion.d.ts.map +1 -0
  14. package/dist/gemma-diffusion.js +147 -0
  15. package/dist/gemma-diffusion.js.map +1 -0
  16. package/dist/http.d.ts +4 -0
  17. package/dist/http.d.ts.map +1 -1
  18. package/dist/http.js +14 -1
  19. package/dist/http.js.map +1 -1
  20. package/dist/index.d.ts +2 -1
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +4 -0
  23. package/dist/index.js.map +1 -1
  24. package/dist/interfaces.d.ts +183 -7
  25. package/dist/interfaces.d.ts.map +1 -1
  26. package/dist/interfaces.js.map +1 -1
  27. package/dist/providers/anthropic.d.ts.map +1 -1
  28. package/dist/providers/anthropic.js +28 -3
  29. package/dist/providers/anthropic.js.map +1 -1
  30. package/dist/providers/google.d.ts +22 -1
  31. package/dist/providers/google.d.ts.map +1 -1
  32. package/dist/providers/google.js +225 -13
  33. package/dist/providers/google.js.map +1 -1
  34. package/dist/providers/ollama.d.ts +2 -0
  35. package/dist/providers/ollama.d.ts.map +1 -1
  36. package/dist/providers/ollama.js +59 -30
  37. package/dist/providers/ollama.js.map +1 -1
  38. package/dist/providers/openai.d.ts +14 -0
  39. package/dist/providers/openai.d.ts.map +1 -1
  40. package/dist/providers/openai.js +200 -22
  41. package/dist/providers/openai.js.map +1 -1
  42. package/dist/router.d.ts +2 -0
  43. package/dist/router.d.ts.map +1 -1
  44. package/dist/router.js +4 -0
  45. package/dist/router.js.map +1 -1
  46. package/dist/stream-decoder.d.ts +12 -0
  47. package/dist/stream-decoder.d.ts.map +1 -1
  48. package/dist/stream-decoder.js +182 -5
  49. package/dist/stream-decoder.js.map +1 -1
  50. package/dist/thinking.d.ts +36 -0
  51. package/dist/thinking.d.ts.map +1 -0
  52. package/dist/thinking.js +52 -0
  53. package/dist/thinking.js.map +1 -0
  54. package/package.json +118 -116
  55. package/src/ai-model.ts +400 -350
  56. package/src/auditor.ts +213 -213
  57. package/src/client.ts +402 -402
  58. package/src/debug/debug-google-streaming.ts +1 -1
  59. package/src/demos/basic/universal-llm-examples.ts +3 -3
  60. package/src/demos/diffusion-gemma/.env +29 -0
  61. package/src/demos/diffusion-gemma/.env.example +27 -0
  62. package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
  63. package/src/demos/diffusion-gemma/README.md +59 -0
  64. package/src/demos/diffusion-gemma/canvas.ts +1606 -0
  65. package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
  66. package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
  67. package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
  68. package/src/demos/diffusion-gemma/server.ts +1205 -0
  69. package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
  70. package/src/gemma-channel.ts +47 -0
  71. package/src/gemma-diffusion.ts +167 -0
  72. package/src/http.ts +261 -247
  73. package/src/index.ts +180 -161
  74. package/src/interfaces.ts +843 -657
  75. package/src/mcp.ts +345 -345
  76. package/src/providers/anthropic.ts +796 -762
  77. package/src/providers/google.ts +840 -620
  78. package/src/providers/index.ts +8 -8
  79. package/src/providers/ollama.ts +503 -469
  80. package/src/providers/openai.ts +587 -392
  81. package/src/router.ts +785 -780
  82. package/src/stream-decoder.ts +535 -361
  83. package/src/structured-output.ts +759 -759
  84. package/src/test-scripts/test-google-deep-research.ts +33 -0
  85. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -147
  86. package/src/test-scripts/test-google-streaming.ts +1 -1
  87. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -189
  88. package/src/test-scripts/test-google-thinking.ts +46 -0
  89. package/src/test-scripts/test-system-message-positions.ts +163 -163
  90. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -83
  91. package/src/test-scripts/test-vllm-qwen36.ts +256 -0
  92. package/src/tests/ai-model.test.ts +1614 -1614
  93. package/src/tests/auditor.test.ts +224 -224
  94. package/src/tests/gemma-diffusion.test.ts +115 -0
  95. package/src/tests/http.test.ts +200 -200
  96. package/src/tests/interfaces.test.ts +117 -117
  97. package/src/tests/providers/anthropic.test.ts +118 -0
  98. package/src/tests/providers/google.test.ts +841 -660
  99. package/src/tests/providers/ollama.test.ts +1034 -954
  100. package/src/tests/providers/openai.test.ts +1511 -1122
  101. package/src/tests/router.test.ts +254 -254
  102. package/src/tests/stream-decoder.test.ts +263 -179
  103. package/src/tests/structured-output.test.ts +1450 -1450
  104. package/src/tests/thinking.test.ts +65 -0
  105. package/src/tests/tools.test.ts +175 -175
  106. package/src/thinking.ts +73 -0
  107. package/src/tools.ts +246 -246
  108. package/src/zod-adapter.ts +72 -72
@@ -0,0 +1,29 @@
1
+ services:
2
+ diffusiongemma:
3
+ container_name: diffusiongemma
4
+ image: ${VLLM_IMAGE:-vllm/vllm-openai:gemma}
5
+ entrypoint: ["bash", "-lc", "/start-vllm.sh"]
6
+ ipc: host
7
+ shm_size: 2gb
8
+ ports:
9
+ - "${VLLM_PORT:-8000}:8000"
10
+ environment:
11
+ MODEL_NAME: ${MODEL_NAME:-RedHatAI/diffusiongemma-26B-A4B-it-NVFP4}
12
+ GPU_MEM_UTIL: ${GPU_MEM_UTIL:-0.28}
13
+ MAX_MODEL_LEN: ${MAX_MODEL_LEN:-32768}
14
+ MAX_NUM_SEQS: ${MAX_NUM_SEQS:-1}
15
+ DIFFUSION_ENTROPY: ${DIFFUSION_ENTROPY:-0.1}
16
+ ENFORCE_EAGER: ${ENFORCE_EAGER:-0}
17
+ VLLM_NO_USAGE_STATS: ${VLLM_NO_USAGE_STATS:-1}
18
+ NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
19
+ NVIDIA_DRIVER_CAPABILITIES: compute,utility
20
+ volumes:
21
+ - ./start-vllm.sh:/start-vllm.sh:ro
22
+ - ./.cache/huggingface:/root/.cache/huggingface
23
+ deploy:
24
+ resources:
25
+ reservations:
26
+ devices:
27
+ - driver: nvidia
28
+ count: all
29
+ capabilities: [gpu]
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Probe the raw vLLM SSE stream to understand chunk arrival patterns.
3
+ * Logs: chunk index, ms since start, gap since last chunk, content length, field, preview.
4
+ */
5
+
6
+ const res = await fetch('http://localhost:3333/api/stream-raw', {
7
+ method: 'POST',
8
+ headers: { 'Content-Type': 'application/json' },
9
+ body: JSON.stringify({
10
+ prompt: 'Write a short poem about the stars at night.',
11
+ maxTokens: 512,
12
+ }),
13
+ });
14
+
15
+ if (!res.ok || !res.body) {
16
+ console.error('HTTP', res.status, res.statusText);
17
+ process.exit(1);
18
+ }
19
+
20
+ const reader = res.body.getReader();
21
+ const decoder = new TextDecoder();
22
+ let buffer = '';
23
+ const t0 = performance.now();
24
+ let last = t0;
25
+ let i = 0;
26
+ let total = 0;
27
+
28
+ while (true) {
29
+ const { done, value } = await reader.read();
30
+ if (done) break;
31
+ buffer += decoder.decode(value, { stream: true });
32
+ const lines = buffer.split('\n');
33
+ buffer = lines.pop() ?? '';
34
+ for (const line of lines) {
35
+ if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
36
+ let chunk: any;
37
+ try { chunk = JSON.parse(line.slice(6)); } catch { continue; }
38
+ const delta = chunk.choices?.[0]?.delta;
39
+ if (!delta) continue;
40
+ const field = delta.reasoning_content != null ? 'reasoning' : delta.content != null ? 'content' : '?';
41
+ const text: string = delta.reasoning_content ?? delta.content ?? '';
42
+ const now = performance.now();
43
+ total += text.length;
44
+ const extraKeys = Object.keys(delta).filter(k => !['content', 'reasoning_content', 'role', 'tool_calls'].includes(k));
45
+ console.log(
46
+ `#${String(i++).padStart(3)} t=${(now - t0).toFixed(0).padStart(6)}ms gap=${(now - last).toFixed(1).padStart(7)}ms len=${String(text.length).padStart(4)} ${field.padEnd(9)} ${JSON.stringify(text.slice(0, 60))}${extraKeys.length ? ' extra=' + JSON.stringify(extraKeys) : ''}`,
47
+ );
48
+ last = now;
49
+ }
50
+ }
51
+ console.log(`\nTotal: ${i} chunks, ${total} chars, ${(performance.now() - t0).toFixed(0)}ms`);
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Probe the full native tool-calling loop against vLLM, step by step,
3
+ * printing raw wire content (skip_special_tokens: false throughout).
4
+ */
5
+
6
+ const MODEL = process.env.MODEL_NAME ?? 'RedHatAI/diffusiongemma-26B-A4B-it-NVFP4';
7
+ const VLLM = process.env.VLLM_URL ?? 'http://localhost:8000';
8
+
9
+ const tools = [{
10
+ type: 'function',
11
+ function: {
12
+ name: 'get_weather',
13
+ description: 'Get current weather for a city',
14
+ parameters: {
15
+ type: 'object',
16
+ properties: {
17
+ city: { type: 'string', description: 'City name' },
18
+ unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
19
+ },
20
+ required: ['city'],
21
+ },
22
+ },
23
+ }];
24
+
25
+ async function post(messages: unknown[], withTools: boolean): Promise<string> {
26
+ const res = await fetch(`${VLLM}/v1/chat/completions`, {
27
+ method: 'POST',
28
+ headers: { 'Content-Type': 'application/json' },
29
+ body: JSON.stringify({
30
+ model: MODEL,
31
+ messages,
32
+ max_tokens: 1024,
33
+ skip_special_tokens: false,
34
+ ...(withTools ? { tools, tool_choice: 'none' } : {}),
35
+ }),
36
+ });
37
+ const d = await res.json() as any;
38
+ return d.choices?.[0]?.message?.content ?? JSON.stringify(d).slice(0, 300);
39
+ }
40
+
41
+ const followUp = [
42
+ { role: 'user', content: 'What is the weather in Paris right now, in celsius?' },
43
+ {
44
+ role: 'assistant', content: '', tool_calls: [{
45
+ id: 'call_x', type: 'function',
46
+ function: { name: 'get_weather', arguments: JSON.stringify({ city: 'Paris', unit: 'celsius' }) },
47
+ }],
48
+ },
49
+ { role: 'tool', tool_call_id: 'call_x', content: JSON.stringify({ temp_c: 18, condition: 'partly cloudy' }) },
50
+ ];
51
+
52
+ console.log('A) follow-up WITH tools+choice none:');
53
+ console.log(' ', JSON.stringify(await post(followUp, true)).slice(0, 500));
54
+ console.log('B) follow-up WITHOUT tools:');
55
+ console.log(' ', JSON.stringify(await post(followUp, false)).slice(0, 500));