universal-llm-client 4.2.0 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +142 -103
- package/LICENSE +21 -21
- package/README.md +640 -591
- package/dist/ai-model.d.ts +12 -1
- package/dist/ai-model.d.ts.map +1 -1
- package/dist/ai-model.js +36 -1
- package/dist/ai-model.js.map +1 -1
- package/dist/gemma-channel.d.ts +14 -0
- package/dist/gemma-channel.d.ts.map +1 -0
- package/dist/gemma-channel.js +38 -0
- package/dist/gemma-channel.js.map +1 -0
- package/dist/gemma-diffusion.d.ts +49 -0
- package/dist/gemma-diffusion.d.ts.map +1 -0
- package/dist/gemma-diffusion.js +147 -0
- package/dist/gemma-diffusion.js.map +1 -0
- package/dist/http.d.ts +4 -0
- package/dist/http.d.ts.map +1 -1
- package/dist/http.js +14 -1
- package/dist/http.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/interfaces.d.ts +183 -7
- package/dist/interfaces.d.ts.map +1 -1
- package/dist/interfaces.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +28 -3
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/google.d.ts +22 -1
- package/dist/providers/google.d.ts.map +1 -1
- package/dist/providers/google.js +225 -13
- package/dist/providers/google.js.map +1 -1
- package/dist/providers/ollama.d.ts +2 -0
- package/dist/providers/ollama.d.ts.map +1 -1
- package/dist/providers/ollama.js +59 -30
- package/dist/providers/ollama.js.map +1 -1
- package/dist/providers/openai.d.ts +14 -0
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +200 -22
- package/dist/providers/openai.js.map +1 -1
- package/dist/router.d.ts +2 -0
- package/dist/router.d.ts.map +1 -1
- package/dist/router.js +4 -0
- package/dist/router.js.map +1 -1
- package/dist/stream-decoder.d.ts +12 -0
- package/dist/stream-decoder.d.ts.map +1 -1
- package/dist/stream-decoder.js +182 -5
- package/dist/stream-decoder.js.map +1 -1
- package/dist/thinking.d.ts +36 -0
- package/dist/thinking.d.ts.map +1 -0
- package/dist/thinking.js +52 -0
- package/dist/thinking.js.map +1 -0
- package/package.json +118 -116
- package/src/ai-model.ts +400 -350
- package/src/auditor.ts +213 -213
- package/src/client.ts +402 -402
- package/src/debug/debug-google-streaming.ts +1 -1
- package/src/demos/basic/universal-llm-examples.ts +3 -3
- package/src/demos/diffusion-gemma/.env +29 -0
- package/src/demos/diffusion-gemma/.env.example +27 -0
- package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
- package/src/demos/diffusion-gemma/README.md +59 -0
- package/src/demos/diffusion-gemma/canvas.ts +1606 -0
- package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
- package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
- package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
- package/src/demos/diffusion-gemma/server.ts +1205 -0
- package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
- package/src/gemma-channel.ts +47 -0
- package/src/gemma-diffusion.ts +167 -0
- package/src/http.ts +261 -247
- package/src/index.ts +180 -161
- package/src/interfaces.ts +843 -657
- package/src/mcp.ts +345 -345
- package/src/providers/anthropic.ts +796 -762
- package/src/providers/google.ts +840 -620
- package/src/providers/index.ts +8 -8
- package/src/providers/ollama.ts +503 -469
- package/src/providers/openai.ts +587 -392
- package/src/router.ts +785 -780
- package/src/stream-decoder.ts +535 -361
- package/src/structured-output.ts +759 -759
- package/src/test-scripts/test-google-deep-research.ts +33 -0
- package/src/test-scripts/test-google-streaming-enhanced.ts +147 -147
- package/src/test-scripts/test-google-streaming.ts +1 -1
- package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -189
- package/src/test-scripts/test-google-thinking.ts +46 -0
- package/src/test-scripts/test-system-message-positions.ts +163 -163
- package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -83
- package/src/test-scripts/test-vllm-qwen36.ts +256 -0
- package/src/tests/ai-model.test.ts +1614 -1614
- package/src/tests/auditor.test.ts +224 -224
- package/src/tests/gemma-diffusion.test.ts +115 -0
- package/src/tests/http.test.ts +200 -200
- package/src/tests/interfaces.test.ts +117 -117
- package/src/tests/providers/anthropic.test.ts +118 -0
- package/src/tests/providers/google.test.ts +841 -660
- package/src/tests/providers/ollama.test.ts +1034 -954
- package/src/tests/providers/openai.test.ts +1511 -1122
- package/src/tests/router.test.ts +254 -254
- package/src/tests/stream-decoder.test.ts +263 -179
- package/src/tests/structured-output.test.ts +1450 -1450
- package/src/tests/thinking.test.ts +65 -0
- package/src/tests/tools.test.ts +175 -175
- package/src/thinking.ts +73 -0
- package/src/tools.ts +246 -246
- package/src/zod-adapter.ts +72 -72
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
services:
|
|
2
|
+
diffusiongemma:
|
|
3
|
+
container_name: diffusiongemma
|
|
4
|
+
image: ${VLLM_IMAGE:-vllm/vllm-openai:gemma}
|
|
5
|
+
entrypoint: ["bash", "-lc", "/start-vllm.sh"]
|
|
6
|
+
ipc: host
|
|
7
|
+
shm_size: 2gb
|
|
8
|
+
ports:
|
|
9
|
+
- "${VLLM_PORT:-8000}:8000"
|
|
10
|
+
environment:
|
|
11
|
+
MODEL_NAME: ${MODEL_NAME:-RedHatAI/diffusiongemma-26B-A4B-it-NVFP4}
|
|
12
|
+
GPU_MEM_UTIL: ${GPU_MEM_UTIL:-0.28}
|
|
13
|
+
MAX_MODEL_LEN: ${MAX_MODEL_LEN:-32768}
|
|
14
|
+
MAX_NUM_SEQS: ${MAX_NUM_SEQS:-1}
|
|
15
|
+
DIFFUSION_ENTROPY: ${DIFFUSION_ENTROPY:-0.1}
|
|
16
|
+
ENFORCE_EAGER: ${ENFORCE_EAGER:-0}
|
|
17
|
+
VLLM_NO_USAGE_STATS: ${VLLM_NO_USAGE_STATS:-1}
|
|
18
|
+
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
|
|
19
|
+
NVIDIA_DRIVER_CAPABILITIES: compute,utility
|
|
20
|
+
volumes:
|
|
21
|
+
- ./start-vllm.sh:/start-vllm.sh:ro
|
|
22
|
+
- ./.cache/huggingface:/root/.cache/huggingface
|
|
23
|
+
deploy:
|
|
24
|
+
resources:
|
|
25
|
+
reservations:
|
|
26
|
+
devices:
|
|
27
|
+
- driver: nvidia
|
|
28
|
+
count: all
|
|
29
|
+
capabilities: [gpu]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Probe the raw vLLM SSE stream to understand chunk arrival patterns.
|
|
3
|
+
* Logs: chunk index, ms since start, gap since last chunk, content length, field, preview.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const res = await fetch('http://localhost:3333/api/stream-raw', {
|
|
7
|
+
method: 'POST',
|
|
8
|
+
headers: { 'Content-Type': 'application/json' },
|
|
9
|
+
body: JSON.stringify({
|
|
10
|
+
prompt: 'Write a short poem about the stars at night.',
|
|
11
|
+
maxTokens: 512,
|
|
12
|
+
}),
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
if (!res.ok || !res.body) {
|
|
16
|
+
console.error('HTTP', res.status, res.statusText);
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const reader = res.body.getReader();
|
|
21
|
+
const decoder = new TextDecoder();
|
|
22
|
+
let buffer = '';
|
|
23
|
+
const t0 = performance.now();
|
|
24
|
+
let last = t0;
|
|
25
|
+
let i = 0;
|
|
26
|
+
let total = 0;
|
|
27
|
+
|
|
28
|
+
while (true) {
|
|
29
|
+
const { done, value } = await reader.read();
|
|
30
|
+
if (done) break;
|
|
31
|
+
buffer += decoder.decode(value, { stream: true });
|
|
32
|
+
const lines = buffer.split('\n');
|
|
33
|
+
buffer = lines.pop() ?? '';
|
|
34
|
+
for (const line of lines) {
|
|
35
|
+
if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
|
|
36
|
+
let chunk: any;
|
|
37
|
+
try { chunk = JSON.parse(line.slice(6)); } catch { continue; }
|
|
38
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
39
|
+
if (!delta) continue;
|
|
40
|
+
const field = delta.reasoning_content != null ? 'reasoning' : delta.content != null ? 'content' : '?';
|
|
41
|
+
const text: string = delta.reasoning_content ?? delta.content ?? '';
|
|
42
|
+
const now = performance.now();
|
|
43
|
+
total += text.length;
|
|
44
|
+
const extraKeys = Object.keys(delta).filter(k => !['content', 'reasoning_content', 'role', 'tool_calls'].includes(k));
|
|
45
|
+
console.log(
|
|
46
|
+
`#${String(i++).padStart(3)} t=${(now - t0).toFixed(0).padStart(6)}ms gap=${(now - last).toFixed(1).padStart(7)}ms len=${String(text.length).padStart(4)} ${field.padEnd(9)} ${JSON.stringify(text.slice(0, 60))}${extraKeys.length ? ' extra=' + JSON.stringify(extraKeys) : ''}`,
|
|
47
|
+
);
|
|
48
|
+
last = now;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
console.log(`\nTotal: ${i} chunks, ${total} chars, ${(performance.now() - t0).toFixed(0)}ms`);
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Probe the full native tool-calling loop against vLLM, step by step,
|
|
3
|
+
* printing raw wire content (skip_special_tokens: false throughout).
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const MODEL = process.env.MODEL_NAME ?? 'RedHatAI/diffusiongemma-26B-A4B-it-NVFP4';
|
|
7
|
+
const VLLM = process.env.VLLM_URL ?? 'http://localhost:8000';
|
|
8
|
+
|
|
9
|
+
const tools = [{
|
|
10
|
+
type: 'function',
|
|
11
|
+
function: {
|
|
12
|
+
name: 'get_weather',
|
|
13
|
+
description: 'Get current weather for a city',
|
|
14
|
+
parameters: {
|
|
15
|
+
type: 'object',
|
|
16
|
+
properties: {
|
|
17
|
+
city: { type: 'string', description: 'City name' },
|
|
18
|
+
unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
|
|
19
|
+
},
|
|
20
|
+
required: ['city'],
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
}];
|
|
24
|
+
|
|
25
|
+
async function post(messages: unknown[], withTools: boolean): Promise<string> {
|
|
26
|
+
const res = await fetch(`${VLLM}/v1/chat/completions`, {
|
|
27
|
+
method: 'POST',
|
|
28
|
+
headers: { 'Content-Type': 'application/json' },
|
|
29
|
+
body: JSON.stringify({
|
|
30
|
+
model: MODEL,
|
|
31
|
+
messages,
|
|
32
|
+
max_tokens: 1024,
|
|
33
|
+
skip_special_tokens: false,
|
|
34
|
+
...(withTools ? { tools, tool_choice: 'none' } : {}),
|
|
35
|
+
}),
|
|
36
|
+
});
|
|
37
|
+
const d = await res.json() as any;
|
|
38
|
+
return d.choices?.[0]?.message?.content ?? JSON.stringify(d).slice(0, 300);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const followUp = [
|
|
42
|
+
{ role: 'user', content: 'What is the weather in Paris right now, in celsius?' },
|
|
43
|
+
{
|
|
44
|
+
role: 'assistant', content: '', tool_calls: [{
|
|
45
|
+
id: 'call_x', type: 'function',
|
|
46
|
+
function: { name: 'get_weather', arguments: JSON.stringify({ city: 'Paris', unit: 'celsius' }) },
|
|
47
|
+
}],
|
|
48
|
+
},
|
|
49
|
+
{ role: 'tool', tool_call_id: 'call_x', content: JSON.stringify({ temp_c: 18, condition: 'partly cloudy' }) },
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
console.log('A) follow-up WITH tools+choice none:');
|
|
53
|
+
console.log(' ', JSON.stringify(await post(followUp, true)).slice(0, 500));
|
|
54
|
+
console.log('B) follow-up WITHOUT tools:');
|
|
55
|
+
console.log(' ', JSON.stringify(await post(followUp, false)).slice(0, 500));
|