universal-llm-client 4.5.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +2 -0
- package/dist/ai-model.d.ts +0 -1
- package/dist/ai-model.js +0 -1
- package/dist/auditor.d.ts +0 -1
- package/dist/auditor.js +0 -1
- package/dist/client.d.ts +0 -1
- package/dist/client.js +0 -1
- package/dist/gemma-channel.d.ts +0 -1
- package/dist/gemma-channel.js +0 -1
- package/dist/gemma-diffusion.d.ts +0 -1
- package/dist/gemma-diffusion.js +0 -1
- package/dist/http.d.ts +0 -1
- package/dist/http.js +0 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/interfaces.d.ts +0 -1
- package/dist/interfaces.js +0 -1
- package/dist/mcp.d.ts +0 -1
- package/dist/mcp.js +0 -1
- package/dist/providers/anthropic.d.ts +0 -1
- package/dist/providers/anthropic.js +0 -1
- package/dist/providers/google.d.ts +0 -1
- package/dist/providers/google.js +0 -1
- package/dist/providers/index.d.ts +0 -1
- package/dist/providers/index.js +0 -1
- package/dist/providers/ollama.d.ts +0 -1
- package/dist/providers/ollama.js +0 -1
- package/dist/providers/openai.d.ts +2 -1
- package/dist/providers/openai.js +303 -74
- package/dist/router.d.ts +0 -1
- package/dist/router.js +0 -1
- package/dist/stream-decoder.d.ts +0 -1
- package/dist/stream-decoder.js +0 -1
- package/dist/structured-output.d.ts +0 -1
- package/dist/structured-output.js +0 -1
- package/dist/thinking.d.ts +0 -1
- package/dist/thinking.js +0 -1
- package/dist/tools.d.ts +0 -1
- package/dist/tools.js +0 -1
- package/dist/zod-adapter.d.ts +0 -1
- package/dist/zod-adapter.js +0 -1
- package/package.json +1 -2
- package/dist/ai-model.d.ts.map +0 -1
- package/dist/ai-model.js.map +0 -1
- package/dist/auditor.d.ts.map +0 -1
- package/dist/auditor.js.map +0 -1
- package/dist/client.d.ts.map +0 -1
- package/dist/client.js.map +0 -1
- package/dist/gemma-channel.d.ts.map +0 -1
- package/dist/gemma-channel.js.map +0 -1
- package/dist/gemma-diffusion.d.ts.map +0 -1
- package/dist/gemma-diffusion.js.map +0 -1
- package/dist/http.d.ts.map +0 -1
- package/dist/http.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/interfaces.d.ts.map +0 -1
- package/dist/interfaces.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/providers/anthropic.d.ts.map +0 -1
- package/dist/providers/anthropic.js.map +0 -1
- package/dist/providers/google.d.ts.map +0 -1
- package/dist/providers/google.js.map +0 -1
- package/dist/providers/index.d.ts.map +0 -1
- package/dist/providers/index.js.map +0 -1
- package/dist/providers/ollama.d.ts.map +0 -1
- package/dist/providers/ollama.js.map +0 -1
- package/dist/providers/openai.d.ts.map +0 -1
- package/dist/providers/openai.js.map +0 -1
- package/dist/router.d.ts.map +0 -1
- package/dist/router.js.map +0 -1
- package/dist/stream-decoder.d.ts.map +0 -1
- package/dist/stream-decoder.js.map +0 -1
- package/dist/structured-output.d.ts.map +0 -1
- package/dist/structured-output.js.map +0 -1
- package/dist/thinking.d.ts.map +0 -1
- package/dist/thinking.js.map +0 -1
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/zod-adapter.d.ts.map +0 -1
- package/dist/zod-adapter.js.map +0 -1
- package/src/ai-model.ts +0 -400
- package/src/auditor.ts +0 -213
- package/src/client.ts +0 -402
- package/src/debug/debug-google-streaming.ts +0 -97
- package/src/debug/debug-tool-execution.ts +0 -86
- package/src/debug/test-lmstudio-tools.ts +0 -155
- package/src/demos/README.md +0 -47
- package/src/demos/basic/universal-llm-examples.ts +0 -161
- package/src/demos/diffusion-gemma/.env +0 -29
- package/src/demos/diffusion-gemma/.env.example +0 -27
- package/src/demos/diffusion-gemma/CLAUDE.md +0 -95
- package/src/demos/diffusion-gemma/README.md +0 -59
- package/src/demos/diffusion-gemma/canvas.ts +0 -1606
- package/src/demos/diffusion-gemma/docker-compose.yml +0 -29
- package/src/demos/diffusion-gemma/probe-stream.ts +0 -51
- package/src/demos/diffusion-gemma/probe-tools.ts +0 -55
- package/src/demos/diffusion-gemma/server.ts +0 -1205
- package/src/demos/diffusion-gemma/start-vllm.sh +0 -98
- package/src/demos/mcp/astrid-memory-demo.ts +0 -295
- package/src/demos/mcp/astrid-persona-memory.ts +0 -357
- package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
- package/src/demos/mcp/simple-astrid-memory.ts +0 -148
- package/src/demos/mcp/simple-mcp-demo.ts +0 -68
- package/src/demos/mcp/working-mcp-demo.ts +0 -62
- package/src/demos/model-alias-demo.ts +0 -0
- package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
- package/src/demos/tools/astrid-memory-demo.ts +0 -270
- package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
- package/src/demos/tools/astrid-production-memory.ts +0 -558
- package/src/demos/tools/basic-translation-test.ts +0 -66
- package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
- package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
- package/src/demos/tools/clean-translation-test.ts +0 -119
- package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
- package/src/demos/tools/complete-rag-demo.ts +0 -369
- package/src/demos/tools/complete-tool-demo.ts +0 -132
- package/src/demos/tools/demo-tool-calling.ts +0 -124
- package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
- package/src/demos/tools/hybrid-thinking-test.ts +0 -154
- package/src/demos/tools/memory-integration-test.ts +0 -420
- package/src/demos/tools/multilingual-memory-system.ts +0 -802
- package/src/demos/tools/ondemand-translation-demo.ts +0 -655
- package/src/demos/tools/production-tool-demo.ts +0 -245
- package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
- package/src/demos/tools/rigorous-language-analysis.ts +0 -218
- package/src/demos/tools/test-universal-memory-system.ts +0 -126
- package/src/demos/tools/translation-integration-guide.ts +0 -346
- package/src/demos/tools/universal-memory-system.ts +0 -560
- package/src/gemma-channel.ts +0 -47
- package/src/gemma-diffusion.ts +0 -167
- package/src/http.ts +0 -261
- package/src/index.ts +0 -180
- package/src/interfaces.ts +0 -843
- package/src/mcp.ts +0 -345
- package/src/providers/anthropic.ts +0 -796
- package/src/providers/google.ts +0 -840
- package/src/providers/index.ts +0 -8
- package/src/providers/ollama.ts +0 -503
- package/src/providers/openai.ts +0 -587
- package/src/router.ts +0 -785
- package/src/stream-decoder.ts +0 -535
- package/src/structured-output.ts +0 -759
- package/src/test-scripts/test-advanced-tools.ts +0 -310
- package/src/test-scripts/test-google-deep-research.ts +0 -33
- package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
- package/src/test-scripts/test-google-streaming.ts +0 -63
- package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
- package/src/test-scripts/test-google-thinking.ts +0 -46
- package/src/test-scripts/test-mcp-config.ts +0 -28
- package/src/test-scripts/test-mcp-connection.ts +0 -29
- package/src/test-scripts/test-system-message-positions.ts +0 -163
- package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
- package/src/test-scripts/test-tool-calling.ts +0 -231
- package/src/test-scripts/test-vllm-qwen36.ts +0 -256
- package/src/tests/ai-model.test.ts +0 -1614
- package/src/tests/auditor.test.ts +0 -224
- package/src/tests/gemma-diffusion.test.ts +0 -115
- package/src/tests/http.test.ts +0 -200
- package/src/tests/interfaces.test.ts +0 -117
- package/src/tests/providers/anthropic.test.ts +0 -118
- package/src/tests/providers/google.test.ts +0 -841
- package/src/tests/providers/ollama.test.ts +0 -1034
- package/src/tests/providers/openai.test.ts +0 -1511
- package/src/tests/router.test.ts +0 -254
- package/src/tests/stream-decoder.test.ts +0 -263
- package/src/tests/structured-output.test.ts +0 -1450
- package/src/tests/thinking.test.ts +0 -65
- package/src/tests/tools.test.ts +0 -175
- package/src/thinking.ts +0 -73
- package/src/tools.ts +0 -246
- package/src/zod-adapter.ts +0 -72
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
services:
|
|
2
|
-
diffusiongemma:
|
|
3
|
-
container_name: diffusiongemma
|
|
4
|
-
image: ${VLLM_IMAGE:-vllm/vllm-openai:gemma}
|
|
5
|
-
entrypoint: ["bash", "-lc", "/start-vllm.sh"]
|
|
6
|
-
ipc: host
|
|
7
|
-
shm_size: 2gb
|
|
8
|
-
ports:
|
|
9
|
-
- "${VLLM_PORT:-8000}:8000"
|
|
10
|
-
environment:
|
|
11
|
-
MODEL_NAME: ${MODEL_NAME:-RedHatAI/diffusiongemma-26B-A4B-it-NVFP4}
|
|
12
|
-
GPU_MEM_UTIL: ${GPU_MEM_UTIL:-0.28}
|
|
13
|
-
MAX_MODEL_LEN: ${MAX_MODEL_LEN:-32768}
|
|
14
|
-
MAX_NUM_SEQS: ${MAX_NUM_SEQS:-1}
|
|
15
|
-
DIFFUSION_ENTROPY: ${DIFFUSION_ENTROPY:-0.1}
|
|
16
|
-
ENFORCE_EAGER: ${ENFORCE_EAGER:-0}
|
|
17
|
-
VLLM_NO_USAGE_STATS: ${VLLM_NO_USAGE_STATS:-1}
|
|
18
|
-
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
|
|
19
|
-
NVIDIA_DRIVER_CAPABILITIES: compute,utility
|
|
20
|
-
volumes:
|
|
21
|
-
- ./start-vllm.sh:/start-vllm.sh:ro
|
|
22
|
-
- ./.cache/huggingface:/root/.cache/huggingface
|
|
23
|
-
deploy:
|
|
24
|
-
resources:
|
|
25
|
-
reservations:
|
|
26
|
-
devices:
|
|
27
|
-
- driver: nvidia
|
|
28
|
-
count: all
|
|
29
|
-
capabilities: [gpu]
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Probe the raw vLLM SSE stream to understand chunk arrival patterns.
|
|
3
|
-
* Logs: chunk index, ms since start, gap since last chunk, content length, field, preview.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
const res = await fetch('http://localhost:3333/api/stream-raw', {
|
|
7
|
-
method: 'POST',
|
|
8
|
-
headers: { 'Content-Type': 'application/json' },
|
|
9
|
-
body: JSON.stringify({
|
|
10
|
-
prompt: 'Write a short poem about the stars at night.',
|
|
11
|
-
maxTokens: 512,
|
|
12
|
-
}),
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
if (!res.ok || !res.body) {
|
|
16
|
-
console.error('HTTP', res.status, res.statusText);
|
|
17
|
-
process.exit(1);
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
const reader = res.body.getReader();
|
|
21
|
-
const decoder = new TextDecoder();
|
|
22
|
-
let buffer = '';
|
|
23
|
-
const t0 = performance.now();
|
|
24
|
-
let last = t0;
|
|
25
|
-
let i = 0;
|
|
26
|
-
let total = 0;
|
|
27
|
-
|
|
28
|
-
while (true) {
|
|
29
|
-
const { done, value } = await reader.read();
|
|
30
|
-
if (done) break;
|
|
31
|
-
buffer += decoder.decode(value, { stream: true });
|
|
32
|
-
const lines = buffer.split('\n');
|
|
33
|
-
buffer = lines.pop() ?? '';
|
|
34
|
-
for (const line of lines) {
|
|
35
|
-
if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
|
|
36
|
-
let chunk: any;
|
|
37
|
-
try { chunk = JSON.parse(line.slice(6)); } catch { continue; }
|
|
38
|
-
const delta = chunk.choices?.[0]?.delta;
|
|
39
|
-
if (!delta) continue;
|
|
40
|
-
const field = delta.reasoning_content != null ? 'reasoning' : delta.content != null ? 'content' : '?';
|
|
41
|
-
const text: string = delta.reasoning_content ?? delta.content ?? '';
|
|
42
|
-
const now = performance.now();
|
|
43
|
-
total += text.length;
|
|
44
|
-
const extraKeys = Object.keys(delta).filter(k => !['content', 'reasoning_content', 'role', 'tool_calls'].includes(k));
|
|
45
|
-
console.log(
|
|
46
|
-
`#${String(i++).padStart(3)} t=${(now - t0).toFixed(0).padStart(6)}ms gap=${(now - last).toFixed(1).padStart(7)}ms len=${String(text.length).padStart(4)} ${field.padEnd(9)} ${JSON.stringify(text.slice(0, 60))}${extraKeys.length ? ' extra=' + JSON.stringify(extraKeys) : ''}`,
|
|
47
|
-
);
|
|
48
|
-
last = now;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
console.log(`\nTotal: ${i} chunks, ${total} chars, ${(performance.now() - t0).toFixed(0)}ms`);
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Probe the full native tool-calling loop against vLLM, step by step,
|
|
3
|
-
* printing raw wire content (skip_special_tokens: false throughout).
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
const MODEL = process.env.MODEL_NAME ?? 'RedHatAI/diffusiongemma-26B-A4B-it-NVFP4';
|
|
7
|
-
const VLLM = process.env.VLLM_URL ?? 'http://localhost:8000';
|
|
8
|
-
|
|
9
|
-
const tools = [{
|
|
10
|
-
type: 'function',
|
|
11
|
-
function: {
|
|
12
|
-
name: 'get_weather',
|
|
13
|
-
description: 'Get current weather for a city',
|
|
14
|
-
parameters: {
|
|
15
|
-
type: 'object',
|
|
16
|
-
properties: {
|
|
17
|
-
city: { type: 'string', description: 'City name' },
|
|
18
|
-
unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
|
|
19
|
-
},
|
|
20
|
-
required: ['city'],
|
|
21
|
-
},
|
|
22
|
-
},
|
|
23
|
-
}];
|
|
24
|
-
|
|
25
|
-
async function post(messages: unknown[], withTools: boolean): Promise<string> {
|
|
26
|
-
const res = await fetch(`${VLLM}/v1/chat/completions`, {
|
|
27
|
-
method: 'POST',
|
|
28
|
-
headers: { 'Content-Type': 'application/json' },
|
|
29
|
-
body: JSON.stringify({
|
|
30
|
-
model: MODEL,
|
|
31
|
-
messages,
|
|
32
|
-
max_tokens: 1024,
|
|
33
|
-
skip_special_tokens: false,
|
|
34
|
-
...(withTools ? { tools, tool_choice: 'none' } : {}),
|
|
35
|
-
}),
|
|
36
|
-
});
|
|
37
|
-
const d = await res.json() as any;
|
|
38
|
-
return d.choices?.[0]?.message?.content ?? JSON.stringify(d).slice(0, 300);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
const followUp = [
|
|
42
|
-
{ role: 'user', content: 'What is the weather in Paris right now, in celsius?' },
|
|
43
|
-
{
|
|
44
|
-
role: 'assistant', content: '', tool_calls: [{
|
|
45
|
-
id: 'call_x', type: 'function',
|
|
46
|
-
function: { name: 'get_weather', arguments: JSON.stringify({ city: 'Paris', unit: 'celsius' }) },
|
|
47
|
-
}],
|
|
48
|
-
},
|
|
49
|
-
{ role: 'tool', tool_call_id: 'call_x', content: JSON.stringify({ temp_c: 18, condition: 'partly cloudy' }) },
|
|
50
|
-
];
|
|
51
|
-
|
|
52
|
-
console.log('A) follow-up WITH tools+choice none:');
|
|
53
|
-
console.log(' ', JSON.stringify(await post(followUp, true)).slice(0, 500));
|
|
54
|
-
console.log('B) follow-up WITHOUT tools:');
|
|
55
|
-
console.log(' ', JSON.stringify(await post(followUp, false)).slice(0, 500));
|