universal-llm-client 4.5.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +2 -0
- package/dist/ai-model.d.ts +0 -1
- package/dist/ai-model.js +0 -1
- package/dist/auditor.d.ts +0 -1
- package/dist/auditor.js +0 -1
- package/dist/client.d.ts +0 -1
- package/dist/client.js +0 -1
- package/dist/gemma-channel.d.ts +0 -1
- package/dist/gemma-channel.js +0 -1
- package/dist/gemma-diffusion.d.ts +0 -1
- package/dist/gemma-diffusion.js +0 -1
- package/dist/http.d.ts +0 -1
- package/dist/http.js +0 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/interfaces.d.ts +0 -1
- package/dist/interfaces.js +0 -1
- package/dist/mcp.d.ts +0 -1
- package/dist/mcp.js +0 -1
- package/dist/providers/anthropic.d.ts +0 -1
- package/dist/providers/anthropic.js +0 -1
- package/dist/providers/google.d.ts +0 -1
- package/dist/providers/google.js +0 -1
- package/dist/providers/index.d.ts +0 -1
- package/dist/providers/index.js +0 -1
- package/dist/providers/ollama.d.ts +0 -1
- package/dist/providers/ollama.js +0 -1
- package/dist/providers/openai.d.ts +2 -1
- package/dist/providers/openai.js +303 -74
- package/dist/router.d.ts +0 -1
- package/dist/router.js +0 -1
- package/dist/stream-decoder.d.ts +0 -1
- package/dist/stream-decoder.js +0 -1
- package/dist/structured-output.d.ts +0 -1
- package/dist/structured-output.js +0 -1
- package/dist/thinking.d.ts +0 -1
- package/dist/thinking.js +0 -1
- package/dist/tools.d.ts +0 -1
- package/dist/tools.js +0 -1
- package/dist/zod-adapter.d.ts +0 -1
- package/dist/zod-adapter.js +0 -1
- package/package.json +1 -2
- package/dist/ai-model.d.ts.map +0 -1
- package/dist/ai-model.js.map +0 -1
- package/dist/auditor.d.ts.map +0 -1
- package/dist/auditor.js.map +0 -1
- package/dist/client.d.ts.map +0 -1
- package/dist/client.js.map +0 -1
- package/dist/gemma-channel.d.ts.map +0 -1
- package/dist/gemma-channel.js.map +0 -1
- package/dist/gemma-diffusion.d.ts.map +0 -1
- package/dist/gemma-diffusion.js.map +0 -1
- package/dist/http.d.ts.map +0 -1
- package/dist/http.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/interfaces.d.ts.map +0 -1
- package/dist/interfaces.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/providers/anthropic.d.ts.map +0 -1
- package/dist/providers/anthropic.js.map +0 -1
- package/dist/providers/google.d.ts.map +0 -1
- package/dist/providers/google.js.map +0 -1
- package/dist/providers/index.d.ts.map +0 -1
- package/dist/providers/index.js.map +0 -1
- package/dist/providers/ollama.d.ts.map +0 -1
- package/dist/providers/ollama.js.map +0 -1
- package/dist/providers/openai.d.ts.map +0 -1
- package/dist/providers/openai.js.map +0 -1
- package/dist/router.d.ts.map +0 -1
- package/dist/router.js.map +0 -1
- package/dist/stream-decoder.d.ts.map +0 -1
- package/dist/stream-decoder.js.map +0 -1
- package/dist/structured-output.d.ts.map +0 -1
- package/dist/structured-output.js.map +0 -1
- package/dist/thinking.d.ts.map +0 -1
- package/dist/thinking.js.map +0 -1
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/zod-adapter.d.ts.map +0 -1
- package/dist/zod-adapter.js.map +0 -1
- package/src/ai-model.ts +0 -400
- package/src/auditor.ts +0 -213
- package/src/client.ts +0 -402
- package/src/debug/debug-google-streaming.ts +0 -97
- package/src/debug/debug-tool-execution.ts +0 -86
- package/src/debug/test-lmstudio-tools.ts +0 -155
- package/src/demos/README.md +0 -47
- package/src/demos/basic/universal-llm-examples.ts +0 -161
- package/src/demos/diffusion-gemma/.env +0 -29
- package/src/demos/diffusion-gemma/.env.example +0 -27
- package/src/demos/diffusion-gemma/CLAUDE.md +0 -95
- package/src/demos/diffusion-gemma/README.md +0 -59
- package/src/demos/diffusion-gemma/canvas.ts +0 -1606
- package/src/demos/diffusion-gemma/docker-compose.yml +0 -29
- package/src/demos/diffusion-gemma/probe-stream.ts +0 -51
- package/src/demos/diffusion-gemma/probe-tools.ts +0 -55
- package/src/demos/diffusion-gemma/server.ts +0 -1205
- package/src/demos/diffusion-gemma/start-vllm.sh +0 -98
- package/src/demos/mcp/astrid-memory-demo.ts +0 -295
- package/src/demos/mcp/astrid-persona-memory.ts +0 -357
- package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
- package/src/demos/mcp/simple-astrid-memory.ts +0 -148
- package/src/demos/mcp/simple-mcp-demo.ts +0 -68
- package/src/demos/mcp/working-mcp-demo.ts +0 -62
- package/src/demos/model-alias-demo.ts +0 -0
- package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
- package/src/demos/tools/astrid-memory-demo.ts +0 -270
- package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
- package/src/demos/tools/astrid-production-memory.ts +0 -558
- package/src/demos/tools/basic-translation-test.ts +0 -66
- package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
- package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
- package/src/demos/tools/clean-translation-test.ts +0 -119
- package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
- package/src/demos/tools/complete-rag-demo.ts +0 -369
- package/src/demos/tools/complete-tool-demo.ts +0 -132
- package/src/demos/tools/demo-tool-calling.ts +0 -124
- package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
- package/src/demos/tools/hybrid-thinking-test.ts +0 -154
- package/src/demos/tools/memory-integration-test.ts +0 -420
- package/src/demos/tools/multilingual-memory-system.ts +0 -802
- package/src/demos/tools/ondemand-translation-demo.ts +0 -655
- package/src/demos/tools/production-tool-demo.ts +0 -245
- package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
- package/src/demos/tools/rigorous-language-analysis.ts +0 -218
- package/src/demos/tools/test-universal-memory-system.ts +0 -126
- package/src/demos/tools/translation-integration-guide.ts +0 -346
- package/src/demos/tools/universal-memory-system.ts +0 -560
- package/src/gemma-channel.ts +0 -47
- package/src/gemma-diffusion.ts +0 -167
- package/src/http.ts +0 -261
- package/src/index.ts +0 -180
- package/src/interfaces.ts +0 -843
- package/src/mcp.ts +0 -345
- package/src/providers/anthropic.ts +0 -796
- package/src/providers/google.ts +0 -840
- package/src/providers/index.ts +0 -8
- package/src/providers/ollama.ts +0 -503
- package/src/providers/openai.ts +0 -587
- package/src/router.ts +0 -785
- package/src/stream-decoder.ts +0 -535
- package/src/structured-output.ts +0 -759
- package/src/test-scripts/test-advanced-tools.ts +0 -310
- package/src/test-scripts/test-google-deep-research.ts +0 -33
- package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
- package/src/test-scripts/test-google-streaming.ts +0 -63
- package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
- package/src/test-scripts/test-google-thinking.ts +0 -46
- package/src/test-scripts/test-mcp-config.ts +0 -28
- package/src/test-scripts/test-mcp-connection.ts +0 -29
- package/src/test-scripts/test-system-message-positions.ts +0 -163
- package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
- package/src/test-scripts/test-tool-calling.ts +0 -231
- package/src/test-scripts/test-vllm-qwen36.ts +0 -256
- package/src/tests/ai-model.test.ts +0 -1614
- package/src/tests/auditor.test.ts +0 -224
- package/src/tests/gemma-diffusion.test.ts +0 -115
- package/src/tests/http.test.ts +0 -200
- package/src/tests/interfaces.test.ts +0 -117
- package/src/tests/providers/anthropic.test.ts +0 -118
- package/src/tests/providers/google.test.ts +0 -841
- package/src/tests/providers/ollama.test.ts +0 -1034
- package/src/tests/providers/openai.test.ts +0 -1511
- package/src/tests/router.test.ts +0 -254
- package/src/tests/stream-decoder.test.ts +0 -263
- package/src/tests/structured-output.test.ts +0 -1450
- package/src/tests/thinking.test.ts +0 -65
- package/src/tests/tools.test.ts +0 -175
- package/src/thinking.ts +0 -73
- package/src/tools.ts +0 -246
- package/src/zod-adapter.ts +0 -72
|
@@ -1,1205 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* DiffusionGemma Test Harness
|
|
3
|
-
*
|
|
4
|
-
* A standalone Bun server that tests universal-llm-client against
|
|
5
|
-
* DiffusionGemma (a discrete diffusion language model served via vLLM).
|
|
6
|
-
*
|
|
7
|
-
* This validates:
|
|
8
|
-
* 1. Basic chat completion via OpenAI-compatible API
|
|
9
|
-
* 2. Streaming responses (diffusion models emit token blocks, not single tokens)
|
|
10
|
-
* 3. Thinking/reasoning mode
|
|
11
|
-
* 4. Tool calling compatibility
|
|
12
|
-
*
|
|
13
|
-
* Usage: bun run src/demos/diffusion-gemma/server.ts
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
import { mkdir } from 'node:fs/promises';
|
|
17
|
-
import { dirname, join } from 'node:path';
|
|
18
|
-
import { fileURLToPath } from 'node:url';
|
|
19
|
-
import { AIModel, extractTextContent, type LLMChatResponse, type TokenUsageInfo } from '../../index.js';
|
|
20
|
-
import { CANVAS_HTML } from './canvas.js';
|
|
21
|
-
|
|
22
|
-
const PORT = 3333;
|
|
23
|
-
const VLLM_URL = process.env.VLLM_URL ?? 'http://localhost:8000';
|
|
24
|
-
const MODEL_NAME = process.env.MODEL_NAME ?? 'RedHatAI/diffusiongemma-26B-A4B-it-NVFP4';
|
|
25
|
-
|
|
26
|
-
// ============================================================================
|
|
27
|
-
// Create the AIModel instance pointing at our local vLLM server
|
|
28
|
-
// ============================================================================
|
|
29
|
-
|
|
30
|
-
function createModel(debug = false): AIModel {
|
|
31
|
-
return new AIModel({
|
|
32
|
-
model: MODEL_NAME,
|
|
33
|
-
timeout: 120_000, // diffusion models can take longer
|
|
34
|
-
retries: 0, // no retries for testing — we want to see raw errors
|
|
35
|
-
debug,
|
|
36
|
-
providers: [
|
|
37
|
-
{
|
|
38
|
-
type: 'openai',
|
|
39
|
-
url: VLLM_URL,
|
|
40
|
-
apiKey: 'not-needed', // vLLM doesn't require auth by default
|
|
41
|
-
},
|
|
42
|
-
],
|
|
43
|
-
});
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
type DemoTokenUsage = TokenUsageInfo & {
|
|
47
|
-
readonly promptTokens: number;
|
|
48
|
-
readonly completionTokens: number;
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
type DemoChatResponse = LLMChatResponse & {
|
|
52
|
-
readonly content: string;
|
|
53
|
-
readonly usage?: DemoTokenUsage;
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
function toDemoChatResponse(response: LLMChatResponse): DemoChatResponse {
|
|
57
|
-
const content = extractTextContent(response.message.content);
|
|
58
|
-
const usage = response.usage
|
|
59
|
-
? {
|
|
60
|
-
...response.usage,
|
|
61
|
-
promptTokens: response.usage.inputTokens,
|
|
62
|
-
completionTokens: response.usage.outputTokens,
|
|
63
|
-
}
|
|
64
|
-
: undefined;
|
|
65
|
-
|
|
66
|
-
return {
|
|
67
|
-
...response,
|
|
68
|
-
content,
|
|
69
|
-
usage,
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// ============================================================================
|
|
74
|
-
// API Handlers
|
|
75
|
-
// ============================================================================
|
|
76
|
-
|
|
77
|
-
async function handleChat(req: Request): Promise<Response> {
|
|
78
|
-
const body = await req.json() as {
|
|
79
|
-
messages: Array<{ role: string; content: string }>;
|
|
80
|
-
stream?: boolean;
|
|
81
|
-
maxTokens?: number;
|
|
82
|
-
temperature?: number;
|
|
83
|
-
thinking?: boolean;
|
|
84
|
-
};
|
|
85
|
-
|
|
86
|
-
const model = createModel();
|
|
87
|
-
|
|
88
|
-
if (body.stream) {
|
|
89
|
-
// Streaming response via SSE
|
|
90
|
-
const encoder = new TextEncoder();
|
|
91
|
-
const stream = new ReadableStream({
|
|
92
|
-
async start(controller) {
|
|
93
|
-
try {
|
|
94
|
-
const gen = model.chatStream(body.messages as any, {
|
|
95
|
-
maxTokens: body.maxTokens ?? 512,
|
|
96
|
-
temperature: body.temperature ?? 0.7,
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
for await (const event of gen) {
|
|
100
|
-
const data = JSON.stringify(event);
|
|
101
|
-
controller.enqueue(encoder.encode(`data: ${data}\n\n`));
|
|
102
|
-
}
|
|
103
|
-
controller.enqueue(encoder.encode('data: [DONE]\n\n'));
|
|
104
|
-
} catch (err: any) {
|
|
105
|
-
const errorData = JSON.stringify({
|
|
106
|
-
type: 'error',
|
|
107
|
-
content: err.message ?? String(err),
|
|
108
|
-
});
|
|
109
|
-
controller.enqueue(encoder.encode(`data: ${errorData}\n\n`));
|
|
110
|
-
} finally {
|
|
111
|
-
controller.close();
|
|
112
|
-
await model.dispose();
|
|
113
|
-
}
|
|
114
|
-
},
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
return new Response(stream, {
|
|
118
|
-
headers: {
|
|
119
|
-
'Content-Type': 'text/event-stream',
|
|
120
|
-
'Cache-Control': 'no-cache',
|
|
121
|
-
'Connection': 'keep-alive',
|
|
122
|
-
'Access-Control-Allow-Origin': '*',
|
|
123
|
-
},
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// Non-streaming response
|
|
128
|
-
try {
|
|
129
|
-
const response = await model.chat(body.messages as any, {
|
|
130
|
-
maxTokens: body.maxTokens ?? 512,
|
|
131
|
-
temperature: body.temperature ?? 0.7,
|
|
132
|
-
});
|
|
133
|
-
await model.dispose();
|
|
134
|
-
return Response.json(toDemoChatResponse(response));
|
|
135
|
-
} catch (err: any) {
|
|
136
|
-
await model.dispose();
|
|
137
|
-
return Response.json({ error: err.message ?? String(err) }, { status: 500 });
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
async function handleHealth(): Promise<Response> {
|
|
142
|
-
try {
|
|
143
|
-
const model = createModel();
|
|
144
|
-
const models = await model.getModels();
|
|
145
|
-
await model.dispose();
|
|
146
|
-
if (models.length === 0) {
|
|
147
|
-
return Response.json({
|
|
148
|
-
status: 'error',
|
|
149
|
-
vllm: VLLM_URL,
|
|
150
|
-
error: 'No models reported by vLLM',
|
|
151
|
-
timestamp: new Date().toISOString(),
|
|
152
|
-
}, { status: 503 });
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
return Response.json({
|
|
156
|
-
status: 'ok',
|
|
157
|
-
vllm: VLLM_URL,
|
|
158
|
-
models,
|
|
159
|
-
timestamp: new Date().toISOString(),
|
|
160
|
-
});
|
|
161
|
-
} catch (err: any) {
|
|
162
|
-
return Response.json({
|
|
163
|
-
status: 'error',
|
|
164
|
-
vllm: VLLM_URL,
|
|
165
|
-
error: err.message ?? String(err),
|
|
166
|
-
}, { status: 503 });
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
// ============================================================================
|
|
171
|
-
// Raw Stream Proxy — bypasses universal-llm-client for canvas visualization
|
|
172
|
-
// ============================================================================
|
|
173
|
-
|
|
174
|
-
async function handleStreamRaw(req: Request): Promise<Response> {
|
|
175
|
-
const body = await req.json() as {
|
|
176
|
-
prompt?: string;
|
|
177
|
-
messages?: Array<{ role: string; content: string }>;
|
|
178
|
-
maxTokens?: number;
|
|
179
|
-
thinking?: boolean;
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
const messages = body.messages ?? [{ role: 'user', content: body.prompt ?? '' }];
|
|
183
|
-
|
|
184
|
-
// Proxy directly to vLLM to preserve raw SSE chunk timing.
|
|
185
|
-
// skip_special_tokens:false keeps the native channel markers
|
|
186
|
-
// (<|channel>thought ... <channel|>) so the canvas can split
|
|
187
|
-
// reasoning from the final answer deterministically.
|
|
188
|
-
const vllmBody = JSON.stringify({
|
|
189
|
-
model: MODEL_NAME,
|
|
190
|
-
messages,
|
|
191
|
-
max_tokens: body.maxTokens ?? 512,
|
|
192
|
-
stream: true,
|
|
193
|
-
skip_special_tokens: false,
|
|
194
|
-
...(body.thinking === false
|
|
195
|
-
? { chat_template_kwargs: { enable_thinking: false } }
|
|
196
|
-
: {}),
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
const vllmRes = await fetch(`${VLLM_URL}/v1/chat/completions`, {
|
|
200
|
-
method: 'POST',
|
|
201
|
-
headers: { 'Content-Type': 'application/json' },
|
|
202
|
-
body: vllmBody,
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
if (!vllmRes.ok || !vllmRes.body) {
|
|
206
|
-
return Response.json(
|
|
207
|
-
{ error: `vLLM error: ${vllmRes.status} ${vllmRes.statusText}` },
|
|
208
|
-
{ status: 502 },
|
|
209
|
-
);
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
// Pass through the SSE stream unchanged
|
|
213
|
-
return new Response(vllmRes.body, {
|
|
214
|
-
headers: {
|
|
215
|
-
'Content-Type': 'text/event-stream',
|
|
216
|
-
'Cache-Control': 'no-cache',
|
|
217
|
-
'Connection': 'keep-alive',
|
|
218
|
-
'Access-Control-Allow-Origin': '*',
|
|
219
|
-
},
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// ============================================================================
|
|
224
|
-
// Engine Config — entropy is engine-level in vLLM (hf_overrides at init),
|
|
225
|
-
// so changing it requires a container restart. The start script sources
|
|
226
|
-
// an env file from the bind-mounted HF cache dir; we write it here and
|
|
227
|
-
// `docker restart` the engine. The UI polls /api/health until it returns.
|
|
228
|
-
// ============================================================================
|
|
229
|
-
|
|
230
|
-
const DEMO_DIR = fileURLToPath(new URL('.', import.meta.url));
|
|
231
|
-
const DEMO_ENGINE_ENV_FILE = join(DEMO_DIR, '.cache', 'huggingface', 'diffusion-env.sh');
|
|
232
|
-
const USER_ENGINE_ENV_FILE = `${process.env.USERPROFILE ?? process.env.HOME ?? ''}/.cache/huggingface/diffusion-env.sh`;
|
|
233
|
-
const ENGINE_ENV_FILES = [
|
|
234
|
-
process.env.ENGINE_ENV_FILE,
|
|
235
|
-
DEMO_ENGINE_ENV_FILE,
|
|
236
|
-
USER_ENGINE_ENV_FILE,
|
|
237
|
-
].filter((path): path is string => Boolean(path));
|
|
238
|
-
const ENGINE_CONTAINER = process.env.ENGINE_CONTAINER ?? 'diffusiongemma';
|
|
239
|
-
|
|
240
|
-
async function resolveEngineEnvFile(): Promise<string> {
|
|
241
|
-
for (const path of ENGINE_ENV_FILES) {
|
|
242
|
-
if (await Bun.file(path).exists()) return path;
|
|
243
|
-
}
|
|
244
|
-
return ENGINE_ENV_FILES[0] ?? DEMO_ENGINE_ENV_FILE;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
async function readEngineEntropy(): Promise<number> {
|
|
248
|
-
try {
|
|
249
|
-
const text = await Bun.file(await resolveEngineEnvFile()).text();
|
|
250
|
-
const m = text.match(/DIFFUSION_ENTROPY=([0-9.]+)/);
|
|
251
|
-
if (m?.[1]) return parseFloat(m[1]);
|
|
252
|
-
} catch { /* no env file yet — engine runs script defaults */ }
|
|
253
|
-
return 0.1;
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
async function handleEngineConfig(req: Request): Promise<Response> {
|
|
257
|
-
if (req.method === 'GET') {
|
|
258
|
-
return Response.json({ entropy: await readEngineEntropy(), container: ENGINE_CONTAINER });
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
const body = await req.json() as { entropy?: number };
|
|
262
|
-
const entropy = Number(body.entropy);
|
|
263
|
-
if (!Number.isFinite(entropy) || entropy < 0.01 || entropy > 1) {
|
|
264
|
-
return Response.json({ error: 'entropy must be in [0.01, 1]' }, { status: 400 });
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
const engineEnvFile = await resolveEngineEnvFile();
|
|
268
|
-
await mkdir(dirname(engineEnvFile), { recursive: true });
|
|
269
|
-
await Bun.write(engineEnvFile, `export DIFFUSION_ENTROPY=${entropy}\n`);
|
|
270
|
-
|
|
271
|
-
const proc = Bun.spawn(['docker', 'restart', ENGINE_CONTAINER], {
|
|
272
|
-
stdout: 'pipe', stderr: 'pipe',
|
|
273
|
-
});
|
|
274
|
-
const code = await proc.exited;
|
|
275
|
-
if (code !== 0) {
|
|
276
|
-
const err = await new Response(proc.stderr).text();
|
|
277
|
-
return Response.json({ error: `docker restart failed: ${err.trim()}` }, { status: 500 });
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
console.log(`[engine-config] entropy=${entropy} → restarted ${ENGINE_CONTAINER}`);
|
|
281
|
-
return Response.json({ ok: true, entropy, reloading: true });
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
// ============================================================================
|
|
285
|
-
// Static UI
|
|
286
|
-
// ============================================================================
|
|
287
|
-
|
|
288
|
-
function serveUI(): Response {
|
|
289
|
-
return new Response(HTML, {
|
|
290
|
-
headers: { 'Content-Type': 'text/html; charset=utf-8' },
|
|
291
|
-
});
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
function serveCanvas(): Response {
|
|
295
|
-
return new Response(CANVAS_HTML, {
|
|
296
|
-
headers: { 'Content-Type': 'text/html; charset=utf-8' },
|
|
297
|
-
});
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
// ============================================================================
|
|
301
|
-
// Bun Server
|
|
302
|
-
// ============================================================================
|
|
303
|
-
|
|
304
|
-
console.log(`
|
|
305
|
-
╔══════════════════════════════════════════════════════════════╗
|
|
306
|
-
║ 🧪 DiffusionGemma Test Harness ║
|
|
307
|
-
║ ────────────────────────────────────────────────────────── ║
|
|
308
|
-
║ UI: http://localhost:${PORT} ║
|
|
309
|
-
║ vLLM: ${VLLM_URL.padEnd(48)} ║
|
|
310
|
-
║ Model: ${MODEL_NAME.padEnd(48).slice(0, 48)} ║
|
|
311
|
-
╚══════════════════════════════════════════════════════════════╝
|
|
312
|
-
`);
|
|
313
|
-
|
|
314
|
-
Bun.serve({
|
|
315
|
-
port: PORT,
|
|
316
|
-
async fetch(req) {
|
|
317
|
-
const url = new URL(req.url);
|
|
318
|
-
|
|
319
|
-
// CORS preflight
|
|
320
|
-
if (req.method === 'OPTIONS') {
|
|
321
|
-
return new Response(null, {
|
|
322
|
-
headers: {
|
|
323
|
-
'Access-Control-Allow-Origin': '*',
|
|
324
|
-
'Access-Control-Allow-Methods': 'POST, GET, OPTIONS',
|
|
325
|
-
'Access-Control-Allow-Headers': 'Content-Type',
|
|
326
|
-
},
|
|
327
|
-
});
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
switch (url.pathname) {
|
|
331
|
-
case '/':
|
|
332
|
-
return serveUI();
|
|
333
|
-
case '/canvas':
|
|
334
|
-
return serveCanvas();
|
|
335
|
-
case '/api/chat':
|
|
336
|
-
if (req.method === 'POST') return handleChat(req);
|
|
337
|
-
break;
|
|
338
|
-
case '/api/stream-raw':
|
|
339
|
-
if (req.method === 'POST') return handleStreamRaw(req);
|
|
340
|
-
break;
|
|
341
|
-
case '/api/engine-config':
|
|
342
|
-
return handleEngineConfig(req);
|
|
343
|
-
case '/api/health':
|
|
344
|
-
return handleHealth();
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
return new Response('Not Found', { status: 404 });
|
|
348
|
-
},
|
|
349
|
-
});
|
|
350
|
-
|
|
351
|
-
// ============================================================================
|
|
352
|
-
// Inline HTML UI
|
|
353
|
-
// ============================================================================
|
|
354
|
-
|
|
355
|
-
const HTML = /*html*/ `<!DOCTYPE html>
|
|
356
|
-
<html lang="en">
|
|
357
|
-
<head>
|
|
358
|
-
<meta charset="UTF-8">
|
|
359
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
360
|
-
<title>DiffusionGemma Test Harness</title>
|
|
361
|
-
<link rel="preconnect" href="https://fonts.googleapis.com">
|
|
362
|
-
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
|
|
363
|
-
<style>
|
|
364
|
-
:root {
|
|
365
|
-
--bg-primary: #0a0a0f;
|
|
366
|
-
--bg-secondary: #12121a;
|
|
367
|
-
--bg-card: #1a1a26;
|
|
368
|
-
--bg-input: #0f0f18;
|
|
369
|
-
--border: #2a2a3a;
|
|
370
|
-
--border-active: #6366f1;
|
|
371
|
-
--text-primary: #e8e8f0;
|
|
372
|
-
--text-secondary: #8888a0;
|
|
373
|
-
--text-muted: #5a5a70;
|
|
374
|
-
--accent: #6366f1;
|
|
375
|
-
--accent-glow: rgba(99, 102, 241, 0.15);
|
|
376
|
-
--accent-hover: #818cf8;
|
|
377
|
-
--success: #22c55e;
|
|
378
|
-
--warning: #f59e0b;
|
|
379
|
-
--error: #ef4444;
|
|
380
|
-
--thinking: #a78bfa;
|
|
381
|
-
--thinking-bg: rgba(167, 139, 250, 0.08);
|
|
382
|
-
--diffusion: #f472b6;
|
|
383
|
-
--diffusion-glow: rgba(244, 114, 182, 0.12);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
* { margin: 0; padding: 0; box-sizing: border-box; }
|
|
387
|
-
|
|
388
|
-
body {
|
|
389
|
-
font-family: 'Inter', -apple-system, sans-serif;
|
|
390
|
-
background: var(--bg-primary);
|
|
391
|
-
color: var(--text-primary);
|
|
392
|
-
min-height: 100vh;
|
|
393
|
-
overflow-x: hidden;
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
/* Ambient background effect */
|
|
397
|
-
body::before {
|
|
398
|
-
content: '';
|
|
399
|
-
position: fixed;
|
|
400
|
-
top: -50%;
|
|
401
|
-
left: -50%;
|
|
402
|
-
width: 200%;
|
|
403
|
-
height: 200%;
|
|
404
|
-
background: radial-gradient(ellipse at 30% 20%, rgba(99, 102, 241, 0.04) 0%, transparent 60%),
|
|
405
|
-
radial-gradient(ellipse at 70% 80%, rgba(244, 114, 182, 0.03) 0%, transparent 60%);
|
|
406
|
-
pointer-events: none;
|
|
407
|
-
z-index: 0;
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
.app {
|
|
411
|
-
max-width: 900px;
|
|
412
|
-
margin: 0 auto;
|
|
413
|
-
padding: 2rem 1.5rem;
|
|
414
|
-
position: relative;
|
|
415
|
-
z-index: 1;
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
/* Header */
|
|
419
|
-
.header {
|
|
420
|
-
text-align: center;
|
|
421
|
-
margin-bottom: 2rem;
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
.header h1 {
|
|
425
|
-
font-size: 1.75rem;
|
|
426
|
-
font-weight: 700;
|
|
427
|
-
letter-spacing: -0.02em;
|
|
428
|
-
background: linear-gradient(135deg, var(--accent), var(--diffusion));
|
|
429
|
-
-webkit-background-clip: text;
|
|
430
|
-
-webkit-text-fill-color: transparent;
|
|
431
|
-
margin-bottom: 0.25rem;
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
.header .subtitle {
|
|
435
|
-
font-size: 0.85rem;
|
|
436
|
-
color: var(--text-muted);
|
|
437
|
-
font-weight: 400;
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
.header .model-badge {
|
|
441
|
-
display: inline-flex;
|
|
442
|
-
align-items: center;
|
|
443
|
-
gap: 0.4rem;
|
|
444
|
-
margin-top: 0.75rem;
|
|
445
|
-
padding: 0.35rem 0.75rem;
|
|
446
|
-
background: var(--diffusion-glow);
|
|
447
|
-
border: 1px solid rgba(244, 114, 182, 0.2);
|
|
448
|
-
border-radius: 100px;
|
|
449
|
-
font-size: 0.75rem;
|
|
450
|
-
font-family: 'JetBrains Mono', monospace;
|
|
451
|
-
color: var(--diffusion);
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
.model-badge .dot {
|
|
455
|
-
width: 6px;
|
|
456
|
-
height: 6px;
|
|
457
|
-
border-radius: 50%;
|
|
458
|
-
background: var(--diffusion);
|
|
459
|
-
animation: pulse-dot 2s ease-in-out infinite;
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
@keyframes pulse-dot {
|
|
463
|
-
0%, 100% { opacity: 1; }
|
|
464
|
-
50% { opacity: 0.4; }
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
/* Status bar */
|
|
468
|
-
.status-bar {
|
|
469
|
-
display: flex;
|
|
470
|
-
gap: 0.75rem;
|
|
471
|
-
justify-content: center;
|
|
472
|
-
margin-bottom: 2rem;
|
|
473
|
-
flex-wrap: wrap;
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
.status-pill {
|
|
477
|
-
display: flex;
|
|
478
|
-
align-items: center;
|
|
479
|
-
gap: 0.35rem;
|
|
480
|
-
padding: 0.3rem 0.65rem;
|
|
481
|
-
background: var(--bg-card);
|
|
482
|
-
border: 1px solid var(--border);
|
|
483
|
-
border-radius: 6px;
|
|
484
|
-
font-size: 0.72rem;
|
|
485
|
-
color: var(--text-secondary);
|
|
486
|
-
font-family: 'JetBrains Mono', monospace;
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
.status-pill .indicator {
|
|
490
|
-
width: 5px;
|
|
491
|
-
height: 5px;
|
|
492
|
-
border-radius: 50%;
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
.status-pill .indicator.online { background: var(--success); box-shadow: 0 0 4px var(--success); }
|
|
496
|
-
.status-pill .indicator.offline { background: var(--error); }
|
|
497
|
-
.status-pill .indicator.checking { background: var(--warning); animation: pulse-dot 1s ease-in-out infinite; }
|
|
498
|
-
|
|
499
|
-
/* Chat area */
|
|
500
|
-
.chat-container {
|
|
501
|
-
background: var(--bg-secondary);
|
|
502
|
-
border: 1px solid var(--border);
|
|
503
|
-
border-radius: 12px;
|
|
504
|
-
overflow: hidden;
|
|
505
|
-
margin-bottom: 1rem;
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
.messages {
|
|
509
|
-
min-height: 300px;
|
|
510
|
-
max-height: 500px;
|
|
511
|
-
overflow-y: auto;
|
|
512
|
-
padding: 1.25rem;
|
|
513
|
-
scroll-behavior: smooth;
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
.messages::-webkit-scrollbar { width: 4px; }
|
|
517
|
-
.messages::-webkit-scrollbar-track { background: transparent; }
|
|
518
|
-
.messages::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
|
|
519
|
-
|
|
520
|
-
.message {
|
|
521
|
-
margin-bottom: 1rem;
|
|
522
|
-
animation: msg-in 0.3s ease-out;
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
@keyframes msg-in {
|
|
526
|
-
from { opacity: 0; transform: translateY(8px); }
|
|
527
|
-
to { opacity: 1; transform: translateY(0); }
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
.message .role {
|
|
531
|
-
font-size: 0.7rem;
|
|
532
|
-
font-weight: 600;
|
|
533
|
-
text-transform: uppercase;
|
|
534
|
-
letter-spacing: 0.06em;
|
|
535
|
-
margin-bottom: 0.3rem;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
.message.user .role { color: var(--accent); }
|
|
539
|
-
.message.assistant .role { color: var(--diffusion); }
|
|
540
|
-
.message.system .role { color: var(--warning); }
|
|
541
|
-
|
|
542
|
-
.message .content {
|
|
543
|
-
font-size: 0.9rem;
|
|
544
|
-
line-height: 1.65;
|
|
545
|
-
color: var(--text-primary);
|
|
546
|
-
white-space: pre-wrap;
|
|
547
|
-
word-break: break-word;
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
.message.user .content {
|
|
551
|
-
background: var(--accent-glow);
|
|
552
|
-
border: 1px solid rgba(99, 102, 241, 0.15);
|
|
553
|
-
padding: 0.75rem 1rem;
|
|
554
|
-
border-radius: 10px;
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
.message.assistant .content {
|
|
558
|
-
padding: 0.75rem 0;
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
.thinking-block {
|
|
562
|
-
background: var(--thinking-bg);
|
|
563
|
-
border-left: 2px solid var(--thinking);
|
|
564
|
-
padding: 0.6rem 0.8rem;
|
|
565
|
-
margin-bottom: 0.5rem;
|
|
566
|
-
border-radius: 0 6px 6px 0;
|
|
567
|
-
font-size: 0.82rem;
|
|
568
|
-
color: var(--thinking);
|
|
569
|
-
opacity: 0.85;
|
|
570
|
-
}
|
|
571
|
-
|
|
572
|
-
.thinking-block .label {
|
|
573
|
-
font-size: 0.65rem;
|
|
574
|
-
font-weight: 600;
|
|
575
|
-
text-transform: uppercase;
|
|
576
|
-
letter-spacing: 0.06em;
|
|
577
|
-
margin-bottom: 0.25rem;
|
|
578
|
-
opacity: 0.7;
|
|
579
|
-
}
|
|
580
|
-
|
|
581
|
-
/* Metrics bar */
|
|
582
|
-
.metrics {
|
|
583
|
-
display: flex;
|
|
584
|
-
gap: 1rem;
|
|
585
|
-
padding: 0.5rem 0;
|
|
586
|
-
margin-top: 0.5rem;
|
|
587
|
-
border-top: 1px solid var(--border);
|
|
588
|
-
flex-wrap: wrap;
|
|
589
|
-
}
|
|
590
|
-
|
|
591
|
-
.metric {
|
|
592
|
-
font-size: 0.7rem;
|
|
593
|
-
font-family: 'JetBrains Mono', monospace;
|
|
594
|
-
color: var(--text-muted);
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
.metric span { color: var(--text-secondary); }
|
|
598
|
-
|
|
599
|
-
/* Input area */
|
|
600
|
-
.input-area {
|
|
601
|
-
display: flex;
|
|
602
|
-
gap: 0.5rem;
|
|
603
|
-
padding: 1rem 1.25rem;
|
|
604
|
-
border-top: 1px solid var(--border);
|
|
605
|
-
background: var(--bg-card);
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
.input-area textarea {
|
|
609
|
-
flex: 1;
|
|
610
|
-
background: var(--bg-input);
|
|
611
|
-
border: 1px solid var(--border);
|
|
612
|
-
border-radius: 8px;
|
|
613
|
-
padding: 0.65rem 0.85rem;
|
|
614
|
-
color: var(--text-primary);
|
|
615
|
-
font-family: 'Inter', sans-serif;
|
|
616
|
-
font-size: 0.875rem;
|
|
617
|
-
resize: none;
|
|
618
|
-
min-height: 42px;
|
|
619
|
-
max-height: 120px;
|
|
620
|
-
outline: none;
|
|
621
|
-
transition: border-color 0.2s;
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
.input-area textarea:focus {
|
|
625
|
-
border-color: var(--accent);
|
|
626
|
-
box-shadow: 0 0 0 2px var(--accent-glow);
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
.input-area textarea::placeholder { color: var(--text-muted); }
|
|
630
|
-
|
|
631
|
-
.send-btn {
|
|
632
|
-
align-self: flex-end;
|
|
633
|
-
padding: 0.65rem 1.2rem;
|
|
634
|
-
background: linear-gradient(135deg, var(--accent), #8b5cf6);
|
|
635
|
-
color: white;
|
|
636
|
-
border: none;
|
|
637
|
-
border-radius: 8px;
|
|
638
|
-
font-size: 0.8rem;
|
|
639
|
-
font-weight: 600;
|
|
640
|
-
cursor: pointer;
|
|
641
|
-
transition: all 0.2s;
|
|
642
|
-
white-space: nowrap;
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
.send-btn:hover:not(:disabled) {
|
|
646
|
-
transform: translateY(-1px);
|
|
647
|
-
box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3);
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
.send-btn:disabled {
|
|
651
|
-
opacity: 0.5;
|
|
652
|
-
cursor: not-allowed;
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
/* Controls */
|
|
656
|
-
.controls {
|
|
657
|
-
display: flex;
|
|
658
|
-
gap: 0.75rem;
|
|
659
|
-
align-items: center;
|
|
660
|
-
flex-wrap: wrap;
|
|
661
|
-
margin-bottom: 1rem;
|
|
662
|
-
}
|
|
663
|
-
|
|
664
|
-
.control-group {
|
|
665
|
-
display: flex;
|
|
666
|
-
align-items: center;
|
|
667
|
-
gap: 0.35rem;
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
.control-group label {
|
|
671
|
-
font-size: 0.72rem;
|
|
672
|
-
color: var(--text-muted);
|
|
673
|
-
font-weight: 500;
|
|
674
|
-
}
|
|
675
|
-
|
|
676
|
-
.control-group input[type="number"],
|
|
677
|
-
.control-group select {
|
|
678
|
-
background: var(--bg-card);
|
|
679
|
-
border: 1px solid var(--border);
|
|
680
|
-
border-radius: 5px;
|
|
681
|
-
padding: 0.3rem 0.5rem;
|
|
682
|
-
color: var(--text-primary);
|
|
683
|
-
font-family: 'JetBrains Mono', monospace;
|
|
684
|
-
font-size: 0.72rem;
|
|
685
|
-
width: 70px;
|
|
686
|
-
outline: none;
|
|
687
|
-
}
|
|
688
|
-
|
|
689
|
-
.control-group input[type="checkbox"] {
|
|
690
|
-
accent-color: var(--accent);
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
.preset-btn {
|
|
694
|
-
padding: 0.3rem 0.6rem;
|
|
695
|
-
background: var(--bg-card);
|
|
696
|
-
border: 1px solid var(--border);
|
|
697
|
-
border-radius: 5px;
|
|
698
|
-
color: var(--text-secondary);
|
|
699
|
-
font-size: 0.7rem;
|
|
700
|
-
cursor: pointer;
|
|
701
|
-
transition: all 0.2s;
|
|
702
|
-
}
|
|
703
|
-
|
|
704
|
-
.preset-btn:hover {
|
|
705
|
-
border-color: var(--accent);
|
|
706
|
-
color: var(--accent);
|
|
707
|
-
}
|
|
708
|
-
|
|
709
|
-
/* Empty state */
|
|
710
|
-
.empty-state {
|
|
711
|
-
text-align: center;
|
|
712
|
-
padding: 3rem 1rem;
|
|
713
|
-
color: var(--text-muted);
|
|
714
|
-
}
|
|
715
|
-
|
|
716
|
-
.empty-state .icon {
|
|
717
|
-
font-size: 2.5rem;
|
|
718
|
-
margin-bottom: 0.75rem;
|
|
719
|
-
opacity: 0.5;
|
|
720
|
-
}
|
|
721
|
-
|
|
722
|
-
.empty-state p {
|
|
723
|
-
font-size: 0.85rem;
|
|
724
|
-
max-width: 360px;
|
|
725
|
-
margin: 0 auto;
|
|
726
|
-
line-height: 1.5;
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
/* Streaming cursor */
|
|
730
|
-
.streaming-cursor::after {
|
|
731
|
-
content: '▊';
|
|
732
|
-
animation: blink 0.8s step-end infinite;
|
|
733
|
-
color: var(--diffusion);
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
@keyframes blink {
|
|
737
|
-
50% { opacity: 0; }
|
|
738
|
-
}
|
|
739
|
-
|
|
740
|
-
/* Test results panel */
|
|
741
|
-
.test-panel {
|
|
742
|
-
background: var(--bg-secondary);
|
|
743
|
-
border: 1px solid var(--border);
|
|
744
|
-
border-radius: 12px;
|
|
745
|
-
padding: 1.25rem;
|
|
746
|
-
margin-top: 1rem;
|
|
747
|
-
}
|
|
748
|
-
|
|
749
|
-
.test-panel h3 {
|
|
750
|
-
font-size: 0.85rem;
|
|
751
|
-
font-weight: 600;
|
|
752
|
-
margin-bottom: 0.75rem;
|
|
753
|
-
color: var(--text-secondary);
|
|
754
|
-
}
|
|
755
|
-
|
|
756
|
-
.test-row {
|
|
757
|
-
display: flex;
|
|
758
|
-
justify-content: space-between;
|
|
759
|
-
align-items: center;
|
|
760
|
-
padding: 0.5rem 0;
|
|
761
|
-
border-bottom: 1px solid rgba(255,255,255,0.04);
|
|
762
|
-
font-size: 0.8rem;
|
|
763
|
-
}
|
|
764
|
-
|
|
765
|
-
.test-row:last-child { border-bottom: none; }
|
|
766
|
-
|
|
767
|
-
.test-row .test-name { color: var(--text-secondary); }
|
|
768
|
-
|
|
769
|
-
.test-row .test-result {
|
|
770
|
-
font-family: 'JetBrains Mono', monospace;
|
|
771
|
-
font-size: 0.72rem;
|
|
772
|
-
padding: 0.15rem 0.5rem;
|
|
773
|
-
border-radius: 4px;
|
|
774
|
-
}
|
|
775
|
-
|
|
776
|
-
.test-result.pass { background: rgba(34,197,94,0.1); color: var(--success); }
|
|
777
|
-
.test-result.fail { background: rgba(239,68,68,0.1); color: var(--error); }
|
|
778
|
-
.test-result.running { background: rgba(245,158,11,0.1); color: var(--warning); animation: pulse-dot 1s ease-in-out infinite; }
|
|
779
|
-
.test-result.pending { color: var(--text-muted); }
|
|
780
|
-
</style>
|
|
781
|
-
</head>
|
|
782
|
-
<body>
|
|
783
|
-
<div class="app">
|
|
784
|
-
<div class="header">
|
|
785
|
-
<h1>🧪 DiffusionGemma Test Harness</h1>
|
|
786
|
-
<p class="subtitle">Testing universal-llm-client against a discrete diffusion language model</p>
|
|
787
|
-
<div class="model-badge">
|
|
788
|
-
<span class="dot"></span>
|
|
789
|
-
<span id="model-name">connecting...</span>
|
|
790
|
-
</div>
|
|
791
|
-
</div>
|
|
792
|
-
|
|
793
|
-
<div class="status-bar">
|
|
794
|
-
<div class="status-pill">
|
|
795
|
-
<span class="indicator checking" id="vllm-status"></span>
|
|
796
|
-
<span>vLLM</span>
|
|
797
|
-
<span id="vllm-url">localhost:8000</span>
|
|
798
|
-
</div>
|
|
799
|
-
<div class="status-pill">
|
|
800
|
-
<span class="indicator checking" id="client-status"></span>
|
|
801
|
-
<span>universal-llm-client</span>
|
|
802
|
-
<span>openai compat</span>
|
|
803
|
-
</div>
|
|
804
|
-
</div>
|
|
805
|
-
|
|
806
|
-
<div class="controls">
|
|
807
|
-
<div class="control-group">
|
|
808
|
-
<label>Max Tokens</label>
|
|
809
|
-
<input type="number" id="max-tokens" value="512" min="1" max="8192">
|
|
810
|
-
</div>
|
|
811
|
-
<div class="control-group">
|
|
812
|
-
<label>Temperature</label>
|
|
813
|
-
<input type="number" id="temperature" value="0.7" min="0" max="2" step="0.1">
|
|
814
|
-
</div>
|
|
815
|
-
<div class="control-group">
|
|
816
|
-
<label>Stream</label>
|
|
817
|
-
<input type="checkbox" id="stream-toggle" checked>
|
|
818
|
-
</div>
|
|
819
|
-
<div style="flex:1"></div>
|
|
820
|
-
<button class="preset-btn" onclick="sendPreset('poem')">🎭 Poem</button>
|
|
821
|
-
<button class="preset-btn" onclick="sendPreset('code')">💻 Code</button>
|
|
822
|
-
<button class="preset-btn" onclick="sendPreset('reason')">🧠 Reason</button>
|
|
823
|
-
<button class="preset-btn" onclick="sendPreset('speed')">⚡ Speed</button>
|
|
824
|
-
<button class="preset-btn" onclick="runAllTests()">🧪 Run Tests</button>
|
|
825
|
-
</div>
|
|
826
|
-
|
|
827
|
-
<div class="chat-container">
|
|
828
|
-
<div class="messages" id="messages">
|
|
829
|
-
<div class="empty-state">
|
|
830
|
-
<div class="icon">🔮</div>
|
|
831
|
-
<p>DiffusionGemma generates text via <strong>parallel block diffusion</strong> —
|
|
832
|
-
256 tokens at a time through iterative denoising.
|
|
833
|
-
Type a message to test it.</p>
|
|
834
|
-
</div>
|
|
835
|
-
</div>
|
|
836
|
-
<div class="input-area">
|
|
837
|
-
<textarea id="input" placeholder="Type a message..." rows="1"
|
|
838
|
-
onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();sendMessage()}"></textarea>
|
|
839
|
-
<button class="send-btn" id="send-btn" onclick="sendMessage()">Send</button>
|
|
840
|
-
</div>
|
|
841
|
-
</div>
|
|
842
|
-
|
|
843
|
-
<div class="test-panel" id="test-panel" style="display:none">
|
|
844
|
-
<h3>🧪 Compatibility Test Results</h3>
|
|
845
|
-
<div id="test-results"></div>
|
|
846
|
-
</div>
|
|
847
|
-
</div>
|
|
848
|
-
|
|
849
|
-
<script>
|
|
850
|
-
const messagesEl = document.getElementById('messages');
|
|
851
|
-
const inputEl = document.getElementById('input');
|
|
852
|
-
const sendBtn = document.getElementById('send-btn');
|
|
853
|
-
let conversationHistory = [];
|
|
854
|
-
let isStreaming = false;
|
|
855
|
-
|
|
856
|
-
// Auto-resize textarea
|
|
857
|
-
inputEl.addEventListener('input', () => {
|
|
858
|
-
inputEl.style.height = 'auto';
|
|
859
|
-
inputEl.style.height = Math.min(inputEl.scrollHeight, 120) + 'px';
|
|
860
|
-
});
|
|
861
|
-
|
|
862
|
-
// Health check on load
|
|
863
|
-
async function checkHealth() {
|
|
864
|
-
try {
|
|
865
|
-
const res = await fetch('/api/health');
|
|
866
|
-
const data = await res.json();
|
|
867
|
-
if (data.status === 'ok') {
|
|
868
|
-
document.getElementById('vllm-status').className = 'indicator online';
|
|
869
|
-
document.getElementById('client-status').className = 'indicator online';
|
|
870
|
-
document.getElementById('model-name').textContent = data.models?.[0] ?? 'unknown';
|
|
871
|
-
} else {
|
|
872
|
-
document.getElementById('vllm-status').className = 'indicator offline';
|
|
873
|
-
document.getElementById('client-status').className = 'indicator offline';
|
|
874
|
-
document.getElementById('model-name').textContent = 'offline';
|
|
875
|
-
}
|
|
876
|
-
} catch {
|
|
877
|
-
document.getElementById('vllm-status').className = 'indicator offline';
|
|
878
|
-
document.getElementById('client-status').className = 'indicator offline';
|
|
879
|
-
document.getElementById('model-name').textContent = 'connection failed';
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
checkHealth();
|
|
883
|
-
|
|
884
|
-
function clearEmptyState() {
|
|
885
|
-
const empty = messagesEl.querySelector('.empty-state');
|
|
886
|
-
if (empty) empty.remove();
|
|
887
|
-
}
|
|
888
|
-
|
|
889
|
-
function addMessage(role, content, metrics) {
|
|
890
|
-
clearEmptyState();
|
|
891
|
-
const div = document.createElement('div');
|
|
892
|
-
div.className = 'message ' + role;
|
|
893
|
-
|
|
894
|
-
let thinkingHtml = '';
|
|
895
|
-
let cleanContent = content;
|
|
896
|
-
|
|
897
|
-
// Parse thinking content
|
|
898
|
-
if (role === 'assistant' && content) {
|
|
899
|
-
const thinkMatch = content.match(/^thought\\n(.*?)(?=\\n\\n[A-Z"]|$)/s);
|
|
900
|
-
if (thinkMatch) {
|
|
901
|
-
thinkingHtml = '<div class="thinking-block"><div class="label">💭 Thinking</div>' +
|
|
902
|
-
escapeHtml(thinkMatch[1]) + '</div>';
|
|
903
|
-
cleanContent = content.slice(thinkMatch[0].length).trim();
|
|
904
|
-
}
|
|
905
|
-
}
|
|
906
|
-
|
|
907
|
-
let metricsHtml = '';
|
|
908
|
-
if (metrics) {
|
|
909
|
-
metricsHtml = '<div class="metrics">';
|
|
910
|
-
if (metrics.promptTokens) metricsHtml += '<div class="metric">prompt: <span>' + metrics.promptTokens + '</span></div>';
|
|
911
|
-
if (metrics.completionTokens) metricsHtml += '<div class="metric">completion: <span>' + metrics.completionTokens + '</span></div>';
|
|
912
|
-
if (metrics.duration) metricsHtml += '<div class="metric">time: <span>' + metrics.duration + 'ms</span></div>';
|
|
913
|
-
if (metrics.tokensPerSec) metricsHtml += '<div class="metric">speed: <span>' + metrics.tokensPerSec + ' t/s</span></div>';
|
|
914
|
-
metricsHtml += '</div>';
|
|
915
|
-
}
|
|
916
|
-
|
|
917
|
-
div.innerHTML = '<div class="role">' + role + '</div>' + thinkingHtml +
|
|
918
|
-
'<div class="content">' + escapeHtml(cleanContent || '') + '</div>' + metricsHtml;
|
|
919
|
-
|
|
920
|
-
messagesEl.appendChild(div);
|
|
921
|
-
messagesEl.scrollTop = messagesEl.scrollHeight;
|
|
922
|
-
return div;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
|
-
function escapeHtml(text) {
|
|
926
|
-
const d = document.createElement('div');
|
|
927
|
-
d.textContent = text;
|
|
928
|
-
return d.innerHTML;
|
|
929
|
-
}
|
|
930
|
-
|
|
931
|
-
async function sendMessage(overrideText) {
|
|
932
|
-
const text = overrideText || inputEl.value.trim();
|
|
933
|
-
if (!text || isStreaming) return;
|
|
934
|
-
|
|
935
|
-
inputEl.value = '';
|
|
936
|
-
inputEl.style.height = 'auto';
|
|
937
|
-
isStreaming = true;
|
|
938
|
-
sendBtn.disabled = true;
|
|
939
|
-
sendBtn.textContent = '...';
|
|
940
|
-
|
|
941
|
-
addMessage('user', text);
|
|
942
|
-
conversationHistory.push({ role: 'user', content: text });
|
|
943
|
-
|
|
944
|
-
const maxTokens = parseInt(document.getElementById('max-tokens').value) || 512;
|
|
945
|
-
const temperature = parseFloat(document.getElementById('temperature').value) || 0.7;
|
|
946
|
-
const useStream = document.getElementById('stream-toggle').checked;
|
|
947
|
-
const startTime = Date.now();
|
|
948
|
-
|
|
949
|
-
if (useStream) {
|
|
950
|
-
// Streaming
|
|
951
|
-
const assistantDiv = addMessage('assistant', '');
|
|
952
|
-
const contentEl = assistantDiv.querySelector('.content');
|
|
953
|
-
contentEl.classList.add('streaming-cursor');
|
|
954
|
-
let fullContent = '';
|
|
955
|
-
|
|
956
|
-
try {
|
|
957
|
-
const res = await fetch('/api/chat', {
|
|
958
|
-
method: 'POST',
|
|
959
|
-
headers: { 'Content-Type': 'application/json' },
|
|
960
|
-
body: JSON.stringify({
|
|
961
|
-
messages: conversationHistory,
|
|
962
|
-
stream: true,
|
|
963
|
-
maxTokens,
|
|
964
|
-
temperature,
|
|
965
|
-
}),
|
|
966
|
-
});
|
|
967
|
-
|
|
968
|
-
const reader = res.body.getReader();
|
|
969
|
-
const decoder = new TextDecoder();
|
|
970
|
-
let buffer = '';
|
|
971
|
-
|
|
972
|
-
while (true) {
|
|
973
|
-
const { done, value } = await reader.read();
|
|
974
|
-
if (done) break;
|
|
975
|
-
|
|
976
|
-
buffer += decoder.decode(value, { stream: true });
|
|
977
|
-
const lines = buffer.split('\\n');
|
|
978
|
-
buffer = lines.pop();
|
|
979
|
-
|
|
980
|
-
for (const line of lines) {
|
|
981
|
-
if (line.startsWith('data: ')) {
|
|
982
|
-
const data = line.slice(6);
|
|
983
|
-
if (data === '[DONE]') continue;
|
|
984
|
-
try {
|
|
985
|
-
const event = JSON.parse(data);
|
|
986
|
-
if (event.type === 'text') {
|
|
987
|
-
fullContent += event.content;
|
|
988
|
-
contentEl.textContent = fullContent;
|
|
989
|
-
} else if (event.type === 'thinking') {
|
|
990
|
-
fullContent += event.content;
|
|
991
|
-
contentEl.textContent = fullContent;
|
|
992
|
-
} else if (event.type === 'error') {
|
|
993
|
-
contentEl.textContent = '❌ ' + event.content;
|
|
994
|
-
}
|
|
995
|
-
} catch {}
|
|
996
|
-
}
|
|
997
|
-
}
|
|
998
|
-
messagesEl.scrollTop = messagesEl.scrollHeight;
|
|
999
|
-
}
|
|
1000
|
-
} catch (err) {
|
|
1001
|
-
contentEl.textContent = '❌ Stream error: ' + err.message;
|
|
1002
|
-
}
|
|
1003
|
-
|
|
1004
|
-
contentEl.classList.remove('streaming-cursor');
|
|
1005
|
-
const elapsed = Date.now() - startTime;
|
|
1006
|
-
const words = fullContent.split(/\\s+/).length;
|
|
1007
|
-
|
|
1008
|
-
// Add metrics
|
|
1009
|
-
const metricsDiv = document.createElement('div');
|
|
1010
|
-
metricsDiv.className = 'metrics';
|
|
1011
|
-
metricsDiv.innerHTML =
|
|
1012
|
-
'<div class="metric">time: <span>' + elapsed + 'ms</span></div>' +
|
|
1013
|
-
'<div class="metric">chars: <span>' + fullContent.length + '</span></div>';
|
|
1014
|
-
assistantDiv.appendChild(metricsDiv);
|
|
1015
|
-
|
|
1016
|
-
conversationHistory.push({ role: 'assistant', content: fullContent });
|
|
1017
|
-
|
|
1018
|
-
} else {
|
|
1019
|
-
// Non-streaming
|
|
1020
|
-
try {
|
|
1021
|
-
const res = await fetch('/api/chat', {
|
|
1022
|
-
method: 'POST',
|
|
1023
|
-
headers: { 'Content-Type': 'application/json' },
|
|
1024
|
-
body: JSON.stringify({
|
|
1025
|
-
messages: conversationHistory,
|
|
1026
|
-
stream: false,
|
|
1027
|
-
maxTokens,
|
|
1028
|
-
temperature,
|
|
1029
|
-
}),
|
|
1030
|
-
});
|
|
1031
|
-
|
|
1032
|
-
const data = await res.json();
|
|
1033
|
-
const elapsed = Date.now() - startTime;
|
|
1034
|
-
|
|
1035
|
-
if (data.error) {
|
|
1036
|
-
addMessage('assistant', '❌ ' + data.error);
|
|
1037
|
-
} else {
|
|
1038
|
-
const tokens = data.usage;
|
|
1039
|
-
addMessage('assistant', data.content || '(empty response)', {
|
|
1040
|
-
promptTokens: tokens?.promptTokens,
|
|
1041
|
-
completionTokens: tokens?.completionTokens,
|
|
1042
|
-
duration: elapsed,
|
|
1043
|
-
tokensPerSec: tokens?.completionTokens ?
|
|
1044
|
-
Math.round(tokens.completionTokens / (elapsed / 1000)) : undefined,
|
|
1045
|
-
});
|
|
1046
|
-
conversationHistory.push({ role: 'assistant', content: data.content });
|
|
1047
|
-
}
|
|
1048
|
-
} catch (err) {
|
|
1049
|
-
addMessage('assistant', '❌ Request error: ' + err.message);
|
|
1050
|
-
}
|
|
1051
|
-
}
|
|
1052
|
-
|
|
1053
|
-
isStreaming = false;
|
|
1054
|
-
sendBtn.disabled = false;
|
|
1055
|
-
sendBtn.textContent = 'Send';
|
|
1056
|
-
inputEl.focus();
|
|
1057
|
-
}
|
|
1058
|
-
|
|
1059
|
-
// Preset prompts
|
|
1060
|
-
const PRESETS = {
|
|
1061
|
-
poem: 'Write a short haiku about quantum computing.',
|
|
1062
|
-
code: 'Write a TypeScript function that reverses a linked list. Include types.',
|
|
1063
|
-
reason: 'What is 47 * 23? Show your step-by-step reasoning.',
|
|
1064
|
-
speed: 'Hi',
|
|
1065
|
-
};
|
|
1066
|
-
|
|
1067
|
-
function sendPreset(name) {
|
|
1068
|
-
sendMessage(PRESETS[name]);
|
|
1069
|
-
}
|
|
1070
|
-
|
|
1071
|
-
// === Automated Tests ===
|
|
1072
|
-
async function runAllTests() {
|
|
1073
|
-
const panel = document.getElementById('test-panel');
|
|
1074
|
-
const results = document.getElementById('test-results');
|
|
1075
|
-
panel.style.display = 'block';
|
|
1076
|
-
|
|
1077
|
-
const tests = [
|
|
1078
|
-
{ name: 'Health Check (GET /v1/models)', fn: testHealth },
|
|
1079
|
-
{ name: 'Basic Chat (non-streaming)', fn: testBasicChat },
|
|
1080
|
-
{ name: 'Streaming Chat', fn: testStreamingChat },
|
|
1081
|
-
{ name: 'Multi-turn Conversation', fn: testMultiTurn },
|
|
1082
|
-
{ name: 'Empty Response Handling', fn: testEmptyResponse },
|
|
1083
|
-
{ name: 'Long Output (1024 tokens)', fn: testLongOutput },
|
|
1084
|
-
];
|
|
1085
|
-
|
|
1086
|
-
results.innerHTML = tests.map(t =>
|
|
1087
|
-
'<div class="test-row"><span class="test-name">' + t.name + '</span>' +
|
|
1088
|
-
'<span class="test-result pending" id="test-' + t.name.replace(/[^a-z]/gi, '') + '">pending</span></div>'
|
|
1089
|
-
).join('');
|
|
1090
|
-
|
|
1091
|
-
for (const test of tests) {
|
|
1092
|
-
const id = 'test-' + test.name.replace(/[^a-z]/gi, '');
|
|
1093
|
-
const el = document.getElementById(id);
|
|
1094
|
-
el.className = 'test-result running';
|
|
1095
|
-
el.textContent = 'running...';
|
|
1096
|
-
|
|
1097
|
-
try {
|
|
1098
|
-
const result = await test.fn();
|
|
1099
|
-
el.className = 'test-result pass';
|
|
1100
|
-
el.textContent = '✓ ' + result;
|
|
1101
|
-
} catch (err) {
|
|
1102
|
-
el.className = 'test-result fail';
|
|
1103
|
-
el.textContent = '✗ ' + err.message;
|
|
1104
|
-
}
|
|
1105
|
-
}
|
|
1106
|
-
}
|
|
1107
|
-
|
|
1108
|
-
async function testHealth() {
|
|
1109
|
-
const res = await fetch('/api/health');
|
|
1110
|
-
const data = await res.json();
|
|
1111
|
-
if (data.status !== 'ok') throw new Error('unhealthy');
|
|
1112
|
-
return data.models[0]?.slice(0, 30) + '...';
|
|
1113
|
-
}
|
|
1114
|
-
|
|
1115
|
-
async function testBasicChat() {
|
|
1116
|
-
const start = Date.now();
|
|
1117
|
-
const res = await fetch('/api/chat', {
|
|
1118
|
-
method: 'POST',
|
|
1119
|
-
headers: { 'Content-Type': 'application/json' },
|
|
1120
|
-
body: JSON.stringify({
|
|
1121
|
-
messages: [{ role: 'user', content: 'Say the word hello' }],
|
|
1122
|
-
maxTokens: 64,
|
|
1123
|
-
stream: false,
|
|
1124
|
-
}),
|
|
1125
|
-
});
|
|
1126
|
-
const data = await res.json();
|
|
1127
|
-
const elapsed = Date.now() - start;
|
|
1128
|
-
if (data.error) throw new Error(data.error);
|
|
1129
|
-
if (!data.content && data.content !== '') throw new Error('no content');
|
|
1130
|
-
return elapsed + 'ms, ' + (data.content?.length ?? 0) + ' chars';
|
|
1131
|
-
}
|
|
1132
|
-
|
|
1133
|
-
async function testStreamingChat() {
|
|
1134
|
-
const start = Date.now();
|
|
1135
|
-
const res = await fetch('/api/chat', {
|
|
1136
|
-
method: 'POST',
|
|
1137
|
-
headers: { 'Content-Type': 'application/json' },
|
|
1138
|
-
body: JSON.stringify({
|
|
1139
|
-
messages: [{ role: 'user', content: 'Count from 1 to 5' }],
|
|
1140
|
-
maxTokens: 128,
|
|
1141
|
-
stream: true,
|
|
1142
|
-
}),
|
|
1143
|
-
});
|
|
1144
|
-
const text = await res.text();
|
|
1145
|
-
const elapsed = Date.now() - start;
|
|
1146
|
-
const events = text.split('\\n').filter(l => l.startsWith('data: ') && l !== 'data: [DONE]');
|
|
1147
|
-
return elapsed + 'ms, ' + events.length + ' events';
|
|
1148
|
-
}
|
|
1149
|
-
|
|
1150
|
-
async function testMultiTurn() {
|
|
1151
|
-
const start = Date.now();
|
|
1152
|
-
const res = await fetch('/api/chat', {
|
|
1153
|
-
method: 'POST',
|
|
1154
|
-
headers: { 'Content-Type': 'application/json' },
|
|
1155
|
-
body: JSON.stringify({
|
|
1156
|
-
messages: [
|
|
1157
|
-
{ role: 'user', content: 'My name is Igor.' },
|
|
1158
|
-
{ role: 'assistant', content: 'Hello Igor!' },
|
|
1159
|
-
{ role: 'user', content: 'What is my name?' },
|
|
1160
|
-
],
|
|
1161
|
-
maxTokens: 64,
|
|
1162
|
-
stream: false,
|
|
1163
|
-
}),
|
|
1164
|
-
});
|
|
1165
|
-
const data = await res.json();
|
|
1166
|
-
const elapsed = Date.now() - start;
|
|
1167
|
-
if (data.error) throw new Error(data.error);
|
|
1168
|
-
const hasName = (data.content || '').toLowerCase().includes('igor');
|
|
1169
|
-
return elapsed + 'ms, name recalled: ' + hasName;
|
|
1170
|
-
}
|
|
1171
|
-
|
|
1172
|
-
async function testEmptyResponse() {
|
|
1173
|
-
const res = await fetch('/api/chat', {
|
|
1174
|
-
method: 'POST',
|
|
1175
|
-
headers: { 'Content-Type': 'application/json' },
|
|
1176
|
-
body: JSON.stringify({
|
|
1177
|
-
messages: [{ role: 'user', content: 'Respond with just OK' }],
|
|
1178
|
-
maxTokens: 16,
|
|
1179
|
-
stream: false,
|
|
1180
|
-
}),
|
|
1181
|
-
});
|
|
1182
|
-
const data = await res.json();
|
|
1183
|
-
return (data.content?.length ?? 0) + ' chars';
|
|
1184
|
-
}
|
|
1185
|
-
|
|
1186
|
-
async function testLongOutput() {
|
|
1187
|
-
const start = Date.now();
|
|
1188
|
-
const res = await fetch('/api/chat', {
|
|
1189
|
-
method: 'POST',
|
|
1190
|
-
headers: { 'Content-Type': 'application/json' },
|
|
1191
|
-
body: JSON.stringify({
|
|
1192
|
-
messages: [{ role: 'user', content: 'Write a detailed essay about the history of computing. Be thorough.' }],
|
|
1193
|
-
maxTokens: 1024,
|
|
1194
|
-
stream: false,
|
|
1195
|
-
}),
|
|
1196
|
-
});
|
|
1197
|
-
const data = await res.json();
|
|
1198
|
-
const elapsed = Date.now() - start;
|
|
1199
|
-
if (data.error) throw new Error(data.error);
|
|
1200
|
-
const tokens = data.usage?.completionTokens ?? '?';
|
|
1201
|
-
return elapsed + 'ms, ' + tokens + ' tokens';
|
|
1202
|
-
}
|
|
1203
|
-
</script>
|
|
1204
|
-
</body>
|
|
1205
|
-
</html>`;
|