vllm-i64 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +115 -1
- package/dist/index.d.ts +115 -1
- package/dist/index.js +112 -1
- package/dist/index.mjs +110 -0
- package/package.json +5 -2
package/dist/index.d.mts
CHANGED
|
@@ -217,6 +217,55 @@ interface RAGStatsResult {
|
|
|
217
217
|
total_chunks: number;
|
|
218
218
|
dimension: number;
|
|
219
219
|
}
|
|
220
|
+
interface SearchCompletionRequest {
|
|
221
|
+
query: string;
|
|
222
|
+
max_tokens?: number;
|
|
223
|
+
temperature?: number;
|
|
224
|
+
search_count?: number;
|
|
225
|
+
user?: string;
|
|
226
|
+
stream?: boolean;
|
|
227
|
+
}
|
|
228
|
+
interface SearchSource {
|
|
229
|
+
index: number;
|
|
230
|
+
title: string;
|
|
231
|
+
url: string;
|
|
232
|
+
domain: string;
|
|
233
|
+
favicon: string;
|
|
234
|
+
}
|
|
235
|
+
interface SearchCompletionResponse {
|
|
236
|
+
id: string;
|
|
237
|
+
object: "search.completion";
|
|
238
|
+
model: string;
|
|
239
|
+
query: string;
|
|
240
|
+
choices: {
|
|
241
|
+
index: number;
|
|
242
|
+
message: {
|
|
243
|
+
role: "assistant";
|
|
244
|
+
content: string;
|
|
245
|
+
};
|
|
246
|
+
finish_reason: "stop" | "length";
|
|
247
|
+
}[];
|
|
248
|
+
sources: SearchSource[];
|
|
249
|
+
usage?: UsageInfo;
|
|
250
|
+
}
|
|
251
|
+
interface SearchHistoryEntry {
|
|
252
|
+
query: string;
|
|
253
|
+
sources: SearchSource[];
|
|
254
|
+
answer: string;
|
|
255
|
+
timestamp: number;
|
|
256
|
+
}
|
|
257
|
+
interface SearchHistoryResponse {
|
|
258
|
+
history: SearchHistoryEntry[];
|
|
259
|
+
count: number;
|
|
260
|
+
}
|
|
261
|
+
interface SearchStatsResponse {
|
|
262
|
+
enabled: boolean;
|
|
263
|
+
num_partitions: number;
|
|
264
|
+
total_keys: number;
|
|
265
|
+
total_entries: number;
|
|
266
|
+
max_per_key: number;
|
|
267
|
+
persist_dir: string | null;
|
|
268
|
+
}
|
|
220
269
|
|
|
221
270
|
/**
|
|
222
271
|
* vllm-i64 SDK — HTTP Client core
|
|
@@ -383,6 +432,65 @@ declare class RAGEndpoint {
|
|
|
383
432
|
stats(): Promise<RAGStatsResult>;
|
|
384
433
|
}
|
|
385
434
|
|
|
435
|
+
/**
|
|
436
|
+
* Search endpoints — Perplexity-style search-augmented generation.
|
|
437
|
+
*
|
|
438
|
+
* Token-routed isolation: partition = sha256(api_key ∥ user_id) mod N
|
|
439
|
+
* No data leak possible. No shared cache. No session tokens.
|
|
440
|
+
*
|
|
441
|
+
* INL - 2025
|
|
442
|
+
*/
|
|
443
|
+
|
|
444
|
+
declare class SearchEndpoint {
|
|
445
|
+
private http;
|
|
446
|
+
constructor(http: HttpClient);
|
|
447
|
+
/**
|
|
448
|
+
* Search-augmented generation: query → web search → cited answer.
|
|
449
|
+
*
|
|
450
|
+
* @example
|
|
451
|
+
* ```ts
|
|
452
|
+
* const res = await client.search.create({ query: "What is MoE?" });
|
|
453
|
+
* console.log(res.choices[0].message.content);
|
|
454
|
+
* for (const src of res.sources) {
|
|
455
|
+
* console.log(`[${src.index}] ${src.title} — ${src.url}`);
|
|
456
|
+
* }
|
|
457
|
+
* ```
|
|
458
|
+
*/
|
|
459
|
+
create(params: SearchCompletionRequest): Promise<SearchCompletionResponse>;
|
|
460
|
+
/**
|
|
461
|
+
* Stream search completion — yields text chunks.
|
|
462
|
+
* Sources are sent as the final SSE event.
|
|
463
|
+
*
|
|
464
|
+
* @example
|
|
465
|
+
* ```ts
|
|
466
|
+
* const { stream, sources } = await client.search.stream({ query: "token routing" });
|
|
467
|
+
* for await (const chunk of stream) {
|
|
468
|
+
* process.stdout.write(chunk);
|
|
469
|
+
* }
|
|
470
|
+
* console.log("\nSources:", await sources);
|
|
471
|
+
* ```
|
|
472
|
+
*/
|
|
473
|
+
stream(params: SearchCompletionRequest, signal?: AbortSignal): Promise<{
|
|
474
|
+
stream: AsyncGenerator<string>;
|
|
475
|
+
sources: Promise<SearchSource[]>;
|
|
476
|
+
}>;
|
|
477
|
+
/**
|
|
478
|
+
* Get search history for the authenticated user.
|
|
479
|
+
* History is partitioned by api_key + user — no cross-user access.
|
|
480
|
+
*/
|
|
481
|
+
history(user?: string, limit?: number): Promise<SearchHistoryResponse>;
|
|
482
|
+
/**
|
|
483
|
+
* Clear search history for the authenticated user.
|
|
484
|
+
* Only clears the caller's own partition.
|
|
485
|
+
*/
|
|
486
|
+
clearHistory(user?: string): Promise<{
|
|
487
|
+
status: string;
|
|
488
|
+
removed: number;
|
|
489
|
+
}>;
|
|
490
|
+
/** Search history statistics (admin). */
|
|
491
|
+
stats(): Promise<SearchStatsResponse>;
|
|
492
|
+
}
|
|
493
|
+
|
|
386
494
|
/**
|
|
387
495
|
* vllm-i64 — TypeScript SDK
|
|
388
496
|
*
|
|
@@ -402,6 +510,10 @@ declare class RAGEndpoint {
|
|
|
402
510
|
* process.stdout.write(chunk);
|
|
403
511
|
* }
|
|
404
512
|
*
|
|
513
|
+
* // Search (Perplexity-style, token-routed isolation)
|
|
514
|
+
* const search = await client.search.create({ query: "What is MoE?" });
|
|
515
|
+
* console.log(search.sources);
|
|
516
|
+
*
|
|
405
517
|
* // Admin
|
|
406
518
|
* await client.monitor.snapshot();
|
|
407
519
|
* await client.cache.purge();
|
|
@@ -425,6 +537,8 @@ declare class I64Client {
|
|
|
425
537
|
readonly monitor: MonitorEndpoint;
|
|
426
538
|
/** RAG — index, search, stats. */
|
|
427
539
|
readonly rag: RAGEndpoint;
|
|
540
|
+
/** Web search — Perplexity-style with token-routed isolation. */
|
|
541
|
+
readonly search: SearchEndpoint;
|
|
428
542
|
/**
|
|
429
543
|
* Create a vllm-i64 client.
|
|
430
544
|
*
|
|
@@ -436,4 +550,4 @@ declare class I64Client {
|
|
|
436
550
|
get baseUrl(): string;
|
|
437
551
|
}
|
|
438
552
|
|
|
439
|
-
export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
|
553
|
+
export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -217,6 +217,55 @@ interface RAGStatsResult {
|
|
|
217
217
|
total_chunks: number;
|
|
218
218
|
dimension: number;
|
|
219
219
|
}
|
|
220
|
+
interface SearchCompletionRequest {
|
|
221
|
+
query: string;
|
|
222
|
+
max_tokens?: number;
|
|
223
|
+
temperature?: number;
|
|
224
|
+
search_count?: number;
|
|
225
|
+
user?: string;
|
|
226
|
+
stream?: boolean;
|
|
227
|
+
}
|
|
228
|
+
interface SearchSource {
|
|
229
|
+
index: number;
|
|
230
|
+
title: string;
|
|
231
|
+
url: string;
|
|
232
|
+
domain: string;
|
|
233
|
+
favicon: string;
|
|
234
|
+
}
|
|
235
|
+
interface SearchCompletionResponse {
|
|
236
|
+
id: string;
|
|
237
|
+
object: "search.completion";
|
|
238
|
+
model: string;
|
|
239
|
+
query: string;
|
|
240
|
+
choices: {
|
|
241
|
+
index: number;
|
|
242
|
+
message: {
|
|
243
|
+
role: "assistant";
|
|
244
|
+
content: string;
|
|
245
|
+
};
|
|
246
|
+
finish_reason: "stop" | "length";
|
|
247
|
+
}[];
|
|
248
|
+
sources: SearchSource[];
|
|
249
|
+
usage?: UsageInfo;
|
|
250
|
+
}
|
|
251
|
+
interface SearchHistoryEntry {
|
|
252
|
+
query: string;
|
|
253
|
+
sources: SearchSource[];
|
|
254
|
+
answer: string;
|
|
255
|
+
timestamp: number;
|
|
256
|
+
}
|
|
257
|
+
interface SearchHistoryResponse {
|
|
258
|
+
history: SearchHistoryEntry[];
|
|
259
|
+
count: number;
|
|
260
|
+
}
|
|
261
|
+
interface SearchStatsResponse {
|
|
262
|
+
enabled: boolean;
|
|
263
|
+
num_partitions: number;
|
|
264
|
+
total_keys: number;
|
|
265
|
+
total_entries: number;
|
|
266
|
+
max_per_key: number;
|
|
267
|
+
persist_dir: string | null;
|
|
268
|
+
}
|
|
220
269
|
|
|
221
270
|
/**
|
|
222
271
|
* vllm-i64 SDK — HTTP Client core
|
|
@@ -383,6 +432,65 @@ declare class RAGEndpoint {
|
|
|
383
432
|
stats(): Promise<RAGStatsResult>;
|
|
384
433
|
}
|
|
385
434
|
|
|
435
|
+
/**
|
|
436
|
+
* Search endpoints — Perplexity-style search-augmented generation.
|
|
437
|
+
*
|
|
438
|
+
* Token-routed isolation: partition = sha256(api_key ∥ user_id) mod N
|
|
439
|
+
* No data leak possible. No shared cache. No session tokens.
|
|
440
|
+
*
|
|
441
|
+
* INL - 2025
|
|
442
|
+
*/
|
|
443
|
+
|
|
444
|
+
declare class SearchEndpoint {
|
|
445
|
+
private http;
|
|
446
|
+
constructor(http: HttpClient);
|
|
447
|
+
/**
|
|
448
|
+
* Search-augmented generation: query → web search → cited answer.
|
|
449
|
+
*
|
|
450
|
+
* @example
|
|
451
|
+
* ```ts
|
|
452
|
+
* const res = await client.search.create({ query: "What is MoE?" });
|
|
453
|
+
* console.log(res.choices[0].message.content);
|
|
454
|
+
* for (const src of res.sources) {
|
|
455
|
+
* console.log(`[${src.index}] ${src.title} — ${src.url}`);
|
|
456
|
+
* }
|
|
457
|
+
* ```
|
|
458
|
+
*/
|
|
459
|
+
create(params: SearchCompletionRequest): Promise<SearchCompletionResponse>;
|
|
460
|
+
/**
|
|
461
|
+
* Stream search completion — yields text chunks.
|
|
462
|
+
* Sources are sent as the final SSE event.
|
|
463
|
+
*
|
|
464
|
+
* @example
|
|
465
|
+
* ```ts
|
|
466
|
+
* const { stream, sources } = await client.search.stream({ query: "token routing" });
|
|
467
|
+
* for await (const chunk of stream) {
|
|
468
|
+
* process.stdout.write(chunk);
|
|
469
|
+
* }
|
|
470
|
+
* console.log("\nSources:", await sources);
|
|
471
|
+
* ```
|
|
472
|
+
*/
|
|
473
|
+
stream(params: SearchCompletionRequest, signal?: AbortSignal): Promise<{
|
|
474
|
+
stream: AsyncGenerator<string>;
|
|
475
|
+
sources: Promise<SearchSource[]>;
|
|
476
|
+
}>;
|
|
477
|
+
/**
|
|
478
|
+
* Get search history for the authenticated user.
|
|
479
|
+
* History is partitioned by api_key + user — no cross-user access.
|
|
480
|
+
*/
|
|
481
|
+
history(user?: string, limit?: number): Promise<SearchHistoryResponse>;
|
|
482
|
+
/**
|
|
483
|
+
* Clear search history for the authenticated user.
|
|
484
|
+
* Only clears the caller's own partition.
|
|
485
|
+
*/
|
|
486
|
+
clearHistory(user?: string): Promise<{
|
|
487
|
+
status: string;
|
|
488
|
+
removed: number;
|
|
489
|
+
}>;
|
|
490
|
+
/** Search history statistics (admin). */
|
|
491
|
+
stats(): Promise<SearchStatsResponse>;
|
|
492
|
+
}
|
|
493
|
+
|
|
386
494
|
/**
|
|
387
495
|
* vllm-i64 — TypeScript SDK
|
|
388
496
|
*
|
|
@@ -402,6 +510,10 @@ declare class RAGEndpoint {
|
|
|
402
510
|
* process.stdout.write(chunk);
|
|
403
511
|
* }
|
|
404
512
|
*
|
|
513
|
+
* // Search (Perplexity-style, token-routed isolation)
|
|
514
|
+
* const search = await client.search.create({ query: "What is MoE?" });
|
|
515
|
+
* console.log(search.sources);
|
|
516
|
+
*
|
|
405
517
|
* // Admin
|
|
406
518
|
* await client.monitor.snapshot();
|
|
407
519
|
* await client.cache.purge();
|
|
@@ -425,6 +537,8 @@ declare class I64Client {
|
|
|
425
537
|
readonly monitor: MonitorEndpoint;
|
|
426
538
|
/** RAG — index, search, stats. */
|
|
427
539
|
readonly rag: RAGEndpoint;
|
|
540
|
+
/** Web search — Perplexity-style with token-routed isolation. */
|
|
541
|
+
readonly search: SearchEndpoint;
|
|
428
542
|
/**
|
|
429
543
|
* Create a vllm-i64 client.
|
|
430
544
|
*
|
|
@@ -436,4 +550,4 @@ declare class I64Client {
|
|
|
436
550
|
get baseUrl(): string;
|
|
437
551
|
}
|
|
438
552
|
|
|
439
|
-
export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
|
553
|
+
export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
package/dist/index.js
CHANGED
|
@@ -28,6 +28,7 @@ __export(index_exports, {
|
|
|
28
28
|
LoRAEndpoint: () => LoRAEndpoint,
|
|
29
29
|
MonitorEndpoint: () => MonitorEndpoint,
|
|
30
30
|
RAGEndpoint: () => RAGEndpoint,
|
|
31
|
+
SearchEndpoint: () => SearchEndpoint,
|
|
31
32
|
default: () => index_default
|
|
32
33
|
});
|
|
33
34
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -341,6 +342,112 @@ var RAGEndpoint = class {
|
|
|
341
342
|
}
|
|
342
343
|
};
|
|
343
344
|
|
|
345
|
+
// src/endpoints/search.ts
|
|
346
|
+
var SearchEndpoint = class {
|
|
347
|
+
constructor(http) {
|
|
348
|
+
this.http = http;
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Search-augmented generation: query → web search → cited answer.
|
|
352
|
+
*
|
|
353
|
+
* @example
|
|
354
|
+
* ```ts
|
|
355
|
+
* const res = await client.search.create({ query: "What is MoE?" });
|
|
356
|
+
* console.log(res.choices[0].message.content);
|
|
357
|
+
* for (const src of res.sources) {
|
|
358
|
+
* console.log(`[${src.index}] ${src.title} — ${src.url}`);
|
|
359
|
+
* }
|
|
360
|
+
* ```
|
|
361
|
+
*/
|
|
362
|
+
async create(params) {
|
|
363
|
+
return this.http.post("/v1/search/completions", { ...params, stream: false });
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Stream search completion — yields text chunks.
|
|
367
|
+
* Sources are sent as the final SSE event.
|
|
368
|
+
*
|
|
369
|
+
* @example
|
|
370
|
+
* ```ts
|
|
371
|
+
* const { stream, sources } = await client.search.stream({ query: "token routing" });
|
|
372
|
+
* for await (const chunk of stream) {
|
|
373
|
+
* process.stdout.write(chunk);
|
|
374
|
+
* }
|
|
375
|
+
* console.log("\nSources:", await sources);
|
|
376
|
+
* ```
|
|
377
|
+
*/
|
|
378
|
+
async stream(params, signal) {
|
|
379
|
+
const res = await this.http.fetch(
|
|
380
|
+
"/v1/search/completions",
|
|
381
|
+
{ method: "POST", body: JSON.stringify({ ...params, stream: true }) },
|
|
382
|
+
signal
|
|
383
|
+
);
|
|
384
|
+
let resolveSourcesFn;
|
|
385
|
+
const sourcesPromise = new Promise((resolve) => {
|
|
386
|
+
resolveSourcesFn = resolve;
|
|
387
|
+
});
|
|
388
|
+
const self = this;
|
|
389
|
+
async function* readStream() {
|
|
390
|
+
if (!res.body) return;
|
|
391
|
+
const reader = res.body.getReader();
|
|
392
|
+
const decoder = new TextDecoder();
|
|
393
|
+
let buffer = "";
|
|
394
|
+
let foundSources = false;
|
|
395
|
+
try {
|
|
396
|
+
while (true) {
|
|
397
|
+
const { done, value } = await reader.read();
|
|
398
|
+
if (done) break;
|
|
399
|
+
buffer += decoder.decode(value, { stream: true });
|
|
400
|
+
const lines = buffer.split("\n");
|
|
401
|
+
buffer = lines.pop() ?? "";
|
|
402
|
+
for (const line of lines) {
|
|
403
|
+
const trimmed = line.trim();
|
|
404
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
405
|
+
const payload = trimmed.slice(6);
|
|
406
|
+
if (payload === "[DONE]") return;
|
|
407
|
+
try {
|
|
408
|
+
const data = JSON.parse(payload);
|
|
409
|
+
if (data.sources) {
|
|
410
|
+
foundSources = true;
|
|
411
|
+
resolveSourcesFn(data.sources);
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
const content = data.choices?.[0]?.delta?.content ?? "";
|
|
415
|
+
if (content) yield content;
|
|
416
|
+
} catch {
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
} finally {
|
|
421
|
+
reader.releaseLock();
|
|
422
|
+
if (!foundSources) resolveSourcesFn([]);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
return { stream: readStream(), sources: sourcesPromise };
|
|
426
|
+
}
|
|
427
|
+
/**
|
|
428
|
+
* Get search history for the authenticated user.
|
|
429
|
+
* History is partitioned by api_key + user — no cross-user access.
|
|
430
|
+
*/
|
|
431
|
+
async history(user, limit = 50) {
|
|
432
|
+
const params = new URLSearchParams({ limit: String(limit) });
|
|
433
|
+
if (user) params.set("user", user);
|
|
434
|
+
return this.http.get(`/v1/search/history?${params}`);
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Clear search history for the authenticated user.
|
|
438
|
+
* Only clears the caller's own partition.
|
|
439
|
+
*/
|
|
440
|
+
async clearHistory(user) {
|
|
441
|
+
const params = user ? `?user=${encodeURIComponent(user)}` : "";
|
|
442
|
+
const res = await this.http.fetch(`/v1/search/history${params}`, { method: "DELETE" });
|
|
443
|
+
return res.json();
|
|
444
|
+
}
|
|
445
|
+
/** Search history statistics (admin). */
|
|
446
|
+
async stats() {
|
|
447
|
+
return this.http.get("/v1/search/stats");
|
|
448
|
+
}
|
|
449
|
+
};
|
|
450
|
+
|
|
344
451
|
// src/index.ts
|
|
345
452
|
var I64Client = class {
|
|
346
453
|
http;
|
|
@@ -356,6 +463,8 @@ var I64Client = class {
|
|
|
356
463
|
monitor;
|
|
357
464
|
/** RAG — index, search, stats. */
|
|
358
465
|
rag;
|
|
466
|
+
/** Web search — Perplexity-style with token-routed isolation. */
|
|
467
|
+
search;
|
|
359
468
|
/**
|
|
360
469
|
* Create a vllm-i64 client.
|
|
361
470
|
*
|
|
@@ -370,6 +479,7 @@ var I64Client = class {
|
|
|
370
479
|
this.lora = new LoRAEndpoint(this.http);
|
|
371
480
|
this.monitor = new MonitorEndpoint(this.http);
|
|
372
481
|
this.rag = new RAGEndpoint(this.http);
|
|
482
|
+
this.search = new SearchEndpoint(this.http);
|
|
373
483
|
}
|
|
374
484
|
/** Server base URL. */
|
|
375
485
|
get baseUrl() {
|
|
@@ -386,5 +496,6 @@ var index_default = I64Client;
|
|
|
386
496
|
I64Client,
|
|
387
497
|
LoRAEndpoint,
|
|
388
498
|
MonitorEndpoint,
|
|
389
|
-
RAGEndpoint
|
|
499
|
+
RAGEndpoint,
|
|
500
|
+
SearchEndpoint
|
|
390
501
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -307,6 +307,112 @@ var RAGEndpoint = class {
|
|
|
307
307
|
}
|
|
308
308
|
};
|
|
309
309
|
|
|
310
|
+
// src/endpoints/search.ts
|
|
311
|
+
var SearchEndpoint = class {
|
|
312
|
+
constructor(http) {
|
|
313
|
+
this.http = http;
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Search-augmented generation: query → web search → cited answer.
|
|
317
|
+
*
|
|
318
|
+
* @example
|
|
319
|
+
* ```ts
|
|
320
|
+
* const res = await client.search.create({ query: "What is MoE?" });
|
|
321
|
+
* console.log(res.choices[0].message.content);
|
|
322
|
+
* for (const src of res.sources) {
|
|
323
|
+
* console.log(`[${src.index}] ${src.title} — ${src.url}`);
|
|
324
|
+
* }
|
|
325
|
+
* ```
|
|
326
|
+
*/
|
|
327
|
+
async create(params) {
|
|
328
|
+
return this.http.post("/v1/search/completions", { ...params, stream: false });
|
|
329
|
+
}
|
|
330
|
+
/**
|
|
331
|
+
* Stream search completion — yields text chunks.
|
|
332
|
+
* Sources are sent as the final SSE event.
|
|
333
|
+
*
|
|
334
|
+
* @example
|
|
335
|
+
* ```ts
|
|
336
|
+
* const { stream, sources } = await client.search.stream({ query: "token routing" });
|
|
337
|
+
* for await (const chunk of stream) {
|
|
338
|
+
* process.stdout.write(chunk);
|
|
339
|
+
* }
|
|
340
|
+
* console.log("\nSources:", await sources);
|
|
341
|
+
* ```
|
|
342
|
+
*/
|
|
343
|
+
async stream(params, signal) {
|
|
344
|
+
const res = await this.http.fetch(
|
|
345
|
+
"/v1/search/completions",
|
|
346
|
+
{ method: "POST", body: JSON.stringify({ ...params, stream: true }) },
|
|
347
|
+
signal
|
|
348
|
+
);
|
|
349
|
+
let resolveSourcesFn;
|
|
350
|
+
const sourcesPromise = new Promise((resolve) => {
|
|
351
|
+
resolveSourcesFn = resolve;
|
|
352
|
+
});
|
|
353
|
+
const self = this;
|
|
354
|
+
async function* readStream() {
|
|
355
|
+
if (!res.body) return;
|
|
356
|
+
const reader = res.body.getReader();
|
|
357
|
+
const decoder = new TextDecoder();
|
|
358
|
+
let buffer = "";
|
|
359
|
+
let foundSources = false;
|
|
360
|
+
try {
|
|
361
|
+
while (true) {
|
|
362
|
+
const { done, value } = await reader.read();
|
|
363
|
+
if (done) break;
|
|
364
|
+
buffer += decoder.decode(value, { stream: true });
|
|
365
|
+
const lines = buffer.split("\n");
|
|
366
|
+
buffer = lines.pop() ?? "";
|
|
367
|
+
for (const line of lines) {
|
|
368
|
+
const trimmed = line.trim();
|
|
369
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
370
|
+
const payload = trimmed.slice(6);
|
|
371
|
+
if (payload === "[DONE]") return;
|
|
372
|
+
try {
|
|
373
|
+
const data = JSON.parse(payload);
|
|
374
|
+
if (data.sources) {
|
|
375
|
+
foundSources = true;
|
|
376
|
+
resolveSourcesFn(data.sources);
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
const content = data.choices?.[0]?.delta?.content ?? "";
|
|
380
|
+
if (content) yield content;
|
|
381
|
+
} catch {
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
} finally {
|
|
386
|
+
reader.releaseLock();
|
|
387
|
+
if (!foundSources) resolveSourcesFn([]);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
return { stream: readStream(), sources: sourcesPromise };
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Get search history for the authenticated user.
|
|
394
|
+
* History is partitioned by api_key + user — no cross-user access.
|
|
395
|
+
*/
|
|
396
|
+
async history(user, limit = 50) {
|
|
397
|
+
const params = new URLSearchParams({ limit: String(limit) });
|
|
398
|
+
if (user) params.set("user", user);
|
|
399
|
+
return this.http.get(`/v1/search/history?${params}`);
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Clear search history for the authenticated user.
|
|
403
|
+
* Only clears the caller's own partition.
|
|
404
|
+
*/
|
|
405
|
+
async clearHistory(user) {
|
|
406
|
+
const params = user ? `?user=${encodeURIComponent(user)}` : "";
|
|
407
|
+
const res = await this.http.fetch(`/v1/search/history${params}`, { method: "DELETE" });
|
|
408
|
+
return res.json();
|
|
409
|
+
}
|
|
410
|
+
/** Search history statistics (admin). */
|
|
411
|
+
async stats() {
|
|
412
|
+
return this.http.get("/v1/search/stats");
|
|
413
|
+
}
|
|
414
|
+
};
|
|
415
|
+
|
|
310
416
|
// src/index.ts
|
|
311
417
|
var I64Client = class {
|
|
312
418
|
http;
|
|
@@ -322,6 +428,8 @@ var I64Client = class {
|
|
|
322
428
|
monitor;
|
|
323
429
|
/** RAG — index, search, stats. */
|
|
324
430
|
rag;
|
|
431
|
+
/** Web search — Perplexity-style with token-routed isolation. */
|
|
432
|
+
search;
|
|
325
433
|
/**
|
|
326
434
|
* Create a vllm-i64 client.
|
|
327
435
|
*
|
|
@@ -336,6 +444,7 @@ var I64Client = class {
|
|
|
336
444
|
this.lora = new LoRAEndpoint(this.http);
|
|
337
445
|
this.monitor = new MonitorEndpoint(this.http);
|
|
338
446
|
this.rag = new RAGEndpoint(this.http);
|
|
447
|
+
this.search = new SearchEndpoint(this.http);
|
|
339
448
|
}
|
|
340
449
|
/** Server base URL. */
|
|
341
450
|
get baseUrl() {
|
|
@@ -352,5 +461,6 @@ export {
|
|
|
352
461
|
LoRAEndpoint,
|
|
353
462
|
MonitorEndpoint,
|
|
354
463
|
RAGEndpoint,
|
|
464
|
+
SearchEndpoint,
|
|
355
465
|
index_default as default
|
|
356
466
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vllm-i64",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "TypeScript SDK for vllm-i64 — integer-first inference engine",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -28,7 +28,10 @@
|
|
|
28
28
|
"moe",
|
|
29
29
|
"openai",
|
|
30
30
|
"ai",
|
|
31
|
-
"complexity"
|
|
31
|
+
"complexity",
|
|
32
|
+
"search",
|
|
33
|
+
"perplexity",
|
|
34
|
+
"security"
|
|
32
35
|
],
|
|
33
36
|
"author": "Complexity-ML / INL",
|
|
34
37
|
"license": "Apache-2.0",
|