vllm-i64 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +75 -1
- package/dist/index.d.ts +75 -1
- package/dist/index.js +42 -0
- package/dist/index.mjs +41 -0
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -267,6 +267,40 @@ interface SearchStatsResponse {
|
|
|
267
267
|
max_per_key: number;
|
|
268
268
|
persist_dir: string | null;
|
|
269
269
|
}
|
|
270
|
+
interface AgentRequest {
|
|
271
|
+
model: string;
|
|
272
|
+
messages: ChatMessage[];
|
|
273
|
+
provider?: string;
|
|
274
|
+
temperature?: number;
|
|
275
|
+
top_p?: number;
|
|
276
|
+
max_tokens?: number;
|
|
277
|
+
}
|
|
278
|
+
interface AgentToolCall {
|
|
279
|
+
id: string;
|
|
280
|
+
name: string;
|
|
281
|
+
arguments: Record<string, unknown>;
|
|
282
|
+
}
|
|
283
|
+
interface AgentToolResult {
|
|
284
|
+
tool_call_id: string;
|
|
285
|
+
name: string;
|
|
286
|
+
result: string;
|
|
287
|
+
}
|
|
288
|
+
interface AgentStep {
|
|
289
|
+
step: number;
|
|
290
|
+
tool_calls: AgentToolCall[];
|
|
291
|
+
tool_results: AgentToolResult[];
|
|
292
|
+
}
|
|
293
|
+
interface AgentResponse {
|
|
294
|
+
response: string | null;
|
|
295
|
+
model: string;
|
|
296
|
+
provider: string;
|
|
297
|
+
steps: AgentStep[];
|
|
298
|
+
finish_reason: string | null;
|
|
299
|
+
error?: {
|
|
300
|
+
message: string;
|
|
301
|
+
type: string;
|
|
302
|
+
};
|
|
303
|
+
}
|
|
270
304
|
|
|
271
305
|
/**
|
|
272
306
|
* vllm-i64 SDK — HTTP Client core
|
|
@@ -293,6 +327,44 @@ declare class HttpClient {
|
|
|
293
327
|
readSSERaw(res: Response): AsyncGenerator<StreamDelta, void, undefined>;
|
|
294
328
|
}
|
|
295
329
|
|
|
330
|
+
/**
|
|
331
|
+
* Agent endpoint — orchestrated tool-use loop via proxy.
|
|
332
|
+
*
|
|
333
|
+
* Sends messages to an external LLM (Claude, GPT, etc.) through
|
|
334
|
+
* the complexity proxy. The server executes tools (sandbox, RAG)
|
|
335
|
+
* and loops until the LLM produces a final text answer.
|
|
336
|
+
*
|
|
337
|
+
* INL - 2025
|
|
338
|
+
*/
|
|
339
|
+
|
|
340
|
+
interface AgentRunOptions {
|
|
341
|
+
model: string;
|
|
342
|
+
provider?: string;
|
|
343
|
+
temperature?: number;
|
|
344
|
+
top_p?: number;
|
|
345
|
+
max_tokens?: number;
|
|
346
|
+
/** Called after each tool-use step completes. */
|
|
347
|
+
onStep?: (step: AgentStep) => void;
|
|
348
|
+
}
|
|
349
|
+
declare class AgentEndpoint {
|
|
350
|
+
private http;
|
|
351
|
+
constructor(http: HttpClient);
|
|
352
|
+
/**
|
|
353
|
+
* Run the agent loop — the server handles tool execution.
|
|
354
|
+
*
|
|
355
|
+
* @example
|
|
356
|
+
* ```ts
|
|
357
|
+
* const result = await client.agent.run(
|
|
358
|
+
* [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
|
|
359
|
+
* { model: "claude-sonnet-4-20250514" },
|
|
360
|
+
* );
|
|
361
|
+
* console.log(result.response);
|
|
362
|
+
* console.log(`Steps: ${result.steps.length}`);
|
|
363
|
+
* ```
|
|
364
|
+
*/
|
|
365
|
+
run(messages: ChatMessage[], options: AgentRunOptions): Promise<AgentResponse>;
|
|
366
|
+
}
|
|
367
|
+
|
|
296
368
|
/**
|
|
297
369
|
* Chat completions endpoint.
|
|
298
370
|
*
|
|
@@ -526,6 +598,8 @@ declare class SearchEndpoint {
|
|
|
526
598
|
|
|
527
599
|
declare class I64Client {
|
|
528
600
|
private http;
|
|
601
|
+
/** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
|
|
602
|
+
readonly agent: AgentEndpoint;
|
|
529
603
|
/** Chat completions (streaming + non-streaming, tool_calls). */
|
|
530
604
|
readonly chat: ChatEndpoint;
|
|
531
605
|
/** Text completions (streaming + batch). */
|
|
@@ -551,4 +625,4 @@ declare class I64Client {
|
|
|
551
625
|
get baseUrl(): string;
|
|
552
626
|
}
|
|
553
627
|
|
|
554
|
-
export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
|
628
|
+
export { AgentEndpoint, type AgentRequest, type AgentResponse, type AgentStep, type AgentToolCall, type AgentToolResult, CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -267,6 +267,40 @@ interface SearchStatsResponse {
|
|
|
267
267
|
max_per_key: number;
|
|
268
268
|
persist_dir: string | null;
|
|
269
269
|
}
|
|
270
|
+
interface AgentRequest {
|
|
271
|
+
model: string;
|
|
272
|
+
messages: ChatMessage[];
|
|
273
|
+
provider?: string;
|
|
274
|
+
temperature?: number;
|
|
275
|
+
top_p?: number;
|
|
276
|
+
max_tokens?: number;
|
|
277
|
+
}
|
|
278
|
+
interface AgentToolCall {
|
|
279
|
+
id: string;
|
|
280
|
+
name: string;
|
|
281
|
+
arguments: Record<string, unknown>;
|
|
282
|
+
}
|
|
283
|
+
interface AgentToolResult {
|
|
284
|
+
tool_call_id: string;
|
|
285
|
+
name: string;
|
|
286
|
+
result: string;
|
|
287
|
+
}
|
|
288
|
+
interface AgentStep {
|
|
289
|
+
step: number;
|
|
290
|
+
tool_calls: AgentToolCall[];
|
|
291
|
+
tool_results: AgentToolResult[];
|
|
292
|
+
}
|
|
293
|
+
interface AgentResponse {
|
|
294
|
+
response: string | null;
|
|
295
|
+
model: string;
|
|
296
|
+
provider: string;
|
|
297
|
+
steps: AgentStep[];
|
|
298
|
+
finish_reason: string | null;
|
|
299
|
+
error?: {
|
|
300
|
+
message: string;
|
|
301
|
+
type: string;
|
|
302
|
+
};
|
|
303
|
+
}
|
|
270
304
|
|
|
271
305
|
/**
|
|
272
306
|
* vllm-i64 SDK — HTTP Client core
|
|
@@ -293,6 +327,44 @@ declare class HttpClient {
|
|
|
293
327
|
readSSERaw(res: Response): AsyncGenerator<StreamDelta, void, undefined>;
|
|
294
328
|
}
|
|
295
329
|
|
|
330
|
+
/**
|
|
331
|
+
* Agent endpoint — orchestrated tool-use loop via proxy.
|
|
332
|
+
*
|
|
333
|
+
* Sends messages to an external LLM (Claude, GPT, etc.) through
|
|
334
|
+
* the complexity proxy. The server executes tools (sandbox, RAG)
|
|
335
|
+
* and loops until the LLM produces a final text answer.
|
|
336
|
+
*
|
|
337
|
+
* INL - 2025
|
|
338
|
+
*/
|
|
339
|
+
|
|
340
|
+
interface AgentRunOptions {
|
|
341
|
+
model: string;
|
|
342
|
+
provider?: string;
|
|
343
|
+
temperature?: number;
|
|
344
|
+
top_p?: number;
|
|
345
|
+
max_tokens?: number;
|
|
346
|
+
/** Called after each tool-use step completes. */
|
|
347
|
+
onStep?: (step: AgentStep) => void;
|
|
348
|
+
}
|
|
349
|
+
declare class AgentEndpoint {
|
|
350
|
+
private http;
|
|
351
|
+
constructor(http: HttpClient);
|
|
352
|
+
/**
|
|
353
|
+
* Run the agent loop — the server handles tool execution.
|
|
354
|
+
*
|
|
355
|
+
* @example
|
|
356
|
+
* ```ts
|
|
357
|
+
* const result = await client.agent.run(
|
|
358
|
+
* [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
|
|
359
|
+
* { model: "claude-sonnet-4-20250514" },
|
|
360
|
+
* );
|
|
361
|
+
* console.log(result.response);
|
|
362
|
+
* console.log(`Steps: ${result.steps.length}`);
|
|
363
|
+
* ```
|
|
364
|
+
*/
|
|
365
|
+
run(messages: ChatMessage[], options: AgentRunOptions): Promise<AgentResponse>;
|
|
366
|
+
}
|
|
367
|
+
|
|
296
368
|
/**
|
|
297
369
|
* Chat completions endpoint.
|
|
298
370
|
*
|
|
@@ -526,6 +598,8 @@ declare class SearchEndpoint {
|
|
|
526
598
|
|
|
527
599
|
declare class I64Client {
|
|
528
600
|
private http;
|
|
601
|
+
/** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
|
|
602
|
+
readonly agent: AgentEndpoint;
|
|
529
603
|
/** Chat completions (streaming + non-streaming, tool_calls). */
|
|
530
604
|
readonly chat: ChatEndpoint;
|
|
531
605
|
/** Text completions (streaming + batch). */
|
|
@@ -551,4 +625,4 @@ declare class I64Client {
|
|
|
551
625
|
get baseUrl(): string;
|
|
552
626
|
}
|
|
553
627
|
|
|
554
|
-
export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
|
628
|
+
export { AgentEndpoint, type AgentRequest, type AgentResponse, type AgentStep, type AgentToolCall, type AgentToolResult, CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
|
package/dist/index.js
CHANGED
|
@@ -20,6 +20,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
+
AgentEndpoint: () => AgentEndpoint,
|
|
23
24
|
CacheEndpoint: () => CacheEndpoint,
|
|
24
25
|
ChatEndpoint: () => ChatEndpoint,
|
|
25
26
|
CompletionsEndpoint: () => CompletionsEndpoint,
|
|
@@ -153,6 +154,43 @@ var HttpClient = class {
|
|
|
153
154
|
}
|
|
154
155
|
};
|
|
155
156
|
|
|
157
|
+
// src/endpoints/agent.ts
|
|
158
|
+
var AgentEndpoint = class {
|
|
159
|
+
constructor(http) {
|
|
160
|
+
this.http = http;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Run the agent loop — the server handles tool execution.
|
|
164
|
+
*
|
|
165
|
+
* @example
|
|
166
|
+
* ```ts
|
|
167
|
+
* const result = await client.agent.run(
|
|
168
|
+
* [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
|
|
169
|
+
* { model: "claude-sonnet-4-20250514" },
|
|
170
|
+
* );
|
|
171
|
+
* console.log(result.response);
|
|
172
|
+
* console.log(`Steps: ${result.steps.length}`);
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
async run(messages, options) {
|
|
176
|
+
const body = {
|
|
177
|
+
model: options.model,
|
|
178
|
+
messages,
|
|
179
|
+
provider: options.provider,
|
|
180
|
+
temperature: options.temperature,
|
|
181
|
+
top_p: options.top_p,
|
|
182
|
+
max_tokens: options.max_tokens
|
|
183
|
+
};
|
|
184
|
+
const res = await this.http.post("/api/proxy/agent", body);
|
|
185
|
+
if (options.onStep) {
|
|
186
|
+
for (const step of res.steps) {
|
|
187
|
+
options.onStep(step);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return res;
|
|
191
|
+
}
|
|
192
|
+
};
|
|
193
|
+
|
|
156
194
|
// src/endpoints/chat.ts
|
|
157
195
|
var ChatEndpoint = class {
|
|
158
196
|
constructor(http) {
|
|
@@ -451,6 +489,8 @@ var SearchEndpoint = class {
|
|
|
451
489
|
// src/index.ts
|
|
452
490
|
var I64Client = class {
|
|
453
491
|
http;
|
|
492
|
+
/** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
|
|
493
|
+
agent;
|
|
454
494
|
/** Chat completions (streaming + non-streaming, tool_calls). */
|
|
455
495
|
chat;
|
|
456
496
|
/** Text completions (streaming + batch). */
|
|
@@ -473,6 +513,7 @@ var I64Client = class {
|
|
|
473
513
|
*/
|
|
474
514
|
constructor(baseUrl = "http://localhost:8000", options = {}) {
|
|
475
515
|
this.http = new HttpClient(baseUrl, options);
|
|
516
|
+
this.agent = new AgentEndpoint(this.http);
|
|
476
517
|
this.chat = new ChatEndpoint(this.http);
|
|
477
518
|
this.completions = new CompletionsEndpoint(this.http);
|
|
478
519
|
this.cache = new CacheEndpoint(this.http);
|
|
@@ -489,6 +530,7 @@ var I64Client = class {
|
|
|
489
530
|
var index_default = I64Client;
|
|
490
531
|
// Annotate the CommonJS export names for ESM import in node:
|
|
491
532
|
0 && (module.exports = {
|
|
533
|
+
AgentEndpoint,
|
|
492
534
|
CacheEndpoint,
|
|
493
535
|
ChatEndpoint,
|
|
494
536
|
CompletionsEndpoint,
|
package/dist/index.mjs
CHANGED
|
@@ -118,6 +118,43 @@ var HttpClient = class {
|
|
|
118
118
|
}
|
|
119
119
|
};
|
|
120
120
|
|
|
121
|
+
// src/endpoints/agent.ts
|
|
122
|
+
var AgentEndpoint = class {
|
|
123
|
+
constructor(http) {
|
|
124
|
+
this.http = http;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Run the agent loop — the server handles tool execution.
|
|
128
|
+
*
|
|
129
|
+
* @example
|
|
130
|
+
* ```ts
|
|
131
|
+
* const result = await client.agent.run(
|
|
132
|
+
* [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
|
|
133
|
+
* { model: "claude-sonnet-4-20250514" },
|
|
134
|
+
* );
|
|
135
|
+
* console.log(result.response);
|
|
136
|
+
* console.log(`Steps: ${result.steps.length}`);
|
|
137
|
+
* ```
|
|
138
|
+
*/
|
|
139
|
+
async run(messages, options) {
|
|
140
|
+
const body = {
|
|
141
|
+
model: options.model,
|
|
142
|
+
messages,
|
|
143
|
+
provider: options.provider,
|
|
144
|
+
temperature: options.temperature,
|
|
145
|
+
top_p: options.top_p,
|
|
146
|
+
max_tokens: options.max_tokens
|
|
147
|
+
};
|
|
148
|
+
const res = await this.http.post("/api/proxy/agent", body);
|
|
149
|
+
if (options.onStep) {
|
|
150
|
+
for (const step of res.steps) {
|
|
151
|
+
options.onStep(step);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return res;
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
121
158
|
// src/endpoints/chat.ts
|
|
122
159
|
var ChatEndpoint = class {
|
|
123
160
|
constructor(http) {
|
|
@@ -416,6 +453,8 @@ var SearchEndpoint = class {
|
|
|
416
453
|
// src/index.ts
|
|
417
454
|
var I64Client = class {
|
|
418
455
|
http;
|
|
456
|
+
/** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
|
|
457
|
+
agent;
|
|
419
458
|
/** Chat completions (streaming + non-streaming, tool_calls). */
|
|
420
459
|
chat;
|
|
421
460
|
/** Text completions (streaming + batch). */
|
|
@@ -438,6 +477,7 @@ var I64Client = class {
|
|
|
438
477
|
*/
|
|
439
478
|
constructor(baseUrl = "http://localhost:8000", options = {}) {
|
|
440
479
|
this.http = new HttpClient(baseUrl, options);
|
|
480
|
+
this.agent = new AgentEndpoint(this.http);
|
|
441
481
|
this.chat = new ChatEndpoint(this.http);
|
|
442
482
|
this.completions = new CompletionsEndpoint(this.http);
|
|
443
483
|
this.cache = new CacheEndpoint(this.http);
|
|
@@ -453,6 +493,7 @@ var I64Client = class {
|
|
|
453
493
|
};
|
|
454
494
|
var index_default = I64Client;
|
|
455
495
|
export {
|
|
496
|
+
AgentEndpoint,
|
|
456
497
|
CacheEndpoint,
|
|
457
498
|
ChatEndpoint,
|
|
458
499
|
CompletionsEndpoint,
|