vllm-i64 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -267,6 +267,40 @@ interface SearchStatsResponse {
267
267
  max_per_key: number;
268
268
  persist_dir: string | null;
269
269
  }
270
+ interface AgentRequest {
271
+ model: string;
272
+ messages: ChatMessage[];
273
+ provider?: string;
274
+ temperature?: number;
275
+ top_p?: number;
276
+ max_tokens?: number;
277
+ }
278
+ interface AgentToolCall {
279
+ id: string;
280
+ name: string;
281
+ arguments: Record<string, unknown>;
282
+ }
283
+ interface AgentToolResult {
284
+ tool_call_id: string;
285
+ name: string;
286
+ result: string;
287
+ }
288
+ interface AgentStep {
289
+ step: number;
290
+ tool_calls: AgentToolCall[];
291
+ tool_results: AgentToolResult[];
292
+ }
293
+ interface AgentResponse {
294
+ response: string | null;
295
+ model: string;
296
+ provider: string;
297
+ steps: AgentStep[];
298
+ finish_reason: string | null;
299
+ error?: {
300
+ message: string;
301
+ type: string;
302
+ };
303
+ }
270
304
 
271
305
  /**
272
306
  * vllm-i64 SDK — HTTP Client core
@@ -293,6 +327,44 @@ declare class HttpClient {
293
327
  readSSERaw(res: Response): AsyncGenerator<StreamDelta, void, undefined>;
294
328
  }
295
329
 
330
+ /**
331
+ * Agent endpoint — orchestrated tool-use loop via proxy.
332
+ *
333
+ * Sends messages to an external LLM (Claude, GPT, etc.) through
334
+ * the complexity proxy. The server executes tools (sandbox, RAG)
335
+ * and loops until the LLM produces a final text answer.
336
+ *
337
+ * INL - 2025
338
+ */
339
+
340
+ interface AgentRunOptions {
341
+ model: string;
342
+ provider?: string;
343
+ temperature?: number;
344
+ top_p?: number;
345
+ max_tokens?: number;
346
+ /** Called after each tool-use step completes. */
347
+ onStep?: (step: AgentStep) => void;
348
+ }
349
+ declare class AgentEndpoint {
350
+ private http;
351
+ constructor(http: HttpClient);
352
+ /**
353
+ * Run the agent loop — the server handles tool execution.
354
+ *
355
+ * @example
356
+ * ```ts
357
+ * const result = await client.agent.run(
358
+ * [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
359
+ * { model: "claude-sonnet-4-20250514" },
360
+ * );
361
+ * console.log(result.response);
362
+ * console.log(`Steps: ${result.steps.length}`);
363
+ * ```
364
+ */
365
+ run(messages: ChatMessage[], options: AgentRunOptions): Promise<AgentResponse>;
366
+ }
367
+
296
368
  /**
297
369
  * Chat completions endpoint.
298
370
  *
@@ -526,6 +598,8 @@ declare class SearchEndpoint {
526
598
 
527
599
  declare class I64Client {
528
600
  private http;
601
+ /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
602
+ readonly agent: AgentEndpoint;
529
603
  /** Chat completions (streaming + non-streaming, tool_calls). */
530
604
  readonly chat: ChatEndpoint;
531
605
  /** Text completions (streaming + batch). */
@@ -551,4 +625,4 @@ declare class I64Client {
551
625
  get baseUrl(): string;
552
626
  }
553
627
 
554
- export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
628
+ export { AgentEndpoint, type AgentRequest, type AgentResponse, type AgentStep, type AgentToolCall, type AgentToolResult, CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
package/dist/index.d.ts CHANGED
@@ -267,6 +267,40 @@ interface SearchStatsResponse {
267
267
  max_per_key: number;
268
268
  persist_dir: string | null;
269
269
  }
270
+ interface AgentRequest {
271
+ model: string;
272
+ messages: ChatMessage[];
273
+ provider?: string;
274
+ temperature?: number;
275
+ top_p?: number;
276
+ max_tokens?: number;
277
+ }
278
+ interface AgentToolCall {
279
+ id: string;
280
+ name: string;
281
+ arguments: Record<string, unknown>;
282
+ }
283
+ interface AgentToolResult {
284
+ tool_call_id: string;
285
+ name: string;
286
+ result: string;
287
+ }
288
+ interface AgentStep {
289
+ step: number;
290
+ tool_calls: AgentToolCall[];
291
+ tool_results: AgentToolResult[];
292
+ }
293
+ interface AgentResponse {
294
+ response: string | null;
295
+ model: string;
296
+ provider: string;
297
+ steps: AgentStep[];
298
+ finish_reason: string | null;
299
+ error?: {
300
+ message: string;
301
+ type: string;
302
+ };
303
+ }
270
304
 
271
305
  /**
272
306
  * vllm-i64 SDK — HTTP Client core
@@ -293,6 +327,44 @@ declare class HttpClient {
293
327
  readSSERaw(res: Response): AsyncGenerator<StreamDelta, void, undefined>;
294
328
  }
295
329
 
330
+ /**
331
+ * Agent endpoint — orchestrated tool-use loop via proxy.
332
+ *
333
+ * Sends messages to an external LLM (Claude, GPT, etc.) through
334
+ * the complexity proxy. The server executes tools (sandbox, RAG)
335
+ * and loops until the LLM produces a final text answer.
336
+ *
337
+ * INL - 2025
338
+ */
339
+
340
+ interface AgentRunOptions {
341
+ model: string;
342
+ provider?: string;
343
+ temperature?: number;
344
+ top_p?: number;
345
+ max_tokens?: number;
346
+ /** Called after each tool-use step completes. */
347
+ onStep?: (step: AgentStep) => void;
348
+ }
349
+ declare class AgentEndpoint {
350
+ private http;
351
+ constructor(http: HttpClient);
352
+ /**
353
+ * Run the agent loop — the server handles tool execution.
354
+ *
355
+ * @example
356
+ * ```ts
357
+ * const result = await client.agent.run(
358
+ * [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
359
+ * { model: "claude-sonnet-4-20250514" },
360
+ * );
361
+ * console.log(result.response);
362
+ * console.log(`Steps: ${result.steps.length}`);
363
+ * ```
364
+ */
365
+ run(messages: ChatMessage[], options: AgentRunOptions): Promise<AgentResponse>;
366
+ }
367
+
296
368
  /**
297
369
  * Chat completions endpoint.
298
370
  *
@@ -526,6 +598,8 @@ declare class SearchEndpoint {
526
598
 
527
599
  declare class I64Client {
528
600
  private http;
601
+ /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
602
+ readonly agent: AgentEndpoint;
529
603
  /** Chat completions (streaming + non-streaming, tool_calls). */
530
604
  readonly chat: ChatEndpoint;
531
605
  /** Text completions (streaming + batch). */
@@ -551,4 +625,4 @@ declare class I64Client {
551
625
  get baseUrl(): string;
552
626
  }
553
627
 
554
- export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
628
+ export { AgentEndpoint, type AgentRequest, type AgentResponse, type AgentStep, type AgentToolCall, type AgentToolResult, CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
package/dist/index.js CHANGED
@@ -20,6 +20,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ AgentEndpoint: () => AgentEndpoint,
23
24
  CacheEndpoint: () => CacheEndpoint,
24
25
  ChatEndpoint: () => ChatEndpoint,
25
26
  CompletionsEndpoint: () => CompletionsEndpoint,
@@ -153,6 +154,43 @@ var HttpClient = class {
153
154
  }
154
155
  };
155
156
 
157
+ // src/endpoints/agent.ts
158
+ var AgentEndpoint = class {
159
+ constructor(http) {
160
+ this.http = http;
161
+ }
162
+ /**
163
+ * Run the agent loop — the server handles tool execution.
164
+ *
165
+ * @example
166
+ * ```ts
167
+ * const result = await client.agent.run(
168
+ * [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
169
+ * { model: "claude-sonnet-4-20250514" },
170
+ * );
171
+ * console.log(result.response);
172
+ * console.log(`Steps: ${result.steps.length}`);
173
+ * ```
174
+ */
175
+ async run(messages, options) {
176
+ const body = {
177
+ model: options.model,
178
+ messages,
179
+ provider: options.provider,
180
+ temperature: options.temperature,
181
+ top_p: options.top_p,
182
+ max_tokens: options.max_tokens
183
+ };
184
+ const res = await this.http.post("/api/proxy/agent", body);
185
+ if (options.onStep) {
186
+ for (const step of res.steps) {
187
+ options.onStep(step);
188
+ }
189
+ }
190
+ return res;
191
+ }
192
+ };
193
+
156
194
  // src/endpoints/chat.ts
157
195
  var ChatEndpoint = class {
158
196
  constructor(http) {
@@ -451,6 +489,8 @@ var SearchEndpoint = class {
451
489
  // src/index.ts
452
490
  var I64Client = class {
453
491
  http;
492
+ /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
493
+ agent;
454
494
  /** Chat completions (streaming + non-streaming, tool_calls). */
455
495
  chat;
456
496
  /** Text completions (streaming + batch). */
@@ -473,6 +513,7 @@ var I64Client = class {
473
513
  */
474
514
  constructor(baseUrl = "http://localhost:8000", options = {}) {
475
515
  this.http = new HttpClient(baseUrl, options);
516
+ this.agent = new AgentEndpoint(this.http);
476
517
  this.chat = new ChatEndpoint(this.http);
477
518
  this.completions = new CompletionsEndpoint(this.http);
478
519
  this.cache = new CacheEndpoint(this.http);
@@ -489,6 +530,7 @@ var I64Client = class {
489
530
  var index_default = I64Client;
490
531
  // Annotate the CommonJS export names for ESM import in node:
491
532
  0 && (module.exports = {
533
+ AgentEndpoint,
492
534
  CacheEndpoint,
493
535
  ChatEndpoint,
494
536
  CompletionsEndpoint,
package/dist/index.mjs CHANGED
@@ -118,6 +118,43 @@ var HttpClient = class {
118
118
  }
119
119
  };
120
120
 
121
+ // src/endpoints/agent.ts
122
+ var AgentEndpoint = class {
123
+ constructor(http) {
124
+ this.http = http;
125
+ }
126
+ /**
127
+ * Run the agent loop — the server handles tool execution.
128
+ *
129
+ * @example
130
+ * ```ts
131
+ * const result = await client.agent.run(
132
+ * [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
133
+ * { model: "claude-sonnet-4-20250514" },
134
+ * );
135
+ * console.log(result.response);
136
+ * console.log(`Steps: ${result.steps.length}`);
137
+ * ```
138
+ */
139
+ async run(messages, options) {
140
+ const body = {
141
+ model: options.model,
142
+ messages,
143
+ provider: options.provider,
144
+ temperature: options.temperature,
145
+ top_p: options.top_p,
146
+ max_tokens: options.max_tokens
147
+ };
148
+ const res = await this.http.post("/api/proxy/agent", body);
149
+ if (options.onStep) {
150
+ for (const step of res.steps) {
151
+ options.onStep(step);
152
+ }
153
+ }
154
+ return res;
155
+ }
156
+ };
157
+
121
158
  // src/endpoints/chat.ts
122
159
  var ChatEndpoint = class {
123
160
  constructor(http) {
@@ -416,6 +453,8 @@ var SearchEndpoint = class {
416
453
  // src/index.ts
417
454
  var I64Client = class {
418
455
  http;
456
+ /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
457
+ agent;
419
458
  /** Chat completions (streaming + non-streaming, tool_calls). */
420
459
  chat;
421
460
  /** Text completions (streaming + batch). */
@@ -438,6 +477,7 @@ var I64Client = class {
438
477
  */
439
478
  constructor(baseUrl = "http://localhost:8000", options = {}) {
440
479
  this.http = new HttpClient(baseUrl, options);
480
+ this.agent = new AgentEndpoint(this.http);
441
481
  this.chat = new ChatEndpoint(this.http);
442
482
  this.completions = new CompletionsEndpoint(this.http);
443
483
  this.cache = new CacheEndpoint(this.http);
@@ -453,6 +493,7 @@ var I64Client = class {
453
493
  };
454
494
  var index_default = I64Client;
455
495
  export {
496
+ AgentEndpoint,
456
497
  CacheEndpoint,
457
498
  ChatEndpoint,
458
499
  CompletionsEndpoint,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vllm-i64",
3
- "version": "0.3.1",
3
+ "version": "0.4.0",
4
4
  "description": "TypeScript SDK for vllm-i64 — integer-first inference engine",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",