npm - vllm-i64 - Versions diffs - 0.3.1 → 0.4.0 - Mend

vllm-i64 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -267,6 +267,40 @@ interface SearchStatsResponse {
     max_per_key: number;
     persist_dir: string | null;
 }
+interface AgentRequest {
+    model: string;
+    messages: ChatMessage[];
+    provider?: string;
+    temperature?: number;
+    top_p?: number;
+    max_tokens?: number;
+}
+interface AgentToolCall {
+    id: string;
+    name: string;
+    arguments: Record<string, unknown>;
+}
+interface AgentToolResult {
+    tool_call_id: string;
+    name: string;
+    result: string;
+}
+interface AgentStep {
+    step: number;
+    tool_calls: AgentToolCall[];
+    tool_results: AgentToolResult[];
+}
+interface AgentResponse {
+    response: string | null;
+    model: string;
+    provider: string;
+    steps: AgentStep[];
+    finish_reason: string | null;
+    error?: {
+        message: string;
+        type: string;
+    };
+}
 /**
  * vllm-i64 SDK — HTTP Client core
@@ -293,6 +327,44 @@ declare class HttpClient {
     readSSERaw(res: Response): AsyncGenerator<StreamDelta, void, undefined>;
 }
+/**
+ * Agent endpoint — orchestrated tool-use loop via proxy.
+ *
+ * Sends messages to an external LLM (Claude, GPT, etc.) through
+ * the complexity proxy. The server executes tools (sandbox, RAG)
+ * and loops until the LLM produces a final text answer.
+ *
+ * INL - 2025
+ */
+interface AgentRunOptions {
+    model: string;
+    provider?: string;
+    temperature?: number;
+    top_p?: number;
+    max_tokens?: number;
+    /** Called after each tool-use step completes. */
+    onStep?: (step: AgentStep) => void;
+}
+declare class AgentEndpoint {
+    private http;
+    constructor(http: HttpClient);
+    /**
+     * Run the agent loop — the server handles tool execution.
+     *
+     * @example
+     * ```ts
+     * const result = await client.agent.run(
+     *   [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
+     *   { model: "claude-sonnet-4-20250514" },
+     * );
+     * console.log(result.response);
+     * console.log(`Steps: ${result.steps.length}`);
+     * ```
+     */
+    run(messages: ChatMessage[], options: AgentRunOptions): Promise<AgentResponse>;
+}
 /**
  * Chat completions endpoint.
  *
@@ -526,6 +598,8 @@ declare class SearchEndpoint {
 declare class I64Client {
     private http;
+    /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
+    readonly agent: AgentEndpoint;
     /** Chat completions (streaming + non-streaming, tool_calls). */
     readonly chat: ChatEndpoint;
     /** Text completions (streaming + batch). */
@@ -551,4 +625,4 @@ declare class I64Client {
     get baseUrl(): string;
 }
-export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
+export { AgentEndpoint, type AgentRequest, type AgentResponse, type AgentStep, type AgentToolCall, type AgentToolResult, CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };

package/dist/index.d.ts CHANGED Viewed

@@ -267,6 +267,40 @@ interface SearchStatsResponse {
     max_per_key: number;
     persist_dir: string | null;
 }
+interface AgentRequest {
+    model: string;
+    messages: ChatMessage[];
+    provider?: string;
+    temperature?: number;
+    top_p?: number;
+    max_tokens?: number;
+}
+interface AgentToolCall {
+    id: string;
+    name: string;
+    arguments: Record<string, unknown>;
+}
+interface AgentToolResult {
+    tool_call_id: string;
+    name: string;
+    result: string;
+}
+interface AgentStep {
+    step: number;
+    tool_calls: AgentToolCall[];
+    tool_results: AgentToolResult[];
+}
+interface AgentResponse {
+    response: string | null;
+    model: string;
+    provider: string;
+    steps: AgentStep[];
+    finish_reason: string | null;
+    error?: {
+        message: string;
+        type: string;
+    };
+}
 /**
  * vllm-i64 SDK — HTTP Client core
@@ -293,6 +327,44 @@ declare class HttpClient {
     readSSERaw(res: Response): AsyncGenerator<StreamDelta, void, undefined>;
 }
+/**
+ * Agent endpoint — orchestrated tool-use loop via proxy.
+ *
+ * Sends messages to an external LLM (Claude, GPT, etc.) through
+ * the complexity proxy. The server executes tools (sandbox, RAG)
+ * and loops until the LLM produces a final text answer.
+ *
+ * INL - 2025
+ */
+interface AgentRunOptions {
+    model: string;
+    provider?: string;
+    temperature?: number;
+    top_p?: number;
+    max_tokens?: number;
+    /** Called after each tool-use step completes. */
+    onStep?: (step: AgentStep) => void;
+}
+declare class AgentEndpoint {
+    private http;
+    constructor(http: HttpClient);
+    /**
+     * Run the agent loop — the server handles tool execution.
+     *
+     * @example
+     * ```ts
+     * const result = await client.agent.run(
+     *   [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
+     *   { model: "claude-sonnet-4-20250514" },
+     * );
+     * console.log(result.response);
+     * console.log(`Steps: ${result.steps.length}`);
+     * ```
+     */
+    run(messages: ChatMessage[], options: AgentRunOptions): Promise<AgentResponse>;
+}
 /**
  * Chat completions endpoint.
  *
@@ -526,6 +598,8 @@ declare class SearchEndpoint {
 declare class I64Client {
     private http;
+    /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
+    readonly agent: AgentEndpoint;
     /** Chat completions (streaming + non-streaming, tool_calls). */
     readonly chat: ChatEndpoint;
     /** Text completions (streaming + batch). */
@@ -551,4 +625,4 @@ declare class I64Client {
     get baseUrl(): string;
 }
-export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
+export { AgentEndpoint, type AgentRequest, type AgentResponse, type AgentStep, type AgentToolCall, type AgentToolResult, CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };

package/dist/index.js CHANGED Viewed

@@ -20,6 +20,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var index_exports = {};
 __export(index_exports, {
+  AgentEndpoint: () => AgentEndpoint,
   CacheEndpoint: () => CacheEndpoint,
   ChatEndpoint: () => ChatEndpoint,
   CompletionsEndpoint: () => CompletionsEndpoint,
@@ -153,6 +154,43 @@ var HttpClient = class {
   }
 };
+// src/endpoints/agent.ts
+var AgentEndpoint = class {
+  constructor(http) {
+    this.http = http;
+  }
+  /**
+   * Run the agent loop — the server handles tool execution.
+   *
+   * @example
+   * ```ts
+   * const result = await client.agent.run(
+   *   [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
+   *   { model: "claude-sonnet-4-20250514" },
+   * );
+   * console.log(result.response);
+   * console.log(`Steps: ${result.steps.length}`);
+   * ```
+   */
+  async run(messages, options) {
+    const body = {
+      model: options.model,
+      messages,
+      provider: options.provider,
+      temperature: options.temperature,
+      top_p: options.top_p,
+      max_tokens: options.max_tokens
+    };
+    const res = await this.http.post("/api/proxy/agent", body);
+    if (options.onStep) {
+      for (const step of res.steps) {
+        options.onStep(step);
+      }
+    }
+    return res;
+  }
+};
 // src/endpoints/chat.ts
 var ChatEndpoint = class {
   constructor(http) {
@@ -451,6 +489,8 @@ var SearchEndpoint = class {
 // src/index.ts
 var I64Client = class {
   http;
+  /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
+  agent;
   /** Chat completions (streaming + non-streaming, tool_calls). */
   chat;
   /** Text completions (streaming + batch). */
@@ -473,6 +513,7 @@ var I64Client = class {
    */
   constructor(baseUrl = "http://localhost:8000", options = {}) {
     this.http = new HttpClient(baseUrl, options);
+    this.agent = new AgentEndpoint(this.http);
     this.chat = new ChatEndpoint(this.http);
     this.completions = new CompletionsEndpoint(this.http);
     this.cache = new CacheEndpoint(this.http);
@@ -489,6 +530,7 @@ var I64Client = class {
 var index_default = I64Client;
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
+  AgentEndpoint,
   CacheEndpoint,
   ChatEndpoint,
   CompletionsEndpoint,

package/dist/index.mjs CHANGED Viewed

@@ -118,6 +118,43 @@ var HttpClient = class {
   }
 };
+// src/endpoints/agent.ts
+var AgentEndpoint = class {
+  constructor(http) {
+    this.http = http;
+  }
+  /**
+   * Run the agent loop — the server handles tool execution.
+   *
+   * @example
+   * ```ts
+   * const result = await client.agent.run(
+   *   [{ role: "user", content: "Write a Python script that computes fibonacci(30)" }],
+   *   { model: "claude-sonnet-4-20250514" },
+   * );
+   * console.log(result.response);
+   * console.log(`Steps: ${result.steps.length}`);
+   * ```
+   */
+  async run(messages, options) {
+    const body = {
+      model: options.model,
+      messages,
+      provider: options.provider,
+      temperature: options.temperature,
+      top_p: options.top_p,
+      max_tokens: options.max_tokens
+    };
+    const res = await this.http.post("/api/proxy/agent", body);
+    if (options.onStep) {
+      for (const step of res.steps) {
+        options.onStep(step);
+      }
+    }
+    return res;
+  }
+};
 // src/endpoints/chat.ts
 var ChatEndpoint = class {
   constructor(http) {
@@ -416,6 +453,8 @@ var SearchEndpoint = class {
 // src/index.ts
 var I64Client = class {
   http;
+  /** Agent — orchestrated tool-use loop (sandbox + RAG) via external LLM. */
+  agent;
   /** Chat completions (streaming + non-streaming, tool_calls). */
   chat;
   /** Text completions (streaming + batch). */
@@ -438,6 +477,7 @@ var I64Client = class {
    */
   constructor(baseUrl = "http://localhost:8000", options = {}) {
     this.http = new HttpClient(baseUrl, options);
+    this.agent = new AgentEndpoint(this.http);
     this.chat = new ChatEndpoint(this.http);
     this.completions = new CompletionsEndpoint(this.http);
     this.cache = new CacheEndpoint(this.http);
@@ -453,6 +493,7 @@ var I64Client = class {
 };
 var index_default = I64Client;
 export {
+  AgentEndpoint,
   CacheEndpoint,
   ChatEndpoint,
   CompletionsEndpoint,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vllm-i64",
-  "version": "0.3.1",
+  "version": "0.4.0",
   "description": "TypeScript SDK for vllm-i64 — integer-first inference engine",
   "main": "dist/index.js",
   "module": "dist/index.mjs",