npm - vllm-i64 - Versions diffs - 0.2.0 → 0.3.0 - Mend

vllm-i64 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -217,6 +217,55 @@ interface RAGStatsResult {
     total_chunks: number;
     dimension: number;
 }
+interface SearchCompletionRequest {
+    query: string;
+    max_tokens?: number;
+    temperature?: number;
+    search_count?: number;
+    user?: string;
+    stream?: boolean;
+}
+interface SearchSource {
+    index: number;
+    title: string;
+    url: string;
+    domain: string;
+    favicon: string;
+}
+interface SearchCompletionResponse {
+    id: string;
+    object: "search.completion";
+    model: string;
+    query: string;
+    choices: {
+        index: number;
+        message: {
+            role: "assistant";
+            content: string;
+        };
+        finish_reason: "stop" | "length";
+    }[];
+    sources: SearchSource[];
+    usage?: UsageInfo;
+}
+interface SearchHistoryEntry {
+    query: string;
+    sources: SearchSource[];
+    answer: string;
+    timestamp: number;
+}
+interface SearchHistoryResponse {
+    history: SearchHistoryEntry[];
+    count: number;
+}
+interface SearchStatsResponse {
+    enabled: boolean;
+    num_partitions: number;
+    total_keys: number;
+    total_entries: number;
+    max_per_key: number;
+    persist_dir: string | null;
+}
 /**
  * vllm-i64 SDK — HTTP Client core
@@ -383,6 +432,65 @@ declare class RAGEndpoint {
     stats(): Promise<RAGStatsResult>;
 }
+/**
+ * Search endpoints — Perplexity-style search-augmented generation.
+ *
+ * Token-routed isolation: partition = sha256(api_key ∥ user_id) mod N
+ * No data leak possible. No shared cache. No session tokens.
+ *
+ * INL - 2025
+ */
+declare class SearchEndpoint {
+    private http;
+    constructor(http: HttpClient);
+    /**
+     * Search-augmented generation: query → web search → cited answer.
+     *
+     * @example
+     * ```ts
+     * const res = await client.search.create({ query: "What is MoE?" });
+     * console.log(res.choices[0].message.content);
+     * for (const src of res.sources) {
+     *   console.log(`[${src.index}] ${src.title} — ${src.url}`);
+     * }
+     * ```
+     */
+    create(params: SearchCompletionRequest): Promise<SearchCompletionResponse>;
+    /**
+     * Stream search completion — yields text chunks.
+     * Sources are sent as the final SSE event.
+     *
+     * @example
+     * ```ts
+     * const { stream, sources } = await client.search.stream({ query: "token routing" });
+     * for await (const chunk of stream) {
+     *   process.stdout.write(chunk);
+     * }
+     * console.log("\nSources:", await sources);
+     * ```
+     */
+    stream(params: SearchCompletionRequest, signal?: AbortSignal): Promise<{
+        stream: AsyncGenerator<string>;
+        sources: Promise<SearchSource[]>;
+    }>;
+    /**
+     * Get search history for the authenticated user.
+     * History is partitioned by api_key + user — no cross-user access.
+     */
+    history(user?: string, limit?: number): Promise<SearchHistoryResponse>;
+    /**
+     * Clear search history for the authenticated user.
+     * Only clears the caller's own partition.
+     */
+    clearHistory(user?: string): Promise<{
+        status: string;
+        removed: number;
+    }>;
+    /** Search history statistics (admin). */
+    stats(): Promise<SearchStatsResponse>;
+}
 /**
  * vllm-i64 — TypeScript SDK
  *
@@ -402,6 +510,10 @@ declare class RAGEndpoint {
  *   process.stdout.write(chunk);
  * }
  *
+ * // Search (Perplexity-style, token-routed isolation)
+ * const search = await client.search.create({ query: "What is MoE?" });
+ * console.log(search.sources);
+ *
  * // Admin
  * await client.monitor.snapshot();
  * await client.cache.purge();
@@ -425,6 +537,8 @@ declare class I64Client {
     readonly monitor: MonitorEndpoint;
     /** RAG — index, search, stats. */
     readonly rag: RAGEndpoint;
+    /** Web search — Perplexity-style with token-routed isolation. */
+    readonly search: SearchEndpoint;
     /**
      * Create a vllm-i64 client.
      *
@@ -436,4 +550,4 @@ declare class I64Client {
     get baseUrl(): string;
 }
-export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
+export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };

package/dist/index.d.ts CHANGED Viewed

@@ -217,6 +217,55 @@ interface RAGStatsResult {
     total_chunks: number;
     dimension: number;
 }
+interface SearchCompletionRequest {
+    query: string;
+    max_tokens?: number;
+    temperature?: number;
+    search_count?: number;
+    user?: string;
+    stream?: boolean;
+}
+interface SearchSource {
+    index: number;
+    title: string;
+    url: string;
+    domain: string;
+    favicon: string;
+}
+interface SearchCompletionResponse {
+    id: string;
+    object: "search.completion";
+    model: string;
+    query: string;
+    choices: {
+        index: number;
+        message: {
+            role: "assistant";
+            content: string;
+        };
+        finish_reason: "stop" | "length";
+    }[];
+    sources: SearchSource[];
+    usage?: UsageInfo;
+}
+interface SearchHistoryEntry {
+    query: string;
+    sources: SearchSource[];
+    answer: string;
+    timestamp: number;
+}
+interface SearchHistoryResponse {
+    history: SearchHistoryEntry[];
+    count: number;
+}
+interface SearchStatsResponse {
+    enabled: boolean;
+    num_partitions: number;
+    total_keys: number;
+    total_entries: number;
+    max_per_key: number;
+    persist_dir: string | null;
+}
 /**
  * vllm-i64 SDK — HTTP Client core
@@ -383,6 +432,65 @@ declare class RAGEndpoint {
     stats(): Promise<RAGStatsResult>;
 }
+/**
+ * Search endpoints — Perplexity-style search-augmented generation.
+ *
+ * Token-routed isolation: partition = sha256(api_key ∥ user_id) mod N
+ * No data leak possible. No shared cache. No session tokens.
+ *
+ * INL - 2025
+ */
+declare class SearchEndpoint {
+    private http;
+    constructor(http: HttpClient);
+    /**
+     * Search-augmented generation: query → web search → cited answer.
+     *
+     * @example
+     * ```ts
+     * const res = await client.search.create({ query: "What is MoE?" });
+     * console.log(res.choices[0].message.content);
+     * for (const src of res.sources) {
+     *   console.log(`[${src.index}] ${src.title} — ${src.url}`);
+     * }
+     * ```
+     */
+    create(params: SearchCompletionRequest): Promise<SearchCompletionResponse>;
+    /**
+     * Stream search completion — yields text chunks.
+     * Sources are sent as the final SSE event.
+     *
+     * @example
+     * ```ts
+     * const { stream, sources } = await client.search.stream({ query: "token routing" });
+     * for await (const chunk of stream) {
+     *   process.stdout.write(chunk);
+     * }
+     * console.log("\nSources:", await sources);
+     * ```
+     */
+    stream(params: SearchCompletionRequest, signal?: AbortSignal): Promise<{
+        stream: AsyncGenerator<string>;
+        sources: Promise<SearchSource[]>;
+    }>;
+    /**
+     * Get search history for the authenticated user.
+     * History is partitioned by api_key + user — no cross-user access.
+     */
+    history(user?: string, limit?: number): Promise<SearchHistoryResponse>;
+    /**
+     * Clear search history for the authenticated user.
+     * Only clears the caller's own partition.
+     */
+    clearHistory(user?: string): Promise<{
+        status: string;
+        removed: number;
+    }>;
+    /** Search history statistics (admin). */
+    stats(): Promise<SearchStatsResponse>;
+}
 /**
  * vllm-i64 — TypeScript SDK
  *
@@ -402,6 +510,10 @@ declare class RAGEndpoint {
  *   process.stdout.write(chunk);
  * }
  *
+ * // Search (Perplexity-style, token-routed isolation)
+ * const search = await client.search.create({ query: "What is MoE?" });
+ * console.log(search.sources);
+ *
  * // Admin
  * await client.monitor.snapshot();
  * await client.cache.purge();
@@ -425,6 +537,8 @@ declare class I64Client {
     readonly monitor: MonitorEndpoint;
     /** RAG — index, search, stats. */
     readonly rag: RAGEndpoint;
+    /** Web search — Perplexity-style with token-routed isolation. */
+    readonly search: SearchEndpoint;
     /**
      * Create a vllm-i64 client.
      *
@@ -436,4 +550,4 @@ declare class I64Client {
     get baseUrl(): string;
 }
-export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };
+export { CacheEndpoint, type CachePurgeResult, type CacheStats, type ChatCompletionChoice, type ChatCompletionRequest, type ChatCompletionResponse, ChatEndpoint, type ChatMessage, type ClientOptions, type CompletionRequest, type CompletionResponse, CompletionsEndpoint, type ExpertStats, type GpuInfo, type HealthResponse, HttpClient, I64Client, type LoRAAdapter, LoRAEndpoint, type LoRAListResult, type LoRALoadParams, type LoRALoadResult, type LoRAUnloadResult, type ModelInfo, MonitorEndpoint, type MonitorSnapshot, RAGEndpoint, type RAGIndexParams, type RAGIndexResult, type RAGResult, type RAGSearchResult, type RAGStatsResult, type SearchCompletionRequest, type SearchCompletionResponse, SearchEndpoint, type SearchHistoryEntry, type SearchHistoryResponse, type SearchSource, type SearchStatsResponse, type StreamDelta, type ToolCall, type ToolDefinition, type UsageInfo, I64Client as default };

package/dist/index.js CHANGED Viewed

@@ -28,6 +28,7 @@ __export(index_exports, {
   LoRAEndpoint: () => LoRAEndpoint,
   MonitorEndpoint: () => MonitorEndpoint,
   RAGEndpoint: () => RAGEndpoint,
+  SearchEndpoint: () => SearchEndpoint,
   default: () => index_default
 });
 module.exports = __toCommonJS(index_exports);
@@ -341,6 +342,112 @@ var RAGEndpoint = class {
   }
 };
+// src/endpoints/search.ts
+var SearchEndpoint = class {
+  constructor(http) {
+    this.http = http;
+  }
+  /**
+   * Search-augmented generation: query → web search → cited answer.
+   *
+   * @example
+   * ```ts
+   * const res = await client.search.create({ query: "What is MoE?" });
+   * console.log(res.choices[0].message.content);
+   * for (const src of res.sources) {
+   *   console.log(`[${src.index}] ${src.title} — ${src.url}`);
+   * }
+   * ```
+   */
+  async create(params) {
+    return this.http.post("/v1/search/completions", { ...params, stream: false });
+  }
+  /**
+   * Stream search completion — yields text chunks.
+   * Sources are sent as the final SSE event.
+   *
+   * @example
+   * ```ts
+   * const { stream, sources } = await client.search.stream({ query: "token routing" });
+   * for await (const chunk of stream) {
+   *   process.stdout.write(chunk);
+   * }
+   * console.log("\nSources:", await sources);
+   * ```
+   */
+  async stream(params, signal) {
+    const res = await this.http.fetch(
+      "/v1/search/completions",
+      { method: "POST", body: JSON.stringify({ ...params, stream: true }) },
+      signal
+    );
+    let resolveSourcesFn;
+    const sourcesPromise = new Promise((resolve) => {
+      resolveSourcesFn = resolve;
+    });
+    const self = this;
+    async function* readStream() {
+      if (!res.body) return;
+      const reader = res.body.getReader();
+      const decoder = new TextDecoder();
+      let buffer = "";
+      let foundSources = false;
+      try {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          buffer += decoder.decode(value, { stream: true });
+          const lines = buffer.split("\n");
+          buffer = lines.pop() ?? "";
+          for (const line of lines) {
+            const trimmed = line.trim();
+            if (!trimmed.startsWith("data: ")) continue;
+            const payload = trimmed.slice(6);
+            if (payload === "[DONE]") return;
+            try {
+              const data = JSON.parse(payload);
+              if (data.sources) {
+                foundSources = true;
+                resolveSourcesFn(data.sources);
+                continue;
+              }
+              const content = data.choices?.[0]?.delta?.content ?? "";
+              if (content) yield content;
+            } catch {
+            }
+          }
+        }
+      } finally {
+        reader.releaseLock();
+        if (!foundSources) resolveSourcesFn([]);
+      }
+    }
+    return { stream: readStream(), sources: sourcesPromise };
+  }
+  /**
+   * Get search history for the authenticated user.
+   * History is partitioned by api_key + user — no cross-user access.
+   */
+  async history(user, limit = 50) {
+    const params = new URLSearchParams({ limit: String(limit) });
+    if (user) params.set("user", user);
+    return this.http.get(`/v1/search/history?${params}`);
+  }
+  /**
+   * Clear search history for the authenticated user.
+   * Only clears the caller's own partition.
+   */
+  async clearHistory(user) {
+    const params = user ? `?user=${encodeURIComponent(user)}` : "";
+    const res = await this.http.fetch(`/v1/search/history${params}`, { method: "DELETE" });
+    return res.json();
+  }
+  /** Search history statistics (admin). */
+  async stats() {
+    return this.http.get("/v1/search/stats");
+  }
+};
 // src/index.ts
 var I64Client = class {
   http;
@@ -356,6 +463,8 @@ var I64Client = class {
   monitor;
   /** RAG — index, search, stats. */
   rag;
+  /** Web search — Perplexity-style with token-routed isolation. */
+  search;
   /**
    * Create a vllm-i64 client.
    *
@@ -370,6 +479,7 @@ var I64Client = class {
     this.lora = new LoRAEndpoint(this.http);
     this.monitor = new MonitorEndpoint(this.http);
     this.rag = new RAGEndpoint(this.http);
+    this.search = new SearchEndpoint(this.http);
   }
   /** Server base URL. */
   get baseUrl() {
@@ -386,5 +496,6 @@ var index_default = I64Client;
   I64Client,
   LoRAEndpoint,
   MonitorEndpoint,
-  RAGEndpoint
+  RAGEndpoint,
+  SearchEndpoint
 });

package/dist/index.mjs CHANGED Viewed

@@ -307,6 +307,112 @@ var RAGEndpoint = class {
   }
 };
+// src/endpoints/search.ts
+var SearchEndpoint = class {
+  constructor(http) {
+    this.http = http;
+  }
+  /**
+   * Search-augmented generation: query → web search → cited answer.
+   *
+   * @example
+   * ```ts
+   * const res = await client.search.create({ query: "What is MoE?" });
+   * console.log(res.choices[0].message.content);
+   * for (const src of res.sources) {
+   *   console.log(`[${src.index}] ${src.title} — ${src.url}`);
+   * }
+   * ```
+   */
+  async create(params) {
+    return this.http.post("/v1/search/completions", { ...params, stream: false });
+  }
+  /**
+   * Stream search completion — yields text chunks.
+   * Sources are sent as the final SSE event.
+   *
+   * @example
+   * ```ts
+   * const { stream, sources } = await client.search.stream({ query: "token routing" });
+   * for await (const chunk of stream) {
+   *   process.stdout.write(chunk);
+   * }
+   * console.log("\nSources:", await sources);
+   * ```
+   */
+  async stream(params, signal) {
+    const res = await this.http.fetch(
+      "/v1/search/completions",
+      { method: "POST", body: JSON.stringify({ ...params, stream: true }) },
+      signal
+    );
+    let resolveSourcesFn;
+    const sourcesPromise = new Promise((resolve) => {
+      resolveSourcesFn = resolve;
+    });
+    const self = this;
+    async function* readStream() {
+      if (!res.body) return;
+      const reader = res.body.getReader();
+      const decoder = new TextDecoder();
+      let buffer = "";
+      let foundSources = false;
+      try {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          buffer += decoder.decode(value, { stream: true });
+          const lines = buffer.split("\n");
+          buffer = lines.pop() ?? "";
+          for (const line of lines) {
+            const trimmed = line.trim();
+            if (!trimmed.startsWith("data: ")) continue;
+            const payload = trimmed.slice(6);
+            if (payload === "[DONE]") return;
+            try {
+              const data = JSON.parse(payload);
+              if (data.sources) {
+                foundSources = true;
+                resolveSourcesFn(data.sources);
+                continue;
+              }
+              const content = data.choices?.[0]?.delta?.content ?? "";
+              if (content) yield content;
+            } catch {
+            }
+          }
+        }
+      } finally {
+        reader.releaseLock();
+        if (!foundSources) resolveSourcesFn([]);
+      }
+    }
+    return { stream: readStream(), sources: sourcesPromise };
+  }
+  /**
+   * Get search history for the authenticated user.
+   * History is partitioned by api_key + user — no cross-user access.
+   */
+  async history(user, limit = 50) {
+    const params = new URLSearchParams({ limit: String(limit) });
+    if (user) params.set("user", user);
+    return this.http.get(`/v1/search/history?${params}`);
+  }
+  /**
+   * Clear search history for the authenticated user.
+   * Only clears the caller's own partition.
+   */
+  async clearHistory(user) {
+    const params = user ? `?user=${encodeURIComponent(user)}` : "";
+    const res = await this.http.fetch(`/v1/search/history${params}`, { method: "DELETE" });
+    return res.json();
+  }
+  /** Search history statistics (admin). */
+  async stats() {
+    return this.http.get("/v1/search/stats");
+  }
+};
 // src/index.ts
 var I64Client = class {
   http;
@@ -322,6 +428,8 @@ var I64Client = class {
   monitor;
   /** RAG — index, search, stats. */
   rag;
+  /** Web search — Perplexity-style with token-routed isolation. */
+  search;
   /**
    * Create a vllm-i64 client.
    *
@@ -336,6 +444,7 @@ var I64Client = class {
     this.lora = new LoRAEndpoint(this.http);
     this.monitor = new MonitorEndpoint(this.http);
     this.rag = new RAGEndpoint(this.http);
+    this.search = new SearchEndpoint(this.http);
   }
   /** Server base URL. */
   get baseUrl() {
@@ -352,5 +461,6 @@ export {
   LoRAEndpoint,
   MonitorEndpoint,
   RAGEndpoint,
+  SearchEndpoint,
   index_default as default
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vllm-i64",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "TypeScript SDK for vllm-i64 — integer-first inference engine",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
@@ -28,7 +28,10 @@
     "moe",
     "openai",
     "ai",
-    "complexity"
+    "complexity",
+    "search",
+    "perplexity",
+    "security"
   ],
   "author": "Complexity-ML / INL",
   "license": "Apache-2.0",