npm - @probeo/anymodel - Versions diffs - 0.4.0 → 0.5.1 - Mend

@probeo/anymodel 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/cli.js CHANGED Viewed

@@ -485,6 +485,25 @@ var Router = class {
   }
 };
+// src/utils/fetch-with-timeout.ts
+var _defaultTimeout = 12e4;
+var _flexTimeout = 6e5;
+function setDefaultTimeout(ms) {
+  _defaultTimeout = ms;
+}
+function getFlexTimeout() {
+  return _flexTimeout;
+}
+function fetchWithTimeout(url, init, timeoutMs) {
+  const ms = timeoutMs ?? _defaultTimeout;
+  const signal = AbortSignal.timeout(ms);
+  if (init?.signal) {
+    const combined = AbortSignal.any([signal, init.signal]);
+    return fetch(url, { ...init, signal: combined });
+  }
+  return fetch(url, { ...init, signal });
+}
 // src/providers/openai.ts
 var OPENAI_API_BASE = "https://api.openai.com/v1";
 var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
@@ -502,19 +521,20 @@ var SUPPORTED_PARAMS = /* @__PURE__ */ new Set([
   "tools",
   "tool_choice",
   "user",
-  "logit_bias"
+  "logit_bias",
+  "service_tier"
 ]);
 function createOpenAIAdapter(apiKey, baseURL) {
   const base = baseURL || OPENAI_API_BASE;
-  async function makeRequest(path2, body, method = "POST") {
-    const res = await fetch(`${base}${path2}`, {
+  async function makeRequest(path2, body, method = "POST", timeoutMs) {
+    const res = await fetchWithTimeout(`${base}${path2}`, {
       method,
       headers: {
         "Content-Type": "application/json",
         "Authorization": `Bearer ${apiKey}`
       },
       body: body ? JSON.stringify(body) : void 0
-    });
+    }, timeoutMs);
     if (!res.ok) {
       let errorBody;
       try {
@@ -562,6 +582,7 @@ function createOpenAIAdapter(apiKey, baseURL) {
     if (request.tools !== void 0) body.tools = request.tools;
     if (request.tool_choice !== void 0) body.tool_choice = request.tool_choice;
     if (request.user !== void 0) body.user = request.user;
+    if (request.service_tier !== void 0) body.service_tier = request.service_tier;
     return body;
   }
   const adapter = {
@@ -663,13 +684,15 @@ function createOpenAIAdapter(apiKey, baseURL) {
     },
     async sendRequest(request) {
       const body = buildRequestBody(request);
-      const res = await makeRequest("/chat/completions", body);
+      const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
+      const res = await makeRequest("/chat/completions", body, "POST", timeout);
       const json = await res.json();
       return adapter.translateResponse(json);
     },
     async sendStreamingRequest(request) {
       const body = buildRequestBody({ ...request, stream: true });
-      const res = await makeRequest("/chat/completions", body);
+      const timeout = request.service_tier === "flex" ? getFlexTimeout() : void 0;
+      const res = await makeRequest("/chat/completions", body, "POST", timeout);
       if (!res.body) {
         throw new AnyModelError(502, "No response body for streaming request", {
           provider_name: "openai"
@@ -716,7 +739,7 @@ var FALLBACK_MODELS = [
 ];
 function createAnthropicAdapter(apiKey) {
   async function makeRequest(path2, body, stream = false) {
-    const res = await fetch(`${ANTHROPIC_API_BASE}${path2}`, {
+    const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}${path2}`, {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
@@ -973,7 +996,7 @@ ${body.system}` : jsonInstruction;
     },
     async listModels() {
       try {
-        const res = await fetch(`${ANTHROPIC_API_BASE}/models`, {
+        const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE}/models`, {
           method: "GET",
           headers: {
             "x-api-key": apiKey,
@@ -1258,7 +1281,7 @@ function createGoogleAdapter(apiKey) {
     },
     async listModels() {
       try {
-        const res = await fetch(`${GEMINI_API_BASE}/models?key=${apiKey}`);
+        const res = await fetchWithTimeout(`${GEMINI_API_BASE}/models?key=${apiKey}`);
         if (!res.ok) return FALLBACK_MODELS2;
         const data = await res.json();
         const models = data.models || [];
@@ -1293,12 +1316,12 @@ function createGoogleAdapter(apiKey) {
       return SUPPORTED_PARAMS3.has(param);
     },
     supportsBatch() {
-      return false;
+      return true;
     },
     async sendRequest(request) {
       const body = translateRequest(request);
       const url = getModelEndpoint(request.model, false);
-      const res = await fetch(url, {
+      const res = await fetchWithTimeout(url, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify(body)
@@ -1321,7 +1344,7 @@ function createGoogleAdapter(apiKey) {
     async sendStreamingRequest(request) {
       const body = translateRequest(request);
       const url = getModelEndpoint(request.model, true);
-      const res = await fetch(url, {
+      const res = await fetchWithTimeout(url, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify(body)
@@ -1371,7 +1394,7 @@ var MODELS = [
 ];
 function createPerplexityAdapter(apiKey) {
   async function makeRequest(path2, body, method = "POST") {
-    const res = await fetch(`${PERPLEXITY_API_BASE}${path2}`, {
+    const res = await fetchWithTimeout(`${PERPLEXITY_API_BASE}${path2}`, {
       method,
       headers: {
         "Content-Type": "application/json",
@@ -1926,6 +1949,17 @@ var BatchStore = class {
     const entries = await readDirQueued(this.dir);
     return entries.filter((d) => d.isDirectory()).map((d) => d.name).sort();
   }
+  /**
+   * Stream requests from JSONL one line at a time (memory-efficient).
+   */
+  async *streamRequests(id) {
+    const p = joinPath(this.batchDir(id), "requests.jsonl");
+    if (!await fileExistsQueued(p)) return;
+    const raw = await readFileQueued(p, "utf8");
+    for (const line of raw.split("\n")) {
+      if (line.trim()) yield JSON.parse(line);
+    }
+  }
   /**
    * Check if a batch exists.
    */
@@ -1990,7 +2024,7 @@ var BatchManager = class {
       this.processNativeBatch(id, request, native.adapter).catch(() => {
       });
     } else {
-      this.processConcurrentBatch(id, request).catch(() => {
+      this.processConcurrentBatch(id, request.model, request.options).catch(() => {
       });
     }
     return batch;
@@ -2170,28 +2204,28 @@ var BatchManager = class {
   }
   /**
    * Process batch requests concurrently (fallback path).
+   * Streams requests from disk to avoid holding them all in memory.
    */
-  async processConcurrentBatch(batchId, request) {
+  async processConcurrentBatch(batchId, model, options) {
     const batch = await this.store.getMeta(batchId);
     if (!batch) return;
     batch.status = "processing";
     await this.store.updateMeta(batch);
-    const items = request.requests;
     const active = /* @__PURE__ */ new Set();
     const processItem = async (item) => {
       const current = await this.store.getMeta(batchId);
       if (current?.status === "cancelled") return;
       const chatRequest = {
-        model: request.model,
+        model,
         messages: item.messages,
-        max_tokens: item.max_tokens ?? request.options?.max_tokens,
-        temperature: item.temperature ?? request.options?.temperature,
-        top_p: item.top_p ?? request.options?.top_p,
-        top_k: item.top_k ?? request.options?.top_k,
-        stop: item.stop ?? request.options?.stop,
-        response_format: item.response_format ?? request.options?.response_format,
-        tools: item.tools ?? request.options?.tools,
-        tool_choice: item.tool_choice ?? request.options?.tool_choice
+        max_tokens: item.max_tokens ?? options?.max_tokens,
+        temperature: item.temperature ?? options?.temperature,
+        top_p: item.top_p ?? options?.top_p,
+        top_k: item.top_k ?? options?.top_k,
+        stop: item.stop ?? options?.stop,
+        response_format: item.response_format ?? options?.response_format,
+        tools: item.tools ?? options?.tools,
+        tool_choice: item.tool_choice ?? options?.tool_choice
       };
       let result;
       try {
@@ -2222,7 +2256,7 @@ var BatchManager = class {
         await this.store.updateMeta(meta);
       }
     };
-    for (const item of items) {
+    for await (const item of this.store.streamRequests(batchId)) {
       const current = await this.store.getMeta(batchId);
       if (current?.status === "cancelled") break;
       if (active.size >= this.concurrencyLimit) {
@@ -2243,6 +2277,51 @@ var BatchManager = class {
   }
 };
+// src/utils/token-estimate.ts
+var CHARS_PER_TOKEN2 = 4;
+var MODEL_LIMITS = [
+  // OpenAI
+  { pattern: "gpt-4o-mini", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
+  { pattern: "gpt-4o", limit: { contextLength: 128e3, maxCompletionTokens: 16384 } },
+  { pattern: "gpt-4-turbo", limit: { contextLength: 128e3, maxCompletionTokens: 4096 } },
+  { pattern: "gpt-3.5-turbo", limit: { contextLength: 16385, maxCompletionTokens: 4096 } },
+  { pattern: "o1", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
+  { pattern: "o3", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
+  { pattern: "o4-mini", limit: { contextLength: 2e5, maxCompletionTokens: 1e5 } },
+  // Anthropic
+  { pattern: "claude-opus-4", limit: { contextLength: 2e5, maxCompletionTokens: 32768 } },
+  { pattern: "claude-sonnet-4", limit: { contextLength: 2e5, maxCompletionTokens: 16384 } },
+  { pattern: "claude-haiku-4", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
+  { pattern: "claude-3.5-sonnet", limit: { contextLength: 2e5, maxCompletionTokens: 8192 } },
+  { pattern: "claude-3-opus", limit: { contextLength: 2e5, maxCompletionTokens: 4096 } },
+  // Google
+  { pattern: "gemini-2.5-pro", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
+  { pattern: "gemini-2.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
+  { pattern: "gemini-2.0-flash", limit: { contextLength: 1048576, maxCompletionTokens: 65536 } },
+  { pattern: "gemini-1.5-pro", limit: { contextLength: 2097152, maxCompletionTokens: 8192 } },
+  { pattern: "gemini-1.5-flash", limit: { contextLength: 1048576, maxCompletionTokens: 8192 } }
+];
+var DEFAULT_LIMIT = { contextLength: 128e3, maxCompletionTokens: 4096 };
+function getModelLimits(model) {
+  const bare = model.includes("/") ? model.slice(model.indexOf("/") + 1) : model;
+  for (const entry of MODEL_LIMITS) {
+    if (bare.startsWith(entry.pattern) || bare.includes(entry.pattern)) {
+      return entry.limit;
+    }
+  }
+  return DEFAULT_LIMIT;
+}
+function resolveMaxTokens(model, messages, userMaxTokens) {
+  if (userMaxTokens !== void 0) return userMaxTokens;
+  const inputChars = JSON.stringify(messages).length;
+  const estimatedInput = Math.ceil(inputChars / CHARS_PER_TOKEN2);
+  const estimatedWithMargin = Math.ceil(estimatedInput * 1.05);
+  const limits = getModelLimits(model);
+  const available = limits.contextLength - estimatedWithMargin;
+  const result = Math.min(limits.maxCompletionTokens, available);
+  return Math.max(1, result);
+}
 // src/providers/openai-batch.ts
 var OPENAI_API_BASE2 = "https://api.openai.com/v1";
 function createOpenAIBatchAdapter(apiKey) {
@@ -2257,7 +2336,7 @@ function createOpenAIBatchAdapter(apiKey) {
       headers["Content-Type"] = "application/json";
       fetchBody = JSON.stringify(options.body);
     }
-    const res = await fetch(`${OPENAI_API_BASE2}${path2}`, {
+    const res = await fetchWithTimeout(`${OPENAI_API_BASE2}${path2}`, {
       method: options.method || "GET",
       headers,
       body: fetchBody
@@ -2283,7 +2362,7 @@ function createOpenAIBatchAdapter(apiKey) {
         model,
         messages: req.messages
       };
-      if (req.max_tokens !== void 0) body.max_tokens = req.max_tokens;
+      body.max_tokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
       if (req.temperature !== void 0) body.temperature = req.temperature;
       if (req.top_p !== void 0) body.top_p = req.top_p;
       if (req.stop !== void 0) body.stop = req.stop;
@@ -2442,7 +2521,7 @@ function createAnthropicBatchAdapter(apiKey) {
       "anthropic-version": ANTHROPIC_VERSION2,
       "Content-Type": "application/json"
     };
-    const res = await fetch(`${ANTHROPIC_API_BASE2}${path2}`, {
+    const res = await fetchWithTimeout(`${ANTHROPIC_API_BASE2}${path2}`, {
       method: options.method || "GET",
       headers,
       body: options.body ? JSON.stringify(options.body) : void 0
@@ -2465,7 +2544,7 @@ function createAnthropicBatchAdapter(apiKey) {
   function translateToAnthropicParams(model, req) {
     const params = {
       model,
-      max_tokens: req.max_tokens || DEFAULT_MAX_TOKENS2
+      max_tokens: resolveMaxTokens(model, req.messages, req.max_tokens || DEFAULT_MAX_TOKENS2)
     };
     const systemMessages = req.messages.filter((m) => m.role === "system");
     const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
@@ -2639,6 +2718,284 @@ ${params.system}` : jsonInstruction;
   };
 }
+// src/providers/google-batch.ts
+var GEMINI_API_BASE2 = "https://generativelanguage.googleapis.com/v1beta";
+function createGoogleBatchAdapter(apiKey) {
+  async function apiRequest(path2, options = {}) {
+    const headers = {
+      "Content-Type": "application/json",
+      "x-goog-api-key": apiKey
+    };
+    const res = await fetchWithTimeout(`${GEMINI_API_BASE2}${path2}`, {
+      method: options.method || "GET",
+      headers,
+      body: options.body ? JSON.stringify(options.body) : void 0
+    });
+    if (!res.ok) {
+      let errorBody;
+      try {
+        errorBody = await res.json();
+      } catch {
+        errorBody = { message: res.statusText };
+      }
+      const msg = errorBody?.error?.message || errorBody?.message || res.statusText;
+      throw new AnyModelError(res.status >= 500 ? 502 : res.status, msg, {
+        provider_name: "google",
+        raw: errorBody
+      });
+    }
+    return res;
+  }
+  function translateRequestToGemini(model, req) {
+    const body = {};
+    const systemMessages = req.messages.filter((m) => m.role === "system");
+    const nonSystemMessages = req.messages.filter((m) => m.role !== "system");
+    if (systemMessages.length > 0) {
+      body.systemInstruction = {
+        parts: [{ text: systemMessages.map((m) => typeof m.content === "string" ? m.content : "").join("\n") }]
+      };
+    }
+    body.contents = nonSystemMessages.map((m) => ({
+      role: m.role === "assistant" ? "model" : "user",
+      parts: typeof m.content === "string" ? [{ text: m.content }] : Array.isArray(m.content) ? m.content.map((p) => p.type === "text" ? { text: p.text } : { text: "" }) : [{ text: "" }]
+    }));
+    const generationConfig = {};
+    if (req.temperature !== void 0) generationConfig.temperature = req.temperature;
+    generationConfig.maxOutputTokens = req.max_tokens !== void 0 ? req.max_tokens : resolveMaxTokens(model, req.messages);
+    if (req.top_p !== void 0) generationConfig.topP = req.top_p;
+    if (req.top_k !== void 0) generationConfig.topK = req.top_k;
+    if (req.stop !== void 0) {
+      generationConfig.stopSequences = Array.isArray(req.stop) ? req.stop : [req.stop];
+    }
+    if (req.response_format) {
+      if (req.response_format.type === "json_object") {
+        generationConfig.responseMimeType = "application/json";
+      } else if (req.response_format.type === "json_schema") {
+        generationConfig.responseMimeType = "application/json";
+        generationConfig.responseSchema = req.response_format.json_schema?.schema;
+      }
+    }
+    if (Object.keys(generationConfig).length > 0) {
+      body.generationConfig = generationConfig;
+    }
+    if (req.tools && req.tools.length > 0) {
+      body.tools = [{
+        functionDeclarations: req.tools.map((t) => ({
+          name: t.function.name,
+          description: t.function.description || "",
+          parameters: t.function.parameters || {}
+        }))
+      }];
+      if (req.tool_choice) {
+        if (req.tool_choice === "auto") {
+          body.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
+        } else if (req.tool_choice === "required") {
+          body.toolConfig = { functionCallingConfig: { mode: "ANY" } };
+        } else if (req.tool_choice === "none") {
+          body.toolConfig = { functionCallingConfig: { mode: "NONE" } };
+        } else if (typeof req.tool_choice === "object") {
+          body.toolConfig = {
+            functionCallingConfig: {
+              mode: "ANY",
+              allowedFunctionNames: [req.tool_choice.function.name]
+            }
+          };
+        }
+      }
+    }
+    return body;
+  }
+  function mapFinishReason(reason) {
+    switch (reason) {
+      case "STOP":
+        return "stop";
+      case "MAX_TOKENS":
+        return "length";
+      case "SAFETY":
+        return "content_filter";
+      case "RECITATION":
+        return "content_filter";
+      default:
+        return "stop";
+    }
+  }
+  function translateGeminiResponse(response, model) {
+    const candidate = response.candidates?.[0];
+    let content = "";
+    const toolCalls = [];
+    for (const part of candidate?.content?.parts || []) {
+      if (part.text) {
+        content += part.text;
+      } else if (part.functionCall) {
+        toolCalls.push({
+          id: generateId("call"),
+          type: "function",
+          function: {
+            name: part.functionCall.name,
+            arguments: JSON.stringify(part.functionCall.args || {})
+          }
+        });
+      }
+    }
+    const message = { role: "assistant", content };
+    if (toolCalls.length > 0) {
+      message.tool_calls = toolCalls;
+    }
+    const finishReason = toolCalls.length > 0 ? "tool_calls" : mapFinishReason(candidate?.finishReason || "STOP");
+    return {
+      id: generateId(),
+      object: "chat.completion",
+      created: Math.floor(Date.now() / 1e3),
+      model: `google/${model}`,
+      choices: [{ index: 0, message, finish_reason: finishReason }],
+      usage: {
+        prompt_tokens: response.usageMetadata?.promptTokenCount || 0,
+        completion_tokens: response.usageMetadata?.candidatesTokenCount || 0,
+        total_tokens: response.usageMetadata?.totalTokenCount || 0
+      }
+    };
+  }
+  function mapBatchState(state) {
+    switch (state) {
+      case "JOB_STATE_PENDING":
+        return "pending";
+      case "JOB_STATE_RUNNING":
+        return "processing";
+      case "JOB_STATE_SUCCEEDED":
+        return "completed";
+      case "JOB_STATE_FAILED":
+        return "failed";
+      case "JOB_STATE_CANCELLED":
+        return "cancelled";
+      case "JOB_STATE_EXPIRED":
+        return "failed";
+      default:
+        return "pending";
+    }
+  }
+  return {
+    async createBatch(model, requests, _options) {
+      const batchRequests = requests.map((req) => ({
+        request: translateRequestToGemini(model, req),
+        metadata: { key: req.custom_id }
+      }));
+      const res = await apiRequest(`/models/${model}:batchGenerateContent`, {
+        method: "POST",
+        body: {
+          batch: {
+            display_name: `anymodel-batch-${Date.now()}`,
+            input_config: {
+              requests: {
+                requests: batchRequests
+              }
+            }
+          }
+        }
+      });
+      const data = await res.json();
+      const batchName = data.name || data.batch?.name;
+      if (!batchName) {
+        throw new AnyModelError(502, "No batch name in Google response", {
+          provider_name: "google",
+          raw: data
+        });
+      }
+      return {
+        providerBatchId: batchName,
+        metadata: {
+          model,
+          total_requests: requests.length
+        }
+      };
+    },
+    async pollBatch(providerBatchId) {
+      const res = await apiRequest(`/${providerBatchId}`);
+      const data = await res.json();
+      const state = data.state || "JOB_STATE_PENDING";
+      const status = mapBatchState(state);
+      const totalCount = data.totalCount || data.metadata?.total_requests || 0;
+      const successCount = data.succeededCount || 0;
+      const failedCount = data.failedCount || 0;
+      return {
+        status,
+        total: totalCount || successCount + failedCount,
+        completed: successCount,
+        failed: failedCount
+      };
+    },
+    async getBatchResults(providerBatchId) {
+      const batchRes = await apiRequest(`/${providerBatchId}`);
+      const batchData = await batchRes.json();
+      const results = [];
+      const model = batchData.metadata?.model || "unknown";
+      if (batchData.response?.inlinedResponses) {
+        for (const item of batchData.response.inlinedResponses) {
+          const customId = item.metadata?.key || `request-${results.length}`;
+          if (item.response) {
+            results.push({
+              custom_id: customId,
+              status: "success",
+              response: translateGeminiResponse(item.response, model),
+              error: null
+            });
+          } else if (item.error) {
+            results.push({
+              custom_id: customId,
+              status: "error",
+              response: null,
+              error: {
+                code: item.error.code || 500,
+                message: item.error.message || "Batch item failed"
+              }
+            });
+          }
+        }
+        return results;
+      }
+      const responsesFile = batchData.response?.responsesFileName || batchData.outputConfig?.file_name;
+      if (responsesFile) {
+        const downloadUrl = `${GEMINI_API_BASE2}/${responsesFile}:download?alt=media`;
+        const fileRes = await fetchWithTimeout(downloadUrl, {
+          headers: { "x-goog-api-key": apiKey }
+        });
+        if (!fileRes.ok) {
+          throw new AnyModelError(502, "Failed to download batch results file", {
+            provider_name: "google"
+          });
+        }
+        const text = await fileRes.text();
+        for (const line of text.trim().split("\n")) {
+          if (!line) continue;
+          const item = JSON.parse(line);
+          const customId = item.key || item.metadata?.key || `request-${results.length}`;
+          if (item.response) {
+            results.push({
+              custom_id: customId,
+              status: "success",
+              response: translateGeminiResponse(item.response, model),
+              error: null
+            });
+          } else if (item.error) {
+            results.push({
+              custom_id: customId,
+              status: "error",
+              response: null,
+              error: {
+                code: item.error.code || 500,
+                message: item.error.message || "Batch item failed"
+              }
+            });
+          }
+        }
+      }
+      return results;
+    },
+    async cancelBatch(providerBatchId) {
+      await apiRequest(`/${providerBatchId}:cancel`, { method: "POST" });
+    }
+  };
+}
 // src/client.ts
 var AnyModel = class {
   registry;
@@ -2654,6 +3011,7 @@ var AnyModel = class {
   constructor(config = {}) {
     this.config = resolveConfig(config);
     this.registry = new ProviderRegistry();
+    setDefaultTimeout((this.config.defaults?.timeout ?? 120) * 1e3);
     if (this.config.io) {
       configureFsIO(this.config.io);
     }
@@ -2774,6 +3132,10 @@ var AnyModel = class {
     if (anthropicKey) {
       this.batchManager.registerBatchAdapter("anthropic", createAnthropicBatchAdapter(anthropicKey));
     }
+    const googleKey = config.google?.apiKey || process.env.GOOGLE_API_KEY;
+    if (googleKey) {
+      this.batchManager.registerBatchAdapter("google", createGoogleBatchAdapter(googleKey));
+    }
   }
   applyDefaults(request) {
     const defaults = this.config.defaults;