@earendil-works/pi-ai 0.75.4 → 0.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/cli.d.ts.map +1 -1
  2. package/dist/cli.js +14 -0
  3. package/dist/cli.js.map +1 -1
  4. package/dist/index.d.ts +1 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js.map +1 -1
  7. package/dist/models.generated.d.ts +364 -687
  8. package/dist/models.generated.d.ts.map +1 -1
  9. package/dist/models.generated.js +407 -749
  10. package/dist/models.generated.js.map +1 -1
  11. package/dist/providers/amazon-bedrock.d.ts.map +1 -1
  12. package/dist/providers/amazon-bedrock.js +2 -1
  13. package/dist/providers/amazon-bedrock.js.map +1 -1
  14. package/dist/providers/anthropic.d.ts +22 -5
  15. package/dist/providers/anthropic.d.ts.map +1 -1
  16. package/dist/providers/anthropic.js +8 -22
  17. package/dist/providers/anthropic.js.map +1 -1
  18. package/dist/providers/azure-openai-responses.d.ts.map +1 -1
  19. package/dist/providers/azure-openai-responses.js +1 -1
  20. package/dist/providers/azure-openai-responses.js.map +1 -1
  21. package/dist/providers/images/openrouter.d.ts.map +1 -1
  22. package/dist/providers/images/openrouter.js +1 -1
  23. package/dist/providers/images/openrouter.js.map +1 -1
  24. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  25. package/dist/providers/openai-codex-responses.js +148 -76
  26. package/dist/providers/openai-codex-responses.js.map +1 -1
  27. package/dist/providers/openai-completions.d.ts.map +1 -1
  28. package/dist/providers/openai-completions.js +1 -1
  29. package/dist/providers/openai-completions.js.map +1 -1
  30. package/dist/providers/openai-responses.d.ts.map +1 -1
  31. package/dist/providers/openai-responses.js +1 -1
  32. package/dist/providers/openai-responses.js.map +1 -1
  33. package/dist/providers/simple-options.d.ts.map +1 -1
  34. package/dist/providers/simple-options.js +1 -0
  35. package/dist/providers/simple-options.js.map +1 -1
  36. package/dist/types.d.ts +16 -0
  37. package/dist/types.d.ts.map +1 -1
  38. package/dist/types.js.map +1 -1
  39. package/dist/utils/abort-signals.d.ts +6 -0
  40. package/dist/utils/abort-signals.d.ts.map +1 -0
  41. package/dist/utils/abort-signals.js +34 -0
  42. package/dist/utils/abort-signals.js.map +1 -0
  43. package/dist/utils/oauth/device-code.d.ts +19 -0
  44. package/dist/utils/oauth/device-code.d.ts.map +1 -0
  45. package/dist/utils/oauth/device-code.js +55 -0
  46. package/dist/utils/oauth/device-code.js.map +1 -0
  47. package/dist/utils/oauth/github-copilot.d.ts +3 -3
  48. package/dist/utils/oauth/github-copilot.d.ts.map +1 -1
  49. package/dist/utils/oauth/github-copilot.js +45 -69
  50. package/dist/utils/oauth/github-copilot.js.map +1 -1
  51. package/dist/utils/oauth/index.d.ts +1 -0
  52. package/dist/utils/oauth/index.d.ts.map +1 -1
  53. package/dist/utils/oauth/index.js +1 -0
  54. package/dist/utils/oauth/index.js.map +1 -1
  55. package/dist/utils/oauth/types.d.ts +8 -1
  56. package/dist/utils/oauth/types.d.ts.map +1 -1
  57. package/dist/utils/oauth/types.js.map +1 -1
  58. package/dist/utils/overflow.d.ts +2 -1
  59. package/dist/utils/overflow.d.ts.map +1 -1
  60. package/dist/utils/overflow.js +5 -2
  61. package/dist/utils/overflow.js.map +1 -1
  62. package/package.json +2 -1
@@ -1 +1 @@
1
- {"version":3,"file":"overflow.js","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,MAAM,iBAAiB,GAAG;IACzB,qBAAqB,EAAE,2BAA2B;IAClD,oBAAoB,EAAE,kDAAkD;IACxE,wCAAwC,EAAE,iBAAiB;IAC3D,6BAA6B,EAAE,uCAAuC;IACtE,0EAA0E,EAAE,sCAAsC;IAClH,yCAAyC,EAAE,kBAAkB;IAC7D,+BAA+B,EAAE,aAAa;IAC9C,oCAAoC,EAAE,OAAO;IAC7C,uCAAuC,EAAE,4BAA4B;IACrE,iFAAiF,EAAE,cAAc;IACjG,2BAA2B,EAAE,iBAAiB;IAC9C,qCAAqC,EAAE,mBAAmB;IAC1D,kCAAkC,EAAE,YAAY;IAChD,+BAA+B,EAAE,UAAU;IAC3C,6BAA6B,EAAE,kBAAkB;IACjD,sDAAsD,EAAE,UAAU;IAClE,gCAAgC,EAAE,yDAAyD;IAC3F,oDAAoD,EAAE,iCAAiC;IACvF,gCAAgC,EAAE,mBAAmB;IACrD,kBAAkB,EAAE,mBAAmB;IACvC,uBAAuB,EAAE,mBAAmB;IAC5C,+CAA+C,EAAE,iCAAiC;CAClF,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,qBAAqB,GAAG;IAC7B,2CAA2C,EAAE,oFAAoF;IACjI,aAAa,EAAE,wBAAwB;IACvC,oBAAoB,EAAE,yBAAyB;CAC/C,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgDG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAyB,EAAE,aAAsB,EAAW;IAC7F,uCAAuC;IACvC,IAAI,OAAO,CAAC,UAAU,KAAK,OAAO,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC5D,oFAAoF;QACpF,MAAM,aAAa,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,CAAC;QACvF,IAAI,CAAC,aAAa,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,EAAE,CAAC;YACpF,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,8EAA8E;IAC9E,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,MAAM,EAAE,CAAC;QACpD,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,sFAAsF;IACtF,qFAAqF;IACrF,gEAAgE;IAChE,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpF,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,IAAI,aAAa,GAAG,IAAI,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,GAAa;IAC/C,OAAO,CAAC,GAAG,iBAAiB,CAAC,CAAC;AAAA,CAC9B","sourcesContent":["import type { AssistantMessage } from \"../types.ts\";\n\n/**\n * Regex patterns to detect context overflow errors from different providers.\n *\n * These patterns match error messages returned when the input exceeds\n * the model's context window.\n *\n * Provider-specific patterns (with example error messages):\n *\n * - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - Anthropic: \"413 {\\\"error\\\":{\\\"type\\\":\\\"request_too_large\\\",\\\"message\\\":\\\"Request exceeds the maximum size\\\"}}\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - OpenAI/LiteLLM: \"Requested token count exceeds the model's maximum context length of 131072 tokens\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - Together AI: \"The input (X tokens) is longer than the model's context length (Y tokens).\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - GitHub Copilot: \"prompt token count of X exceeds the limit of Y\"\n * - MiniMax: \"invalid params, context window exceeds limit\"\n * - Kimi For Coding: \"Your request exceeded model token limit: X (requested: Y)\"\n * - Cerebras: \"400/413 status code (no body)\"\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Xiaomi MiMo: Truncates input to fill contextWindow exactly, then returns finish_reason \"length\"\n * with output=0 (no room left to generate). Detected via stopReason \"length\" + zero output +\n * input filling the context window.\n * - Ollama: Some deployments truncate silently, others return errors like \"prompt too long; exceeded max context length by X tokens\"\n */\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic token overflow\n\t/request_too_large/i, // Anthropic request byte-size overflow (HTTP 413)\n\t/input is too long for requested model/i, // Amazon Bedrock\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/exceeds (?:the )?(?:model'?s )?maximum context length of [\\d,]+ tokens?/i, // OpenAI-compatible proxies (LiteLLM)\n\t/input token count.*exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (all backends)\n\t/input \\(\\d+ tokens\\) is longer than the model'?s context length \\(\\d+ tokens\\)/i, // Together AI\n\t/exceeds the limit of \\d+/i, // GitHub Copilot\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context window exceeds limit/i, // MiniMax\n\t/exceeded model token limit/i, // Kimi For Coding\n\t/too large for model with \\d+ maximum context length/i, // Mistral\n\t/model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text\n\t/prompt too long; exceeded (?:max )?context length/i, // Ollama explicit overflow error\n\t/context[_ ]length[_ ]exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n\t/^4(?:00|13)\\s*(?:status code)?\\s*\\(no body\\)/i, // Cerebras: 400/413 with no body\n];\n\n/**\n * Patterns that indicate non-overflow errors (e.g. rate limiting, server errors).\n * Error messages matching any of these are excluded from overflow detection\n * even if they also match an OVERFLOW_PATTERN.\n *\n * Example: Bedrock formats throttling errors as \"ThrottlingException: Too many tokens,\n * please wait before trying again.\" which would match the /too many tokens/i overflow\n * pattern without this exclusion.\n */\nconst NON_OVERFLOW_PATTERNS = [\n\t/^(Throttling error|Service unavailable):/i, // AWS Bedrock non-overflow errors (human-readable prefixes from formatBedrockError)\n\t/rate limit/i, // Generic rate limiting\n\t/too many requests/i, // Generic HTTP 429 style\n];\n\n/**\n * Check if an assistant message represents a context overflow error.\n *\n * This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n *\n * ## Reliability by Provider\n *\n * **Reliable detection (returns error with detectable message):**\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\" or \"request_too_large\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\" or \"exceeds the model's maximum context length of X tokens\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - OpenRouter (all backends): \"maximum context length is X tokens\"\n * - Together AI: \"The input (X tokens) is longer than the model's context length (Y tokens).\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n * - Kimi For Coding: \"exceeded model token limit: X (requested: Y)\"\n *\n * **Unreliable detection:**\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Xiaomi MiMo: Truncates input to fit contextWindow then returns stopReason \"length\" with\n * output=0. Pass contextWindow param to detect via the \"filled context + zero output\" signal.\n * - Ollama: May truncate input silently for some setups, but may also return explicit\n * overflow errors that match the patterns above. Silent truncation still cannot be\n * detected here because we do not know the expected token count.\n *\n * ## Custom Providers\n *\n * If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n *\n * 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n *\n * @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n */\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Skip messages matching known non-overflow patterns (e.g. throttling / rate-limit)\n\t\tconst isNonOverflow = NON_OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!));\n\t\tif (!isNonOverflow && OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 3: Length-stop overflow (Xiaomi MiMo style) - server truncates oversized input\n\t// to fit the context window, leaving no room for output. Returns stopReason \"length\"\n\t// with output=0 and input+cacheRead filling the context window.\n\tif (contextWindow && message.stopReason === \"length\" && message.usage.output === 0) {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens >= contextWindow * 0.99) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/**\n * Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}
1
+ {"version":3,"file":"overflow.js","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,MAAM,iBAAiB,GAAG;IACzB,qBAAqB,EAAE,2BAA2B;IAClD,oBAAoB,EAAE,kDAAkD;IACxE,wCAAwC,EAAE,iBAAiB;IAC3D,6BAA6B,EAAE,uCAAuC;IACtE,0EAA0E,EAAE,sCAAsC;IAClH,yCAAyC,EAAE,kBAAkB;IAC7D,+BAA+B,EAAE,aAAa;IAC9C,oCAAoC,EAAE,OAAO;IAC7C,uCAAuC,EAAE,6BAA6B;IACtE,kEAAkE,EAAE,sBAAsB;IAC1F,iFAAiF,EAAE,cAAc;IACjG,2BAA2B,EAAE,iBAAiB;IAC9C,qCAAqC,EAAE,mBAAmB;IAC1D,kCAAkC,EAAE,YAAY;IAChD,+BAA+B,EAAE,UAAU;IAC3C,6BAA6B,EAAE,kBAAkB;IACjD,sDAAsD,EAAE,UAAU;IAClE,gCAAgC,EAAE,yDAAyD;IAC3F,oDAAoD,EAAE,iCAAiC;IACvF,gCAAgC,EAAE,mBAAmB;IACrD,kBAAkB,EAAE,mBAAmB;IACvC,uBAAuB,EAAE,mBAAmB;IAC5C,+CAA+C,EAAE,iCAAiC;CAClF,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,qBAAqB,GAAG;IAC7B,2CAA2C,EAAE,oFAAoF;IACjI,aAAa,EAAE,wBAAwB;IACvC,oBAAoB,EAAE,yBAAyB;CAC/C,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiDG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAyB,EAAE,aAAsB,EAAW;IAC7F,uCAAuC;IACvC,IAAI,OAAO,CAAC,UAAU,KAAK,OAAO,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC5D,oFAAoF;QACpF,MAAM,aAAa,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,CAAC;QACvF,IAAI,CAAC,aAAa,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,EAAE,CAAC;YACpF,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,8EAA8E;IAC9E,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,MAAM,EAAE,CAAC;QACpD,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,sFAAsF;IACtF,qFAAqF;IACrF,gEAAgE;IAChE,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpF,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,IAAI,aAAa,GAAG,IAAI,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,GAAa;IAC/C,OAAO,CAAC,GAAG,iBAAiB,CAAC,CAAC;AAAA,CAC9B","sourcesContent":["import type { AssistantMessage } from \"../types.ts\";\n\n/**\n * Regex patterns to detect context overflow errors from different providers.\n *\n * These patterns match error messages returned when the input exceeds\n * the model's context window.\n *\n * Provider-specific patterns (with example error messages):\n *\n * - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - Anthropic: \"413 {\\\"error\\\":{\\\"type\\\":\\\"request_too_large\\\",\\\"message\\\":\\\"Request exceeds the maximum size\\\"}}\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - OpenAI/LiteLLM: \"Requested token count exceeds the model's maximum context length of 131072 tokens\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - OpenRouter/Poolside: \"Input length X exceeds the maximum allowed input length of Y tokens.\"\n * - Together AI: \"The input (X tokens) is longer than the model's context length (Y tokens).\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - GitHub Copilot: \"prompt token count of X exceeds the limit of Y\"\n * - MiniMax: \"invalid params, context window exceeds limit\"\n * - Kimi For Coding: \"Your request exceeded model token limit: X (requested: Y)\"\n * - Cerebras: \"400/413 status code (no body)\"\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Xiaomi MiMo: Truncates input to fill contextWindow exactly, then returns finish_reason \"length\"\n * with output=0 (no room left to generate). Detected via stopReason \"length\" + zero output +\n * input filling the context window.\n * - Ollama: Some deployments truncate silently, others return errors like \"prompt too long; exceeded max context length by X tokens\"\n */\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic token overflow\n\t/request_too_large/i, // Anthropic request byte-size overflow (HTTP 413)\n\t/input is too long for requested model/i, // Amazon Bedrock\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/exceeds (?:the )?(?:model'?s )?maximum context length of [\\d,]+ tokens?/i, // OpenAI-compatible proxies (LiteLLM)\n\t/input token count.*exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (most backends)\n\t/exceeds (?:the )?maximum allowed input length of [\\d,]+ tokens?/i, // OpenRouter/Poolside\n\t/input \\(\\d+ tokens\\) is longer than the model'?s context length \\(\\d+ tokens\\)/i, // Together AI\n\t/exceeds the limit of \\d+/i, // GitHub Copilot\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context window exceeds limit/i, // MiniMax\n\t/exceeded model token limit/i, // Kimi For Coding\n\t/too large for model with \\d+ maximum context length/i, // Mistral\n\t/model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text\n\t/prompt too long; exceeded (?:max )?context length/i, // Ollama explicit overflow error\n\t/context[_ ]length[_ ]exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n\t/^4(?:00|13)\\s*(?:status code)?\\s*\\(no body\\)/i, // Cerebras: 400/413 with no body\n];\n\n/**\n * Patterns that indicate non-overflow errors (e.g. rate limiting, server errors).\n * Error messages matching any of these are excluded from overflow detection\n * even if they also match an OVERFLOW_PATTERN.\n *\n * Example: Bedrock formats throttling errors as \"ThrottlingException: Too many tokens,\n * please wait before trying again.\" which would match the /too many tokens/i overflow\n * pattern without this exclusion.\n */\nconst NON_OVERFLOW_PATTERNS = [\n\t/^(Throttling error|Service unavailable):/i, // AWS Bedrock non-overflow errors (human-readable prefixes from formatBedrockError)\n\t/rate limit/i, // Generic rate limiting\n\t/too many requests/i, // Generic HTTP 429 style\n];\n\n/**\n * Check if an assistant message represents a context overflow error.\n *\n * This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n *\n * ## Reliability by Provider\n *\n * **Reliable detection (returns error with detectable message):**\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\" or \"request_too_large\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\" or \"exceeds the model's maximum context length of X tokens\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - OpenRouter (most backends): \"maximum context length is X tokens\"\n * - OpenRouter/Poolside: \"Input length X exceeds the maximum allowed input length of Y tokens.\"\n * - Together AI: \"The input (X tokens) is longer than the model's context length (Y tokens).\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n * - Kimi For Coding: \"exceeded model token limit: X (requested: Y)\"\n *\n * **Unreliable detection:**\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Xiaomi MiMo: Truncates input to fit contextWindow then returns stopReason \"length\" with\n * output=0. Pass contextWindow param to detect via the \"filled context + zero output\" signal.\n * - Ollama: May truncate input silently for some setups, but may also return explicit\n * overflow errors that match the patterns above. Silent truncation still cannot be\n * detected here because we do not know the expected token count.\n *\n * ## Custom Providers\n *\n * If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n *\n * 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n *\n * @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n */\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Skip messages matching known non-overflow patterns (e.g. throttling / rate-limit)\n\t\tconst isNonOverflow = NON_OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!));\n\t\tif (!isNonOverflow && OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 3: Length-stop overflow (Xiaomi MiMo style) - server truncates oversized input\n\t// to fit the context window, leaving no room for output. Returns stopReason \"length\"\n\t// with output=0 and input+cacheRead filling the context window.\n\tif (contextWindow && message.stopReason === \"length\" && message.usage.output === 0) {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens >= contextWindow * 0.99) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/**\n * Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@earendil-works/pi-ai",
3
- "version": "0.75.4",
3
+ "version": "0.76.0",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -69,6 +69,7 @@
69
69
  "dependencies": {
70
70
  "@anthropic-ai/sdk": "0.91.1",
71
71
  "@aws-sdk/client-bedrock-runtime": "3.1048.0",
72
+ "@smithy/node-http-handler": "4.7.3",
72
73
  "@google/genai": "1.52.0",
73
74
  "@mistralai/mistralai": "2.2.1",
74
75
  "http-proxy-agent": "7.0.2",