@mariozechner/pi-ai 0.12.13 → 0.12.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ import type { AssistantMessage } from "../types.js";
2
+ /**
3
+ * Check if an assistant message represents a context overflow error.
4
+ *
5
+ * This handles two cases:
6
+ * 1. Error-based overflow: Most providers return stopReason "error" with a
7
+ * specific error message pattern.
8
+ * 2. Silent overflow: Some providers accept overflow requests and return
9
+ * successfully. For these, we check if usage.input exceeds the context window.
10
+ *
11
+ * ## Reliability by Provider
12
+ *
13
+ * **Reliable detection (returns error with detectable message):**
14
+ * - Anthropic: "prompt is too long: X tokens > Y maximum"
15
+ * - OpenAI (Completions & Responses): "exceeds the context window"
16
+ * - Google Gemini: "input token count exceeds the maximum"
17
+ * - xAI (Grok): "maximum prompt length is X but request contains Y"
18
+ * - Groq: "reduce the length of the messages"
19
+ * - Cerebras: 400/413 status code (no body)
20
+ * - OpenRouter (all backends): "maximum context length is X tokens"
21
+ * - llama.cpp: "exceeds the available context size"
22
+ * - LM Studio: "greater than the context length"
23
+ *
24
+ * **Unreliable detection:**
25
+ * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
26
+ * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
27
+ * - Ollama: Silently truncates input without error. Cannot be detected via this function.
28
+ * The response will have usage.input < expected, but we don't know the expected value.
29
+ *
30
+ * ## Custom Providers
31
+ *
32
+ * If you've added custom models via settings.json, this function may not detect
33
+ * overflow errors from those providers. To add support:
34
+ *
35
+ * 1. Send a request that exceeds the model's context window
36
+ * 2. Check the errorMessage in the response
37
+ * 3. Create a regex pattern that matches the error
38
+ * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or
39
+ * check the errorMessage yourself before calling this function
40
+ *
41
+ * @param message - The assistant message to check
42
+ * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)
43
+ * @returns true if the message indicates a context overflow
44
+ */
45
+ export declare function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean;
46
+ /**
47
+ * Get the overflow patterns for testing purposes.
48
+ */
49
+ export declare function getOverflowPatterns(): RegExp[];
50
+ //# sourceMappingURL=overflow.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"overflow.d.ts","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAoCpD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,gBAAgB,EAAE,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,CAuB5F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,EAAE,CAE9C","sourcesContent":["import type { AssistantMessage } from \"../types.js\";\n\n/**\n * Regex patterns to detect context overflow errors from different providers.\n *\n * These patterns match error messages returned when the input exceeds\n * the model's context window.\n *\n * Provider-specific patterns (with example error messages):\n *\n * - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - Cerebras: Returns \"400 status code (no body)\" - handled separately below\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Ollama: Silently truncates input - not detectable via error message\n */\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/input token count.*exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (all backends)\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context length exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n];\n\n/**\n * Check if an assistant message represents a context overflow error.\n *\n * This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n *\n * ## Reliability by Provider\n *\n * **Reliable detection (returns error with detectable message):**\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - OpenRouter (all backends): \"maximum context length is X tokens\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n *\n * **Unreliable detection:**\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Ollama: Silently truncates input without error. Cannot be detected via this function.\n * The response will have usage.input < expected, but we don't know the expected value.\n *\n * ## Custom Providers\n *\n * If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n *\n * 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n *\n * @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n */\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Check known patterns\n\t\tif (OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\n\t\t// Cerebras returns 400/413 with no body - check for status code pattern\n\t\tif (/^4(00|13)\\s*(status code)?\\s*\\(no body\\)/i.test(message.errorMessage)) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/**\n * Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}
@@ -0,0 +1,104 @@
1
+ /**
2
+ * Regex patterns to detect context overflow errors from different providers.
3
+ *
4
+ * These patterns match error messages returned when the input exceeds
5
+ * the model's context window.
6
+ *
7
+ * Provider-specific patterns (with example error messages):
8
+ *
9
+ * - Anthropic: "prompt is too long: 213462 tokens > 200000 maximum"
10
+ * - OpenAI: "Your input exceeds the context window of this model"
11
+ * - Google: "The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)"
12
+ * - xAI: "This model's maximum prompt length is 131072 but the request contains 537812 tokens"
13
+ * - Groq: "Please reduce the length of the messages or completion"
14
+ * - OpenRouter: "This endpoint's maximum context length is X tokens. However, you requested about Y tokens"
15
+ * - llama.cpp: "the request exceeds the available context size, try increasing it"
16
+ * - LM Studio: "tokens to keep from the initial prompt is greater than the context length"
17
+ * - Cerebras: Returns "400 status code (no body)" - handled separately below
18
+ * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
19
+ * - Ollama: Silently truncates input - not detectable via error message
20
+ */
21
+ const OVERFLOW_PATTERNS = [
22
+ /prompt is too long/i, // Anthropic
23
+ /exceeds the context window/i, // OpenAI (Completions & Responses API)
24
+ /input token count.*exceeds the maximum/i, // Google (Gemini)
25
+ /maximum prompt length is \d+/i, // xAI (Grok)
26
+ /reduce the length of the messages/i, // Groq
27
+ /maximum context length is \d+ tokens/i, // OpenRouter (all backends)
28
+ /exceeds the available context size/i, // llama.cpp server
29
+ /greater than the context length/i, // LM Studio
30
+ /context length exceeded/i, // Generic fallback
31
+ /too many tokens/i, // Generic fallback
32
+ /token limit exceeded/i, // Generic fallback
33
+ ];
34
+ /**
35
+ * Check if an assistant message represents a context overflow error.
36
+ *
37
+ * This handles two cases:
38
+ * 1. Error-based overflow: Most providers return stopReason "error" with a
39
+ * specific error message pattern.
40
+ * 2. Silent overflow: Some providers accept overflow requests and return
41
+ * successfully. For these, we check if usage.input exceeds the context window.
42
+ *
43
+ * ## Reliability by Provider
44
+ *
45
+ * **Reliable detection (returns error with detectable message):**
46
+ * - Anthropic: "prompt is too long: X tokens > Y maximum"
47
+ * - OpenAI (Completions & Responses): "exceeds the context window"
48
+ * - Google Gemini: "input token count exceeds the maximum"
49
+ * - xAI (Grok): "maximum prompt length is X but request contains Y"
50
+ * - Groq: "reduce the length of the messages"
51
+ * - Cerebras: 400/413 status code (no body)
52
+ * - OpenRouter (all backends): "maximum context length is X tokens"
53
+ * - llama.cpp: "exceeds the available context size"
54
+ * - LM Studio: "greater than the context length"
55
+ *
56
+ * **Unreliable detection:**
57
+ * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
58
+ * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
59
+ * - Ollama: Silently truncates input without error. Cannot be detected via this function.
60
+ * The response will have usage.input < expected, but we don't know the expected value.
61
+ *
62
+ * ## Custom Providers
63
+ *
64
+ * If you've added custom models via settings.json, this function may not detect
65
+ * overflow errors from those providers. To add support:
66
+ *
67
+ * 1. Send a request that exceeds the model's context window
68
+ * 2. Check the errorMessage in the response
69
+ * 3. Create a regex pattern that matches the error
70
+ * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or
71
+ * check the errorMessage yourself before calling this function
72
+ *
73
+ * @param message - The assistant message to check
74
+ * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)
75
+ * @returns true if the message indicates a context overflow
76
+ */
77
+ export function isContextOverflow(message, contextWindow) {
78
+ // Case 1: Check error message patterns
79
+ if (message.stopReason === "error" && message.errorMessage) {
80
+ // Check known patterns
81
+ if (OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage))) {
82
+ return true;
83
+ }
84
+ // Cerebras returns 400/413 with no body - check for status code pattern
85
+ if (/^4(00|13)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) {
86
+ return true;
87
+ }
88
+ }
89
+ // Case 2: Silent overflow (z.ai style) - successful but usage exceeds context
90
+ if (contextWindow && message.stopReason === "stop") {
91
+ const inputTokens = message.usage.input + message.usage.cacheRead;
92
+ if (inputTokens > contextWindow) {
93
+ return true;
94
+ }
95
+ }
96
+ return false;
97
+ }
98
+ /**
99
+ * Get the overflow patterns for testing purposes.
100
+ */
101
+ export function getOverflowPatterns() {
102
+ return [...OVERFLOW_PATTERNS];
103
+ }
104
+ //# sourceMappingURL=overflow.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"overflow.js","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,iBAAiB,GAAG;IACzB,qBAAqB,EAAE,YAAY;IACnC,6BAA6B,EAAE,uCAAuC;IACtE,yCAAyC,EAAE,kBAAkB;IAC7D,+BAA+B,EAAE,aAAa;IAC9C,oCAAoC,EAAE,OAAO;IAC7C,uCAAuC,EAAE,4BAA4B;IACrE,qCAAqC,EAAE,mBAAmB;IAC1D,kCAAkC,EAAE,YAAY;IAChD,0BAA0B,EAAE,mBAAmB;IAC/C,kBAAkB,EAAE,mBAAmB;IACvC,uBAAuB,EAAE,mBAAmB;CAC5C,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAyB,EAAE,aAAsB,EAAW;IAC7F,uCAAuC;IACvC,IAAI,OAAO,CAAC,UAAU,KAAK,OAAO,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC5D,uBAAuB;QACvB,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,EAAE,CAAC;YAClE,OAAO,IAAI,CAAC;QACb,CAAC;QAED,wEAAwE;QACxE,IAAI,2CAA2C,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;YAC5E,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,8EAA8E;IAC9E,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,MAAM,EAAE,CAAC;QACpD,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,GAAa;IAC/C,OAAO,CAAC,GAAG,iBAAiB,CAAC,CAAC;AAAA,CAC9B","sourcesContent":["import type { AssistantMessage } from \"../types.js\";\n\n/**\n * Regex patterns to detect context overflow errors from different providers.\n *\n * These patterns match error messages returned when the input exceeds\n * the model's context window.\n *\n * Provider-specific patterns (with example error messages):\n *\n * - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - Cerebras: Returns \"400 status code (no body)\" - handled separately below\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Ollama: Silently truncates input - not detectable via error message\n */\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/input token count.*exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (all backends)\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context length exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n];\n\n/**\n * Check if an assistant message represents a context overflow error.\n *\n * This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n *\n * ## Reliability by Provider\n *\n * **Reliable detection (returns error with detectable message):**\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - OpenRouter (all backends): \"maximum context length is X tokens\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n *\n * **Unreliable detection:**\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Ollama: Silently truncates input without error. Cannot be detected via this function.\n * The response will have usage.input < expected, but we don't know the expected value.\n *\n * ## Custom Providers\n *\n * If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n *\n * 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n *\n * @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n */\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Check known patterns\n\t\tif (OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\n\t\t// Cerebras returns 400/413 with no body - check for status code pattern\n\t\tif (/^4(00|13)\\s*(status code)?\\s*\\(no body\\)/i.test(message.errorMessage)) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/**\n * Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mariozechner/pi-ai",
3
- "version": "0.12.13",
3
+ "version": "0.12.15",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",