ai-microcore 5.0.0.dev5__tar.gz → 5.0.0.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/PKG-INFO +16 -13
  2. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/README.md +12 -11
  3. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/__init__.py +3 -2
  4. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/_env.py +6 -18
  5. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/_llm_functions.py +162 -53
  6. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/configuration.py +168 -88
  7. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/interactive_setup.py +92 -12
  8. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/google_genai.py +9 -4
  9. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/openai.py +8 -8
  10. ai_microcore-5.0.0.dev7/microcore/llm_backends.py +301 -0
  11. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/logging.py +2 -2
  12. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/message_types.py +19 -5
  13. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/presets.py +2 -1
  14. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/types.py +21 -0
  15. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ui.py +29 -5
  16. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/utils.py +6 -4
  17. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/wrappers/llm_response_wrapper.py +1 -2
  18. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/pyproject.toml +16 -1
  19. ai_microcore-5.0.0.dev5/microcore/llm/google_vertex_ai.py +0 -149
  20. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/LICENSE +0 -0
  21. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/_prepare_llm_args.py +0 -0
  22. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/__init__.py +0 -0
  23. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/ai-func.json.j2 +0 -0
  24. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
  25. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/ai-func.tag.j2 +0 -0
  26. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_modules.py +0 -0
  27. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/embedding_db/__init__.py +0 -0
  28. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/embedding_db/chromadb.py +0 -0
  29. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/embedding_db/qdrant.py +0 -0
  30. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/file_cache.py +0 -0
  31. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/file_storage.py +0 -0
  32. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/images.py +0 -0
  33. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/json_parsing.py +0 -0
  34. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/__init__.py +0 -0
  35. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/anthropic.py +0 -0
  36. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/local_llm.py +0 -0
  37. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/local_transformers.py +0 -0
  38. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/shared.py +0 -0
  39. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/lm_client.py +0 -0
  40. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/mcp.py +0 -0
  41. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/metrics.py +0 -0
  42. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/python.py +0 -0
  43. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/templating/__init__.py +0 -0
  44. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/templating/jinja2.py +0 -0
  45. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/text2speech/elevenlabs.py +0 -0
  46. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/tokenizing.py +0 -0
  47. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/wrappers/__init__.py +0 -0
  48. {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/wrappers/prompt_wrapper.py +0 -0
@@ -1,13 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-microcore
3
- Version: 5.0.0.dev5
3
+ Version: 5.0.0.dev7
4
4
  Summary: # Minimalistic Foundation for AI Applications
5
- Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter
5
+ Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter,anthropic,google gemini,google vertex ai
6
6
  Author-email: Vitalii Stepanenko <mail@vitaliy.in>
7
7
  Maintainer-email: Vitalii Stepanenko <mail@vitaliy.in>
8
8
  Requires-Python: >=3.10
9
9
  Description-Content-Type: text/markdown
10
10
  Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
11
12
  Classifier: Programming Language :: Python :: 3.11
12
13
  Classifier: Programming Language :: Python :: 3.12
13
14
  Classifier: Programming Language :: Python :: 3.13
@@ -29,6 +30,7 @@ Requires-Dist: mcp>=1.10.1,<2.0
29
30
  Requires-Dist: fastmcp>=2.10.2,<3.0
30
31
  Requires-Dist: docstring_parser~=0.16.0
31
32
  Requires-Dist: httpx~=0.28.1
33
+ Project-URL: Bug Tracker, https://github.com/Nayjest/ai-microcore/issues
32
34
  Project-URL: Source Code, https://github.com/Nayjest/ai-microcore
33
35
 
34
36
  # AI MicroCore: A Minimalistic Foundation for AI Applications
@@ -53,7 +55,7 @@ It defines interfaces for features typically used in AI applications,
53
55
  which allows you to keep your application as simple as possible and try various models & services
54
56
  without need to change your application code.
55
57
 
56
- You even can switch between text completion and chat completion models only using configuration.
58
+ You can even switch between text completion and chat completion models only using configuration.
57
59
 
58
60
  Thanks to LLM-agnostic MCP integration,
59
61
  **MicroCore** connects MCP tools to any language models easily,
@@ -105,7 +107,7 @@ Similarity search features will work out of the box if you have the `chromadb` p
105
107
  There are a few options available for configuring microcore:
106
108
 
107
109
  - Use `microcore.configure(**params)`
108
- <br>💡 <small>All configuration options should be available in IDE autocompletion tooltips</small>
110
+ <br>💡 <small>All configuration options appear in IDE autocompletion tooltips</small>
109
111
  - Create a `.env` file in your project root; examples: [basic.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.example), [Mistral Large.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.mistral.example), [Anthropic Claude 3 Opus.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.anthropic.example), [Gemini on Vertex AI.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.google-vertex-gemini.example), [Gemini on AI Studio.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.gemini.example)
110
112
  - Use a custom configuration file: `mc.configure(DOT_ENV_FILE='dev-config.ini')`
111
113
  - Define OS environment variables
@@ -113,7 +115,7 @@ There are a few options available for configuring microcore:
113
115
  For the full list of available configuration options, you may also check [`microcore/config.py`](https://github.com/Nayjest/ai-microcore/blob/main/microcore/configuration.py#L175).
114
116
 
115
117
  ### Installing vendor-specific packages
116
- For the models working not via OpenAI API, you may need to install additional packages:
118
+ For models working not via OpenAI API, you may need to install additional packages:
117
119
  #### Anthropic Claude 3
118
120
  ```bash
119
121
  pip install anthropic
@@ -132,7 +134,8 @@ and [configure the authorization](https://cloud.google.com/sdk/docs/authorizing)
132
134
 
133
135
  #### Local language models via Hugging Face Transformers
134
136
 
135
- You will need to install transformers and deep learning library of your choice (PyTorch, TensorFlow, Flax, etc).
137
+ You will need to install transformers and a deep learning library of your choice
138
+ (PyTorch, TensorFlow, Flax, etc).
136
139
 
137
140
  See [transformers installation](https://huggingface.co/docs/transformers/installation).
138
141
 
@@ -148,13 +151,13 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
148
151
  Vector database functions are available via `microcore.texts`.
149
152
 
150
153
  #### ChromaDB
151
- Default vector database is [Chroma](https://www.trychroma.com/).
154
+ The default vector database is [Chroma](https://www.trychroma.com/).
152
155
  In order to use vector database functions with ChromaDB, you need to install the `chromadb` package:
153
156
  ```bash
154
157
  pip install chromadb
155
158
  ```
156
- By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
157
- Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
159
+ By default, MicroCore will use ChromaDB PersistentClient (if the corresponding package is installed).
160
+ Alternatively, you can run Chroma as a separate service and configure MicroCore to use HttpClient:
158
161
 
159
162
  ```python
160
163
  from microcore import configure
@@ -177,7 +180,7 @@ configure(
177
180
  EMBEDDING_DB_TYPE=EmbeddingDbType.QDRANT,
178
181
  EMBEDDING_DB_HOST="localhost",
179
182
  EMBEDDING_DB_PORT="6333",
180
- EMBEDDING_DB_SIZE=384, # dimensions quantity in used SentenceTransformer model
183
+ EMBEDDING_DB_SIZE=384, # number of dimensions in the SentenceTransformer model
181
184
  EMBEDDING_DB_FUNCTION=SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2"),
182
185
  )
183
186
  ```
@@ -200,7 +203,7 @@ use_logging()
200
203
  # Basic usage
201
204
  ai_response = llm('What is your model name?')
202
205
 
203
- # You also may pass a list of strings as prompt
206
+ # You may also pass a list of strings as prompt
204
207
  # - For chat completion models elements are treated as separate messages
205
208
  # - For completion LLMs elements are treated as text lines
206
209
  llm(['1+2', '='])
@@ -293,7 +296,7 @@ LLM Microcore supports all models & API providers having OpenAI API.
293
296
  ## 🖼️ Examples
294
297
 
295
298
  #### [Code review tool](https://github.com/llm-microcore/microcore/blob/main/examples/code-review-tool)
296
- Performs code review by LLM for changes in git .patch files in any programming languages.
299
+ Performs a code review by LLM for changes in git .patch files in any programming languages.
297
300
 
298
301
  #### [Image analysis](https://colab.research.google.com/drive/1qTJ51wxCv3VlyqLt3M8OZ7183YXPFpic) (Google Colab)
299
302
  Determine the number of petals and the color of the flower from a photo (gpt-4-turbo)
@@ -315,7 +318,7 @@ Text generation using HF/Transformers model locally (example with Qwen 3 0.6B).
315
318
  @TODO
316
319
 
317
320
  ## 🤖 AI Modules
318
- **This is experimental feature.**
321
+ **This is an experimental feature.**
319
322
 
320
323
  Tweaks the Python import system to provide automatic setup of MicroCore environment
321
324
  based on metadata in module docstrings.
@@ -20,7 +20,7 @@ It defines interfaces for features typically used in AI applications,
20
20
  which allows you to keep your application as simple as possible and try various models & services
21
21
  without need to change your application code.
22
22
 
23
- You even can switch between text completion and chat completion models only using configuration.
23
+ You can even switch between text completion and chat completion models only using configuration.
24
24
 
25
25
  Thanks to LLM-agnostic MCP integration,
26
26
  **MicroCore** connects MCP tools to any language models easily,
@@ -72,7 +72,7 @@ Similarity search features will work out of the box if you have the `chromadb` p
72
72
  There are a few options available for configuring microcore:
73
73
 
74
74
  - Use `microcore.configure(**params)`
75
- <br>💡 <small>All configuration options should be available in IDE autocompletion tooltips</small>
75
+ <br>💡 <small>All configuration options appear in IDE autocompletion tooltips</small>
76
76
  - Create a `.env` file in your project root; examples: [basic.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.example), [Mistral Large.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.mistral.example), [Anthropic Claude 3 Opus.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.anthropic.example), [Gemini on Vertex AI.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.google-vertex-gemini.example), [Gemini on AI Studio.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.gemini.example)
77
77
  - Use a custom configuration file: `mc.configure(DOT_ENV_FILE='dev-config.ini')`
78
78
  - Define OS environment variables
@@ -80,7 +80,7 @@ There are a few options available for configuring microcore:
80
80
  For the full list of available configuration options, you may also check [`microcore/config.py`](https://github.com/Nayjest/ai-microcore/blob/main/microcore/configuration.py#L175).
81
81
 
82
82
  ### Installing vendor-specific packages
83
- For the models working not via OpenAI API, you may need to install additional packages:
83
+ For models working not via OpenAI API, you may need to install additional packages:
84
84
  #### Anthropic Claude 3
85
85
  ```bash
86
86
  pip install anthropic
@@ -99,7 +99,8 @@ and [configure the authorization](https://cloud.google.com/sdk/docs/authorizing)
99
99
 
100
100
  #### Local language models via Hugging Face Transformers
101
101
 
102
- You will need to install transformers and deep learning library of your choice (PyTorch, TensorFlow, Flax, etc).
102
+ You will need to install transformers and a deep learning library of your choice
103
+ (PyTorch, TensorFlow, Flax, etc).
103
104
 
104
105
  See [transformers installation](https://huggingface.co/docs/transformers/installation).
105
106
 
@@ -115,13 +116,13 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
115
116
  Vector database functions are available via `microcore.texts`.
116
117
 
117
118
  #### ChromaDB
118
- Default vector database is [Chroma](https://www.trychroma.com/).
119
+ The default vector database is [Chroma](https://www.trychroma.com/).
119
120
  In order to use vector database functions with ChromaDB, you need to install the `chromadb` package:
120
121
  ```bash
121
122
  pip install chromadb
122
123
  ```
123
- By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
124
- Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
124
+ By default, MicroCore will use ChromaDB PersistentClient (if the corresponding package is installed).
125
+ Alternatively, you can run Chroma as a separate service and configure MicroCore to use HttpClient:
125
126
 
126
127
  ```python
127
128
  from microcore import configure
@@ -144,7 +145,7 @@ configure(
144
145
  EMBEDDING_DB_TYPE=EmbeddingDbType.QDRANT,
145
146
  EMBEDDING_DB_HOST="localhost",
146
147
  EMBEDDING_DB_PORT="6333",
147
- EMBEDDING_DB_SIZE=384, # dimensions quantity in used SentenceTransformer model
148
+ EMBEDDING_DB_SIZE=384, # number of dimensions in the SentenceTransformer model
148
149
  EMBEDDING_DB_FUNCTION=SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2"),
149
150
  )
150
151
  ```
@@ -167,7 +168,7 @@ use_logging()
167
168
  # Basic usage
168
169
  ai_response = llm('What is your model name?')
169
170
 
170
- # You also may pass a list of strings as prompt
171
+ # You may also pass a list of strings as prompt
171
172
  # - For chat completion models elements are treated as separate messages
172
173
  # - For completion LLMs elements are treated as text lines
173
174
  llm(['1+2', '='])
@@ -260,7 +261,7 @@ LLM Microcore supports all models & API providers having OpenAI API.
260
261
  ## 🖼️ Examples
261
262
 
262
263
  #### [Code review tool](https://github.com/llm-microcore/microcore/blob/main/examples/code-review-tool)
263
- Performs code review by LLM for changes in git .patch files in any programming languages.
264
+ Performs a code review by LLM for changes in git .patch files in any programming languages.
264
265
 
265
266
  #### [Image analysis](https://colab.research.google.com/drive/1qTJ51wxCv3VlyqLt3M8OZ7183YXPFpic) (Google Colab)
266
267
  Determine the number of petals and the color of the flower from a photo (gpt-4-turbo)
@@ -282,7 +283,7 @@ Text generation using HF/Transformers model locally (example with Qwen 3 0.6B).
282
283
  @TODO
283
284
 
284
285
  ## 🤖 AI Modules
285
- **This is experimental feature.**
286
+ **This is an experimental feature.**
286
287
 
287
288
  Tweaks the Python import system to provide automatic setup of MicroCore environment
288
289
  based on metadata in module docstrings.
@@ -19,7 +19,6 @@ from ._env import configure, env, config, min_setup
19
19
  from .logging import use_logging
20
20
  from .message_types import UserMsg, AssistantMsg, SysMsg, Msg, PartialMsg
21
21
  from .configuration import (
22
- ApiType,
23
22
  LLMApiBaseError,
24
23
  LLMApiDeploymentIdError,
25
24
  LLMApiKeyError,
@@ -29,6 +28,7 @@ from .configuration import (
29
28
  EmbeddingDbType,
30
29
  PRINT_STREAM,
31
30
  )
31
+ from .llm_backends import ApiPlatform, ApiType
32
32
  from .types import BadAIJsonAnswer, BadAIAnswer, LLMContextLengthExceededError
33
33
  from .wrappers.prompt_wrapper import PromptWrapper
34
34
  from .wrappers.llm_response_wrapper import LLMResponse
@@ -194,6 +194,7 @@ __all__ = [
194
194
  "AssistantMsg",
195
195
  "PartialMsg",
196
196
  "ApiType",
197
+ "ApiPlatform",
197
198
  "EmbeddingDbType",
198
199
  "BadAIJsonAnswer",
199
200
  "PRINT_STREAM",
@@ -230,4 +231,4 @@ __all__ = [
230
231
  # "wrappers",
231
232
  ]
232
233
 
233
- __version__ = "5.0.0dev5"
234
+ __version__ = "5.0.0.dev7"
@@ -11,11 +11,11 @@ import jinja2
11
11
  from .embedding_db import AbstractEmbeddingDB
12
12
  from .configuration import (
13
13
  Config,
14
- ApiType,
15
14
  LLMConfigError,
16
15
  EmbeddingDbType,
17
16
  PRINT_STREAM,
18
17
  )
18
+ from .llm_backends import ApiType
19
19
  from .presets import MIN_SETUP
20
20
  from .lm_client import BaseAIClient
21
21
  from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
@@ -126,23 +126,11 @@ class Env:
126
126
  self.llm_function, self.llm_async_function = make_anthropic_llm_functions(
127
127
  self.config
128
128
  )
129
- elif self.config.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI:
130
- try:
131
- from .llm.google_vertex_ai import (
132
- make_llm_functions as make_google_vertex_llm_functions,
133
- )
134
- except ModuleNotFoundError as e:
135
- raise ModuleNotFoundError(
136
- "To use the Google Vertex language models, "
137
- "you need to install the `vertexai` package "
138
- "and authenticate with Google Cloud cli."
139
- "Run `pip install vertexai`."
140
- ) from e
141
- (
142
- self.llm_function,
143
- self.llm_async_function,
144
- ) = make_google_vertex_llm_functions(self.config)
145
- elif self.config.LLM_API_TYPE in (ApiType.GOOGLE, ApiType.GOOGLE_AI_STUDIO):
129
+ elif self.config.LLM_API_TYPE in (
130
+ ApiType.GOOGLE,
131
+ ApiType.GOOGLE_AI_STUDIO, # @deprecated
132
+ ApiType.GOOGLE_VERTEX_AI # @deprecated
133
+ ):
146
134
  try:
147
135
  from .llm.google_genai import GoogleClient
148
136
  except ModuleNotFoundError as e:
@@ -3,20 +3,29 @@ import logging
3
3
  from datetime import datetime
4
4
  from typing import Any
5
5
 
6
-
7
6
  from .utils import run_parallel, RETURN_EXCEPTION
8
- from .wrappers.llm_response_wrapper import LLMResponse, DictFromLLMResponse, ImageGenerationResponse
9
- from .types import TPrompt, LLMContextLengthExceededError
7
+ from .wrappers.llm_response_wrapper import (
8
+ LLMResponse,
9
+ DictFromLLMResponse,
10
+ ImageGenerationResponse,
11
+ )
12
+ from .types import (
13
+ TPrompt,
14
+ LLMContextLengthExceededError,
15
+ LLMQuotaExceededError,
16
+ LLMAuthError,
17
+ )
10
18
  from .file_cache import (
11
19
  cache_hit,
12
20
  load_cache,
13
21
  save_cache,
14
22
  build_cache_name,
15
- delete_cache
23
+ delete_cache,
16
24
  )
17
25
  from ._env import env
18
26
 
19
27
 
28
+ # pylint: disable=too-many-return-statements,too-many-branches
20
29
  def convert_exception(e: Exception, model: str = None) -> Exception | None:
21
30
  """
22
31
  Convert LLM exceptions microcore-specific exceptions if possible.
@@ -26,46 +35,142 @@ def convert_exception(e: Exception, model: str = None) -> Exception | None:
26
35
  Returns:
27
36
  Converted exception or None if no conversion is possible
28
37
  """
38
+
39
+ def with_cause(new_exception: Exception) -> Exception:
40
+ """
41
+ Attach a cause to an exception without raising it.
42
+
43
+ Equivalent to `raise new_exc from cause` but returns the exception
44
+ instead of raising, preserving the exception chain for later use.
45
+ """
46
+ new_exception.__cause__ = e
47
+ return new_exception
48
+
29
49
  if not isinstance(e, Exception):
30
50
  return None
31
51
  t, msg = f"{type(e).__module__}.{type(e).__name__}", str(e)
32
52
  max_tokens, actual_tokens = None, None
33
- if t == "openai.BadRequestError" and "context_length_exceeded" in msg:
34
- match = re.search(
35
- r"maximum context length is (\d+) tokens.*?resulted in (\d+) tokens",
36
- msg
37
- )
38
- if match:
39
- max_tokens = int(match.group(1))
40
- actual_tokens = int(match.group(2))
41
- return LLMContextLengthExceededError(
42
- actual_tokens=actual_tokens,
43
- max_tokens=max_tokens,
44
- model=model
45
- )
53
+ if t == "openai.BadRequestError":
54
+ if "context_length_exceeded" in msg:
55
+ match = re.search(
56
+ r"maximum context length is (\d+) tokens.*?resulted in (\d+) tokens",
57
+ msg,
58
+ )
59
+ if match:
60
+ max_tokens = int(match.group(1))
61
+ actual_tokens = int(match.group(2))
62
+ return with_cause(
63
+ LLMContextLengthExceededError(
64
+ actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
65
+ )
66
+ )
67
+ if (
68
+ "Please reduce the length of the messages or completion." in msg
69
+ ): # Groq, no details
70
+ return with_cause(LLMContextLengthExceededError(model=model))
71
+
72
+ # x.ai grok-fast
73
+ if (
74
+ "This model's maximum prompt length is" in msg
75
+ and "but the request contains" in msg
76
+ and "tokens" in msg
77
+ ):
78
+ match = re.search(
79
+ r"maximum prompt length is (\d+) but the request contains (\d+) tokens",
80
+ msg,
81
+ )
82
+ if match:
83
+ max_tokens = int(match.group(1))
84
+ actual_tokens = int(match.group(2))
85
+ return with_cause(
86
+ LLMContextLengthExceededError(
87
+ actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
88
+ )
89
+ )
90
+
91
+ if "maximum context length" in msg: # Mistral, # DeepSeek
92
+ if match := re.search(
93
+ r"Prompt contains (\d+) tokens.*?model with (\d+) maximum context length",
94
+ msg,
95
+ ): # Mistral
96
+ max_tokens = int(match.group(2))
97
+ actual_tokens = int(match.group(1))
98
+ elif match := re.search(
99
+ r"maximum context length is (\d+) tokens.*? you requested (\d+) tokens",
100
+ msg,
101
+ ): # DeepSeek
102
+ max_tokens = int(match.group(1))
103
+ actual_tokens = int(match.group(2))
104
+ return with_cause(
105
+ LLMContextLengthExceededError(
106
+ actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
107
+ )
108
+ )
109
+ if "too_many_prompt_tokens" in msg: # Perplexity
110
+ if match := re.search(r"User input tokens exceeds (\d+) tokens", msg):
111
+ max_tokens = int(match.group(1))
112
+ return with_cause(
113
+ LLMContextLengthExceededError(
114
+ actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
115
+ )
116
+ )
117
+
118
+ if (
119
+ t == "openai.APIStatusError" and "413 Request Entity Too Large" in msg
120
+ ): # Cerebras
121
+ return with_cause(LLMContextLengthExceededError(model=model))
122
+
123
+ if t == "openai.APIStatusError" and "Payload Too Large" in msg: # Fireworks
124
+ return with_cause(LLMContextLengthExceededError(model=model))
125
+
46
126
  if t == "anthropic.BadRequestError" and "prompt is too long:" in msg:
47
127
  if match := re.search(r"(\d+)\s+tokens\s+>\s+(\d+)\s+maximum", msg):
48
128
  max_tokens = int(match.group(2))
49
129
  actual_tokens = int(match.group(1))
50
- return LLMContextLengthExceededError(
51
- actual_tokens=actual_tokens,
52
- max_tokens=max_tokens,
53
- model=model
130
+ return with_cause(
131
+ LLMContextLengthExceededError(
132
+ actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
133
+ )
54
134
  )
55
- if (
56
- t == "google.api_core.exceptions.InvalidArgument"
57
- and "The input token count exceeds the maximum number of tokens allowed" in msg
58
- ):
59
- if match := re.search(
60
- r"The input token count exceeds the maximum number of tokens allowed (\d+)",
61
- msg
135
+ if t == "google.genai.errors.ClientError":
136
+
137
+ if "429" in msg and "RESOURCE_EXHAUSTED" in msg:
138
+ return with_cause(LLMQuotaExceededError(details=msg))
139
+
140
+ if (
141
+ "input token count" in msg
142
+ and "exceeds the maximum number of tokens allowed" in msg
62
143
  ):
63
- max_tokens = int(match.group(1))
64
- return LLMContextLengthExceededError(
65
- actual_tokens=actual_tokens,
66
- max_tokens=max_tokens,
67
- model=model
68
- )
144
+ # ai studio
145
+ if match := re.search(
146
+ r"input token count exceeds the maximum number of tokens allowed (\d+)",
147
+ msg,
148
+ ):
149
+ max_tokens = int(match.group(1))
150
+ # vertex
151
+ elif match := re.search(
152
+ r"input token count \((\d+)\) "
153
+ r"exceeds the maximum number of tokens allowed \((\d+)\)",
154
+ msg,
155
+ ):
156
+ actual_tokens = int(match.group(1))
157
+ max_tokens = int(match.group(2))
158
+ return with_cause(
159
+ LLMContextLengthExceededError(
160
+ actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
161
+ )
162
+ )
163
+ if t in (
164
+ "openai.AuthenticationError",
165
+ "anthropic.AuthenticationError",
166
+ "google.auth.exceptions.MalformedError", # Vertex AI, wrong service acc. json
167
+ ):
168
+ return with_cause(LLMAuthError(msg))
169
+ if t == "google.genai.errors.ClientError":
170
+ if "API_KEY_INVALID" in msg:
171
+ return with_cause(LLMAuthError(msg))
172
+ if "PERMISSION_DENIED" in msg: # invalid project in service account json
173
+ return with_cause(LLMAuthError(msg))
69
174
  return None
70
175
 
71
176
 
@@ -74,7 +179,7 @@ def llm(
74
179
  retries: int = 0,
75
180
  parse_json: bool | dict = False,
76
181
  file_cache: bool | str = False,
77
- **kwargs
182
+ **kwargs,
78
183
  ) -> str | LLMResponse | ImageGenerationResponse:
79
184
  """
80
185
  Request Large Language Model synchronously
@@ -123,12 +228,13 @@ def llm(
123
228
  [h(prompt, **kwargs) for h in env().llm_before_handlers]
124
229
  start = datetime.now()
125
230
 
126
- if (file_cache and cache_hit(
231
+ if file_cache and cache_hit(
127
232
  cache_name := build_cache_name(
128
- prompt, kwargs,
129
- prefix=file_cache if isinstance(file_cache, str) else "llm_requests"
233
+ prompt,
234
+ kwargs,
235
+ prefix=file_cache if isinstance(file_cache, str) else "llm_requests",
130
236
  )
131
- )):
237
+ ):
132
238
  response: LLMResponse = load_cache(cache_name)
133
239
  response.from_file_cache = True
134
240
  tries = 0
@@ -142,7 +248,9 @@ def llm(
142
248
  except Exception as e: # pylint: disable=W0718
143
249
  converted_exception = convert_exception(e)
144
250
  # If context length exceeded, or no tries left --> do not retry
145
- if tries == 0 or isinstance(converted_exception, LLMContextLengthExceededError):
251
+ if tries == 0 or isinstance(
252
+ converted_exception, (LLMContextLengthExceededError, LLMAuthError)
253
+ ):
146
254
  if converted_exception:
147
255
  raise converted_exception from e
148
256
  raise e
@@ -161,11 +269,7 @@ def llm(
161
269
  if tries > 0:
162
270
  retry_params = dict(**kwargs)
163
271
  retry_params["retries"] = tries - 1
164
- setattr(
165
- response,
166
- "_retry_callback",
167
- lambda: llm(prompt, **retry_params)
168
- )
272
+ setattr(response, "_retry_callback", lambda: llm(prompt, **retry_params))
169
273
  if parse_json:
170
274
  parsing_params = parse_json if isinstance(parse_json, dict) else {}
171
275
  return response.parse_json(**parsing_params)
@@ -177,7 +281,7 @@ async def allm(
177
281
  retries: int = 0,
178
282
  parse_json: bool | dict = False,
179
283
  file_cache: bool | str = False,
180
- **kwargs
284
+ **kwargs,
181
285
  ) -> str | LLMResponse | DictFromLLMResponse | ImageGenerationResponse:
182
286
  """
183
287
  Request Large Language Model asynchronously
@@ -221,12 +325,13 @@ async def allm(
221
325
  [h(prompt, **kwargs) for h in env().llm_before_handlers]
222
326
  start = datetime.now()
223
327
 
224
- if (file_cache and cache_hit(
328
+ if file_cache and cache_hit(
225
329
  cache_name := build_cache_name(
226
- prompt, kwargs,
227
- prefix=file_cache if isinstance(file_cache, str) else "llm_requests"
330
+ prompt,
331
+ kwargs,
332
+ prefix=file_cache if isinstance(file_cache, str) else "llm_requests",
228
333
  )
229
- )):
334
+ ):
230
335
  response: LLMResponse = load_cache(cache_name)
231
336
  response.from_file_cache = True
232
337
  tries = 0
@@ -240,7 +345,9 @@ async def allm(
240
345
  except Exception as e: # pylint: disable=W0718
241
346
  converted_exception = convert_exception(e)
242
347
  # If context length exceeded, or no tries left --> do not retry
243
- if tries == 0 or isinstance(converted_exception, LLMContextLengthExceededError):
348
+ if tries == 0 or isinstance(
349
+ converted_exception, (LLMContextLengthExceededError, LLMAuthError)
350
+ ):
244
351
  if converted_exception:
245
352
  raise converted_exception from e
246
353
  raise e
@@ -266,7 +373,9 @@ async def allm(
266
373
  logging.info(f"Retrying... {tries} retries left")
267
374
  if file_cache:
268
375
  delete_cache(cache_name)
269
- return await allm(prompt, retries=tries - 1, parse_json=parse_json, **kwargs)
376
+ return await allm(
377
+ prompt, retries=tries - 1, parse_json=parse_json, **kwargs
378
+ )
270
379
  return response
271
380
 
272
381
 
@@ -276,7 +385,7 @@ async def llm_parallel(
276
385
  allow_failures: bool = False,
277
386
  return_on_failure: Any = RETURN_EXCEPTION,
278
387
  log_errors: bool = True,
279
- **kwargs
388
+ **kwargs,
280
389
  ) -> list[str | LLMResponse]:
281
390
  """
282
391
  Execute multiple LLM requests in parallel