ai-microcore 5.0.0.dev5__tar.gz → 5.0.0.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/PKG-INFO +13 -12
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/README.md +12 -11
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/__init__.py +3 -2
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/_env.py +6 -18
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/_llm_functions.py +162 -53
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/configuration.py +168 -88
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/interactive_setup.py +92 -12
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/llm/google_genai.py +9 -4
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/llm/openai.py +8 -8
- ai_microcore-5.0.0.dev6/microcore/llm_backends.py +301 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/logging.py +2 -2
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/presets.py +2 -1
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/types.py +21 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/ui.py +29 -5
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/utils.py +6 -4
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/wrappers/llm_response_wrapper.py +1 -2
- ai_microcore-5.0.0.dev5/microcore/llm/google_vertex_ai.py +0 -149
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/LICENSE +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/_prepare_llm_args.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/ai_func/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/ai_func/ai-func.json.j2 +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/ai_func/ai-func.tag.j2 +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/ai_modules.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/embedding_db/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/embedding_db/chromadb.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/embedding_db/qdrant.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/file_cache.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/file_storage.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/images.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/json_parsing.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/llm/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/llm/anthropic.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/llm/local_llm.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/llm/local_transformers.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/llm/shared.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/lm_client.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/mcp.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/message_types.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/metrics.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/python.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/templating/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/templating/jinja2.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/text2speech/elevenlabs.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/tokenizing.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/wrappers/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/microcore/wrappers/prompt_wrapper.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev6}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-microcore
|
|
3
|
-
Version: 5.0.0.
|
|
3
|
+
Version: 5.0.0.dev6
|
|
4
4
|
Summary: # Minimalistic Foundation for AI Applications
|
|
5
5
|
Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter
|
|
6
6
|
Author-email: Vitalii Stepanenko <mail@vitaliy.in>
|
|
@@ -53,7 +53,7 @@ It defines interfaces for features typically used in AI applications,
|
|
|
53
53
|
which allows you to keep your application as simple as possible and try various models & services
|
|
54
54
|
without need to change your application code.
|
|
55
55
|
|
|
56
|
-
You even
|
|
56
|
+
You can even switch between text completion and chat completion models only using configuration.
|
|
57
57
|
|
|
58
58
|
Thanks to LLM-agnostic MCP integration,
|
|
59
59
|
**MicroCore** connects MCP tools to any language models easily,
|
|
@@ -105,7 +105,7 @@ Similarity search features will work out of the box if you have the `chromadb` p
|
|
|
105
105
|
There are a few options available for configuring microcore:
|
|
106
106
|
|
|
107
107
|
- Use `microcore.configure(**params)`
|
|
108
|
-
<br>💡 <small>All configuration options
|
|
108
|
+
<br>💡 <small>All configuration options appear in IDE autocompletion tooltips</small>
|
|
109
109
|
- Create a `.env` file in your project root; examples: [basic.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.example), [Mistral Large.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.mistral.example), [Anthropic Claude 3 Opus.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.anthropic.example), [Gemini on Vertex AI.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.google-vertex-gemini.example), [Gemini on AI Studio.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.gemini.example)
|
|
110
110
|
- Use a custom configuration file: `mc.configure(DOT_ENV_FILE='dev-config.ini')`
|
|
111
111
|
- Define OS environment variables
|
|
@@ -113,7 +113,7 @@ There are a few options available for configuring microcore:
|
|
|
113
113
|
For the full list of available configuration options, you may also check [`microcore/config.py`](https://github.com/Nayjest/ai-microcore/blob/main/microcore/configuration.py#L175).
|
|
114
114
|
|
|
115
115
|
### Installing vendor-specific packages
|
|
116
|
-
For
|
|
116
|
+
For models working not via OpenAI API, you may need to install additional packages:
|
|
117
117
|
#### Anthropic Claude 3
|
|
118
118
|
```bash
|
|
119
119
|
pip install anthropic
|
|
@@ -132,7 +132,8 @@ and [configure the authorization](https://cloud.google.com/sdk/docs/authorizing)
|
|
|
132
132
|
|
|
133
133
|
#### Local language models via Hugging Face Transformers
|
|
134
134
|
|
|
135
|
-
You will need to install transformers and deep learning library of your choice
|
|
135
|
+
You will need to install transformers and a deep learning library of your choice
|
|
136
|
+
(PyTorch, TensorFlow, Flax, etc).
|
|
136
137
|
|
|
137
138
|
See [transformers installation](https://huggingface.co/docs/transformers/installation).
|
|
138
139
|
|
|
@@ -148,13 +149,13 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
|
|
|
148
149
|
Vector database functions are available via `microcore.texts`.
|
|
149
150
|
|
|
150
151
|
#### ChromaDB
|
|
151
|
-
|
|
152
|
+
The default vector database is [Chroma](https://www.trychroma.com/).
|
|
152
153
|
In order to use vector database functions with ChromaDB, you need to install the `chromadb` package:
|
|
153
154
|
```bash
|
|
154
155
|
pip install chromadb
|
|
155
156
|
```
|
|
156
|
-
By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
|
|
157
|
-
Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
|
|
157
|
+
By default, MicroCore will use ChromaDB PersistentClient (if the corresponding package is installed).
|
|
158
|
+
Alternatively, you can run Chroma as a separate service and configure MicroCore to use HttpClient:
|
|
158
159
|
|
|
159
160
|
```python
|
|
160
161
|
from microcore import configure
|
|
@@ -177,7 +178,7 @@ configure(
|
|
|
177
178
|
EMBEDDING_DB_TYPE=EmbeddingDbType.QDRANT,
|
|
178
179
|
EMBEDDING_DB_HOST="localhost",
|
|
179
180
|
EMBEDDING_DB_PORT="6333",
|
|
180
|
-
EMBEDDING_DB_SIZE=384, # dimensions
|
|
181
|
+
EMBEDDING_DB_SIZE=384, # number of dimensions in the SentenceTransformer model
|
|
181
182
|
EMBEDDING_DB_FUNCTION=SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2"),
|
|
182
183
|
)
|
|
183
184
|
```
|
|
@@ -200,7 +201,7 @@ use_logging()
|
|
|
200
201
|
# Basic usage
|
|
201
202
|
ai_response = llm('What is your model name?')
|
|
202
203
|
|
|
203
|
-
# You also
|
|
204
|
+
# You may also pass a list of strings as prompt
|
|
204
205
|
# - For chat completion models elements are treated as separate messages
|
|
205
206
|
# - For completion LLMs elements are treated as text lines
|
|
206
207
|
llm(['1+2', '='])
|
|
@@ -293,7 +294,7 @@ LLM Microcore supports all models & API providers having OpenAI API.
|
|
|
293
294
|
## 🖼️ Examples
|
|
294
295
|
|
|
295
296
|
#### [Code review tool](https://github.com/llm-microcore/microcore/blob/main/examples/code-review-tool)
|
|
296
|
-
Performs code review by LLM for changes in git .patch files in any programming languages.
|
|
297
|
+
Performs a code review by LLM for changes in git .patch files in any programming languages.
|
|
297
298
|
|
|
298
299
|
#### [Image analysis](https://colab.research.google.com/drive/1qTJ51wxCv3VlyqLt3M8OZ7183YXPFpic) (Google Colab)
|
|
299
300
|
Determine the number of petals and the color of the flower from a photo (gpt-4-turbo)
|
|
@@ -315,7 +316,7 @@ Text generation using HF/Transformers model locally (example with Qwen 3 0.6B).
|
|
|
315
316
|
@TODO
|
|
316
317
|
|
|
317
318
|
## 🤖 AI Modules
|
|
318
|
-
**This is experimental feature.**
|
|
319
|
+
**This is an experimental feature.**
|
|
319
320
|
|
|
320
321
|
Tweaks the Python import system to provide automatic setup of MicroCore environment
|
|
321
322
|
based on metadata in module docstrings.
|
|
@@ -20,7 +20,7 @@ It defines interfaces for features typically used in AI applications,
|
|
|
20
20
|
which allows you to keep your application as simple as possible and try various models & services
|
|
21
21
|
without need to change your application code.
|
|
22
22
|
|
|
23
|
-
You even
|
|
23
|
+
You can even switch between text completion and chat completion models only using configuration.
|
|
24
24
|
|
|
25
25
|
Thanks to LLM-agnostic MCP integration,
|
|
26
26
|
**MicroCore** connects MCP tools to any language models easily,
|
|
@@ -72,7 +72,7 @@ Similarity search features will work out of the box if you have the `chromadb` p
|
|
|
72
72
|
There are a few options available for configuring microcore:
|
|
73
73
|
|
|
74
74
|
- Use `microcore.configure(**params)`
|
|
75
|
-
<br>💡 <small>All configuration options
|
|
75
|
+
<br>💡 <small>All configuration options appear in IDE autocompletion tooltips</small>
|
|
76
76
|
- Create a `.env` file in your project root; examples: [basic.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.example), [Mistral Large.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.mistral.example), [Anthropic Claude 3 Opus.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.anthropic.example), [Gemini on Vertex AI.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.google-vertex-gemini.example), [Gemini on AI Studio.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.gemini.example)
|
|
77
77
|
- Use a custom configuration file: `mc.configure(DOT_ENV_FILE='dev-config.ini')`
|
|
78
78
|
- Define OS environment variables
|
|
@@ -80,7 +80,7 @@ There are a few options available for configuring microcore:
|
|
|
80
80
|
For the full list of available configuration options, you may also check [`microcore/config.py`](https://github.com/Nayjest/ai-microcore/blob/main/microcore/configuration.py#L175).
|
|
81
81
|
|
|
82
82
|
### Installing vendor-specific packages
|
|
83
|
-
For
|
|
83
|
+
For models working not via OpenAI API, you may need to install additional packages:
|
|
84
84
|
#### Anthropic Claude 3
|
|
85
85
|
```bash
|
|
86
86
|
pip install anthropic
|
|
@@ -99,7 +99,8 @@ and [configure the authorization](https://cloud.google.com/sdk/docs/authorizing)
|
|
|
99
99
|
|
|
100
100
|
#### Local language models via Hugging Face Transformers
|
|
101
101
|
|
|
102
|
-
You will need to install transformers and deep learning library of your choice
|
|
102
|
+
You will need to install transformers and a deep learning library of your choice
|
|
103
|
+
(PyTorch, TensorFlow, Flax, etc).
|
|
103
104
|
|
|
104
105
|
See [transformers installation](https://huggingface.co/docs/transformers/installation).
|
|
105
106
|
|
|
@@ -115,13 +116,13 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
|
|
|
115
116
|
Vector database functions are available via `microcore.texts`.
|
|
116
117
|
|
|
117
118
|
#### ChromaDB
|
|
118
|
-
|
|
119
|
+
The default vector database is [Chroma](https://www.trychroma.com/).
|
|
119
120
|
In order to use vector database functions with ChromaDB, you need to install the `chromadb` package:
|
|
120
121
|
```bash
|
|
121
122
|
pip install chromadb
|
|
122
123
|
```
|
|
123
|
-
By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
|
|
124
|
-
Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
|
|
124
|
+
By default, MicroCore will use ChromaDB PersistentClient (if the corresponding package is installed).
|
|
125
|
+
Alternatively, you can run Chroma as a separate service and configure MicroCore to use HttpClient:
|
|
125
126
|
|
|
126
127
|
```python
|
|
127
128
|
from microcore import configure
|
|
@@ -144,7 +145,7 @@ configure(
|
|
|
144
145
|
EMBEDDING_DB_TYPE=EmbeddingDbType.QDRANT,
|
|
145
146
|
EMBEDDING_DB_HOST="localhost",
|
|
146
147
|
EMBEDDING_DB_PORT="6333",
|
|
147
|
-
EMBEDDING_DB_SIZE=384, # dimensions
|
|
148
|
+
EMBEDDING_DB_SIZE=384, # number of dimensions in the SentenceTransformer model
|
|
148
149
|
EMBEDDING_DB_FUNCTION=SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2"),
|
|
149
150
|
)
|
|
150
151
|
```
|
|
@@ -167,7 +168,7 @@ use_logging()
|
|
|
167
168
|
# Basic usage
|
|
168
169
|
ai_response = llm('What is your model name?')
|
|
169
170
|
|
|
170
|
-
# You also
|
|
171
|
+
# You may also pass a list of strings as prompt
|
|
171
172
|
# - For chat completion models elements are treated as separate messages
|
|
172
173
|
# - For completion LLMs elements are treated as text lines
|
|
173
174
|
llm(['1+2', '='])
|
|
@@ -260,7 +261,7 @@ LLM Microcore supports all models & API providers having OpenAI API.
|
|
|
260
261
|
## 🖼️ Examples
|
|
261
262
|
|
|
262
263
|
#### [Code review tool](https://github.com/llm-microcore/microcore/blob/main/examples/code-review-tool)
|
|
263
|
-
Performs code review by LLM for changes in git .patch files in any programming languages.
|
|
264
|
+
Performs a code review by LLM for changes in git .patch files in any programming languages.
|
|
264
265
|
|
|
265
266
|
#### [Image analysis](https://colab.research.google.com/drive/1qTJ51wxCv3VlyqLt3M8OZ7183YXPFpic) (Google Colab)
|
|
266
267
|
Determine the number of petals and the color of the flower from a photo (gpt-4-turbo)
|
|
@@ -282,7 +283,7 @@ Text generation using HF/Transformers model locally (example with Qwen 3 0.6B).
|
|
|
282
283
|
@TODO
|
|
283
284
|
|
|
284
285
|
## 🤖 AI Modules
|
|
285
|
-
**This is experimental feature.**
|
|
286
|
+
**This is an experimental feature.**
|
|
286
287
|
|
|
287
288
|
Tweaks the Python import system to provide automatic setup of MicroCore environment
|
|
288
289
|
based on metadata in module docstrings.
|
|
@@ -19,7 +19,6 @@ from ._env import configure, env, config, min_setup
|
|
|
19
19
|
from .logging import use_logging
|
|
20
20
|
from .message_types import UserMsg, AssistantMsg, SysMsg, Msg, PartialMsg
|
|
21
21
|
from .configuration import (
|
|
22
|
-
ApiType,
|
|
23
22
|
LLMApiBaseError,
|
|
24
23
|
LLMApiDeploymentIdError,
|
|
25
24
|
LLMApiKeyError,
|
|
@@ -29,6 +28,7 @@ from .configuration import (
|
|
|
29
28
|
EmbeddingDbType,
|
|
30
29
|
PRINT_STREAM,
|
|
31
30
|
)
|
|
31
|
+
from .llm_backends import ApiPlatform, ApiType
|
|
32
32
|
from .types import BadAIJsonAnswer, BadAIAnswer, LLMContextLengthExceededError
|
|
33
33
|
from .wrappers.prompt_wrapper import PromptWrapper
|
|
34
34
|
from .wrappers.llm_response_wrapper import LLMResponse
|
|
@@ -194,6 +194,7 @@ __all__ = [
|
|
|
194
194
|
"AssistantMsg",
|
|
195
195
|
"PartialMsg",
|
|
196
196
|
"ApiType",
|
|
197
|
+
"ApiPlatform",
|
|
197
198
|
"EmbeddingDbType",
|
|
198
199
|
"BadAIJsonAnswer",
|
|
199
200
|
"PRINT_STREAM",
|
|
@@ -230,4 +231,4 @@ __all__ = [
|
|
|
230
231
|
# "wrappers",
|
|
231
232
|
]
|
|
232
233
|
|
|
233
|
-
__version__ = "5.0.
|
|
234
|
+
__version__ = "5.0.0dev6"
|
|
@@ -11,11 +11,11 @@ import jinja2
|
|
|
11
11
|
from .embedding_db import AbstractEmbeddingDB
|
|
12
12
|
from .configuration import (
|
|
13
13
|
Config,
|
|
14
|
-
ApiType,
|
|
15
14
|
LLMConfigError,
|
|
16
15
|
EmbeddingDbType,
|
|
17
16
|
PRINT_STREAM,
|
|
18
17
|
)
|
|
18
|
+
from .llm_backends import ApiType
|
|
19
19
|
from .presets import MIN_SETUP
|
|
20
20
|
from .lm_client import BaseAIClient
|
|
21
21
|
from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
|
|
@@ -126,23 +126,11 @@ class Env:
|
|
|
126
126
|
self.llm_function, self.llm_async_function = make_anthropic_llm_functions(
|
|
127
127
|
self.config
|
|
128
128
|
)
|
|
129
|
-
elif self.config.LLM_API_TYPE
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
except ModuleNotFoundError as e:
|
|
135
|
-
raise ModuleNotFoundError(
|
|
136
|
-
"To use the Google Vertex language models, "
|
|
137
|
-
"you need to install the `vertexai` package "
|
|
138
|
-
"and authenticate with Google Cloud cli."
|
|
139
|
-
"Run `pip install vertexai`."
|
|
140
|
-
) from e
|
|
141
|
-
(
|
|
142
|
-
self.llm_function,
|
|
143
|
-
self.llm_async_function,
|
|
144
|
-
) = make_google_vertex_llm_functions(self.config)
|
|
145
|
-
elif self.config.LLM_API_TYPE in (ApiType.GOOGLE, ApiType.GOOGLE_AI_STUDIO):
|
|
129
|
+
elif self.config.LLM_API_TYPE in (
|
|
130
|
+
ApiType.GOOGLE,
|
|
131
|
+
ApiType.GOOGLE_AI_STUDIO, # @deprecated
|
|
132
|
+
ApiType.GOOGLE_VERTEX_AI # @deprecated
|
|
133
|
+
):
|
|
146
134
|
try:
|
|
147
135
|
from .llm.google_genai import GoogleClient
|
|
148
136
|
except ModuleNotFoundError as e:
|
|
@@ -3,20 +3,29 @@ import logging
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
|
|
7
6
|
from .utils import run_parallel, RETURN_EXCEPTION
|
|
8
|
-
from .wrappers.llm_response_wrapper import
|
|
9
|
-
|
|
7
|
+
from .wrappers.llm_response_wrapper import (
|
|
8
|
+
LLMResponse,
|
|
9
|
+
DictFromLLMResponse,
|
|
10
|
+
ImageGenerationResponse,
|
|
11
|
+
)
|
|
12
|
+
from .types import (
|
|
13
|
+
TPrompt,
|
|
14
|
+
LLMContextLengthExceededError,
|
|
15
|
+
LLMQuotaExceededError,
|
|
16
|
+
LLMAuthError,
|
|
17
|
+
)
|
|
10
18
|
from .file_cache import (
|
|
11
19
|
cache_hit,
|
|
12
20
|
load_cache,
|
|
13
21
|
save_cache,
|
|
14
22
|
build_cache_name,
|
|
15
|
-
delete_cache
|
|
23
|
+
delete_cache,
|
|
16
24
|
)
|
|
17
25
|
from ._env import env
|
|
18
26
|
|
|
19
27
|
|
|
28
|
+
# pylint: disable=too-many-return-statements,too-many-branches
|
|
20
29
|
def convert_exception(e: Exception, model: str = None) -> Exception | None:
|
|
21
30
|
"""
|
|
22
31
|
Convert LLM exceptions microcore-specific exceptions if possible.
|
|
@@ -26,46 +35,142 @@ def convert_exception(e: Exception, model: str = None) -> Exception | None:
|
|
|
26
35
|
Returns:
|
|
27
36
|
Converted exception or None if no conversion is possible
|
|
28
37
|
"""
|
|
38
|
+
|
|
39
|
+
def with_cause(new_exception: Exception) -> Exception:
|
|
40
|
+
"""
|
|
41
|
+
Attach a cause to an exception without raising it.
|
|
42
|
+
|
|
43
|
+
Equivalent to `raise new_exc from cause` but returns the exception
|
|
44
|
+
instead of raising, preserving the exception chain for later use.
|
|
45
|
+
"""
|
|
46
|
+
new_exception.__cause__ = e
|
|
47
|
+
return new_exception
|
|
48
|
+
|
|
29
49
|
if not isinstance(e, Exception):
|
|
30
50
|
return None
|
|
31
51
|
t, msg = f"{type(e).__module__}.{type(e).__name__}", str(e)
|
|
32
52
|
max_tokens, actual_tokens = None, None
|
|
33
|
-
if t == "openai.BadRequestError"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
if t == "openai.BadRequestError":
|
|
54
|
+
if "context_length_exceeded" in msg:
|
|
55
|
+
match = re.search(
|
|
56
|
+
r"maximum context length is (\d+) tokens.*?resulted in (\d+) tokens",
|
|
57
|
+
msg,
|
|
58
|
+
)
|
|
59
|
+
if match:
|
|
60
|
+
max_tokens = int(match.group(1))
|
|
61
|
+
actual_tokens = int(match.group(2))
|
|
62
|
+
return with_cause(
|
|
63
|
+
LLMContextLengthExceededError(
|
|
64
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
if (
|
|
68
|
+
"Please reduce the length of the messages or completion." in msg
|
|
69
|
+
): # Groq, no details
|
|
70
|
+
return with_cause(LLMContextLengthExceededError(model=model))
|
|
71
|
+
|
|
72
|
+
# x.ai grok-fast
|
|
73
|
+
if (
|
|
74
|
+
"This model's maximum prompt length is" in msg
|
|
75
|
+
and "but the request contains" in msg
|
|
76
|
+
and "tokens" in msg
|
|
77
|
+
):
|
|
78
|
+
match = re.search(
|
|
79
|
+
r"maximum prompt length is (\d+) but the request contains (\d+) tokens",
|
|
80
|
+
msg,
|
|
81
|
+
)
|
|
82
|
+
if match:
|
|
83
|
+
max_tokens = int(match.group(1))
|
|
84
|
+
actual_tokens = int(match.group(2))
|
|
85
|
+
return with_cause(
|
|
86
|
+
LLMContextLengthExceededError(
|
|
87
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if "maximum context length" in msg: # Mistral, # DeepSeek
|
|
92
|
+
if match := re.search(
|
|
93
|
+
r"Prompt contains (\d+) tokens.*?model with (\d+) maximum context length",
|
|
94
|
+
msg,
|
|
95
|
+
): # Mistral
|
|
96
|
+
max_tokens = int(match.group(2))
|
|
97
|
+
actual_tokens = int(match.group(1))
|
|
98
|
+
elif match := re.search(
|
|
99
|
+
r"maximum context length is (\d+) tokens.*? you requested (\d+) tokens",
|
|
100
|
+
msg,
|
|
101
|
+
): # DeepSeek
|
|
102
|
+
max_tokens = int(match.group(1))
|
|
103
|
+
actual_tokens = int(match.group(2))
|
|
104
|
+
return with_cause(
|
|
105
|
+
LLMContextLengthExceededError(
|
|
106
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
if "too_many_prompt_tokens" in msg: # Perplexity
|
|
110
|
+
if match := re.search(r"User input tokens exceeds (\d+) tokens", msg):
|
|
111
|
+
max_tokens = int(match.group(1))
|
|
112
|
+
return with_cause(
|
|
113
|
+
LLMContextLengthExceededError(
|
|
114
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if (
|
|
119
|
+
t == "openai.APIStatusError" and "413 Request Entity Too Large" in msg
|
|
120
|
+
): # Cerebras
|
|
121
|
+
return with_cause(LLMContextLengthExceededError(model=model))
|
|
122
|
+
|
|
123
|
+
if t == "openai.APIStatusError" and "Payload Too Large" in msg: # Fireworks
|
|
124
|
+
return with_cause(LLMContextLengthExceededError(model=model))
|
|
125
|
+
|
|
46
126
|
if t == "anthropic.BadRequestError" and "prompt is too long:" in msg:
|
|
47
127
|
if match := re.search(r"(\d+)\s+tokens\s+>\s+(\d+)\s+maximum", msg):
|
|
48
128
|
max_tokens = int(match.group(2))
|
|
49
129
|
actual_tokens = int(match.group(1))
|
|
50
|
-
return
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
130
|
+
return with_cause(
|
|
131
|
+
LLMContextLengthExceededError(
|
|
132
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
133
|
+
)
|
|
54
134
|
)
|
|
55
|
-
if
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
msg
|
|
135
|
+
if t == "google.genai.errors.ClientError":
|
|
136
|
+
|
|
137
|
+
if "429" in msg and "RESOURCE_EXHAUSTED" in msg:
|
|
138
|
+
return with_cause(LLMQuotaExceededError(details=msg))
|
|
139
|
+
|
|
140
|
+
if (
|
|
141
|
+
"input token count" in msg
|
|
142
|
+
and "exceeds the maximum number of tokens allowed" in msg
|
|
62
143
|
):
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
144
|
+
# ai studio
|
|
145
|
+
if match := re.search(
|
|
146
|
+
r"input token count exceeds the maximum number of tokens allowed (\d+)",
|
|
147
|
+
msg,
|
|
148
|
+
):
|
|
149
|
+
max_tokens = int(match.group(1))
|
|
150
|
+
# vertex
|
|
151
|
+
elif match := re.search(
|
|
152
|
+
r"input token count \((\d+)\) "
|
|
153
|
+
r"exceeds the maximum number of tokens allowed \((\d+)\)",
|
|
154
|
+
msg,
|
|
155
|
+
):
|
|
156
|
+
actual_tokens = int(match.group(1))
|
|
157
|
+
max_tokens = int(match.group(2))
|
|
158
|
+
return with_cause(
|
|
159
|
+
LLMContextLengthExceededError(
|
|
160
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
if t in (
|
|
164
|
+
"openai.AuthenticationError",
|
|
165
|
+
"anthropic.AuthenticationError",
|
|
166
|
+
"google.auth.exceptions.MalformedError", # Vertex AI, wrong service acc. json
|
|
167
|
+
):
|
|
168
|
+
return with_cause(LLMAuthError(msg))
|
|
169
|
+
if t == "google.genai.errors.ClientError":
|
|
170
|
+
if "API_KEY_INVALID" in msg:
|
|
171
|
+
return with_cause(LLMAuthError(msg))
|
|
172
|
+
if "PERMISSION_DENIED" in msg: # invalid project in service account json
|
|
173
|
+
return with_cause(LLMAuthError(msg))
|
|
69
174
|
return None
|
|
70
175
|
|
|
71
176
|
|
|
@@ -74,7 +179,7 @@ def llm(
|
|
|
74
179
|
retries: int = 0,
|
|
75
180
|
parse_json: bool | dict = False,
|
|
76
181
|
file_cache: bool | str = False,
|
|
77
|
-
**kwargs
|
|
182
|
+
**kwargs,
|
|
78
183
|
) -> str | LLMResponse | ImageGenerationResponse:
|
|
79
184
|
"""
|
|
80
185
|
Request Large Language Model synchronously
|
|
@@ -123,12 +228,13 @@ def llm(
|
|
|
123
228
|
[h(prompt, **kwargs) for h in env().llm_before_handlers]
|
|
124
229
|
start = datetime.now()
|
|
125
230
|
|
|
126
|
-
if
|
|
231
|
+
if file_cache and cache_hit(
|
|
127
232
|
cache_name := build_cache_name(
|
|
128
|
-
prompt,
|
|
129
|
-
|
|
233
|
+
prompt,
|
|
234
|
+
kwargs,
|
|
235
|
+
prefix=file_cache if isinstance(file_cache, str) else "llm_requests",
|
|
130
236
|
)
|
|
131
|
-
)
|
|
237
|
+
):
|
|
132
238
|
response: LLMResponse = load_cache(cache_name)
|
|
133
239
|
response.from_file_cache = True
|
|
134
240
|
tries = 0
|
|
@@ -142,7 +248,9 @@ def llm(
|
|
|
142
248
|
except Exception as e: # pylint: disable=W0718
|
|
143
249
|
converted_exception = convert_exception(e)
|
|
144
250
|
# If context length exceeded, or no tries left --> do not retry
|
|
145
|
-
if tries == 0 or isinstance(
|
|
251
|
+
if tries == 0 or isinstance(
|
|
252
|
+
converted_exception, (LLMContextLengthExceededError, LLMAuthError)
|
|
253
|
+
):
|
|
146
254
|
if converted_exception:
|
|
147
255
|
raise converted_exception from e
|
|
148
256
|
raise e
|
|
@@ -161,11 +269,7 @@ def llm(
|
|
|
161
269
|
if tries > 0:
|
|
162
270
|
retry_params = dict(**kwargs)
|
|
163
271
|
retry_params["retries"] = tries - 1
|
|
164
|
-
setattr(
|
|
165
|
-
response,
|
|
166
|
-
"_retry_callback",
|
|
167
|
-
lambda: llm(prompt, **retry_params)
|
|
168
|
-
)
|
|
272
|
+
setattr(response, "_retry_callback", lambda: llm(prompt, **retry_params))
|
|
169
273
|
if parse_json:
|
|
170
274
|
parsing_params = parse_json if isinstance(parse_json, dict) else {}
|
|
171
275
|
return response.parse_json(**parsing_params)
|
|
@@ -177,7 +281,7 @@ async def allm(
|
|
|
177
281
|
retries: int = 0,
|
|
178
282
|
parse_json: bool | dict = False,
|
|
179
283
|
file_cache: bool | str = False,
|
|
180
|
-
**kwargs
|
|
284
|
+
**kwargs,
|
|
181
285
|
) -> str | LLMResponse | DictFromLLMResponse | ImageGenerationResponse:
|
|
182
286
|
"""
|
|
183
287
|
Request Large Language Model asynchronously
|
|
@@ -221,12 +325,13 @@ async def allm(
|
|
|
221
325
|
[h(prompt, **kwargs) for h in env().llm_before_handlers]
|
|
222
326
|
start = datetime.now()
|
|
223
327
|
|
|
224
|
-
if
|
|
328
|
+
if file_cache and cache_hit(
|
|
225
329
|
cache_name := build_cache_name(
|
|
226
|
-
prompt,
|
|
227
|
-
|
|
330
|
+
prompt,
|
|
331
|
+
kwargs,
|
|
332
|
+
prefix=file_cache if isinstance(file_cache, str) else "llm_requests",
|
|
228
333
|
)
|
|
229
|
-
)
|
|
334
|
+
):
|
|
230
335
|
response: LLMResponse = load_cache(cache_name)
|
|
231
336
|
response.from_file_cache = True
|
|
232
337
|
tries = 0
|
|
@@ -240,7 +345,9 @@ async def allm(
|
|
|
240
345
|
except Exception as e: # pylint: disable=W0718
|
|
241
346
|
converted_exception = convert_exception(e)
|
|
242
347
|
# If context length exceeded, or no tries left --> do not retry
|
|
243
|
-
if tries == 0 or isinstance(
|
|
348
|
+
if tries == 0 or isinstance(
|
|
349
|
+
converted_exception, (LLMContextLengthExceededError, LLMAuthError)
|
|
350
|
+
):
|
|
244
351
|
if converted_exception:
|
|
245
352
|
raise converted_exception from e
|
|
246
353
|
raise e
|
|
@@ -266,7 +373,9 @@ async def allm(
|
|
|
266
373
|
logging.info(f"Retrying... {tries} retries left")
|
|
267
374
|
if file_cache:
|
|
268
375
|
delete_cache(cache_name)
|
|
269
|
-
return await allm(
|
|
376
|
+
return await allm(
|
|
377
|
+
prompt, retries=tries - 1, parse_json=parse_json, **kwargs
|
|
378
|
+
)
|
|
270
379
|
return response
|
|
271
380
|
|
|
272
381
|
|
|
@@ -276,7 +385,7 @@ async def llm_parallel(
|
|
|
276
385
|
allow_failures: bool = False,
|
|
277
386
|
return_on_failure: Any = RETURN_EXCEPTION,
|
|
278
387
|
log_errors: bool = True,
|
|
279
|
-
**kwargs
|
|
388
|
+
**kwargs,
|
|
280
389
|
) -> list[str | LLMResponse]:
|
|
281
390
|
"""
|
|
282
391
|
Execute multiple LLM requests in parallel
|