ai-microcore 5.0.0.dev5__tar.gz → 5.0.0.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/PKG-INFO +16 -13
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/README.md +12 -11
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/__init__.py +3 -2
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/_env.py +6 -18
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/_llm_functions.py +162 -53
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/configuration.py +168 -88
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/interactive_setup.py +92 -12
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/google_genai.py +9 -4
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/openai.py +8 -8
- ai_microcore-5.0.0.dev7/microcore/llm_backends.py +301 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/logging.py +2 -2
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/message_types.py +19 -5
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/presets.py +2 -1
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/types.py +21 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ui.py +29 -5
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/utils.py +6 -4
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/wrappers/llm_response_wrapper.py +1 -2
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/pyproject.toml +16 -1
- ai_microcore-5.0.0.dev5/microcore/llm/google_vertex_ai.py +0 -149
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/LICENSE +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/_prepare_llm_args.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/ai-func.json.j2 +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_func/ai-func.tag.j2 +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/ai_modules.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/embedding_db/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/embedding_db/chromadb.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/embedding_db/qdrant.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/file_cache.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/file_storage.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/images.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/json_parsing.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/anthropic.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/local_llm.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/local_transformers.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/llm/shared.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/lm_client.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/mcp.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/metrics.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/python.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/templating/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/templating/jinja2.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/text2speech/elevenlabs.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/tokenizing.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/wrappers/__init__.py +0 -0
- {ai_microcore-5.0.0.dev5 → ai_microcore-5.0.0.dev7}/microcore/wrappers/prompt_wrapper.py +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-microcore
|
|
3
|
-
Version: 5.0.0.
|
|
3
|
+
Version: 5.0.0.dev7
|
|
4
4
|
Summary: # Minimalistic Foundation for AI Applications
|
|
5
|
-
Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter
|
|
5
|
+
Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter,anthropic,google gemini,google vertex ai
|
|
6
6
|
Author-email: Vitalii Stepanenko <mail@vitaliy.in>
|
|
7
7
|
Maintainer-email: Vitalii Stepanenko <mail@vitaliy.in>
|
|
8
8
|
Requires-Python: >=3.10
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
@@ -29,6 +30,7 @@ Requires-Dist: mcp>=1.10.1,<2.0
|
|
|
29
30
|
Requires-Dist: fastmcp>=2.10.2,<3.0
|
|
30
31
|
Requires-Dist: docstring_parser~=0.16.0
|
|
31
32
|
Requires-Dist: httpx~=0.28.1
|
|
33
|
+
Project-URL: Bug Tracker, https://github.com/Nayjest/ai-microcore/issues
|
|
32
34
|
Project-URL: Source Code, https://github.com/Nayjest/ai-microcore
|
|
33
35
|
|
|
34
36
|
# AI MicroCore: A Minimalistic Foundation for AI Applications
|
|
@@ -53,7 +55,7 @@ It defines interfaces for features typically used in AI applications,
|
|
|
53
55
|
which allows you to keep your application as simple as possible and try various models & services
|
|
54
56
|
without need to change your application code.
|
|
55
57
|
|
|
56
|
-
You even
|
|
58
|
+
You can even switch between text completion and chat completion models only using configuration.
|
|
57
59
|
|
|
58
60
|
Thanks to LLM-agnostic MCP integration,
|
|
59
61
|
**MicroCore** connects MCP tools to any language models easily,
|
|
@@ -105,7 +107,7 @@ Similarity search features will work out of the box if you have the `chromadb` p
|
|
|
105
107
|
There are a few options available for configuring microcore:
|
|
106
108
|
|
|
107
109
|
- Use `microcore.configure(**params)`
|
|
108
|
-
<br>💡 <small>All configuration options
|
|
110
|
+
<br>💡 <small>All configuration options appear in IDE autocompletion tooltips</small>
|
|
109
111
|
- Create a `.env` file in your project root; examples: [basic.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.example), [Mistral Large.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.mistral.example), [Anthropic Claude 3 Opus.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.anthropic.example), [Gemini on Vertex AI.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.google-vertex-gemini.example), [Gemini on AI Studio.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.gemini.example)
|
|
110
112
|
- Use a custom configuration file: `mc.configure(DOT_ENV_FILE='dev-config.ini')`
|
|
111
113
|
- Define OS environment variables
|
|
@@ -113,7 +115,7 @@ There are a few options available for configuring microcore:
|
|
|
113
115
|
For the full list of available configuration options, you may also check [`microcore/config.py`](https://github.com/Nayjest/ai-microcore/blob/main/microcore/configuration.py#L175).
|
|
114
116
|
|
|
115
117
|
### Installing vendor-specific packages
|
|
116
|
-
For
|
|
118
|
+
For models working not via OpenAI API, you may need to install additional packages:
|
|
117
119
|
#### Anthropic Claude 3
|
|
118
120
|
```bash
|
|
119
121
|
pip install anthropic
|
|
@@ -132,7 +134,8 @@ and [configure the authorization](https://cloud.google.com/sdk/docs/authorizing)
|
|
|
132
134
|
|
|
133
135
|
#### Local language models via Hugging Face Transformers
|
|
134
136
|
|
|
135
|
-
You will need to install transformers and deep learning library of your choice
|
|
137
|
+
You will need to install transformers and a deep learning library of your choice
|
|
138
|
+
(PyTorch, TensorFlow, Flax, etc).
|
|
136
139
|
|
|
137
140
|
See [transformers installation](https://huggingface.co/docs/transformers/installation).
|
|
138
141
|
|
|
@@ -148,13 +151,13 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
|
|
|
148
151
|
Vector database functions are available via `microcore.texts`.
|
|
149
152
|
|
|
150
153
|
#### ChromaDB
|
|
151
|
-
|
|
154
|
+
The default vector database is [Chroma](https://www.trychroma.com/).
|
|
152
155
|
In order to use vector database functions with ChromaDB, you need to install the `chromadb` package:
|
|
153
156
|
```bash
|
|
154
157
|
pip install chromadb
|
|
155
158
|
```
|
|
156
|
-
By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
|
|
157
|
-
Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
|
|
159
|
+
By default, MicroCore will use ChromaDB PersistentClient (if the corresponding package is installed).
|
|
160
|
+
Alternatively, you can run Chroma as a separate service and configure MicroCore to use HttpClient:
|
|
158
161
|
|
|
159
162
|
```python
|
|
160
163
|
from microcore import configure
|
|
@@ -177,7 +180,7 @@ configure(
|
|
|
177
180
|
EMBEDDING_DB_TYPE=EmbeddingDbType.QDRANT,
|
|
178
181
|
EMBEDDING_DB_HOST="localhost",
|
|
179
182
|
EMBEDDING_DB_PORT="6333",
|
|
180
|
-
EMBEDDING_DB_SIZE=384, # dimensions
|
|
183
|
+
EMBEDDING_DB_SIZE=384, # number of dimensions in the SentenceTransformer model
|
|
181
184
|
EMBEDDING_DB_FUNCTION=SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2"),
|
|
182
185
|
)
|
|
183
186
|
```
|
|
@@ -200,7 +203,7 @@ use_logging()
|
|
|
200
203
|
# Basic usage
|
|
201
204
|
ai_response = llm('What is your model name?')
|
|
202
205
|
|
|
203
|
-
# You also
|
|
206
|
+
# You may also pass a list of strings as prompt
|
|
204
207
|
# - For chat completion models elements are treated as separate messages
|
|
205
208
|
# - For completion LLMs elements are treated as text lines
|
|
206
209
|
llm(['1+2', '='])
|
|
@@ -293,7 +296,7 @@ LLM Microcore supports all models & API providers having OpenAI API.
|
|
|
293
296
|
## 🖼️ Examples
|
|
294
297
|
|
|
295
298
|
#### [Code review tool](https://github.com/llm-microcore/microcore/blob/main/examples/code-review-tool)
|
|
296
|
-
Performs code review by LLM for changes in git .patch files in any programming languages.
|
|
299
|
+
Performs a code review by LLM for changes in git .patch files in any programming languages.
|
|
297
300
|
|
|
298
301
|
#### [Image analysis](https://colab.research.google.com/drive/1qTJ51wxCv3VlyqLt3M8OZ7183YXPFpic) (Google Colab)
|
|
299
302
|
Determine the number of petals and the color of the flower from a photo (gpt-4-turbo)
|
|
@@ -315,7 +318,7 @@ Text generation using HF/Transformers model locally (example with Qwen 3 0.6B).
|
|
|
315
318
|
@TODO
|
|
316
319
|
|
|
317
320
|
## 🤖 AI Modules
|
|
318
|
-
**This is experimental feature.**
|
|
321
|
+
**This is an experimental feature.**
|
|
319
322
|
|
|
320
323
|
Tweaks the Python import system to provide automatic setup of MicroCore environment
|
|
321
324
|
based on metadata in module docstrings.
|
|
@@ -20,7 +20,7 @@ It defines interfaces for features typically used in AI applications,
|
|
|
20
20
|
which allows you to keep your application as simple as possible and try various models & services
|
|
21
21
|
without need to change your application code.
|
|
22
22
|
|
|
23
|
-
You even
|
|
23
|
+
You can even switch between text completion and chat completion models only using configuration.
|
|
24
24
|
|
|
25
25
|
Thanks to LLM-agnostic MCP integration,
|
|
26
26
|
**MicroCore** connects MCP tools to any language models easily,
|
|
@@ -72,7 +72,7 @@ Similarity search features will work out of the box if you have the `chromadb` p
|
|
|
72
72
|
There are a few options available for configuring microcore:
|
|
73
73
|
|
|
74
74
|
- Use `microcore.configure(**params)`
|
|
75
|
-
<br>💡 <small>All configuration options
|
|
75
|
+
<br>💡 <small>All configuration options appear in IDE autocompletion tooltips</small>
|
|
76
76
|
- Create a `.env` file in your project root; examples: [basic.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.example), [Mistral Large.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.mistral.example), [Anthropic Claude 3 Opus.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.anthropic.example), [Gemini on Vertex AI.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.google-vertex-gemini.example), [Gemini on AI Studio.env](https://github.com/Nayjest/ai-microcore/blob/main/.env.gemini.example)
|
|
77
77
|
- Use a custom configuration file: `mc.configure(DOT_ENV_FILE='dev-config.ini')`
|
|
78
78
|
- Define OS environment variables
|
|
@@ -80,7 +80,7 @@ There are a few options available for configuring microcore:
|
|
|
80
80
|
For the full list of available configuration options, you may also check [`microcore/config.py`](https://github.com/Nayjest/ai-microcore/blob/main/microcore/configuration.py#L175).
|
|
81
81
|
|
|
82
82
|
### Installing vendor-specific packages
|
|
83
|
-
For
|
|
83
|
+
For models working not via OpenAI API, you may need to install additional packages:
|
|
84
84
|
#### Anthropic Claude 3
|
|
85
85
|
```bash
|
|
86
86
|
pip install anthropic
|
|
@@ -99,7 +99,8 @@ and [configure the authorization](https://cloud.google.com/sdk/docs/authorizing)
|
|
|
99
99
|
|
|
100
100
|
#### Local language models via Hugging Face Transformers
|
|
101
101
|
|
|
102
|
-
You will need to install transformers and deep learning library of your choice
|
|
102
|
+
You will need to install transformers and a deep learning library of your choice
|
|
103
|
+
(PyTorch, TensorFlow, Flax, etc).
|
|
103
104
|
|
|
104
105
|
See [transformers installation](https://huggingface.co/docs/transformers/installation).
|
|
105
106
|
|
|
@@ -115,13 +116,13 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
|
|
|
115
116
|
Vector database functions are available via `microcore.texts`.
|
|
116
117
|
|
|
117
118
|
#### ChromaDB
|
|
118
|
-
|
|
119
|
+
The default vector database is [Chroma](https://www.trychroma.com/).
|
|
119
120
|
In order to use vector database functions with ChromaDB, you need to install the `chromadb` package:
|
|
120
121
|
```bash
|
|
121
122
|
pip install chromadb
|
|
122
123
|
```
|
|
123
|
-
By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
|
|
124
|
-
Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
|
|
124
|
+
By default, MicroCore will use ChromaDB PersistentClient (if the corresponding package is installed).
|
|
125
|
+
Alternatively, you can run Chroma as a separate service and configure MicroCore to use HttpClient:
|
|
125
126
|
|
|
126
127
|
```python
|
|
127
128
|
from microcore import configure
|
|
@@ -144,7 +145,7 @@ configure(
|
|
|
144
145
|
EMBEDDING_DB_TYPE=EmbeddingDbType.QDRANT,
|
|
145
146
|
EMBEDDING_DB_HOST="localhost",
|
|
146
147
|
EMBEDDING_DB_PORT="6333",
|
|
147
|
-
EMBEDDING_DB_SIZE=384, # dimensions
|
|
148
|
+
EMBEDDING_DB_SIZE=384, # number of dimensions in the SentenceTransformer model
|
|
148
149
|
EMBEDDING_DB_FUNCTION=SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2"),
|
|
149
150
|
)
|
|
150
151
|
```
|
|
@@ -167,7 +168,7 @@ use_logging()
|
|
|
167
168
|
# Basic usage
|
|
168
169
|
ai_response = llm('What is your model name?')
|
|
169
170
|
|
|
170
|
-
# You also
|
|
171
|
+
# You may also pass a list of strings as prompt
|
|
171
172
|
# - For chat completion models elements are treated as separate messages
|
|
172
173
|
# - For completion LLMs elements are treated as text lines
|
|
173
174
|
llm(['1+2', '='])
|
|
@@ -260,7 +261,7 @@ LLM Microcore supports all models & API providers having OpenAI API.
|
|
|
260
261
|
## 🖼️ Examples
|
|
261
262
|
|
|
262
263
|
#### [Code review tool](https://github.com/llm-microcore/microcore/blob/main/examples/code-review-tool)
|
|
263
|
-
Performs code review by LLM for changes in git .patch files in any programming languages.
|
|
264
|
+
Performs a code review by LLM for changes in git .patch files in any programming languages.
|
|
264
265
|
|
|
265
266
|
#### [Image analysis](https://colab.research.google.com/drive/1qTJ51wxCv3VlyqLt3M8OZ7183YXPFpic) (Google Colab)
|
|
266
267
|
Determine the number of petals and the color of the flower from a photo (gpt-4-turbo)
|
|
@@ -282,7 +283,7 @@ Text generation using HF/Transformers model locally (example with Qwen 3 0.6B).
|
|
|
282
283
|
@TODO
|
|
283
284
|
|
|
284
285
|
## 🤖 AI Modules
|
|
285
|
-
**This is experimental feature.**
|
|
286
|
+
**This is an experimental feature.**
|
|
286
287
|
|
|
287
288
|
Tweaks the Python import system to provide automatic setup of MicroCore environment
|
|
288
289
|
based on metadata in module docstrings.
|
|
@@ -19,7 +19,6 @@ from ._env import configure, env, config, min_setup
|
|
|
19
19
|
from .logging import use_logging
|
|
20
20
|
from .message_types import UserMsg, AssistantMsg, SysMsg, Msg, PartialMsg
|
|
21
21
|
from .configuration import (
|
|
22
|
-
ApiType,
|
|
23
22
|
LLMApiBaseError,
|
|
24
23
|
LLMApiDeploymentIdError,
|
|
25
24
|
LLMApiKeyError,
|
|
@@ -29,6 +28,7 @@ from .configuration import (
|
|
|
29
28
|
EmbeddingDbType,
|
|
30
29
|
PRINT_STREAM,
|
|
31
30
|
)
|
|
31
|
+
from .llm_backends import ApiPlatform, ApiType
|
|
32
32
|
from .types import BadAIJsonAnswer, BadAIAnswer, LLMContextLengthExceededError
|
|
33
33
|
from .wrappers.prompt_wrapper import PromptWrapper
|
|
34
34
|
from .wrappers.llm_response_wrapper import LLMResponse
|
|
@@ -194,6 +194,7 @@ __all__ = [
|
|
|
194
194
|
"AssistantMsg",
|
|
195
195
|
"PartialMsg",
|
|
196
196
|
"ApiType",
|
|
197
|
+
"ApiPlatform",
|
|
197
198
|
"EmbeddingDbType",
|
|
198
199
|
"BadAIJsonAnswer",
|
|
199
200
|
"PRINT_STREAM",
|
|
@@ -230,4 +231,4 @@ __all__ = [
|
|
|
230
231
|
# "wrappers",
|
|
231
232
|
]
|
|
232
233
|
|
|
233
|
-
__version__ = "5.0.
|
|
234
|
+
__version__ = "5.0.0.dev7"
|
|
@@ -11,11 +11,11 @@ import jinja2
|
|
|
11
11
|
from .embedding_db import AbstractEmbeddingDB
|
|
12
12
|
from .configuration import (
|
|
13
13
|
Config,
|
|
14
|
-
ApiType,
|
|
15
14
|
LLMConfigError,
|
|
16
15
|
EmbeddingDbType,
|
|
17
16
|
PRINT_STREAM,
|
|
18
17
|
)
|
|
18
|
+
from .llm_backends import ApiType
|
|
19
19
|
from .presets import MIN_SETUP
|
|
20
20
|
from .lm_client import BaseAIClient
|
|
21
21
|
from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
|
|
@@ -126,23 +126,11 @@ class Env:
|
|
|
126
126
|
self.llm_function, self.llm_async_function = make_anthropic_llm_functions(
|
|
127
127
|
self.config
|
|
128
128
|
)
|
|
129
|
-
elif self.config.LLM_API_TYPE
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
except ModuleNotFoundError as e:
|
|
135
|
-
raise ModuleNotFoundError(
|
|
136
|
-
"To use the Google Vertex language models, "
|
|
137
|
-
"you need to install the `vertexai` package "
|
|
138
|
-
"and authenticate with Google Cloud cli."
|
|
139
|
-
"Run `pip install vertexai`."
|
|
140
|
-
) from e
|
|
141
|
-
(
|
|
142
|
-
self.llm_function,
|
|
143
|
-
self.llm_async_function,
|
|
144
|
-
) = make_google_vertex_llm_functions(self.config)
|
|
145
|
-
elif self.config.LLM_API_TYPE in (ApiType.GOOGLE, ApiType.GOOGLE_AI_STUDIO):
|
|
129
|
+
elif self.config.LLM_API_TYPE in (
|
|
130
|
+
ApiType.GOOGLE,
|
|
131
|
+
ApiType.GOOGLE_AI_STUDIO, # @deprecated
|
|
132
|
+
ApiType.GOOGLE_VERTEX_AI # @deprecated
|
|
133
|
+
):
|
|
146
134
|
try:
|
|
147
135
|
from .llm.google_genai import GoogleClient
|
|
148
136
|
except ModuleNotFoundError as e:
|
|
@@ -3,20 +3,29 @@ import logging
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
|
|
7
6
|
from .utils import run_parallel, RETURN_EXCEPTION
|
|
8
|
-
from .wrappers.llm_response_wrapper import
|
|
9
|
-
|
|
7
|
+
from .wrappers.llm_response_wrapper import (
|
|
8
|
+
LLMResponse,
|
|
9
|
+
DictFromLLMResponse,
|
|
10
|
+
ImageGenerationResponse,
|
|
11
|
+
)
|
|
12
|
+
from .types import (
|
|
13
|
+
TPrompt,
|
|
14
|
+
LLMContextLengthExceededError,
|
|
15
|
+
LLMQuotaExceededError,
|
|
16
|
+
LLMAuthError,
|
|
17
|
+
)
|
|
10
18
|
from .file_cache import (
|
|
11
19
|
cache_hit,
|
|
12
20
|
load_cache,
|
|
13
21
|
save_cache,
|
|
14
22
|
build_cache_name,
|
|
15
|
-
delete_cache
|
|
23
|
+
delete_cache,
|
|
16
24
|
)
|
|
17
25
|
from ._env import env
|
|
18
26
|
|
|
19
27
|
|
|
28
|
+
# pylint: disable=too-many-return-statements,too-many-branches
|
|
20
29
|
def convert_exception(e: Exception, model: str = None) -> Exception | None:
|
|
21
30
|
"""
|
|
22
31
|
Convert LLM exceptions microcore-specific exceptions if possible.
|
|
@@ -26,46 +35,142 @@ def convert_exception(e: Exception, model: str = None) -> Exception | None:
|
|
|
26
35
|
Returns:
|
|
27
36
|
Converted exception or None if no conversion is possible
|
|
28
37
|
"""
|
|
38
|
+
|
|
39
|
+
def with_cause(new_exception: Exception) -> Exception:
|
|
40
|
+
"""
|
|
41
|
+
Attach a cause to an exception without raising it.
|
|
42
|
+
|
|
43
|
+
Equivalent to `raise new_exc from cause` but returns the exception
|
|
44
|
+
instead of raising, preserving the exception chain for later use.
|
|
45
|
+
"""
|
|
46
|
+
new_exception.__cause__ = e
|
|
47
|
+
return new_exception
|
|
48
|
+
|
|
29
49
|
if not isinstance(e, Exception):
|
|
30
50
|
return None
|
|
31
51
|
t, msg = f"{type(e).__module__}.{type(e).__name__}", str(e)
|
|
32
52
|
max_tokens, actual_tokens = None, None
|
|
33
|
-
if t == "openai.BadRequestError"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
if t == "openai.BadRequestError":
|
|
54
|
+
if "context_length_exceeded" in msg:
|
|
55
|
+
match = re.search(
|
|
56
|
+
r"maximum context length is (\d+) tokens.*?resulted in (\d+) tokens",
|
|
57
|
+
msg,
|
|
58
|
+
)
|
|
59
|
+
if match:
|
|
60
|
+
max_tokens = int(match.group(1))
|
|
61
|
+
actual_tokens = int(match.group(2))
|
|
62
|
+
return with_cause(
|
|
63
|
+
LLMContextLengthExceededError(
|
|
64
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
if (
|
|
68
|
+
"Please reduce the length of the messages or completion." in msg
|
|
69
|
+
): # Groq, no details
|
|
70
|
+
return with_cause(LLMContextLengthExceededError(model=model))
|
|
71
|
+
|
|
72
|
+
# x.ai grok-fast
|
|
73
|
+
if (
|
|
74
|
+
"This model's maximum prompt length is" in msg
|
|
75
|
+
and "but the request contains" in msg
|
|
76
|
+
and "tokens" in msg
|
|
77
|
+
):
|
|
78
|
+
match = re.search(
|
|
79
|
+
r"maximum prompt length is (\d+) but the request contains (\d+) tokens",
|
|
80
|
+
msg,
|
|
81
|
+
)
|
|
82
|
+
if match:
|
|
83
|
+
max_tokens = int(match.group(1))
|
|
84
|
+
actual_tokens = int(match.group(2))
|
|
85
|
+
return with_cause(
|
|
86
|
+
LLMContextLengthExceededError(
|
|
87
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if "maximum context length" in msg: # Mistral, # DeepSeek
|
|
92
|
+
if match := re.search(
|
|
93
|
+
r"Prompt contains (\d+) tokens.*?model with (\d+) maximum context length",
|
|
94
|
+
msg,
|
|
95
|
+
): # Mistral
|
|
96
|
+
max_tokens = int(match.group(2))
|
|
97
|
+
actual_tokens = int(match.group(1))
|
|
98
|
+
elif match := re.search(
|
|
99
|
+
r"maximum context length is (\d+) tokens.*? you requested (\d+) tokens",
|
|
100
|
+
msg,
|
|
101
|
+
): # DeepSeek
|
|
102
|
+
max_tokens = int(match.group(1))
|
|
103
|
+
actual_tokens = int(match.group(2))
|
|
104
|
+
return with_cause(
|
|
105
|
+
LLMContextLengthExceededError(
|
|
106
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
if "too_many_prompt_tokens" in msg: # Perplexity
|
|
110
|
+
if match := re.search(r"User input tokens exceeds (\d+) tokens", msg):
|
|
111
|
+
max_tokens = int(match.group(1))
|
|
112
|
+
return with_cause(
|
|
113
|
+
LLMContextLengthExceededError(
|
|
114
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if (
|
|
119
|
+
t == "openai.APIStatusError" and "413 Request Entity Too Large" in msg
|
|
120
|
+
): # Cerebras
|
|
121
|
+
return with_cause(LLMContextLengthExceededError(model=model))
|
|
122
|
+
|
|
123
|
+
if t == "openai.APIStatusError" and "Payload Too Large" in msg: # Fireworks
|
|
124
|
+
return with_cause(LLMContextLengthExceededError(model=model))
|
|
125
|
+
|
|
46
126
|
if t == "anthropic.BadRequestError" and "prompt is too long:" in msg:
|
|
47
127
|
if match := re.search(r"(\d+)\s+tokens\s+>\s+(\d+)\s+maximum", msg):
|
|
48
128
|
max_tokens = int(match.group(2))
|
|
49
129
|
actual_tokens = int(match.group(1))
|
|
50
|
-
return
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
130
|
+
return with_cause(
|
|
131
|
+
LLMContextLengthExceededError(
|
|
132
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
133
|
+
)
|
|
54
134
|
)
|
|
55
|
-
if
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
msg
|
|
135
|
+
if t == "google.genai.errors.ClientError":
|
|
136
|
+
|
|
137
|
+
if "429" in msg and "RESOURCE_EXHAUSTED" in msg:
|
|
138
|
+
return with_cause(LLMQuotaExceededError(details=msg))
|
|
139
|
+
|
|
140
|
+
if (
|
|
141
|
+
"input token count" in msg
|
|
142
|
+
and "exceeds the maximum number of tokens allowed" in msg
|
|
62
143
|
):
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
144
|
+
# ai studio
|
|
145
|
+
if match := re.search(
|
|
146
|
+
r"input token count exceeds the maximum number of tokens allowed (\d+)",
|
|
147
|
+
msg,
|
|
148
|
+
):
|
|
149
|
+
max_tokens = int(match.group(1))
|
|
150
|
+
# vertex
|
|
151
|
+
elif match := re.search(
|
|
152
|
+
r"input token count \((\d+)\) "
|
|
153
|
+
r"exceeds the maximum number of tokens allowed \((\d+)\)",
|
|
154
|
+
msg,
|
|
155
|
+
):
|
|
156
|
+
actual_tokens = int(match.group(1))
|
|
157
|
+
max_tokens = int(match.group(2))
|
|
158
|
+
return with_cause(
|
|
159
|
+
LLMContextLengthExceededError(
|
|
160
|
+
actual_tokens=actual_tokens, max_tokens=max_tokens, model=model
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
if t in (
|
|
164
|
+
"openai.AuthenticationError",
|
|
165
|
+
"anthropic.AuthenticationError",
|
|
166
|
+
"google.auth.exceptions.MalformedError", # Vertex AI, wrong service acc. json
|
|
167
|
+
):
|
|
168
|
+
return with_cause(LLMAuthError(msg))
|
|
169
|
+
if t == "google.genai.errors.ClientError":
|
|
170
|
+
if "API_KEY_INVALID" in msg:
|
|
171
|
+
return with_cause(LLMAuthError(msg))
|
|
172
|
+
if "PERMISSION_DENIED" in msg: # invalid project in service account json
|
|
173
|
+
return with_cause(LLMAuthError(msg))
|
|
69
174
|
return None
|
|
70
175
|
|
|
71
176
|
|
|
@@ -74,7 +179,7 @@ def llm(
|
|
|
74
179
|
retries: int = 0,
|
|
75
180
|
parse_json: bool | dict = False,
|
|
76
181
|
file_cache: bool | str = False,
|
|
77
|
-
**kwargs
|
|
182
|
+
**kwargs,
|
|
78
183
|
) -> str | LLMResponse | ImageGenerationResponse:
|
|
79
184
|
"""
|
|
80
185
|
Request Large Language Model synchronously
|
|
@@ -123,12 +228,13 @@ def llm(
|
|
|
123
228
|
[h(prompt, **kwargs) for h in env().llm_before_handlers]
|
|
124
229
|
start = datetime.now()
|
|
125
230
|
|
|
126
|
-
if
|
|
231
|
+
if file_cache and cache_hit(
|
|
127
232
|
cache_name := build_cache_name(
|
|
128
|
-
prompt,
|
|
129
|
-
|
|
233
|
+
prompt,
|
|
234
|
+
kwargs,
|
|
235
|
+
prefix=file_cache if isinstance(file_cache, str) else "llm_requests",
|
|
130
236
|
)
|
|
131
|
-
)
|
|
237
|
+
):
|
|
132
238
|
response: LLMResponse = load_cache(cache_name)
|
|
133
239
|
response.from_file_cache = True
|
|
134
240
|
tries = 0
|
|
@@ -142,7 +248,9 @@ def llm(
|
|
|
142
248
|
except Exception as e: # pylint: disable=W0718
|
|
143
249
|
converted_exception = convert_exception(e)
|
|
144
250
|
# If context length exceeded, or no tries left --> do not retry
|
|
145
|
-
if tries == 0 or isinstance(
|
|
251
|
+
if tries == 0 or isinstance(
|
|
252
|
+
converted_exception, (LLMContextLengthExceededError, LLMAuthError)
|
|
253
|
+
):
|
|
146
254
|
if converted_exception:
|
|
147
255
|
raise converted_exception from e
|
|
148
256
|
raise e
|
|
@@ -161,11 +269,7 @@ def llm(
|
|
|
161
269
|
if tries > 0:
|
|
162
270
|
retry_params = dict(**kwargs)
|
|
163
271
|
retry_params["retries"] = tries - 1
|
|
164
|
-
setattr(
|
|
165
|
-
response,
|
|
166
|
-
"_retry_callback",
|
|
167
|
-
lambda: llm(prompt, **retry_params)
|
|
168
|
-
)
|
|
272
|
+
setattr(response, "_retry_callback", lambda: llm(prompt, **retry_params))
|
|
169
273
|
if parse_json:
|
|
170
274
|
parsing_params = parse_json if isinstance(parse_json, dict) else {}
|
|
171
275
|
return response.parse_json(**parsing_params)
|
|
@@ -177,7 +281,7 @@ async def allm(
|
|
|
177
281
|
retries: int = 0,
|
|
178
282
|
parse_json: bool | dict = False,
|
|
179
283
|
file_cache: bool | str = False,
|
|
180
|
-
**kwargs
|
|
284
|
+
**kwargs,
|
|
181
285
|
) -> str | LLMResponse | DictFromLLMResponse | ImageGenerationResponse:
|
|
182
286
|
"""
|
|
183
287
|
Request Large Language Model asynchronously
|
|
@@ -221,12 +325,13 @@ async def allm(
|
|
|
221
325
|
[h(prompt, **kwargs) for h in env().llm_before_handlers]
|
|
222
326
|
start = datetime.now()
|
|
223
327
|
|
|
224
|
-
if
|
|
328
|
+
if file_cache and cache_hit(
|
|
225
329
|
cache_name := build_cache_name(
|
|
226
|
-
prompt,
|
|
227
|
-
|
|
330
|
+
prompt,
|
|
331
|
+
kwargs,
|
|
332
|
+
prefix=file_cache if isinstance(file_cache, str) else "llm_requests",
|
|
228
333
|
)
|
|
229
|
-
)
|
|
334
|
+
):
|
|
230
335
|
response: LLMResponse = load_cache(cache_name)
|
|
231
336
|
response.from_file_cache = True
|
|
232
337
|
tries = 0
|
|
@@ -240,7 +345,9 @@ async def allm(
|
|
|
240
345
|
except Exception as e: # pylint: disable=W0718
|
|
241
346
|
converted_exception = convert_exception(e)
|
|
242
347
|
# If context length exceeded, or no tries left --> do not retry
|
|
243
|
-
if tries == 0 or isinstance(
|
|
348
|
+
if tries == 0 or isinstance(
|
|
349
|
+
converted_exception, (LLMContextLengthExceededError, LLMAuthError)
|
|
350
|
+
):
|
|
244
351
|
if converted_exception:
|
|
245
352
|
raise converted_exception from e
|
|
246
353
|
raise e
|
|
@@ -266,7 +373,9 @@ async def allm(
|
|
|
266
373
|
logging.info(f"Retrying... {tries} retries left")
|
|
267
374
|
if file_cache:
|
|
268
375
|
delete_cache(cache_name)
|
|
269
|
-
return await allm(
|
|
376
|
+
return await allm(
|
|
377
|
+
prompt, retries=tries - 1, parse_json=parse_json, **kwargs
|
|
378
|
+
)
|
|
270
379
|
return response
|
|
271
380
|
|
|
272
381
|
|
|
@@ -276,7 +385,7 @@ async def llm_parallel(
|
|
|
276
385
|
allow_failures: bool = False,
|
|
277
386
|
return_on_failure: Any = RETURN_EXCEPTION,
|
|
278
387
|
log_errors: bool = True,
|
|
279
|
-
**kwargs
|
|
388
|
+
**kwargs,
|
|
280
389
|
) -> list[str | LLMResponse]:
|
|
281
390
|
"""
|
|
282
391
|
Execute multiple LLM requests in parallel
|