ai-microcore 5.0.0a1__tar.gz → 5.0.0.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/PKG-INFO +5 -1
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/README.md +4 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/__init__.py +23 -2
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/_env.py +33 -27
- ai_microcore-5.0.0.dev2/microcore/_llm_functions.py +299 -0
- ai_microcore-5.0.0.dev2/microcore/_prepare_llm_args.py +47 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/__init__.py +2 -2
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/ai-func.json.j2 +1 -1
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/ai-func.pythonic.j2 +1 -1
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/ai-func.tag.j2 +1 -1
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/configuration.py +47 -21
- ai_microcore-5.0.0.dev2/microcore/file_cache.py +103 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/file_storage.py +126 -23
- ai_microcore-5.0.0.dev2/microcore/images.py +111 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/anthropic.py +3 -3
- ai_microcore-5.0.0.dev2/microcore/llm/google_genai.py +205 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/google_vertex_ai.py +2 -2
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/local_llm.py +3 -3
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/local_transformers.py +2 -2
- ai_microcore-5.0.0.dev2/microcore/llm/openai.py +344 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/shared.py +14 -0
- ai_microcore-5.0.0.dev2/microcore/lm_client.py +113 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/logging.py +35 -2
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/mcp.py +3 -1
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/message_types.py +23 -3
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/types.py +20 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/utils.py +131 -25
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/wrappers/llm_response_wrapper.py +95 -4
- ai_microcore-5.0.0a1/microcore/_llm_functions.py +0 -177
- ai_microcore-5.0.0a1/microcore/_prepare_llm_args.py +0 -31
- ai_microcore-5.0.0a1/microcore/llm/_openai_llm_v0.py +0 -130
- ai_microcore-5.0.0a1/microcore/llm/_openai_llm_v1.py +0 -231
- ai_microcore-5.0.0a1/microcore/llm/google_genai.py +0 -108
- ai_microcore-5.0.0a1/microcore/llm/openai_llm.py +0 -10
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/LICENSE +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_modules.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/embedding_db/__init__.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/embedding_db/chromadb.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/embedding_db/qdrant.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/interactive_setup.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/json_parsing.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/__init__.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/metrics.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/presets.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/python.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/templating/__init__.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/templating/jinja2.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/text2speech/elevenlabs.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/tokenizing.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ui.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/wrappers/__init__.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/wrappers/prompt_wrapper.py +0 -0
- {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-microcore
|
|
3
|
-
Version: 5.0.
|
|
3
|
+
Version: 5.0.0.dev2
|
|
4
4
|
Summary: # Minimalistic Foundation for AI Applications
|
|
5
5
|
Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter
|
|
6
6
|
Author-email: Vitalii Stepanenko <mail@vitaliy.in>
|
|
@@ -300,6 +300,10 @@ Benchmark accuracy of 20+ state of the art models on solving olympiad math probl
|
|
|
300
300
|
|
|
301
301
|
#### [Generate meme image](https://github.com/Nayjest/ai-microcore/blob/main/examples/generate_meme_image.py)
|
|
302
302
|
Simple example demonstrating image generation using [OpenAI GPT Image](https://platform.openai.com/docs/guides/image-generation?image-generation-model=gpt-image-1) model.
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
#### [Local inference with PyTorch / Transformers](https://github.com/Nayjest/ai-microcore/blob/main/examples/pytorch_transformers.py)
|
|
306
|
+
Text generation using HF/Transformers model locally (example withQwen 3 0.6B).
|
|
303
307
|
|
|
304
308
|
#### [Other examples](https://github.com/llm-microcore/microcore/tree/main/examples)
|
|
305
309
|
|
|
@@ -269,6 +269,10 @@ Benchmark accuracy of 20+ state of the art models on solving olympiad math probl
|
|
|
269
269
|
|
|
270
270
|
#### [Generate meme image](https://github.com/Nayjest/ai-microcore/blob/main/examples/generate_meme_image.py)
|
|
271
271
|
Simple example demonstrating image generation using [OpenAI GPT Image](https://platform.openai.com/docs/guides/image-generation?image-generation-model=gpt-image-1) model.
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
#### [Local inference with PyTorch / Transformers](https://github.com/Nayjest/ai-microcore/blob/main/examples/pytorch_transformers.py)
|
|
275
|
+
Text generation using HF/Transformers model locally (example withQwen 3 0.6B).
|
|
272
276
|
|
|
273
277
|
#### [Other examples](https://github.com/llm-microcore/microcore/tree/main/examples)
|
|
274
278
|
|
|
@@ -12,6 +12,7 @@ from . import mcp
|
|
|
12
12
|
from . import ui
|
|
13
13
|
from . import tokenizing
|
|
14
14
|
from . import presets
|
|
15
|
+
from . import file_cache
|
|
15
16
|
from .embedding_db import SearchResult, AbstractEmbeddingDB, SearchResults
|
|
16
17
|
from .file_storage import storage
|
|
17
18
|
from ._env import configure, env, config, min_setup
|
|
@@ -19,12 +20,16 @@ from .logging import use_logging
|
|
|
19
20
|
from .message_types import UserMsg, AssistantMsg, SysMsg, Msg, PartialMsg
|
|
20
21
|
from .configuration import (
|
|
21
22
|
ApiType,
|
|
23
|
+
LLMApiBaseError,
|
|
24
|
+
LLMApiDeploymentIdError,
|
|
25
|
+
LLMApiKeyError,
|
|
26
|
+
LLMApiVersionError,
|
|
22
27
|
LLMConfigError,
|
|
23
28
|
Config,
|
|
24
29
|
EmbeddingDbType,
|
|
25
30
|
PRINT_STREAM,
|
|
26
31
|
)
|
|
27
|
-
from .types import BadAIJsonAnswer, BadAIAnswer
|
|
32
|
+
from .types import BadAIJsonAnswer, BadAIAnswer, LLMContextLengthExceededError
|
|
28
33
|
from .wrappers.prompt_wrapper import PromptWrapper
|
|
29
34
|
from .wrappers.llm_response_wrapper import LLMResponse
|
|
30
35
|
from ._llm_functions import llm, allm, llm_parallel
|
|
@@ -69,6 +74,15 @@ def use_model(name: str):
|
|
|
69
74
|
config().LLM_DEFAULT_ARGS["model"] = name
|
|
70
75
|
|
|
71
76
|
|
|
77
|
+
def model_names() -> list[str]:
|
|
78
|
+
"""
|
|
79
|
+
Return a list of available model names from the default LLM client.
|
|
80
|
+
"""
|
|
81
|
+
if env().default_client is None:
|
|
82
|
+
raise ValueError("No default LLM client supporting models list configured.")
|
|
83
|
+
return env().default_client.model_names()
|
|
84
|
+
|
|
85
|
+
|
|
72
86
|
def validate_config():
|
|
73
87
|
"""
|
|
74
88
|
Validates current MicroCore configuration
|
|
@@ -185,7 +199,12 @@ __all__ = [
|
|
|
185
199
|
"PRINT_STREAM",
|
|
186
200
|
"presets",
|
|
187
201
|
"BadAIAnswer",
|
|
202
|
+
"LLMApiBaseError",
|
|
203
|
+
"LLMApiDeploymentIdError",
|
|
204
|
+
"LLMApiKeyError",
|
|
205
|
+
"LLMApiVersionError",
|
|
188
206
|
"LLMConfigError",
|
|
207
|
+
"LLMContextLengthExceededError",
|
|
189
208
|
"LLMResponse",
|
|
190
209
|
"PromptWrapper",
|
|
191
210
|
"parse",
|
|
@@ -194,6 +213,7 @@ __all__ = [
|
|
|
194
213
|
"dedent",
|
|
195
214
|
# submodules
|
|
196
215
|
"embedding_db",
|
|
216
|
+
"file_cache",
|
|
197
217
|
"file_storage",
|
|
198
218
|
"message_types",
|
|
199
219
|
"utils",
|
|
@@ -206,7 +226,8 @@ __all__ = [
|
|
|
206
226
|
"tokenizing",
|
|
207
227
|
"Metrics",
|
|
208
228
|
"interactive_setup",
|
|
229
|
+
"model_names",
|
|
209
230
|
# "wrappers",
|
|
210
231
|
]
|
|
211
232
|
|
|
212
|
-
__version__ = "5.0.
|
|
233
|
+
__version__ = "5.0.0dev2"
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MicroCore environment object / initialization.
|
|
3
|
+
"""
|
|
1
4
|
import os.path
|
|
2
5
|
from dataclasses import dataclass, field, asdict, fields
|
|
3
6
|
from importlib.util import find_spec
|
|
@@ -14,9 +17,9 @@ from .configuration import (
|
|
|
14
17
|
PRINT_STREAM,
|
|
15
18
|
)
|
|
16
19
|
from .presets import MIN_SETUP
|
|
20
|
+
from .lm_client import BaseAIClient
|
|
17
21
|
from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
|
|
18
22
|
from .templating.jinja2 import make_jinja2_env, make_tpl_function
|
|
19
|
-
from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
|
|
20
23
|
from .llm.local_llm import make_llm_functions as make_local_llm_functions
|
|
21
24
|
|
|
22
25
|
if TYPE_CHECKING:
|
|
@@ -41,6 +44,7 @@ class Env:
|
|
|
41
44
|
tokenizer: "PreTrainedTokenizer" = field( # noqa
|
|
42
45
|
default=None, init=False, repr=False
|
|
43
46
|
)
|
|
47
|
+
default_client: BaseAIClient | None = None
|
|
44
48
|
_mcp_registry: "MCPRegistry" = field(init=False, default=None)
|
|
45
49
|
|
|
46
50
|
def __post_init__(self):
|
|
@@ -58,30 +62,38 @@ class Env:
|
|
|
58
62
|
raise NotImplementedError
|
|
59
63
|
|
|
60
64
|
def init_templating(self):
|
|
65
|
+
"""Initialize Jinja2 environment and template function for templates rendering."""
|
|
61
66
|
self.jinja_env = make_jinja2_env(self)
|
|
62
67
|
self.tpl_function = make_tpl_function(self)
|
|
63
68
|
|
|
64
69
|
@property
|
|
65
|
-
def mcp_registry(self):
|
|
70
|
+
def mcp_registry(self) -> "MCPRegistry":
|
|
71
|
+
"""Lazily initialize and return the registry of preconfigured MCP servers."""
|
|
66
72
|
if self._mcp_registry is None:
|
|
67
73
|
from .mcp import MCPRegistry
|
|
68
74
|
self._mcp_registry = MCPRegistry(self.config.MCP_SERVERS)
|
|
69
75
|
return self._mcp_registry
|
|
70
76
|
|
|
71
77
|
def init_llm(self):
|
|
72
|
-
|
|
78
|
+
"""Initialize language model functions based on configuration."""
|
|
73
79
|
|
|
74
|
-
|
|
75
|
-
|
|
80
|
+
def default_llm(*args, **kwargs) -> "LLMResponse":
|
|
81
|
+
if self.default_client:
|
|
82
|
+
return self.default_client.generate(*args, **kwargs)
|
|
83
|
+
raise LLMConfigError("Language model is not configured")
|
|
76
84
|
|
|
77
|
-
|
|
78
|
-
|
|
85
|
+
async def aio_default_llm(*args, **kwargs) -> "LLMResponse":
|
|
86
|
+
if self.default_client:
|
|
87
|
+
return await self.default_client.aio.generate(*args, **kwargs)
|
|
88
|
+
raise LLMConfigError("Language model is not configured")
|
|
79
89
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
90
|
+
self.llm_function, self.llm_async_function = (
|
|
91
|
+
default_llm,
|
|
92
|
+
aio_default_llm,
|
|
93
|
+
)
|
|
84
94
|
|
|
95
|
+
if self.config.LLM_API_TYPE == ApiType.NONE:
|
|
96
|
+
pass
|
|
85
97
|
elif self.config.LLM_API_TYPE == ApiType.FUNCTION:
|
|
86
98
|
self.llm_function, self.llm_async_function = make_local_llm_functions(
|
|
87
99
|
self.config
|
|
@@ -130,25 +142,19 @@ class Env:
|
|
|
130
142
|
self.llm_function,
|
|
131
143
|
self.llm_async_function,
|
|
132
144
|
) = make_google_vertex_llm_functions(self.config)
|
|
133
|
-
elif self.config.LLM_API_TYPE
|
|
145
|
+
elif self.config.LLM_API_TYPE in (ApiType.GOOGLE, ApiType.GOOGLE_AI_STUDIO):
|
|
134
146
|
try:
|
|
135
|
-
from .llm.google_genai import
|
|
136
|
-
make_llm_functions as make_google_genai_llm_functions,
|
|
137
|
-
)
|
|
147
|
+
from .llm.google_genai import GoogleClient
|
|
138
148
|
except ModuleNotFoundError as e:
|
|
139
149
|
raise ModuleNotFoundError(
|
|
140
|
-
"To use the Google Gemini language models via
|
|
141
|
-
"you need to install the `google-
|
|
142
|
-
"Run `pip install google-
|
|
150
|
+
"To use the Google Gemini language models via Google GenAI SDK, "
|
|
151
|
+
"you need to install the `google-genai` package. "
|
|
152
|
+
"Run `pip install google-genai`."
|
|
143
153
|
) from e
|
|
144
|
-
(
|
|
145
|
-
self.llm_function,
|
|
146
|
-
self.llm_async_function,
|
|
147
|
-
) = make_google_genai_llm_functions(self.config)
|
|
154
|
+
self.default_client = GoogleClient(self.config)
|
|
148
155
|
else:
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
)
|
|
156
|
+
from .llm.openai import OpenAIClient
|
|
157
|
+
self.default_client = OpenAIClient(self.config)
|
|
152
158
|
|
|
153
159
|
def init_similarity_search(self):
|
|
154
160
|
if (
|
|
@@ -225,10 +231,10 @@ _env: Env | None = None
|
|
|
225
231
|
|
|
226
232
|
|
|
227
233
|
def env() -> Env:
|
|
228
|
-
"""
|
|
234
|
+
"""Return current MicroCore environment object."""
|
|
229
235
|
return _env or Env()
|
|
230
236
|
|
|
231
237
|
|
|
232
238
|
def config() -> Config:
|
|
233
|
-
"""Resolve current configuration"""
|
|
239
|
+
"""Resolve current configuration."""
|
|
234
240
|
return env().config
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
from .utils import run_parallel, RETURN_EXCEPTION
|
|
8
|
+
from .wrappers.llm_response_wrapper import LLMResponse, DictFromLLMResponse, ImageGenerationResponse
|
|
9
|
+
from .types import TPrompt, LLMContextLengthExceededError
|
|
10
|
+
from .file_cache import (
|
|
11
|
+
cache_hit,
|
|
12
|
+
load_cache,
|
|
13
|
+
save_cache,
|
|
14
|
+
build_cache_name,
|
|
15
|
+
delete_cache
|
|
16
|
+
)
|
|
17
|
+
from ._env import env
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def convert_exception(e: Exception, model: str = None) -> Exception | None:
|
|
21
|
+
"""
|
|
22
|
+
Convert LLM exceptions microcore-specific exceptions if possible.
|
|
23
|
+
Args:
|
|
24
|
+
e (Exception): Original exception
|
|
25
|
+
model (str): LLM model name, used for better error messages
|
|
26
|
+
Returns:
|
|
27
|
+
Converted exception or None if no conversion is possible
|
|
28
|
+
"""
|
|
29
|
+
if not isinstance(e, Exception):
|
|
30
|
+
return None
|
|
31
|
+
t, msg = f"{type(e).__module__}.{type(e).__name__}", str(e)
|
|
32
|
+
max_tokens, actual_tokens = None, None
|
|
33
|
+
if t == "openai.BadRequestError" and "context_length_exceeded" in msg:
|
|
34
|
+
match = re.search(
|
|
35
|
+
r"maximum context length is (\d+) tokens.*?resulted in (\d+) tokens",
|
|
36
|
+
msg
|
|
37
|
+
)
|
|
38
|
+
if match:
|
|
39
|
+
max_tokens = int(match.group(1))
|
|
40
|
+
actual_tokens = int(match.group(2))
|
|
41
|
+
return LLMContextLengthExceededError(
|
|
42
|
+
actual_tokens=actual_tokens,
|
|
43
|
+
max_tokens=max_tokens,
|
|
44
|
+
model=model
|
|
45
|
+
)
|
|
46
|
+
if t == "anthropic.BadRequestError" and "prompt is too long:" in msg:
|
|
47
|
+
if match := re.search(r"(\d+)\s+tokens\s+>\s+(\d+)\s+maximum", msg):
|
|
48
|
+
max_tokens = int(match.group(2))
|
|
49
|
+
actual_tokens = int(match.group(1))
|
|
50
|
+
return LLMContextLengthExceededError(
|
|
51
|
+
actual_tokens=actual_tokens,
|
|
52
|
+
max_tokens=max_tokens,
|
|
53
|
+
model=model
|
|
54
|
+
)
|
|
55
|
+
if (
|
|
56
|
+
t == "google.api_core.exceptions.InvalidArgument"
|
|
57
|
+
and "The input token count exceeds the maximum number of tokens allowed" in msg
|
|
58
|
+
):
|
|
59
|
+
if match := re.search(
|
|
60
|
+
r"The input token count exceeds the maximum number of tokens allowed (\d+)",
|
|
61
|
+
msg
|
|
62
|
+
):
|
|
63
|
+
max_tokens = int(match.group(1))
|
|
64
|
+
return LLMContextLengthExceededError(
|
|
65
|
+
actual_tokens=actual_tokens,
|
|
66
|
+
max_tokens=max_tokens,
|
|
67
|
+
model=model
|
|
68
|
+
)
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def llm(
|
|
73
|
+
prompt: TPrompt,
|
|
74
|
+
retries: int = 0,
|
|
75
|
+
parse_json: bool | dict = False,
|
|
76
|
+
file_cache: bool | str = False,
|
|
77
|
+
**kwargs
|
|
78
|
+
) -> str | LLMResponse | ImageGenerationResponse:
|
|
79
|
+
"""
|
|
80
|
+
Request Large Language Model synchronously
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM.
|
|
84
|
+
retries (int):
|
|
85
|
+
Number of retries in case of error.
|
|
86
|
+
Default is 0 (no retries).
|
|
87
|
+
parse_json (bool|dict):
|
|
88
|
+
If True, parses response as JSON,
|
|
89
|
+
alternatively non-empty dict can be used as parse_json arguments
|
|
90
|
+
Default is False (no parsing).
|
|
91
|
+
file_cache (bool | str):
|
|
92
|
+
If True or non-empty string, enables file caching of LLM responses.
|
|
93
|
+
If string, it will be used as cache prefix.
|
|
94
|
+
When enabled, identical requests with identical parameters
|
|
95
|
+
will return cached responses instead of making new API calls.
|
|
96
|
+
Default is False (no caching).
|
|
97
|
+
**kwargs: Parameters supported by the LLM API.
|
|
98
|
+
|
|
99
|
+
See parameters supported by the OpenAI:
|
|
100
|
+
|
|
101
|
+
- https://platform.openai.com/docs/api-reference/completions/create
|
|
102
|
+
- https://platform.openai.com/docs/api-reference/chat/create
|
|
103
|
+
|
|
104
|
+
**Additional parameters:**
|
|
105
|
+
|
|
106
|
+
- callback: callable - callback function
|
|
107
|
+
to be called on each chunk of text,
|
|
108
|
+
enables response streaming if supported by the LLM API
|
|
109
|
+
- callbacks: list[callable] - collection of callbacks
|
|
110
|
+
to be called on each chunk of text,
|
|
111
|
+
enables response streaming if supported by the LLM API
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
|
|
115
|
+
Text generated by the LLM as string
|
|
116
|
+
with all fields returned by API accessible as an attributes.
|
|
117
|
+
|
|
118
|
+
See fields returned by the OpenAI:
|
|
119
|
+
|
|
120
|
+
- https://platform.openai.com/docs/api-reference/completions/object
|
|
121
|
+
- https://platform.openai.com/docs/api-reference/chat/object
|
|
122
|
+
"""
|
|
123
|
+
[h(prompt, **kwargs) for h in env().llm_before_handlers]
|
|
124
|
+
start = datetime.now()
|
|
125
|
+
|
|
126
|
+
if (file_cache and cache_hit(
|
|
127
|
+
cache_name := build_cache_name(
|
|
128
|
+
prompt, kwargs,
|
|
129
|
+
prefix=file_cache if isinstance(file_cache, str) else "llm_requests"
|
|
130
|
+
)
|
|
131
|
+
)):
|
|
132
|
+
response: LLMResponse = load_cache(cache_name)
|
|
133
|
+
response.from_file_cache = True
|
|
134
|
+
tries = 0
|
|
135
|
+
else:
|
|
136
|
+
tries = retries + 1
|
|
137
|
+
while tries > 0:
|
|
138
|
+
try:
|
|
139
|
+
tries -= 1
|
|
140
|
+
response = env().llm_function(prompt, **kwargs)
|
|
141
|
+
break
|
|
142
|
+
except Exception as e: # pylint: disable=W0718
|
|
143
|
+
converted_exception = convert_exception(e)
|
|
144
|
+
# If context length exceeded, or no tries left --> do not retry
|
|
145
|
+
if tries == 0 or isinstance(converted_exception, LLMContextLengthExceededError):
|
|
146
|
+
if converted_exception:
|
|
147
|
+
raise converted_exception from e
|
|
148
|
+
raise e
|
|
149
|
+
logging.error(f"LLM error: {e}")
|
|
150
|
+
logging.info(f"Retrying... {tries} retries left")
|
|
151
|
+
continue
|
|
152
|
+
try:
|
|
153
|
+
response.gen_duration = (datetime.now() - start).total_seconds()
|
|
154
|
+
if not env().config.SAVE_MEMORY:
|
|
155
|
+
response.prompt = prompt
|
|
156
|
+
except AttributeError:
|
|
157
|
+
...
|
|
158
|
+
if file_cache:
|
|
159
|
+
save_cache(cache_name, response)
|
|
160
|
+
[h(response) for h in env().llm_after_handlers]
|
|
161
|
+
if tries > 0:
|
|
162
|
+
retry_params = dict(**kwargs)
|
|
163
|
+
retry_params["retries"] = tries - 1
|
|
164
|
+
setattr(
|
|
165
|
+
response,
|
|
166
|
+
"_retry_callback",
|
|
167
|
+
lambda: llm(prompt, **retry_params)
|
|
168
|
+
)
|
|
169
|
+
if parse_json:
|
|
170
|
+
parsing_params = parse_json if isinstance(parse_json, dict) else {}
|
|
171
|
+
return response.parse_json(**parsing_params)
|
|
172
|
+
return response
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
async def allm(
|
|
176
|
+
prompt: TPrompt,
|
|
177
|
+
retries: int = 0,
|
|
178
|
+
parse_json: bool | dict = False,
|
|
179
|
+
file_cache: bool | str = False,
|
|
180
|
+
**kwargs
|
|
181
|
+
) -> str | LLMResponse | DictFromLLMResponse | ImageGenerationResponse:
|
|
182
|
+
"""
|
|
183
|
+
Request Large Language Model asynchronously
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM.
|
|
187
|
+
retries (int):
|
|
188
|
+
Number of retries in case of error.
|
|
189
|
+
Default is 0 (no retries).
|
|
190
|
+
parse_json (bool|dict):
|
|
191
|
+
If True, parses response as JSON,
|
|
192
|
+
alternatively non-empty dict can be used as parse_json arguments.
|
|
193
|
+
**kwargs: Parameters supported by the LLM API.
|
|
194
|
+
|
|
195
|
+
See parameters supported by the OpenAI:
|
|
196
|
+
|
|
197
|
+
- https://platform.openai.com/docs/api-reference/completions/create
|
|
198
|
+
- https://platform.openai.com/docs/api-reference/chat/create
|
|
199
|
+
|
|
200
|
+
**Additional parameters:**
|
|
201
|
+
|
|
202
|
+
- callback: callable - callback function
|
|
203
|
+
to be called on each chunk of text,
|
|
204
|
+
enables response streaming if supported by the LLM API
|
|
205
|
+
- callbacks: list[callable] - collection of callbacks
|
|
206
|
+
to be called on each chunk of text,
|
|
207
|
+
enables response streaming if supported by the LLM API
|
|
208
|
+
|
|
209
|
+
Note: async callbacks are supported only for async LLM API calls
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
|
|
213
|
+
Text generated by the LLM as string
|
|
214
|
+
with all fields returned by API accessible as an attributes.
|
|
215
|
+
|
|
216
|
+
See fields returned by the OpenAI:
|
|
217
|
+
|
|
218
|
+
- https://platform.openai.com/docs/api-reference/completions/object
|
|
219
|
+
- https://platform.openai.com/docs/api-reference/chat/object
|
|
220
|
+
"""
|
|
221
|
+
[h(prompt, **kwargs) for h in env().llm_before_handlers]
|
|
222
|
+
start = datetime.now()
|
|
223
|
+
|
|
224
|
+
if (file_cache and cache_hit(
|
|
225
|
+
cache_name := build_cache_name(
|
|
226
|
+
prompt, kwargs,
|
|
227
|
+
prefix=file_cache if isinstance(file_cache, str) else "llm_requests"
|
|
228
|
+
)
|
|
229
|
+
)):
|
|
230
|
+
response: LLMResponse = load_cache(cache_name)
|
|
231
|
+
response.from_file_cache = True
|
|
232
|
+
tries = 0
|
|
233
|
+
else:
|
|
234
|
+
tries = retries + 1
|
|
235
|
+
while tries > 0:
|
|
236
|
+
try:
|
|
237
|
+
tries -= 1
|
|
238
|
+
response = await env().llm_async_function(prompt, **kwargs)
|
|
239
|
+
break
|
|
240
|
+
except Exception as e: # pylint: disable=W0718
|
|
241
|
+
converted_exception = convert_exception(e)
|
|
242
|
+
# If context length exceeded, or no tries left --> do not retry
|
|
243
|
+
if tries == 0 or isinstance(converted_exception, LLMContextLengthExceededError):
|
|
244
|
+
if converted_exception:
|
|
245
|
+
raise converted_exception from e
|
|
246
|
+
raise e
|
|
247
|
+
logging.error(f"LLM error: {e}")
|
|
248
|
+
logging.info(f"Retrying... {tries} retries left")
|
|
249
|
+
continue
|
|
250
|
+
try:
|
|
251
|
+
response.gen_duration = (datetime.now() - start).total_seconds()
|
|
252
|
+
if not env().config.SAVE_MEMORY:
|
|
253
|
+
response.prompt = prompt
|
|
254
|
+
except AttributeError:
|
|
255
|
+
...
|
|
256
|
+
if file_cache:
|
|
257
|
+
save_cache(cache_name, response)
|
|
258
|
+
[h(response) for h in env().llm_after_handlers]
|
|
259
|
+
if parse_json:
|
|
260
|
+
try:
|
|
261
|
+
parsing_params = parse_json if isinstance(parse_json, dict) else {}
|
|
262
|
+
return response.parse_json(**parsing_params)
|
|
263
|
+
except Exception as e: # pylint: disable=W0718
|
|
264
|
+
if tries > 0:
|
|
265
|
+
logging.error(f"LLM error: {e}")
|
|
266
|
+
logging.info(f"Retrying... {tries} retries left")
|
|
267
|
+
if file_cache:
|
|
268
|
+
delete_cache(cache_name)
|
|
269
|
+
return await allm(prompt, retries=tries - 1, parse_json=parse_json, **kwargs)
|
|
270
|
+
return response
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
async def llm_parallel(
|
|
274
|
+
prompts: list[TPrompt],
|
|
275
|
+
max_concurrent_tasks: int = None,
|
|
276
|
+
allow_failures: bool = False,
|
|
277
|
+
return_on_failure: Any = RETURN_EXCEPTION,
|
|
278
|
+
log_errors: bool = True,
|
|
279
|
+
**kwargs
|
|
280
|
+
) -> list[str | LLMResponse]:
|
|
281
|
+
"""
|
|
282
|
+
Execute multiple LLM requests in parallel
|
|
283
|
+
|
|
284
|
+
Returns (list[LLMResponse | str]): a list of responses in the same order as the prompts
|
|
285
|
+
"""
|
|
286
|
+
tasks = [allm(prompt, **kwargs) for prompt in prompts]
|
|
287
|
+
|
|
288
|
+
if max_concurrent_tasks is None:
|
|
289
|
+
max_concurrent_tasks = int(env().config.MAX_CONCURRENT_TASKS or 0)
|
|
290
|
+
if not max_concurrent_tasks:
|
|
291
|
+
max_concurrent_tasks = len(tasks)
|
|
292
|
+
|
|
293
|
+
return await run_parallel(
|
|
294
|
+
tasks,
|
|
295
|
+
max_concurrent_tasks=max_concurrent_tasks,
|
|
296
|
+
allow_failures=allow_failures,
|
|
297
|
+
return_on_failure=return_on_failure,
|
|
298
|
+
log_errors=log_errors,
|
|
299
|
+
)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from dataclasses import asdict
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from .message_types import DEFAULT_MESSAGE_ROLE, Msg, MsgContent
|
|
5
|
+
from .types import TPrompt
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def prepare_prompt(prompt) -> str:
|
|
9
|
+
"""Converts prompt to string for LLM completion API"""
|
|
10
|
+
return "\n".join(
|
|
11
|
+
[
|
|
12
|
+
str(p["content"]) if isinstance(p, dict) and "content" in p else str(p)
|
|
13
|
+
for p in (prompt if isinstance(prompt, list) else [prompt])
|
|
14
|
+
]
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def prompt_item_to_message_dict(item: Any, strict=False) -> dict | Any:
|
|
19
|
+
"""
|
|
20
|
+
Convert a single prompt item to message dict for LLM inference chat API (OpenAI-like).
|
|
21
|
+
Args:
|
|
22
|
+
item: The prompt item to convert. Can be a string, Msg instance, or dict.
|
|
23
|
+
strict: If True, raises TypeError for unsupported types. If False, returns the item as is.
|
|
24
|
+
Returns:
|
|
25
|
+
A dict representing the message,
|
|
26
|
+
or the original item if not convertible and strict is False.
|
|
27
|
+
"""
|
|
28
|
+
if isinstance(item, Msg):
|
|
29
|
+
message_dict = asdict(item, dict_factory=item.DICT_FACTORY)
|
|
30
|
+
elif isinstance(item, dict):
|
|
31
|
+
message_dict = item
|
|
32
|
+
else:
|
|
33
|
+
if strict and not isinstance(item, str | MsgContent):
|
|
34
|
+
raise TypeError(f"Unsupported message type: {type(item)}")
|
|
35
|
+
message_dict = dict(role=DEFAULT_MESSAGE_ROLE, content=item)
|
|
36
|
+
return message_dict
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def prompt_to_message_dicts(prompt: TPrompt, strict=False) -> list[dict | Any]:
|
|
40
|
+
"""
|
|
41
|
+
Convert prompt to messages for LLM inference chat API (OpenAI-like).
|
|
42
|
+
Args:
|
|
43
|
+
prompt: The prompt to convert. Can be a string, Msg instance, dict, or list of these.
|
|
44
|
+
strict: If True, raises TypeError for unsupported types. If False, returns the item as is.
|
|
45
|
+
"""
|
|
46
|
+
message_like_items: list[Any] = prompt if isinstance(prompt, list) else [prompt]
|
|
47
|
+
return [prompt_item_to_message_dict(item, strict=strict) for item in message_like_items]
|
|
@@ -72,7 +72,7 @@ def func_metadata(func, name=None) -> Dict[str, Any]:
|
|
|
72
72
|
metadata["args"][key]["type"] = param_type
|
|
73
73
|
|
|
74
74
|
arg_comments = func_arg_comments(func)
|
|
75
|
-
for key, val in metadata
|
|
75
|
+
for key, val in metadata.get("args", {}).items():
|
|
76
76
|
val["comment"] = arg_comments[key]
|
|
77
77
|
|
|
78
78
|
# Parse docstring
|
|
@@ -80,7 +80,7 @@ def func_metadata(func, name=None) -> Dict[str, Any]:
|
|
|
80
80
|
|
|
81
81
|
# Add descriptions from parsed docstring to parameters
|
|
82
82
|
for param in parsed_docstring.params:
|
|
83
|
-
if param.arg_name in metadata
|
|
83
|
+
if param.arg_name in metadata.get("args", []):
|
|
84
84
|
metadata["args"][param.arg_name]["docstr"] = param.description
|
|
85
85
|
|
|
86
86
|
return metadata
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
{%- for k,v in args.items() %}
|
|
5
5
|
"{{ k }}":
|
|
6
6
|
{%- if v.type %} <{{ v.type }}>{% endif -%}
|
|
7
|
-
{%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){%endif-%}
|
|
7
|
+
{%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){% endif -%}
|
|
8
8
|
{%- if not loop.last -%},{%- endif -%}
|
|
9
9
|
{%- if v.comment %} {{ v.comment }}{% endif -%}
|
|
10
10
|
{%- endfor -%}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
{%- for name,v in args.items() -%}
|
|
4
4
|
{{ "\n\t" }}{{ name }}
|
|
5
5
|
{%- if v.type %}: {{ v.type }}{% endif -%}
|
|
6
|
-
{%- if v.default != 'NOT_SET' %} = {{ v.default }}{%endif-%}
|
|
6
|
+
{%- if v.default != 'NOT_SET' %} = {{ v.default }}{% endif -%}
|
|
7
7
|
{%- if not loop.last -%},{%- endif -%}
|
|
8
8
|
{%- if v.comment %} # {{ v.comment }}{% endif -%}
|
|
9
9
|
{%- endfor -%}
|
|
@@ -26,7 +26,7 @@ Tool:<{{ name }}> {{ description or name.replace('_', ' ').capitalize() }}
|
|
|
26
26
|
{%- for k,v in args.items() %}
|
|
27
27
|
"{{ k }}":
|
|
28
28
|
{%- if v.type %} <{{ v.type }}>{% endif -%}
|
|
29
|
-
{%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){%endif-%}
|
|
29
|
+
{%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){% endif -%}
|
|
30
30
|
{%- if not loop.last -%},{%- endif -%}
|
|
31
31
|
{%- if v.comment %} {{ v.comment }}{% endif -%}
|
|
32
32
|
{%- endfor -%}
|