ai-microcore 5.0.0a1__tar.gz → 5.0.0.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/PKG-INFO +5 -1
  2. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/README.md +4 -0
  3. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/__init__.py +23 -2
  4. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/_env.py +33 -27
  5. ai_microcore-5.0.0.dev2/microcore/_llm_functions.py +299 -0
  6. ai_microcore-5.0.0.dev2/microcore/_prepare_llm_args.py +47 -0
  7. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/__init__.py +2 -2
  8. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/ai-func.json.j2 +1 -1
  9. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/ai-func.pythonic.j2 +1 -1
  10. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_func/ai-func.tag.j2 +1 -1
  11. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/configuration.py +47 -21
  12. ai_microcore-5.0.0.dev2/microcore/file_cache.py +103 -0
  13. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/file_storage.py +126 -23
  14. ai_microcore-5.0.0.dev2/microcore/images.py +111 -0
  15. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/anthropic.py +3 -3
  16. ai_microcore-5.0.0.dev2/microcore/llm/google_genai.py +205 -0
  17. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/google_vertex_ai.py +2 -2
  18. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/local_llm.py +3 -3
  19. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/local_transformers.py +2 -2
  20. ai_microcore-5.0.0.dev2/microcore/llm/openai.py +344 -0
  21. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/shared.py +14 -0
  22. ai_microcore-5.0.0.dev2/microcore/lm_client.py +113 -0
  23. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/logging.py +35 -2
  24. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/mcp.py +3 -1
  25. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/message_types.py +23 -3
  26. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/types.py +20 -0
  27. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/utils.py +131 -25
  28. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/wrappers/llm_response_wrapper.py +95 -4
  29. ai_microcore-5.0.0a1/microcore/_llm_functions.py +0 -177
  30. ai_microcore-5.0.0a1/microcore/_prepare_llm_args.py +0 -31
  31. ai_microcore-5.0.0a1/microcore/llm/_openai_llm_v0.py +0 -130
  32. ai_microcore-5.0.0a1/microcore/llm/_openai_llm_v1.py +0 -231
  33. ai_microcore-5.0.0a1/microcore/llm/google_genai.py +0 -108
  34. ai_microcore-5.0.0a1/microcore/llm/openai_llm.py +0 -10
  35. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/LICENSE +0 -0
  36. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ai_modules.py +0 -0
  37. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/embedding_db/__init__.py +0 -0
  38. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/embedding_db/chromadb.py +0 -0
  39. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/embedding_db/qdrant.py +0 -0
  40. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/interactive_setup.py +0 -0
  41. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/json_parsing.py +0 -0
  42. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/llm/__init__.py +0 -0
  43. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/metrics.py +0 -0
  44. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/presets.py +0 -0
  45. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/python.py +0 -0
  46. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/templating/__init__.py +0 -0
  47. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/templating/jinja2.py +0 -0
  48. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/text2speech/elevenlabs.py +0 -0
  49. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/tokenizing.py +0 -0
  50. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/ui.py +0 -0
  51. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/wrappers/__init__.py +0 -0
  52. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/microcore/wrappers/prompt_wrapper.py +0 -0
  53. {ai_microcore-5.0.0a1 → ai_microcore-5.0.0.dev2}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-microcore
3
- Version: 5.0.0a1
3
+ Version: 5.0.0.dev2
4
4
  Summary: # Minimalistic Foundation for AI Applications
5
5
  Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter
6
6
  Author-email: Vitalii Stepanenko <mail@vitaliy.in>
@@ -300,6 +300,10 @@ Benchmark accuracy of 20+ state of the art models on solving olympiad math probl
300
300
 
301
301
  #### [Generate meme image](https://github.com/Nayjest/ai-microcore/blob/main/examples/generate_meme_image.py)
302
302
  Simple example demonstrating image generation using [OpenAI GPT Image](https://platform.openai.com/docs/guides/image-generation?image-generation-model=gpt-image-1) model.
303
+
304
+
305
+ #### [Local inference with PyTorch / Transformers](https://github.com/Nayjest/ai-microcore/blob/main/examples/pytorch_transformers.py)
306
+ Text generation using HF/Transformers model locally (example withQwen 3 0.6B).
303
307
 
304
308
  #### [Other examples](https://github.com/llm-microcore/microcore/tree/main/examples)
305
309
 
@@ -269,6 +269,10 @@ Benchmark accuracy of 20+ state of the art models on solving olympiad math probl
269
269
 
270
270
  #### [Generate meme image](https://github.com/Nayjest/ai-microcore/blob/main/examples/generate_meme_image.py)
271
271
  Simple example demonstrating image generation using [OpenAI GPT Image](https://platform.openai.com/docs/guides/image-generation?image-generation-model=gpt-image-1) model.
272
+
273
+
274
+ #### [Local inference with PyTorch / Transformers](https://github.com/Nayjest/ai-microcore/blob/main/examples/pytorch_transformers.py)
275
+ Text generation using HF/Transformers model locally (example withQwen 3 0.6B).
272
276
 
273
277
  #### [Other examples](https://github.com/llm-microcore/microcore/tree/main/examples)
274
278
 
@@ -12,6 +12,7 @@ from . import mcp
12
12
  from . import ui
13
13
  from . import tokenizing
14
14
  from . import presets
15
+ from . import file_cache
15
16
  from .embedding_db import SearchResult, AbstractEmbeddingDB, SearchResults
16
17
  from .file_storage import storage
17
18
  from ._env import configure, env, config, min_setup
@@ -19,12 +20,16 @@ from .logging import use_logging
19
20
  from .message_types import UserMsg, AssistantMsg, SysMsg, Msg, PartialMsg
20
21
  from .configuration import (
21
22
  ApiType,
23
+ LLMApiBaseError,
24
+ LLMApiDeploymentIdError,
25
+ LLMApiKeyError,
26
+ LLMApiVersionError,
22
27
  LLMConfigError,
23
28
  Config,
24
29
  EmbeddingDbType,
25
30
  PRINT_STREAM,
26
31
  )
27
- from .types import BadAIJsonAnswer, BadAIAnswer
32
+ from .types import BadAIJsonAnswer, BadAIAnswer, LLMContextLengthExceededError
28
33
  from .wrappers.prompt_wrapper import PromptWrapper
29
34
  from .wrappers.llm_response_wrapper import LLMResponse
30
35
  from ._llm_functions import llm, allm, llm_parallel
@@ -69,6 +74,15 @@ def use_model(name: str):
69
74
  config().LLM_DEFAULT_ARGS["model"] = name
70
75
 
71
76
 
77
+ def model_names() -> list[str]:
78
+ """
79
+ Return a list of available model names from the default LLM client.
80
+ """
81
+ if env().default_client is None:
82
+ raise ValueError("No default LLM client supporting models list configured.")
83
+ return env().default_client.model_names()
84
+
85
+
72
86
  def validate_config():
73
87
  """
74
88
  Validates current MicroCore configuration
@@ -185,7 +199,12 @@ __all__ = [
185
199
  "PRINT_STREAM",
186
200
  "presets",
187
201
  "BadAIAnswer",
202
+ "LLMApiBaseError",
203
+ "LLMApiDeploymentIdError",
204
+ "LLMApiKeyError",
205
+ "LLMApiVersionError",
188
206
  "LLMConfigError",
207
+ "LLMContextLengthExceededError",
189
208
  "LLMResponse",
190
209
  "PromptWrapper",
191
210
  "parse",
@@ -194,6 +213,7 @@ __all__ = [
194
213
  "dedent",
195
214
  # submodules
196
215
  "embedding_db",
216
+ "file_cache",
197
217
  "file_storage",
198
218
  "message_types",
199
219
  "utils",
@@ -206,7 +226,8 @@ __all__ = [
206
226
  "tokenizing",
207
227
  "Metrics",
208
228
  "interactive_setup",
229
+ "model_names",
209
230
  # "wrappers",
210
231
  ]
211
232
 
212
- __version__ = "5.0.0a1"
233
+ __version__ = "5.0.0dev2"
@@ -1,3 +1,6 @@
1
+ """
2
+ MicroCore environment object / initialization.
3
+ """
1
4
  import os.path
2
5
  from dataclasses import dataclass, field, asdict, fields
3
6
  from importlib.util import find_spec
@@ -14,9 +17,9 @@ from .configuration import (
14
17
  PRINT_STREAM,
15
18
  )
16
19
  from .presets import MIN_SETUP
20
+ from .lm_client import BaseAIClient
17
21
  from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
18
22
  from .templating.jinja2 import make_jinja2_env, make_tpl_function
19
- from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
20
23
  from .llm.local_llm import make_llm_functions as make_local_llm_functions
21
24
 
22
25
  if TYPE_CHECKING:
@@ -41,6 +44,7 @@ class Env:
41
44
  tokenizer: "PreTrainedTokenizer" = field( # noqa
42
45
  default=None, init=False, repr=False
43
46
  )
47
+ default_client: BaseAIClient | None = None
44
48
  _mcp_registry: "MCPRegistry" = field(init=False, default=None)
45
49
 
46
50
  def __post_init__(self):
@@ -58,30 +62,38 @@ class Env:
58
62
  raise NotImplementedError
59
63
 
60
64
  def init_templating(self):
65
+ """Initialize Jinja2 environment and template function for templates rendering."""
61
66
  self.jinja_env = make_jinja2_env(self)
62
67
  self.tpl_function = make_tpl_function(self)
63
68
 
64
69
  @property
65
- def mcp_registry(self):
70
+ def mcp_registry(self) -> "MCPRegistry":
71
+ """Lazily initialize and return the registry of preconfigured MCP servers."""
66
72
  if self._mcp_registry is None:
67
73
  from .mcp import MCPRegistry
68
74
  self._mcp_registry = MCPRegistry(self.config.MCP_SERVERS)
69
75
  return self._mcp_registry
70
76
 
71
77
  def init_llm(self):
72
- if self.config.LLM_API_TYPE == ApiType.NONE:
78
+ """Initialize language model functions based on configuration."""
73
79
 
74
- def not_configured(*args, **kwargs) -> "LLMResponse":
75
- raise LLMConfigError("Language model is not configured")
80
+ def default_llm(*args, **kwargs) -> "LLMResponse":
81
+ if self.default_client:
82
+ return self.default_client.generate(*args, **kwargs)
83
+ raise LLMConfigError("Language model is not configured")
76
84
 
77
- async def a_not_configured(*args, **kwargs) -> "LLMResponse":
78
- raise LLMConfigError("Language model is not configured")
85
+ async def aio_default_llm(*args, **kwargs) -> "LLMResponse":
86
+ if self.default_client:
87
+ return await self.default_client.aio.generate(*args, **kwargs)
88
+ raise LLMConfigError("Language model is not configured")
79
89
 
80
- self.llm_function, self.llm_async_function = (
81
- not_configured,
82
- a_not_configured,
83
- )
90
+ self.llm_function, self.llm_async_function = (
91
+ default_llm,
92
+ aio_default_llm,
93
+ )
84
94
 
95
+ if self.config.LLM_API_TYPE == ApiType.NONE:
96
+ pass
85
97
  elif self.config.LLM_API_TYPE == ApiType.FUNCTION:
86
98
  self.llm_function, self.llm_async_function = make_local_llm_functions(
87
99
  self.config
@@ -130,25 +142,19 @@ class Env:
130
142
  self.llm_function,
131
143
  self.llm_async_function,
132
144
  ) = make_google_vertex_llm_functions(self.config)
133
- elif self.config.LLM_API_TYPE == ApiType.GOOGLE_AI_STUDIO:
145
+ elif self.config.LLM_API_TYPE in (ApiType.GOOGLE, ApiType.GOOGLE_AI_STUDIO):
134
146
  try:
135
- from .llm.google_genai import (
136
- make_llm_functions as make_google_genai_llm_functions,
137
- )
147
+ from .llm.google_genai import GoogleClient
138
148
  except ModuleNotFoundError as e:
139
149
  raise ModuleNotFoundError(
140
- "To use the Google Gemini language models via AI Studio, "
141
- "you need to install the `google-generativeai` package. "
142
- "Run `pip install google-generativeai`."
150
+ "To use the Google Gemini language models via Google GenAI SDK, "
151
+ "you need to install the `google-genai` package. "
152
+ "Run `pip install google-genai`."
143
153
  ) from e
144
- (
145
- self.llm_function,
146
- self.llm_async_function,
147
- ) = make_google_genai_llm_functions(self.config)
154
+ self.default_client = GoogleClient(self.config)
148
155
  else:
149
- self.llm_function, self.llm_async_function = make_openai_llm_functions(
150
- self.config
151
- )
156
+ from .llm.openai import OpenAIClient
157
+ self.default_client = OpenAIClient(self.config)
152
158
 
153
159
  def init_similarity_search(self):
154
160
  if (
@@ -225,10 +231,10 @@ _env: Env | None = None
225
231
 
226
232
 
227
233
  def env() -> Env:
228
- """Returns the current MicroCore environment"""
234
+ """Return current MicroCore environment object."""
229
235
  return _env or Env()
230
236
 
231
237
 
232
238
  def config() -> Config:
233
- """Resolve current configuration"""
239
+ """Resolve current configuration."""
234
240
  return env().config
@@ -0,0 +1,299 @@
1
+ import re
2
+ import logging
3
+ from datetime import datetime
4
+ from typing import Any
5
+
6
+
7
+ from .utils import run_parallel, RETURN_EXCEPTION
8
+ from .wrappers.llm_response_wrapper import LLMResponse, DictFromLLMResponse, ImageGenerationResponse
9
+ from .types import TPrompt, LLMContextLengthExceededError
10
+ from .file_cache import (
11
+ cache_hit,
12
+ load_cache,
13
+ save_cache,
14
+ build_cache_name,
15
+ delete_cache
16
+ )
17
+ from ._env import env
18
+
19
+
20
+ def convert_exception(e: Exception, model: str = None) -> Exception | None:
21
+ """
22
+ Convert LLM exceptions microcore-specific exceptions if possible.
23
+ Args:
24
+ e (Exception): Original exception
25
+ model (str): LLM model name, used for better error messages
26
+ Returns:
27
+ Converted exception or None if no conversion is possible
28
+ """
29
+ if not isinstance(e, Exception):
30
+ return None
31
+ t, msg = f"{type(e).__module__}.{type(e).__name__}", str(e)
32
+ max_tokens, actual_tokens = None, None
33
+ if t == "openai.BadRequestError" and "context_length_exceeded" in msg:
34
+ match = re.search(
35
+ r"maximum context length is (\d+) tokens.*?resulted in (\d+) tokens",
36
+ msg
37
+ )
38
+ if match:
39
+ max_tokens = int(match.group(1))
40
+ actual_tokens = int(match.group(2))
41
+ return LLMContextLengthExceededError(
42
+ actual_tokens=actual_tokens,
43
+ max_tokens=max_tokens,
44
+ model=model
45
+ )
46
+ if t == "anthropic.BadRequestError" and "prompt is too long:" in msg:
47
+ if match := re.search(r"(\d+)\s+tokens\s+>\s+(\d+)\s+maximum", msg):
48
+ max_tokens = int(match.group(2))
49
+ actual_tokens = int(match.group(1))
50
+ return LLMContextLengthExceededError(
51
+ actual_tokens=actual_tokens,
52
+ max_tokens=max_tokens,
53
+ model=model
54
+ )
55
+ if (
56
+ t == "google.api_core.exceptions.InvalidArgument"
57
+ and "The input token count exceeds the maximum number of tokens allowed" in msg
58
+ ):
59
+ if match := re.search(
60
+ r"The input token count exceeds the maximum number of tokens allowed (\d+)",
61
+ msg
62
+ ):
63
+ max_tokens = int(match.group(1))
64
+ return LLMContextLengthExceededError(
65
+ actual_tokens=actual_tokens,
66
+ max_tokens=max_tokens,
67
+ model=model
68
+ )
69
+ return None
70
+
71
+
72
+ def llm(
73
+ prompt: TPrompt,
74
+ retries: int = 0,
75
+ parse_json: bool | dict = False,
76
+ file_cache: bool | str = False,
77
+ **kwargs
78
+ ) -> str | LLMResponse | ImageGenerationResponse:
79
+ """
80
+ Request Large Language Model synchronously
81
+
82
+ Args:
83
+ prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM.
84
+ retries (int):
85
+ Number of retries in case of error.
86
+ Default is 0 (no retries).
87
+ parse_json (bool|dict):
88
+ If True, parses response as JSON,
89
+ alternatively non-empty dict can be used as parse_json arguments
90
+ Default is False (no parsing).
91
+ file_cache (bool | str):
92
+ If True or non-empty string, enables file caching of LLM responses.
93
+ If string, it will be used as cache prefix.
94
+ When enabled, identical requests with identical parameters
95
+ will return cached responses instead of making new API calls.
96
+ Default is False (no caching).
97
+ **kwargs: Parameters supported by the LLM API.
98
+
99
+ See parameters supported by the OpenAI:
100
+
101
+ - https://platform.openai.com/docs/api-reference/completions/create
102
+ - https://platform.openai.com/docs/api-reference/chat/create
103
+
104
+ **Additional parameters:**
105
+
106
+ - callback: callable - callback function
107
+ to be called on each chunk of text,
108
+ enables response streaming if supported by the LLM API
109
+ - callbacks: list[callable] - collection of callbacks
110
+ to be called on each chunk of text,
111
+ enables response streaming if supported by the LLM API
112
+
113
+ Returns:
114
+
115
+ Text generated by the LLM as string
116
+ with all fields returned by API accessible as an attributes.
117
+
118
+ See fields returned by the OpenAI:
119
+
120
+ - https://platform.openai.com/docs/api-reference/completions/object
121
+ - https://platform.openai.com/docs/api-reference/chat/object
122
+ """
123
+ [h(prompt, **kwargs) for h in env().llm_before_handlers]
124
+ start = datetime.now()
125
+
126
+ if (file_cache and cache_hit(
127
+ cache_name := build_cache_name(
128
+ prompt, kwargs,
129
+ prefix=file_cache if isinstance(file_cache, str) else "llm_requests"
130
+ )
131
+ )):
132
+ response: LLMResponse = load_cache(cache_name)
133
+ response.from_file_cache = True
134
+ tries = 0
135
+ else:
136
+ tries = retries + 1
137
+ while tries > 0:
138
+ try:
139
+ tries -= 1
140
+ response = env().llm_function(prompt, **kwargs)
141
+ break
142
+ except Exception as e: # pylint: disable=W0718
143
+ converted_exception = convert_exception(e)
144
+ # If context length exceeded, or no tries left --> do not retry
145
+ if tries == 0 or isinstance(converted_exception, LLMContextLengthExceededError):
146
+ if converted_exception:
147
+ raise converted_exception from e
148
+ raise e
149
+ logging.error(f"LLM error: {e}")
150
+ logging.info(f"Retrying... {tries} retries left")
151
+ continue
152
+ try:
153
+ response.gen_duration = (datetime.now() - start).total_seconds()
154
+ if not env().config.SAVE_MEMORY:
155
+ response.prompt = prompt
156
+ except AttributeError:
157
+ ...
158
+ if file_cache:
159
+ save_cache(cache_name, response)
160
+ [h(response) for h in env().llm_after_handlers]
161
+ if tries > 0:
162
+ retry_params = dict(**kwargs)
163
+ retry_params["retries"] = tries - 1
164
+ setattr(
165
+ response,
166
+ "_retry_callback",
167
+ lambda: llm(prompt, **retry_params)
168
+ )
169
+ if parse_json:
170
+ parsing_params = parse_json if isinstance(parse_json, dict) else {}
171
+ return response.parse_json(**parsing_params)
172
+ return response
173
+
174
+
175
+ async def allm(
176
+ prompt: TPrompt,
177
+ retries: int = 0,
178
+ parse_json: bool | dict = False,
179
+ file_cache: bool | str = False,
180
+ **kwargs
181
+ ) -> str | LLMResponse | DictFromLLMResponse | ImageGenerationResponse:
182
+ """
183
+ Request Large Language Model asynchronously
184
+
185
+ Args:
186
+ prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM.
187
+ retries (int):
188
+ Number of retries in case of error.
189
+ Default is 0 (no retries).
190
+ parse_json (bool|dict):
191
+ If True, parses response as JSON,
192
+ alternatively non-empty dict can be used as parse_json arguments.
193
+ **kwargs: Parameters supported by the LLM API.
194
+
195
+ See parameters supported by the OpenAI:
196
+
197
+ - https://platform.openai.com/docs/api-reference/completions/create
198
+ - https://platform.openai.com/docs/api-reference/chat/create
199
+
200
+ **Additional parameters:**
201
+
202
+ - callback: callable - callback function
203
+ to be called on each chunk of text,
204
+ enables response streaming if supported by the LLM API
205
+ - callbacks: list[callable] - collection of callbacks
206
+ to be called on each chunk of text,
207
+ enables response streaming if supported by the LLM API
208
+
209
+ Note: async callbacks are supported only for async LLM API calls
210
+
211
+ Returns:
212
+
213
+ Text generated by the LLM as string
214
+ with all fields returned by API accessible as an attributes.
215
+
216
+ See fields returned by the OpenAI:
217
+
218
+ - https://platform.openai.com/docs/api-reference/completions/object
219
+ - https://platform.openai.com/docs/api-reference/chat/object
220
+ """
221
+ [h(prompt, **kwargs) for h in env().llm_before_handlers]
222
+ start = datetime.now()
223
+
224
+ if (file_cache and cache_hit(
225
+ cache_name := build_cache_name(
226
+ prompt, kwargs,
227
+ prefix=file_cache if isinstance(file_cache, str) else "llm_requests"
228
+ )
229
+ )):
230
+ response: LLMResponse = load_cache(cache_name)
231
+ response.from_file_cache = True
232
+ tries = 0
233
+ else:
234
+ tries = retries + 1
235
+ while tries > 0:
236
+ try:
237
+ tries -= 1
238
+ response = await env().llm_async_function(prompt, **kwargs)
239
+ break
240
+ except Exception as e: # pylint: disable=W0718
241
+ converted_exception = convert_exception(e)
242
+ # If context length exceeded, or no tries left --> do not retry
243
+ if tries == 0 or isinstance(converted_exception, LLMContextLengthExceededError):
244
+ if converted_exception:
245
+ raise converted_exception from e
246
+ raise e
247
+ logging.error(f"LLM error: {e}")
248
+ logging.info(f"Retrying... {tries} retries left")
249
+ continue
250
+ try:
251
+ response.gen_duration = (datetime.now() - start).total_seconds()
252
+ if not env().config.SAVE_MEMORY:
253
+ response.prompt = prompt
254
+ except AttributeError:
255
+ ...
256
+ if file_cache:
257
+ save_cache(cache_name, response)
258
+ [h(response) for h in env().llm_after_handlers]
259
+ if parse_json:
260
+ try:
261
+ parsing_params = parse_json if isinstance(parse_json, dict) else {}
262
+ return response.parse_json(**parsing_params)
263
+ except Exception as e: # pylint: disable=W0718
264
+ if tries > 0:
265
+ logging.error(f"LLM error: {e}")
266
+ logging.info(f"Retrying... {tries} retries left")
267
+ if file_cache:
268
+ delete_cache(cache_name)
269
+ return await allm(prompt, retries=tries - 1, parse_json=parse_json, **kwargs)
270
+ return response
271
+
272
+
273
+ async def llm_parallel(
274
+ prompts: list[TPrompt],
275
+ max_concurrent_tasks: int = None,
276
+ allow_failures: bool = False,
277
+ return_on_failure: Any = RETURN_EXCEPTION,
278
+ log_errors: bool = True,
279
+ **kwargs
280
+ ) -> list[str | LLMResponse]:
281
+ """
282
+ Execute multiple LLM requests in parallel
283
+
284
+ Returns (list[LLMResponse | str]): a list of responses in the same order as the prompts
285
+ """
286
+ tasks = [allm(prompt, **kwargs) for prompt in prompts]
287
+
288
+ if max_concurrent_tasks is None:
289
+ max_concurrent_tasks = int(env().config.MAX_CONCURRENT_TASKS or 0)
290
+ if not max_concurrent_tasks:
291
+ max_concurrent_tasks = len(tasks)
292
+
293
+ return await run_parallel(
294
+ tasks,
295
+ max_concurrent_tasks=max_concurrent_tasks,
296
+ allow_failures=allow_failures,
297
+ return_on_failure=return_on_failure,
298
+ log_errors=log_errors,
299
+ )
@@ -0,0 +1,47 @@
1
+ from dataclasses import asdict
2
+ from typing import Any
3
+
4
+ from .message_types import DEFAULT_MESSAGE_ROLE, Msg, MsgContent
5
+ from .types import TPrompt
6
+
7
+
8
+ def prepare_prompt(prompt) -> str:
9
+ """Converts prompt to string for LLM completion API"""
10
+ return "\n".join(
11
+ [
12
+ str(p["content"]) if isinstance(p, dict) and "content" in p else str(p)
13
+ for p in (prompt if isinstance(prompt, list) else [prompt])
14
+ ]
15
+ )
16
+
17
+
18
+ def prompt_item_to_message_dict(item: Any, strict=False) -> dict | Any:
19
+ """
20
+ Convert a single prompt item to message dict for LLM inference chat API (OpenAI-like).
21
+ Args:
22
+ item: The prompt item to convert. Can be a string, Msg instance, or dict.
23
+ strict: If True, raises TypeError for unsupported types. If False, returns the item as is.
24
+ Returns:
25
+ A dict representing the message,
26
+ or the original item if not convertible and strict is False.
27
+ """
28
+ if isinstance(item, Msg):
29
+ message_dict = asdict(item, dict_factory=item.DICT_FACTORY)
30
+ elif isinstance(item, dict):
31
+ message_dict = item
32
+ else:
33
+ if strict and not isinstance(item, str | MsgContent):
34
+ raise TypeError(f"Unsupported message type: {type(item)}")
35
+ message_dict = dict(role=DEFAULT_MESSAGE_ROLE, content=item)
36
+ return message_dict
37
+
38
+
39
+ def prompt_to_message_dicts(prompt: TPrompt, strict=False) -> list[dict | Any]:
40
+ """
41
+ Convert prompt to messages for LLM inference chat API (OpenAI-like).
42
+ Args:
43
+ prompt: The prompt to convert. Can be a string, Msg instance, dict, or list of these.
44
+ strict: If True, raises TypeError for unsupported types. If False, returns the item as is.
45
+ """
46
+ message_like_items: list[Any] = prompt if isinstance(prompt, list) else [prompt]
47
+ return [prompt_item_to_message_dict(item, strict=strict) for item in message_like_items]
@@ -72,7 +72,7 @@ def func_metadata(func, name=None) -> Dict[str, Any]:
72
72
  metadata["args"][key]["type"] = param_type
73
73
 
74
74
  arg_comments = func_arg_comments(func)
75
- for key, val in metadata["args"].items():
75
+ for key, val in metadata.get("args", {}).items():
76
76
  val["comment"] = arg_comments[key]
77
77
 
78
78
  # Parse docstring
@@ -80,7 +80,7 @@ def func_metadata(func, name=None) -> Dict[str, Any]:
80
80
 
81
81
  # Add descriptions from parsed docstring to parameters
82
82
  for param in parsed_docstring.params:
83
- if param.arg_name in metadata["args"]:
83
+ if param.arg_name in metadata.get("args", []):
84
84
  metadata["args"][param.arg_name]["docstr"] = param.description
85
85
 
86
86
  return metadata
@@ -4,7 +4,7 @@
4
4
  {%- for k,v in args.items() %}
5
5
  "{{ k }}":
6
6
  {%- if v.type %} <{{ v.type }}>{% endif -%}
7
- {%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){%endif-%}
7
+ {%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){% endif -%}
8
8
  {%- if not loop.last -%},{%- endif -%}
9
9
  {%- if v.comment %} {{ v.comment }}{% endif -%}
10
10
  {%- endfor -%}
@@ -3,7 +3,7 @@
3
3
  {%- for name,v in args.items() -%}
4
4
  {{ "\n\t" }}{{ name }}
5
5
  {%- if v.type %}: {{ v.type }}{% endif -%}
6
- {%- if v.default != 'NOT_SET' %} = {{ v.default }}{%endif-%}
6
+ {%- if v.default != 'NOT_SET' %} = {{ v.default }}{% endif -%}
7
7
  {%- if not loop.last -%},{%- endif -%}
8
8
  {%- if v.comment %} # {{ v.comment }}{% endif -%}
9
9
  {%- endfor -%}
@@ -26,7 +26,7 @@ Tool:<{{ name }}> {{ description or name.replace('_', ' ').capitalize() }}
26
26
  {%- for k,v in args.items() %}
27
27
  "{{ k }}":
28
28
  {%- if v.type %} <{{ v.type }}>{% endif -%}
29
- {%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){%endif-%}
29
+ {%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){% endif -%}
30
30
  {%- if not loop.last -%},{%- endif -%}
31
31
  {%- if v.comment %} {{ v.comment }}{% endif -%}
32
32
  {%- endfor -%}