langroid 0.58.2__py3-none-any.whl → 0.59.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +39 -17
- langroid/agent/base.py-e +2216 -0
- langroid/agent/callbacks/chainlit.py +2 -1
- langroid/agent/chat_agent.py +73 -55
- langroid/agent/chat_agent.py-e +2086 -0
- langroid/agent/chat_document.py +7 -7
- langroid/agent/chat_document.py-e +513 -0
- langroid/agent/openai_assistant.py +9 -9
- langroid/agent/openai_assistant.py-e +882 -0
- langroid/agent/special/arangodb/arangodb_agent.py +10 -18
- langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
- langroid/agent/special/arangodb/tools.py +3 -3
- langroid/agent/special/doc_chat_agent.py +16 -14
- langroid/agent/special/lance_rag/critic_agent.py +2 -2
- langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
- langroid/agent/special/lance_tools.py +6 -5
- langroid/agent/special/lance_tools.py-e +61 -0
- langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
- langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
- langroid/agent/special/relevance_extractor_agent.py +1 -1
- langroid/agent/special/sql/sql_chat_agent.py +11 -3
- langroid/agent/task.py +9 -87
- langroid/agent/task.py-e +2418 -0
- langroid/agent/tool_message.py +33 -17
- langroid/agent/tool_message.py-e +400 -0
- langroid/agent/tools/file_tools.py +4 -2
- langroid/agent/tools/file_tools.py-e +234 -0
- langroid/agent/tools/mcp/fastmcp_client.py +19 -6
- langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
- langroid/agent/tools/orchestration.py +22 -17
- langroid/agent/tools/orchestration.py-e +301 -0
- langroid/agent/tools/recipient_tool.py +3 -3
- langroid/agent/tools/task_tool.py +22 -16
- langroid/agent/tools/task_tool.py-e +249 -0
- langroid/agent/xml_tool_message.py +90 -35
- langroid/agent/xml_tool_message.py-e +392 -0
- langroid/cachedb/base.py +1 -1
- langroid/embedding_models/base.py +2 -2
- langroid/embedding_models/models.py +3 -7
- langroid/embedding_models/models.py-e +563 -0
- langroid/exceptions.py +4 -1
- langroid/language_models/azure_openai.py +2 -2
- langroid/language_models/azure_openai.py-e +134 -0
- langroid/language_models/base.py +6 -4
- langroid/language_models/base.py-e +812 -0
- langroid/language_models/client_cache.py +64 -0
- langroid/language_models/config.py +2 -4
- langroid/language_models/config.py-e +18 -0
- langroid/language_models/model_info.py +9 -1
- langroid/language_models/model_info.py-e +483 -0
- langroid/language_models/openai_gpt.py +119 -20
- langroid/language_models/openai_gpt.py-e +2280 -0
- langroid/language_models/provider_params.py +3 -22
- langroid/language_models/provider_params.py-e +153 -0
- langroid/mytypes.py +11 -4
- langroid/mytypes.py-e +132 -0
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/file_attachment.py +1 -1
- langroid/parsing/file_attachment.py-e +246 -0
- langroid/parsing/md_parser.py +14 -4
- langroid/parsing/md_parser.py-e +574 -0
- langroid/parsing/parser.py +22 -7
- langroid/parsing/parser.py-e +410 -0
- langroid/parsing/repo_loader.py +3 -1
- langroid/parsing/repo_loader.py-e +812 -0
- langroid/parsing/search.py +1 -1
- langroid/parsing/url_loader.py +17 -51
- langroid/parsing/url_loader.py-e +683 -0
- langroid/parsing/urls.py +5 -4
- langroid/parsing/urls.py-e +279 -0
- langroid/prompts/prompts_config.py +1 -1
- langroid/pydantic_v1/__init__.py +45 -6
- langroid/pydantic_v1/__init__.py-e +36 -0
- langroid/pydantic_v1/main.py +11 -4
- langroid/pydantic_v1/main.py-e +11 -0
- langroid/utils/configuration.py +13 -11
- langroid/utils/configuration.py-e +141 -0
- langroid/utils/constants.py +1 -1
- langroid/utils/constants.py-e +32 -0
- langroid/utils/globals.py +21 -5
- langroid/utils/globals.py-e +49 -0
- langroid/utils/html_logger.py +2 -1
- langroid/utils/html_logger.py-e +825 -0
- langroid/utils/object_registry.py +1 -1
- langroid/utils/object_registry.py-e +66 -0
- langroid/utils/pydantic_utils.py +55 -28
- langroid/utils/pydantic_utils.py-e +602 -0
- langroid/utils/types.py +2 -2
- langroid/utils/types.py-e +113 -0
- langroid/vector_store/base.py +3 -3
- langroid/vector_store/lancedb.py +5 -5
- langroid/vector_store/lancedb.py-e +404 -0
- langroid/vector_store/meilisearch.py +2 -2
- langroid/vector_store/pineconedb.py +4 -4
- langroid/vector_store/pineconedb.py-e +427 -0
- langroid/vector_store/postgres.py +1 -1
- langroid/vector_store/qdrantdb.py +3 -3
- langroid/vector_store/weaviatedb.py +1 -1
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/METADATA +3 -2
- langroid-0.59.0b1.dist-info/RECORD +181 -0
- langroid/agent/special/doc_chat_task.py +0 -0
- langroid/mcp/__init__.py +0 -1
- langroid/mcp/server/__init__.py +0 -1
- langroid-0.58.2.dist-info/RECORD +0 -145
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/WHEEL +0 -0
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,812 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from datetime import datetime
|
5
|
+
from enum import Enum
|
6
|
+
from typing import (
|
7
|
+
Any,
|
8
|
+
Awaitable,
|
9
|
+
Callable,
|
10
|
+
Dict,
|
11
|
+
List,
|
12
|
+
Literal,
|
13
|
+
Optional,
|
14
|
+
Tuple,
|
15
|
+
Type,
|
16
|
+
Union,
|
17
|
+
cast,
|
18
|
+
)
|
19
|
+
|
20
|
+
from pydantic import Field
|
21
|
+
from pydantic_settings import BaseSettings
|
22
|
+
|
23
|
+
from langroid.cachedb.base import CacheDBConfig
|
24
|
+
from langroid.cachedb.redis_cachedb import RedisCacheConfig
|
25
|
+
from langroid.language_models.model_info import ModelInfo, get_model_info
|
26
|
+
from langroid.parsing.agent_chats import parse_message
|
27
|
+
from langroid.parsing.file_attachment import FileAttachment
|
28
|
+
from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
|
29
|
+
from langroid.prompts.dialog import collate_chat_history
|
30
|
+
from langroid.pydantic_v1 import BaseModel
|
31
|
+
from langroid.utils.configuration import settings
|
32
|
+
from langroid.utils.output.printing import show_if_debug
|
33
|
+
|
34
|
+
logger = logging.getLogger(__name__)
|
35
|
+
|
36
|
+
|
37
|
+
def noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None:
|
38
|
+
pass
|
39
|
+
|
40
|
+
|
41
|
+
async def async_noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None:
|
42
|
+
pass
|
43
|
+
|
44
|
+
|
45
|
+
FunctionCallTypes = Literal["none", "auto"]
|
46
|
+
ToolChoiceTypes = Literal["none", "auto", "required"]
|
47
|
+
ToolTypes = Literal["function"]
|
48
|
+
|
49
|
+
DEFAULT_CONTEXT_LENGTH = 16_000
|
50
|
+
|
51
|
+
|
52
|
+
class StreamEventType(Enum):
|
53
|
+
TEXT = 1
|
54
|
+
FUNC_NAME = 2
|
55
|
+
FUNC_ARGS = 3
|
56
|
+
TOOL_NAME = 4
|
57
|
+
TOOL_ARGS = 5
|
58
|
+
|
59
|
+
|
60
|
+
class RetryParams(BaseSettings):
|
61
|
+
max_retries: int = 5
|
62
|
+
initial_delay: float = 1.0
|
63
|
+
exponential_base: float = 1.3
|
64
|
+
jitter: bool = True
|
65
|
+
|
66
|
+
|
67
|
+
class LLMConfig(BaseSettings):
|
68
|
+
"""
|
69
|
+
Common configuration for all language models.
|
70
|
+
"""
|
71
|
+
|
72
|
+
type: str = "openai"
|
73
|
+
streamer: Optional[Callable[[Any], None]] = noop_fn
|
74
|
+
streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
|
75
|
+
api_base: str | None = None
|
76
|
+
formatter: None | str = None
|
77
|
+
# specify None if you want to use the full max output tokens of the model
|
78
|
+
max_output_tokens: int | None = 8192
|
79
|
+
timeout: int = 20 # timeout for API requests
|
80
|
+
chat_model: str = ""
|
81
|
+
completion_model: str = ""
|
82
|
+
temperature: float = 0.0
|
83
|
+
chat_context_length: int | None = None
|
84
|
+
async_stream_quiet: bool = False # suppress streaming output in async mode?
|
85
|
+
completion_context_length: int | None = None
|
86
|
+
# if input length + max_output_tokens > context length of model,
|
87
|
+
# we will try shortening requested output
|
88
|
+
min_output_tokens: int = 64
|
89
|
+
use_completion_for_chat: bool = False # use completion model for chat?
|
90
|
+
# use chat model for completion? For OpenAI models, this MUST be set to True!
|
91
|
+
use_chat_for_completion: bool = True
|
92
|
+
stream: bool = True # stream output from API?
|
93
|
+
# TODO: we could have a `stream_reasoning` flag here to control whether to show
|
94
|
+
# reasoning output from reasoning models
|
95
|
+
cache_config: None | CacheDBConfig = RedisCacheConfig()
|
96
|
+
thought_delimiters: Tuple[str, str] = ("<think>", "</think>")
|
97
|
+
retry_params: RetryParams = RetryParams()
|
98
|
+
|
99
|
+
@property
|
100
|
+
def model_max_output_tokens(self) -> int:
|
101
|
+
return (
|
102
|
+
self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens
|
103
|
+
)
|
104
|
+
|
105
|
+
|
106
|
+
class LLMFunctionCall(BaseModel):
|
107
|
+
"""
|
108
|
+
Structure of LLM response indicating it "wants" to call a function.
|
109
|
+
Modeled after OpenAI spec for `function_call` field in ChatCompletion API.
|
110
|
+
"""
|
111
|
+
|
112
|
+
name: str # name of function to call
|
113
|
+
arguments: Optional[Dict[str, Any]] = None
|
114
|
+
|
115
|
+
@staticmethod
|
116
|
+
def from_dict(message: Dict[str, Any]) -> "LLMFunctionCall":
|
117
|
+
"""
|
118
|
+
Initialize from dictionary.
|
119
|
+
Args:
|
120
|
+
d: dictionary containing fields to initialize
|
121
|
+
"""
|
122
|
+
fun_call = LLMFunctionCall(name=message["name"])
|
123
|
+
fun_args_str = message["arguments"]
|
124
|
+
# sometimes may be malformed with invalid indents,
|
125
|
+
# so we try to be safe by removing newlines.
|
126
|
+
if fun_args_str is not None:
|
127
|
+
fun_args_str = fun_args_str.replace("\n", "").strip()
|
128
|
+
dict_or_list = parse_imperfect_json(fun_args_str)
|
129
|
+
|
130
|
+
if not isinstance(dict_or_list, dict):
|
131
|
+
raise ValueError(
|
132
|
+
f"""
|
133
|
+
Invalid function args: {fun_args_str}
|
134
|
+
parsed as {dict_or_list},
|
135
|
+
which is not a valid dict.
|
136
|
+
"""
|
137
|
+
)
|
138
|
+
fun_args = dict_or_list
|
139
|
+
else:
|
140
|
+
fun_args = None
|
141
|
+
fun_call.arguments = fun_args
|
142
|
+
|
143
|
+
return fun_call
|
144
|
+
|
145
|
+
def __str__(self) -> str:
|
146
|
+
return "FUNC: " + json.dumps(self.model_dump(), indent=2)
|
147
|
+
|
148
|
+
|
149
|
+
class LLMFunctionSpec(BaseModel):
|
150
|
+
"""
|
151
|
+
Description of a function available for the LLM to use.
|
152
|
+
To be used when calling the LLM `chat()` method with the `functions` parameter.
|
153
|
+
Modeled after OpenAI spec for `functions` fields in ChatCompletion API.
|
154
|
+
"""
|
155
|
+
|
156
|
+
name: str
|
157
|
+
description: str
|
158
|
+
parameters: Dict[str, Any]
|
159
|
+
|
160
|
+
|
161
|
+
class OpenAIToolCall(BaseModel):
|
162
|
+
"""
|
163
|
+
Represents a single tool call in a list of tool calls generated by OpenAI LLM API.
|
164
|
+
See https://platform.openai.com/docs/api-reference/chat/create
|
165
|
+
|
166
|
+
Attributes:
|
167
|
+
id: The id of the tool call.
|
168
|
+
type: The type of the tool call;
|
169
|
+
only "function" is currently possible (7/26/24).
|
170
|
+
function: The function call.
|
171
|
+
"""
|
172
|
+
|
173
|
+
id: str | None = None
|
174
|
+
type: ToolTypes = "function"
|
175
|
+
function: LLMFunctionCall | None = None
|
176
|
+
|
177
|
+
@staticmethod
|
178
|
+
def from_dict(message: Dict[str, Any]) -> "OpenAIToolCall":
|
179
|
+
"""
|
180
|
+
Initialize from dictionary.
|
181
|
+
Args:
|
182
|
+
d: dictionary containing fields to initialize
|
183
|
+
"""
|
184
|
+
id = message["id"]
|
185
|
+
type = message["type"]
|
186
|
+
function = LLMFunctionCall.from_dict(message["function"])
|
187
|
+
return OpenAIToolCall(id=id, type=type, function=function)
|
188
|
+
|
189
|
+
def __str__(self) -> str:
|
190
|
+
if self.function is None:
|
191
|
+
return ""
|
192
|
+
return "OAI-TOOL: " + json.dumps(self.function.model_dump(), indent=2)
|
193
|
+
|
194
|
+
|
195
|
+
class OpenAIToolSpec(BaseModel):
|
196
|
+
type: ToolTypes
|
197
|
+
strict: Optional[bool] = None
|
198
|
+
function: LLMFunctionSpec
|
199
|
+
|
200
|
+
|
201
|
+
class OpenAIJsonSchemaSpec(BaseModel):
|
202
|
+
strict: Optional[bool] = None
|
203
|
+
function: LLMFunctionSpec
|
204
|
+
|
205
|
+
def to_dict(self) -> Dict[str, Any]:
|
206
|
+
json_schema: Dict[str, Any] = {
|
207
|
+
"name": self.function.name,
|
208
|
+
"description": self.function.description,
|
209
|
+
"schema": self.function.parameters,
|
210
|
+
}
|
211
|
+
if self.strict is not None:
|
212
|
+
json_schema["strict"] = self.strict
|
213
|
+
|
214
|
+
return {
|
215
|
+
"type": "json_schema",
|
216
|
+
"json_schema": json_schema,
|
217
|
+
}
|
218
|
+
|
219
|
+
|
220
|
+
class LLMTokenUsage(BaseModel):
|
221
|
+
"""
|
222
|
+
Usage of tokens by an LLM.
|
223
|
+
"""
|
224
|
+
|
225
|
+
prompt_tokens: int = 0
|
226
|
+
cached_tokens: int = 0
|
227
|
+
completion_tokens: int = 0
|
228
|
+
cost: float = 0.0
|
229
|
+
calls: int = 0 # how many API calls - not used as of 2025-04-04
|
230
|
+
|
231
|
+
def reset(self) -> None:
|
232
|
+
self.prompt_tokens = 0
|
233
|
+
self.cached_tokens = 0
|
234
|
+
self.completion_tokens = 0
|
235
|
+
self.cost = 0.0
|
236
|
+
self.calls = 0
|
237
|
+
|
238
|
+
def __str__(self) -> str:
|
239
|
+
return (
|
240
|
+
f"Tokens = "
|
241
|
+
f"(prompt {self.prompt_tokens}, cached {self.cached_tokens}, "
|
242
|
+
f"completion {self.completion_tokens}), "
|
243
|
+
f"Cost={self.cost}, Calls={self.calls}"
|
244
|
+
)
|
245
|
+
|
246
|
+
@property
|
247
|
+
def total_tokens(self) -> int:
|
248
|
+
return self.prompt_tokens + self.completion_tokens
|
249
|
+
|
250
|
+
|
251
|
+
class Role(str, Enum):
|
252
|
+
"""
|
253
|
+
Possible roles for a message in a chat.
|
254
|
+
"""
|
255
|
+
|
256
|
+
USER = "user"
|
257
|
+
SYSTEM = "system"
|
258
|
+
ASSISTANT = "assistant"
|
259
|
+
FUNCTION = "function"
|
260
|
+
TOOL = "tool"
|
261
|
+
|
262
|
+
|
263
|
+
class LLMMessage(BaseModel):
|
264
|
+
"""
|
265
|
+
Class representing an entry in the msg-history sent to the LLM API.
|
266
|
+
It could be one of these:
|
267
|
+
- a user message
|
268
|
+
- an LLM ("Assistant") response
|
269
|
+
- a fn-call or tool-call-list from an OpenAI-compatible LLM API response
|
270
|
+
- a result or results from executing a fn or tool-call(s)
|
271
|
+
"""
|
272
|
+
|
273
|
+
role: Role
|
274
|
+
name: Optional[str] = None
|
275
|
+
tool_call_id: Optional[str] = None # which OpenAI LLM tool this is a response to
|
276
|
+
tool_id: str = "" # used by OpenAIAssistant
|
277
|
+
content: str
|
278
|
+
files: List[FileAttachment] = []
|
279
|
+
function_call: Optional[LLMFunctionCall] = None
|
280
|
+
tool_calls: Optional[List[OpenAIToolCall]] = None
|
281
|
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
282
|
+
# link to corresponding chat document, for provenance/rewind purposes
|
283
|
+
chat_document_id: str = ""
|
284
|
+
|
285
|
+
def api_dict(self, model: str, has_system_role: bool = True) -> Dict[str, Any]:
|
286
|
+
"""
|
287
|
+
Convert to dictionary for API request, keeping ONLY
|
288
|
+
the fields that are expected in an API call!
|
289
|
+
E.g., DROP the tool_id, since it is only for use in the Assistant API,
|
290
|
+
not the completion API.
|
291
|
+
|
292
|
+
Args:
|
293
|
+
has_system_role: whether the message has a system role (if not,
|
294
|
+
set to "user" role)
|
295
|
+
Returns:
|
296
|
+
dict: dictionary representation of LLM message
|
297
|
+
"""
|
298
|
+
d = self.model_dump()
|
299
|
+
files: List[FileAttachment] = d.pop("files")
|
300
|
+
if len(files) > 0 and self.role == Role.USER:
|
301
|
+
# In there are files, then content is an array of
|
302
|
+
# different content-parts
|
303
|
+
d["content"] = [
|
304
|
+
dict(
|
305
|
+
type="text",
|
306
|
+
text=self.content,
|
307
|
+
)
|
308
|
+
] + [f.to_dict(model) for f in self.files]
|
309
|
+
|
310
|
+
# if there is a key k = "role" with value "system", change to "user"
|
311
|
+
# in case has_system_role is False
|
312
|
+
if not has_system_role and "role" in d and d["role"] == "system":
|
313
|
+
d["role"] = "user"
|
314
|
+
if "content" in d:
|
315
|
+
d["content"] = "[ADDITIONAL SYSTEM MESSAGE:]\n\n" + d["content"]
|
316
|
+
# drop None values since API doesn't accept them
|
317
|
+
dict_no_none = {k: v for k, v in d.items() if v is not None}
|
318
|
+
if "name" in dict_no_none and dict_no_none["name"] == "":
|
319
|
+
# OpenAI API does not like empty name
|
320
|
+
del dict_no_none["name"]
|
321
|
+
if "function_call" in dict_no_none:
|
322
|
+
# arguments must be a string
|
323
|
+
if "arguments" in dict_no_none["function_call"]:
|
324
|
+
dict_no_none["function_call"]["arguments"] = json.dumps(
|
325
|
+
dict_no_none["function_call"]["arguments"]
|
326
|
+
)
|
327
|
+
if "tool_calls" in dict_no_none:
|
328
|
+
# convert tool calls to API format
|
329
|
+
for tc in dict_no_none["tool_calls"]:
|
330
|
+
if "arguments" in tc["function"]:
|
331
|
+
# arguments must be a string
|
332
|
+
tc["function"]["arguments"] = json.dumps(
|
333
|
+
tc["function"]["arguments"]
|
334
|
+
)
|
335
|
+
# IMPORTANT! drop fields that are not expected in API call
|
336
|
+
dict_no_none.pop("tool_id", None)
|
337
|
+
dict_no_none.pop("timestamp", None)
|
338
|
+
dict_no_none.pop("chat_document_id", None)
|
339
|
+
return dict_no_none
|
340
|
+
|
341
|
+
def __str__(self) -> str:
|
342
|
+
if self.function_call is not None:
|
343
|
+
content = "FUNC: " + json.dumps(self.function_call)
|
344
|
+
else:
|
345
|
+
content = self.content
|
346
|
+
name_str = f" ({self.name})" if self.name else ""
|
347
|
+
return f"{self.role} {name_str}: {content}"
|
348
|
+
|
349
|
+
|
350
|
+
class LLMResponse(BaseModel):
|
351
|
+
"""
|
352
|
+
Class representing response from LLM.
|
353
|
+
"""
|
354
|
+
|
355
|
+
message: str
|
356
|
+
reasoning: str = "" # optional reasoning text from reasoning models
|
357
|
+
# TODO tool_id needs to generalize to multi-tool calls
|
358
|
+
tool_id: str = "" # used by OpenAIAssistant
|
359
|
+
oai_tool_calls: Optional[List[OpenAIToolCall]] = None
|
360
|
+
function_call: Optional[LLMFunctionCall] = None
|
361
|
+
usage: Optional[LLMTokenUsage] = None
|
362
|
+
cached: bool = False
|
363
|
+
|
364
|
+
def __str__(self) -> str:
|
365
|
+
if self.function_call is not None:
|
366
|
+
return str(self.function_call)
|
367
|
+
elif self.oai_tool_calls:
|
368
|
+
return "\n".join(str(tc) for tc in self.oai_tool_calls)
|
369
|
+
else:
|
370
|
+
return self.message
|
371
|
+
|
372
|
+
def to_LLMMessage(self) -> LLMMessage:
|
373
|
+
"""Convert LLM response to an LLMMessage, to be included in the
|
374
|
+
message-list sent to the API.
|
375
|
+
This is currently NOT used in any significant way in the library, and is only
|
376
|
+
provided as a utility to construct a message list for the API when directly
|
377
|
+
working with an LLM object.
|
378
|
+
|
379
|
+
In a `ChatAgent`, an LLM response is first converted to a ChatDocument,
|
380
|
+
which is in turn converted to an LLMMessage via `ChatDocument.to_LLMMessage()`
|
381
|
+
See `ChatAgent._prep_llm_messages()` and `ChatAgent.llm_response_messages`
|
382
|
+
"""
|
383
|
+
return LLMMessage(
|
384
|
+
role=Role.ASSISTANT,
|
385
|
+
content=self.message,
|
386
|
+
name=None if self.function_call is None else self.function_call.name,
|
387
|
+
function_call=self.function_call,
|
388
|
+
tool_calls=self.oai_tool_calls,
|
389
|
+
)
|
390
|
+
|
391
|
+
def get_recipient_and_message(
|
392
|
+
self,
|
393
|
+
) -> Tuple[str, str]:
|
394
|
+
"""
|
395
|
+
If `message` or `function_call` of an LLM response contains an explicit
|
396
|
+
recipient name, return this recipient name and `message` stripped
|
397
|
+
of the recipient name if specified.
|
398
|
+
|
399
|
+
Two cases:
|
400
|
+
(a) `message` contains addressing string "TO: <name> <content>", or
|
401
|
+
(b) `message` is empty and function_call/tool_call with explicit `recipient`
|
402
|
+
|
403
|
+
|
404
|
+
Returns:
|
405
|
+
(str): name of recipient, which may be empty string if no recipient
|
406
|
+
(str): content of message
|
407
|
+
|
408
|
+
"""
|
409
|
+
|
410
|
+
if self.function_call is not None:
|
411
|
+
# in this case we ignore message, since all information is in function_call
|
412
|
+
msg = ""
|
413
|
+
args = self.function_call.arguments
|
414
|
+
recipient = ""
|
415
|
+
if isinstance(args, dict):
|
416
|
+
recipient = args.get("recipient", "")
|
417
|
+
return recipient, msg
|
418
|
+
else:
|
419
|
+
msg = self.message
|
420
|
+
if self.oai_tool_calls is not None:
|
421
|
+
# get the first tool that has a recipient field, if any
|
422
|
+
for tc in self.oai_tool_calls:
|
423
|
+
if tc.function is not None and tc.function.arguments is not None:
|
424
|
+
recipient = tc.function.arguments.get(
|
425
|
+
"recipient"
|
426
|
+
) # type: ignore
|
427
|
+
if recipient is not None and recipient != "":
|
428
|
+
return recipient, ""
|
429
|
+
|
430
|
+
# It's not a function or tool call, so continue looking to see
|
431
|
+
# if a recipient is specified in the message.
|
432
|
+
|
433
|
+
# First check if message contains "TO: <recipient> <content>"
|
434
|
+
recipient_name, content = parse_message(msg) if msg is not None else ("", "")
|
435
|
+
# check if there is a top level json that specifies 'recipient',
|
436
|
+
# and retain the entire message as content.
|
437
|
+
if recipient_name == "":
|
438
|
+
recipient_name = top_level_json_field(msg, "recipient") if msg else ""
|
439
|
+
content = msg
|
440
|
+
return recipient_name, content
|
441
|
+
|
442
|
+
|
443
|
+
# Define an abstract base class for language models
|
444
|
+
class LanguageModel(ABC):
|
445
|
+
"""
|
446
|
+
Abstract base class for language models.
|
447
|
+
"""
|
448
|
+
|
449
|
+
# usage cost by model, accumulates here
|
450
|
+
usage_cost_dict: Dict[str, LLMTokenUsage] = {}
|
451
|
+
|
452
|
+
def __init__(self, config: LLMConfig = LLMConfig()):
|
453
|
+
self.config = config
|
454
|
+
self.chat_model_orig = config.chat_model
|
455
|
+
|
456
|
+
@staticmethod
|
457
|
+
def create(config: Optional[LLMConfig]) -> Optional["LanguageModel"]:
|
458
|
+
"""
|
459
|
+
Create a language model.
|
460
|
+
Args:
|
461
|
+
config: configuration for language model
|
462
|
+
Returns: instance of language model
|
463
|
+
"""
|
464
|
+
if type(config) is LLMConfig:
|
465
|
+
raise ValueError(
|
466
|
+
"""
|
467
|
+
Cannot create a Language Model object from LLMConfig.
|
468
|
+
Please specify a specific subclass of LLMConfig e.g.,
|
469
|
+
OpenAIGPTConfig. If you are creating a ChatAgent from
|
470
|
+
a ChatAgentConfig, please specify the `llm` field of this config
|
471
|
+
as a specific subclass of LLMConfig, e.g., OpenAIGPTConfig.
|
472
|
+
"""
|
473
|
+
)
|
474
|
+
from langroid.language_models.azure_openai import AzureGPT
|
475
|
+
from langroid.language_models.mock_lm import MockLM, MockLMConfig
|
476
|
+
from langroid.language_models.openai_gpt import OpenAIGPT
|
477
|
+
|
478
|
+
if config is None or config.type is None:
|
479
|
+
return None
|
480
|
+
|
481
|
+
if config.type == "mock":
|
482
|
+
return MockLM(cast(MockLMConfig, config))
|
483
|
+
|
484
|
+
openai: Union[Type[AzureGPT], Type[OpenAIGPT]]
|
485
|
+
|
486
|
+
if config.type == "azure":
|
487
|
+
openai = AzureGPT
|
488
|
+
else:
|
489
|
+
openai = OpenAIGPT
|
490
|
+
cls = dict(
|
491
|
+
openai=openai,
|
492
|
+
).get(config.type, openai)
|
493
|
+
return cls(config) # type: ignore
|
494
|
+
|
495
|
+
@staticmethod
|
496
|
+
def user_assistant_pairs(lst: List[str]) -> List[Tuple[str, str]]:
|
497
|
+
"""
|
498
|
+
Given an even-length sequence of strings, split into a sequence of pairs
|
499
|
+
|
500
|
+
Args:
|
501
|
+
lst (List[str]): sequence of strings
|
502
|
+
|
503
|
+
Returns:
|
504
|
+
List[Tuple[str,str]]: sequence of pairs of strings
|
505
|
+
"""
|
506
|
+
evens = lst[::2]
|
507
|
+
odds = lst[1::2]
|
508
|
+
return list(zip(evens, odds))
|
509
|
+
|
510
|
+
@staticmethod
|
511
|
+
def get_chat_history_components(
|
512
|
+
messages: List[LLMMessage],
|
513
|
+
) -> Tuple[str, List[Tuple[str, str]], str]:
|
514
|
+
"""
|
515
|
+
From the chat history, extract system prompt, user-assistant turns, and
|
516
|
+
final user msg.
|
517
|
+
|
518
|
+
Args:
|
519
|
+
messages (List[LLMMessage]): List of messages in the chat history
|
520
|
+
|
521
|
+
Returns:
|
522
|
+
Tuple[str, List[Tuple[str,str]], str]:
|
523
|
+
system prompt, user-assistant turns, final user msg
|
524
|
+
|
525
|
+
"""
|
526
|
+
# Handle various degenerate cases
|
527
|
+
messages = [m for m in messages] # copy
|
528
|
+
DUMMY_SYS_PROMPT = "You are a helpful assistant."
|
529
|
+
DUMMY_USER_PROMPT = "Follow the instructions above."
|
530
|
+
if len(messages) == 0 or messages[0].role != Role.SYSTEM:
|
531
|
+
logger.warning("No system msg, creating dummy system prompt")
|
532
|
+
messages.insert(0, LLMMessage(content=DUMMY_SYS_PROMPT, role=Role.SYSTEM))
|
533
|
+
system_prompt = messages[0].content
|
534
|
+
|
535
|
+
# now we have messages = [Sys,...]
|
536
|
+
if len(messages) == 1:
|
537
|
+
logger.warning(
|
538
|
+
"Got only system message in chat history, creating dummy user prompt"
|
539
|
+
)
|
540
|
+
messages.append(LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
|
541
|
+
|
542
|
+
# now we have messages = [Sys, msg, ...]
|
543
|
+
|
544
|
+
if messages[1].role != Role.USER:
|
545
|
+
messages.insert(1, LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
|
546
|
+
|
547
|
+
# now we have messages = [Sys, user, ...]
|
548
|
+
if messages[-1].role != Role.USER:
|
549
|
+
logger.warning(
|
550
|
+
"Last message in chat history is not a user message,"
|
551
|
+
" creating dummy user prompt"
|
552
|
+
)
|
553
|
+
messages.append(LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
|
554
|
+
|
555
|
+
# now we have messages = [Sys, user, ..., user]
|
556
|
+
# so we omit the first and last elements and make pairs of user-asst messages
|
557
|
+
conversation = [m.content for m in messages[1:-1]]
|
558
|
+
user_prompt = messages[-1].content
|
559
|
+
pairs = LanguageModel.user_assistant_pairs(conversation)
|
560
|
+
return system_prompt, pairs, user_prompt
|
561
|
+
|
562
|
+
@abstractmethod
|
563
|
+
def set_stream(self, stream: bool) -> bool:
|
564
|
+
"""Enable or disable streaming output from API.
|
565
|
+
Return previous value of stream."""
|
566
|
+
pass
|
567
|
+
|
568
|
+
@abstractmethod
|
569
|
+
def get_stream(self) -> bool:
|
570
|
+
"""Get streaming status"""
|
571
|
+
pass
|
572
|
+
|
573
|
+
@abstractmethod
|
574
|
+
def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
|
575
|
+
pass
|
576
|
+
|
577
|
+
@abstractmethod
|
578
|
+
async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
|
579
|
+
pass
|
580
|
+
|
581
|
+
@abstractmethod
|
582
|
+
def chat(
|
583
|
+
self,
|
584
|
+
messages: Union[str, List[LLMMessage]],
|
585
|
+
max_tokens: int = 200,
|
586
|
+
tools: Optional[List[OpenAIToolSpec]] = None,
|
587
|
+
tool_choice: ToolChoiceTypes | Dict[str, str | Dict[str, str]] = "auto",
|
588
|
+
functions: Optional[List[LLMFunctionSpec]] = None,
|
589
|
+
function_call: str | Dict[str, str] = "auto",
|
590
|
+
response_format: Optional[OpenAIJsonSchemaSpec] = None,
|
591
|
+
) -> LLMResponse:
|
592
|
+
"""
|
593
|
+
Get chat-completion response from LLM.
|
594
|
+
|
595
|
+
Args:
|
596
|
+
messages: message-history to send to the LLM
|
597
|
+
max_tokens: max tokens to generate
|
598
|
+
tools: tools available for the LLM to use in its response
|
599
|
+
tool_choice: tool call mode, one of "none", "auto", "required",
|
600
|
+
or a dict specifying a specific tool.
|
601
|
+
functions: functions available for LLM to call (deprecated)
|
602
|
+
function_call: function calling mode, "auto", "none", or a specific fn
|
603
|
+
(deprecated)
|
604
|
+
"""
|
605
|
+
|
606
|
+
pass
|
607
|
+
|
608
|
+
@abstractmethod
|
609
|
+
async def achat(
|
610
|
+
self,
|
611
|
+
messages: Union[str, List[LLMMessage]],
|
612
|
+
max_tokens: int = 200,
|
613
|
+
tools: Optional[List[OpenAIToolSpec]] = None,
|
614
|
+
tool_choice: ToolChoiceTypes | Dict[str, str | Dict[str, str]] = "auto",
|
615
|
+
functions: Optional[List[LLMFunctionSpec]] = None,
|
616
|
+
function_call: str | Dict[str, str] = "auto",
|
617
|
+
response_format: Optional[OpenAIJsonSchemaSpec] = None,
|
618
|
+
) -> LLMResponse:
|
619
|
+
"""Async version of `chat`. See `chat` for details."""
|
620
|
+
pass
|
621
|
+
|
622
|
+
def __call__(self, prompt: str, max_tokens: int) -> LLMResponse:
|
623
|
+
return self.generate(prompt, max_tokens)
|
624
|
+
|
625
|
+
@staticmethod
|
626
|
+
def _fallback_model_names(model: str) -> List[str]:
|
627
|
+
parts = model.split("/")
|
628
|
+
fallbacks = []
|
629
|
+
for i in range(1, len(parts)):
|
630
|
+
fallbacks.append("/".join(parts[i:]))
|
631
|
+
return fallbacks
|
632
|
+
|
633
|
+
def info(self) -> ModelInfo:
|
634
|
+
"""Info of relevant chat model"""
|
635
|
+
orig_model = (
|
636
|
+
self.config.completion_model
|
637
|
+
if self.config.use_completion_for_chat
|
638
|
+
else self.chat_model_orig
|
639
|
+
)
|
640
|
+
return get_model_info(orig_model, self._fallback_model_names(orig_model))
|
641
|
+
|
642
|
+
def completion_info(self) -> ModelInfo:
|
643
|
+
"""Info of relevant completion model"""
|
644
|
+
orig_model = (
|
645
|
+
self.chat_model_orig
|
646
|
+
if self.config.use_chat_for_completion
|
647
|
+
else self.config.completion_model
|
648
|
+
)
|
649
|
+
return get_model_info(orig_model, self._fallback_model_names(orig_model))
|
650
|
+
|
651
|
+
def supports_functions_or_tools(self) -> bool:
|
652
|
+
"""
|
653
|
+
Does this Model's API support "native" tool-calling, i.e.
|
654
|
+
can we call the API with arguments that contain a list of available tools,
|
655
|
+
and their schemas?
|
656
|
+
Note that, given the plethora of LLM provider APIs this determination is
|
657
|
+
imperfect at best, and leans towards returning True.
|
658
|
+
When the API calls fails with an error indicating tools are not supported,
|
659
|
+
then users are encouraged to use the Langroid-based prompt-based
|
660
|
+
ToolMessage mechanism, which works with ANY LLM. To enable this,
|
661
|
+
in your ChatAgentConfig, set `use_functions_api=False`, and `use_tools=True`.
|
662
|
+
"""
|
663
|
+
return self.info().has_tools
|
664
|
+
|
665
|
+
def chat_context_length(self) -> int:
|
666
|
+
return self.config.chat_context_length or DEFAULT_CONTEXT_LENGTH
|
667
|
+
|
668
|
+
def completion_context_length(self) -> int:
|
669
|
+
return self.config.completion_context_length or DEFAULT_CONTEXT_LENGTH
|
670
|
+
|
671
|
+
def chat_cost(self) -> Tuple[float, float, float]:
|
672
|
+
"""
|
673
|
+
Return the cost per 1000 tokens for chat completions.
|
674
|
+
|
675
|
+
Returns:
|
676
|
+
Tuple[float, float, float]: (input_cost, cached_cost, output_cost)
|
677
|
+
per 1000 tokens
|
678
|
+
"""
|
679
|
+
return (0.0, 0.0, 0.0)
|
680
|
+
|
681
|
+
def reset_usage_cost(self) -> None:
|
682
|
+
for mdl in [self.config.chat_model, self.config.completion_model]:
|
683
|
+
if mdl is None:
|
684
|
+
return
|
685
|
+
if mdl not in self.usage_cost_dict:
|
686
|
+
self.usage_cost_dict[mdl] = LLMTokenUsage()
|
687
|
+
counter = self.usage_cost_dict[mdl]
|
688
|
+
counter.reset()
|
689
|
+
|
690
|
+
def update_usage_cost(
|
691
|
+
self, chat: bool, prompts: int, completions: int, cost: float
|
692
|
+
) -> None:
|
693
|
+
"""
|
694
|
+
Update usage cost for this LLM.
|
695
|
+
Args:
|
696
|
+
chat (bool): whether to update for chat or completion model
|
697
|
+
prompts (int): number of tokens used for prompts
|
698
|
+
completions (int): number of tokens used for completions
|
699
|
+
cost (float): total token cost in USD
|
700
|
+
"""
|
701
|
+
mdl = self.config.chat_model if chat else self.config.completion_model
|
702
|
+
if mdl is None:
|
703
|
+
return
|
704
|
+
if mdl not in self.usage_cost_dict:
|
705
|
+
self.usage_cost_dict[mdl] = LLMTokenUsage()
|
706
|
+
counter = self.usage_cost_dict[mdl]
|
707
|
+
counter.prompt_tokens += prompts
|
708
|
+
counter.completion_tokens += completions
|
709
|
+
counter.cost += cost
|
710
|
+
counter.calls += 1
|
711
|
+
|
712
|
+
@classmethod
|
713
|
+
def usage_cost_summary(cls) -> str:
|
714
|
+
s = ""
|
715
|
+
for model, counter in cls.usage_cost_dict.items():
|
716
|
+
s += f"{model}: {counter}\n"
|
717
|
+
return s
|
718
|
+
|
719
|
+
@classmethod
|
720
|
+
def tot_tokens_cost(cls) -> Tuple[int, float]:
|
721
|
+
"""
|
722
|
+
Return total tokens used and total cost across all models.
|
723
|
+
"""
|
724
|
+
total_tokens = 0
|
725
|
+
total_cost = 0.0
|
726
|
+
for counter in cls.usage_cost_dict.values():
|
727
|
+
total_tokens += counter.total_tokens
|
728
|
+
total_cost += counter.cost
|
729
|
+
return total_tokens, total_cost
|
730
|
+
|
731
|
+
def get_reasoning_final(self, message: str) -> Tuple[str, str]:
|
732
|
+
"""Extract "reasoning" and "final answer" from an LLM response, if the
|
733
|
+
reasoning is found within configured delimiters, like <think>, </think>.
|
734
|
+
E.g.,
|
735
|
+
'<think> Okay, let's see, the user wants... </think> 2 + 3 = 5'
|
736
|
+
|
737
|
+
Args:
|
738
|
+
message (str): message from LLM
|
739
|
+
|
740
|
+
Returns:
|
741
|
+
Tuple[str, str]: reasoning, final answer
|
742
|
+
"""
|
743
|
+
start, end = self.config.thought_delimiters
|
744
|
+
if start in message and end in message:
|
745
|
+
parts = message.split(start)
|
746
|
+
if len(parts) > 1:
|
747
|
+
reasoning, final = parts[1].split(end)
|
748
|
+
return reasoning, final
|
749
|
+
return "", message
|
750
|
+
|
751
|
+
def followup_to_standalone(
|
752
|
+
self, chat_history: List[Tuple[str, str]], question: str
|
753
|
+
) -> str:
|
754
|
+
"""
|
755
|
+
Given a chat history and a question, convert it to a standalone question.
|
756
|
+
Args:
|
757
|
+
chat_history: list of tuples of (question, answer)
|
758
|
+
query: follow-up question
|
759
|
+
|
760
|
+
Returns: standalone version of the question
|
761
|
+
"""
|
762
|
+
history = collate_chat_history(chat_history)
|
763
|
+
|
764
|
+
prompt = f"""
|
765
|
+
You are an expert at understanding a CHAT HISTORY between an AI Assistant
|
766
|
+
and a User, and you are highly skilled in rephrasing the User's FOLLOW-UP
|
767
|
+
QUESTION/REQUEST as a STANDALONE QUESTION/REQUEST that can be understood
|
768
|
+
WITHOUT the context of the chat history.
|
769
|
+
|
770
|
+
Below is the CHAT HISTORY. When the User asks you to rephrase a
|
771
|
+
FOLLOW-UP QUESTION/REQUEST, your ONLY task is to simply return the
|
772
|
+
question REPHRASED as a STANDALONE QUESTION/REQUEST, without any additional
|
773
|
+
text or context.
|
774
|
+
|
775
|
+
<CHAT_HISTORY>
|
776
|
+
{history}
|
777
|
+
</CHAT_HISTORY>
|
778
|
+
""".strip()
|
779
|
+
|
780
|
+
follow_up_question = f"""
|
781
|
+
Please rephrase this as a stand-alone question or request:
|
782
|
+
<FOLLOW-UP-QUESTION-OR-REQUEST>
|
783
|
+
{question}
|
784
|
+
</FOLLOW-UP-QUESTION-OR-REQUEST>
|
785
|
+
""".strip()
|
786
|
+
|
787
|
+
show_if_debug(prompt, "FOLLOWUP->STANDALONE-PROMPT= ")
|
788
|
+
standalone = self.chat(
|
789
|
+
messages=[
|
790
|
+
LLMMessage(role=Role.SYSTEM, content=prompt),
|
791
|
+
LLMMessage(role=Role.USER, content=follow_up_question),
|
792
|
+
],
|
793
|
+
max_tokens=1024,
|
794
|
+
).message.strip()
|
795
|
+
|
796
|
+
show_if_debug(prompt, "FOLLOWUP->STANDALONE-RESPONSE= ")
|
797
|
+
return standalone
|
798
|
+
|
799
|
+
|
800
|
+
class StreamingIfAllowed:
|
801
|
+
"""Context to temporarily enable or disable streaming, if allowed globally via
|
802
|
+
`settings.stream`"""
|
803
|
+
|
804
|
+
def __init__(self, llm: LanguageModel, stream: bool = True):
|
805
|
+
self.llm = llm
|
806
|
+
self.stream = stream
|
807
|
+
|
808
|
+
def __enter__(self) -> None:
|
809
|
+
self.old_stream = self.llm.set_stream(settings.stream and self.stream)
|
810
|
+
|
811
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
812
|
+
self.llm.set_stream(self.old_stream)
|