langroid 0.58.2__py3-none-any.whl → 0.59.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +39 -17
- langroid/agent/base.py-e +2216 -0
- langroid/agent/callbacks/chainlit.py +2 -1
- langroid/agent/chat_agent.py +73 -55
- langroid/agent/chat_agent.py-e +2086 -0
- langroid/agent/chat_document.py +7 -7
- langroid/agent/chat_document.py-e +513 -0
- langroid/agent/openai_assistant.py +9 -9
- langroid/agent/openai_assistant.py-e +882 -0
- langroid/agent/special/arangodb/arangodb_agent.py +10 -18
- langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
- langroid/agent/special/arangodb/tools.py +3 -3
- langroid/agent/special/doc_chat_agent.py +16 -14
- langroid/agent/special/lance_rag/critic_agent.py +2 -2
- langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
- langroid/agent/special/lance_tools.py +6 -5
- langroid/agent/special/lance_tools.py-e +61 -0
- langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
- langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
- langroid/agent/special/relevance_extractor_agent.py +1 -1
- langroid/agent/special/sql/sql_chat_agent.py +11 -3
- langroid/agent/task.py +9 -87
- langroid/agent/task.py-e +2418 -0
- langroid/agent/tool_message.py +33 -17
- langroid/agent/tool_message.py-e +400 -0
- langroid/agent/tools/file_tools.py +4 -2
- langroid/agent/tools/file_tools.py-e +234 -0
- langroid/agent/tools/mcp/fastmcp_client.py +19 -6
- langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
- langroid/agent/tools/orchestration.py +22 -17
- langroid/agent/tools/orchestration.py-e +301 -0
- langroid/agent/tools/recipient_tool.py +3 -3
- langroid/agent/tools/task_tool.py +22 -16
- langroid/agent/tools/task_tool.py-e +249 -0
- langroid/agent/xml_tool_message.py +90 -35
- langroid/agent/xml_tool_message.py-e +392 -0
- langroid/cachedb/base.py +1 -1
- langroid/embedding_models/base.py +2 -2
- langroid/embedding_models/models.py +3 -7
- langroid/embedding_models/models.py-e +563 -0
- langroid/exceptions.py +4 -1
- langroid/language_models/azure_openai.py +2 -2
- langroid/language_models/azure_openai.py-e +134 -0
- langroid/language_models/base.py +6 -4
- langroid/language_models/base.py-e +812 -0
- langroid/language_models/client_cache.py +64 -0
- langroid/language_models/config.py +2 -4
- langroid/language_models/config.py-e +18 -0
- langroid/language_models/model_info.py +9 -1
- langroid/language_models/model_info.py-e +483 -0
- langroid/language_models/openai_gpt.py +119 -20
- langroid/language_models/openai_gpt.py-e +2280 -0
- langroid/language_models/provider_params.py +3 -22
- langroid/language_models/provider_params.py-e +153 -0
- langroid/mytypes.py +11 -4
- langroid/mytypes.py-e +132 -0
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/file_attachment.py +1 -1
- langroid/parsing/file_attachment.py-e +246 -0
- langroid/parsing/md_parser.py +14 -4
- langroid/parsing/md_parser.py-e +574 -0
- langroid/parsing/parser.py +22 -7
- langroid/parsing/parser.py-e +410 -0
- langroid/parsing/repo_loader.py +3 -1
- langroid/parsing/repo_loader.py-e +812 -0
- langroid/parsing/search.py +1 -1
- langroid/parsing/url_loader.py +17 -51
- langroid/parsing/url_loader.py-e +683 -0
- langroid/parsing/urls.py +5 -4
- langroid/parsing/urls.py-e +279 -0
- langroid/prompts/prompts_config.py +1 -1
- langroid/pydantic_v1/__init__.py +45 -6
- langroid/pydantic_v1/__init__.py-e +36 -0
- langroid/pydantic_v1/main.py +11 -4
- langroid/pydantic_v1/main.py-e +11 -0
- langroid/utils/configuration.py +13 -11
- langroid/utils/configuration.py-e +141 -0
- langroid/utils/constants.py +1 -1
- langroid/utils/constants.py-e +32 -0
- langroid/utils/globals.py +21 -5
- langroid/utils/globals.py-e +49 -0
- langroid/utils/html_logger.py +2 -1
- langroid/utils/html_logger.py-e +825 -0
- langroid/utils/object_registry.py +1 -1
- langroid/utils/object_registry.py-e +66 -0
- langroid/utils/pydantic_utils.py +55 -28
- langroid/utils/pydantic_utils.py-e +602 -0
- langroid/utils/types.py +2 -2
- langroid/utils/types.py-e +113 -0
- langroid/vector_store/base.py +3 -3
- langroid/vector_store/lancedb.py +5 -5
- langroid/vector_store/lancedb.py-e +404 -0
- langroid/vector_store/meilisearch.py +2 -2
- langroid/vector_store/pineconedb.py +4 -4
- langroid/vector_store/pineconedb.py-e +427 -0
- langroid/vector_store/postgres.py +1 -1
- langroid/vector_store/qdrantdb.py +3 -3
- langroid/vector_store/weaviatedb.py +1 -1
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/METADATA +3 -2
- langroid-0.59.0b1.dist-info/RECORD +181 -0
- langroid/agent/special/doc_chat_task.py +0 -0
- langroid/mcp/__init__.py +0 -1
- langroid/mcp/server/__init__.py +0 -1
- langroid-0.58.2.dist-info/RECORD +0 -145
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/WHEEL +0 -0
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -1,11 +1,20 @@
|
|
1
1
|
import re
|
2
2
|
from collections.abc import Mapping
|
3
|
-
from typing import Any, Dict, List, Optional, get_args, get_origin
|
3
|
+
from typing import Any, Dict, List, Optional, Union, get_args, get_origin
|
4
4
|
|
5
5
|
from lxml import etree
|
6
|
+
from pydantic import BaseModel, ConfigDict
|
6
7
|
|
7
8
|
from langroid.agent.tool_message import ToolMessage
|
8
|
-
|
9
|
+
|
10
|
+
# For Union type handling - check if we have Python 3.10+ UnionType
|
11
|
+
HAS_UNION_TYPE = False
|
12
|
+
try:
|
13
|
+
from types import UnionType # noqa: F401 # Used conditionally
|
14
|
+
|
15
|
+
HAS_UNION_TYPE = True
|
16
|
+
except ImportError:
|
17
|
+
pass
|
9
18
|
|
10
19
|
|
11
20
|
class XMLToolMessage(ToolMessage):
|
@@ -27,10 +36,27 @@ class XMLToolMessage(ToolMessage):
|
|
27
36
|
request: str
|
28
37
|
purpose: str
|
29
38
|
|
30
|
-
_allow_llm_use = True
|
39
|
+
_allow_llm_use: bool = True
|
40
|
+
|
41
|
+
model_config = ConfigDict(
|
42
|
+
# Inherit settings from ToolMessage
|
43
|
+
extra="allow",
|
44
|
+
arbitrary_types_allowed=False,
|
45
|
+
validate_default=True,
|
46
|
+
validate_assignment=True,
|
47
|
+
json_schema_extra={"exclude": ["purpose", "id"]},
|
48
|
+
)
|
31
49
|
|
32
|
-
class
|
33
|
-
|
50
|
+
# XMLToolMessage-specific settings as class methods to avoid Pydantic
|
51
|
+
# treating them as model fields
|
52
|
+
@classmethod
|
53
|
+
def _get_excluded_fields(cls) -> set[str]:
|
54
|
+
return {"purpose", "id"}
|
55
|
+
|
56
|
+
# Root element for XML formatting
|
57
|
+
@classmethod
|
58
|
+
def _get_root_element(cls) -> str:
|
59
|
+
return "tool"
|
34
60
|
|
35
61
|
@classmethod
|
36
62
|
def extract_field_values(cls, formatted_string: str) -> Optional[Dict[str, Any]]:
|
@@ -67,9 +93,13 @@ class XMLToolMessage(ToolMessage):
|
|
67
93
|
if element.tag.startswith("_"):
|
68
94
|
return {}
|
69
95
|
|
70
|
-
field_info = cls.
|
71
|
-
is_verbatim =
|
72
|
-
|
96
|
+
field_info = cls.model_fields.get(element.tag)
|
97
|
+
is_verbatim = (
|
98
|
+
field_info
|
99
|
+
and hasattr(field_info, "json_schema_extra")
|
100
|
+
and field_info.json_schema_extra is not None
|
101
|
+
and isinstance(field_info.json_schema_extra, dict)
|
102
|
+
and field_info.json_schema_extra.get("verbatim", False)
|
73
103
|
)
|
74
104
|
|
75
105
|
if is_verbatim:
|
@@ -96,8 +126,12 @@ class XMLToolMessage(ToolMessage):
|
|
96
126
|
# Otherwise, treat as a dictionary
|
97
127
|
result = {child.tag: parse_element(child) for child in element}
|
98
128
|
# Check if this corresponds to a nested Pydantic model
|
99
|
-
if
|
100
|
-
|
129
|
+
if (
|
130
|
+
field_info
|
131
|
+
and isinstance(field_info.annotation, type)
|
132
|
+
and issubclass(field_info.annotation, BaseModel)
|
133
|
+
):
|
134
|
+
return field_info.annotation(**result)
|
101
135
|
return result
|
102
136
|
|
103
137
|
result = parse_element(root)
|
@@ -124,7 +158,7 @@ class XMLToolMessage(ToolMessage):
|
|
124
158
|
return None
|
125
159
|
|
126
160
|
# Use Pydantic's parse_obj to create and validate the instance
|
127
|
-
return cls.
|
161
|
+
return cls.model_validate(parsed_data)
|
128
162
|
except Exception as e:
|
129
163
|
from langroid.exceptions import XMLException
|
130
164
|
|
@@ -132,28 +166,30 @@ class XMLToolMessage(ToolMessage):
|
|
132
166
|
|
133
167
|
@classmethod
|
134
168
|
def find_verbatim_fields(
|
135
|
-
cls, prefix: str = "", parent_cls: Optional[
|
169
|
+
cls, prefix: str = "", parent_cls: Optional[type[BaseModel]] = None
|
136
170
|
) -> List[str]:
|
137
171
|
verbatim_fields = []
|
138
|
-
for field_name, field_info in (parent_cls or cls).
|
172
|
+
for field_name, field_info in (parent_cls or cls).model_fields.items():
|
139
173
|
full_name = f"{prefix}.{field_name}" if prefix else field_name
|
140
174
|
if (
|
141
|
-
field_info
|
142
|
-
|
143
|
-
|
175
|
+
hasattr(field_info, "json_schema_extra")
|
176
|
+
and field_info.json_schema_extra is not None
|
177
|
+
and isinstance(field_info.json_schema_extra, dict)
|
178
|
+
and field_info.json_schema_extra.get("verbatim", False)
|
179
|
+
) or field_name == "code":
|
144
180
|
verbatim_fields.append(full_name)
|
145
|
-
if
|
181
|
+
if isinstance(field_info.annotation, type) and issubclass(
|
182
|
+
field_info.annotation, BaseModel
|
183
|
+
):
|
146
184
|
verbatim_fields.extend(
|
147
|
-
cls.find_verbatim_fields(full_name, field_info.
|
185
|
+
cls.find_verbatim_fields(full_name, field_info.annotation)
|
148
186
|
)
|
149
187
|
return verbatim_fields
|
150
188
|
|
151
189
|
@classmethod
|
152
190
|
def format_instructions(cls, tool: bool = False) -> str:
|
153
191
|
fields = [
|
154
|
-
f
|
155
|
-
for f in cls.__fields__.keys()
|
156
|
-
if f not in cls.Config.schema_extra.get("exclude", set())
|
192
|
+
f for f in cls.model_fields.keys() if f not in cls._get_excluded_fields()
|
157
193
|
]
|
158
194
|
|
159
195
|
instructions = """
|
@@ -162,11 +198,11 @@ class XMLToolMessage(ToolMessage):
|
|
162
198
|
"""
|
163
199
|
|
164
200
|
preamble = "Placeholders:\n"
|
165
|
-
xml_format = f"Formatting example:\n\n<{cls.
|
201
|
+
xml_format = f"Formatting example:\n\n<{cls._get_root_element()}>\n"
|
166
202
|
|
167
203
|
def format_field(
|
168
204
|
field_name: str,
|
169
|
-
field_type:
|
205
|
+
field_type: Any,
|
170
206
|
indent: str = "",
|
171
207
|
path: str = "",
|
172
208
|
) -> None:
|
@@ -176,6 +212,24 @@ class XMLToolMessage(ToolMessage):
|
|
176
212
|
origin = get_origin(field_type)
|
177
213
|
args = get_args(field_type)
|
178
214
|
|
215
|
+
# Handle Union types (including Optional types like List[Person] | None)
|
216
|
+
# Support both typing.Union and types.UnionType (Python 3.10+ | syntax)
|
217
|
+
is_union = origin is Union
|
218
|
+
if HAS_UNION_TYPE:
|
219
|
+
from types import UnionType as _UnionType
|
220
|
+
|
221
|
+
is_union = is_union or origin is _UnionType
|
222
|
+
|
223
|
+
if is_union:
|
224
|
+
# Filter out None type for Optional types
|
225
|
+
non_none_args = [arg for arg in args if arg is not type(None)]
|
226
|
+
if len(non_none_args) == 1:
|
227
|
+
# This is an Optional type, process the non-None type
|
228
|
+
field_type = non_none_args[0]
|
229
|
+
origin = get_origin(field_type)
|
230
|
+
args = get_args(field_type)
|
231
|
+
# If there are multiple non-None types, fall through to default handling
|
232
|
+
|
179
233
|
if (
|
180
234
|
origin is None
|
181
235
|
and isinstance(field_type, type)
|
@@ -185,10 +239,10 @@ class XMLToolMessage(ToolMessage):
|
|
185
239
|
f"{field_name.upper()} = [nested structure for {field_name}]\n"
|
186
240
|
)
|
187
241
|
xml_format += f"{indent}<{field_name}>\n"
|
188
|
-
for sub_field, sub_field_info in field_type.
|
242
|
+
for sub_field, sub_field_info in field_type.model_fields.items():
|
189
243
|
format_field(
|
190
244
|
sub_field,
|
191
|
-
sub_field_info.
|
245
|
+
sub_field_info.annotation,
|
192
246
|
indent + " ",
|
193
247
|
current_path,
|
194
248
|
)
|
@@ -248,13 +302,14 @@ class XMLToolMessage(ToolMessage):
|
|
248
302
|
verbatim_fields = cls.find_verbatim_fields()
|
249
303
|
|
250
304
|
for field in fields:
|
251
|
-
field_info = cls.
|
252
|
-
field_type =
|
253
|
-
|
254
|
-
|
305
|
+
field_info = cls.model_fields[field]
|
306
|
+
field_type = field_info.annotation
|
307
|
+
# Ensure we have a valid type
|
308
|
+
if field_type is None:
|
309
|
+
continue
|
255
310
|
format_field(field, field_type)
|
256
311
|
|
257
|
-
xml_format += f"</{cls.
|
312
|
+
xml_format += f"</{cls._get_root_element()}>"
|
258
313
|
|
259
314
|
verbatim_alert = ""
|
260
315
|
if len(verbatim_fields) > 0:
|
@@ -312,7 +367,7 @@ class XMLToolMessage(ToolMessage):
|
|
312
367
|
create_element(elem, k, v, current_path)
|
313
368
|
elif isinstance(value, BaseModel):
|
314
369
|
# Handle nested Pydantic models
|
315
|
-
for field_name, field_value in value.
|
370
|
+
for field_name, field_value in value.model_dump().items():
|
316
371
|
create_element(elem, field_name, field_value, current_path)
|
317
372
|
else:
|
318
373
|
if current_path in self.__class__.find_verbatim_fields():
|
@@ -320,9 +375,9 @@ class XMLToolMessage(ToolMessage):
|
|
320
375
|
else:
|
321
376
|
elem.text = str(value)
|
322
377
|
|
323
|
-
root = etree.Element(self.
|
324
|
-
exclude_fields = self.
|
325
|
-
for name, value in self.
|
378
|
+
root = etree.Element(self._get_root_element())
|
379
|
+
exclude_fields: set[str] = self._get_excluded_fields()
|
380
|
+
for name, value in self.model_dump().items():
|
326
381
|
if name not in exclude_fields:
|
327
382
|
create_element(root, name, value)
|
328
383
|
|
@@ -349,7 +404,7 @@ class XMLToolMessage(ToolMessage):
|
|
349
404
|
Returns: ["<tool><field1>data</field1></tool>"]
|
350
405
|
"""
|
351
406
|
|
352
|
-
root_tag = cls.
|
407
|
+
root_tag = cls._get_root_element()
|
353
408
|
opening_tag = f"<{root_tag}>"
|
354
409
|
closing_tag = f"</{root_tag}>"
|
355
410
|
|
@@ -0,0 +1,392 @@
|
|
1
|
+
import re
|
2
|
+
from collections.abc import Mapping
|
3
|
+
from typing import Any, Dict, List, Optional, get_args, get_origin
|
4
|
+
|
5
|
+
from lxml import etree
|
6
|
+
|
7
|
+
from langroid.agent.tool_message import ToolMessage
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
10
|
+
|
11
|
+
class XMLToolMessage(ToolMessage):
|
12
|
+
"""
|
13
|
+
Abstract class for tools formatted using XML instead of JSON.
|
14
|
+
|
15
|
+
When a subclass defines a field with the attribute `verbatim=True`,
|
16
|
+
instructions are sent to the LLM to ensure the field's content is:
|
17
|
+
- preserved as is, including whitespace, indents, quotes, newlines, etc
|
18
|
+
with no escaping, and
|
19
|
+
- enclosed in a CDATA section in the XML output.
|
20
|
+
This is useful for LLMs sending code as part of a tool;
|
21
|
+
results can be far superior compared to sending code in JSON-formatted tools,
|
22
|
+
where code needs to confirm to JSON's strict rules and escaping requirements.
|
23
|
+
(see test_xml_tool_message.py for an example).
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
request: str
|
28
|
+
purpose: str
|
29
|
+
|
30
|
+
_allow_llm_use = True
|
31
|
+
|
32
|
+
class Config(ToolMessage.Config):
|
33
|
+
root_element = "tool"
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def extract_field_values(cls, formatted_string: str) -> Optional[Dict[str, Any]]:
|
37
|
+
"""
|
38
|
+
Extracts field values from an XML-formatted string.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
formatted_string (str): The XML-formatted string to parse.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
Optional[Dict[str, Any]]: A dictionary containing the extracted field
|
45
|
+
values, where keys are the XML element names and values are their
|
46
|
+
corresponding contents.
|
47
|
+
Returns None if parsing fails or the root element is not a dictionary.
|
48
|
+
|
49
|
+
Raises:
|
50
|
+
etree.XMLSyntaxError: If the input string is not valid XML.
|
51
|
+
"""
|
52
|
+
# SECURITY: Initialize XMLParser with flags to prevent
|
53
|
+
# XML External Entity (XXE), billion laughs, and external DTD attacks by
|
54
|
+
# disabling entity resolution, DTD loading, and network access;
|
55
|
+
# `strip_cdata=False` is needed to preserve
|
56
|
+
# content within CDATA sections (e.g., for code).
|
57
|
+
parser = etree.XMLParser(
|
58
|
+
strip_cdata=False,
|
59
|
+
resolve_entities=False,
|
60
|
+
load_dtd=False,
|
61
|
+
no_network=True,
|
62
|
+
)
|
63
|
+
root = etree.fromstring(formatted_string.encode("utf-8"), parser=parser)
|
64
|
+
|
65
|
+
def parse_element(element: etree._Element) -> Any:
|
66
|
+
# Skip elements starting with underscore
|
67
|
+
if element.tag.startswith("_"):
|
68
|
+
return {}
|
69
|
+
|
70
|
+
field_info = cls.__fields__.get(element.tag)
|
71
|
+
is_verbatim = field_info and field_info.field_info.extra.get(
|
72
|
+
"verbatim", False
|
73
|
+
)
|
74
|
+
|
75
|
+
if is_verbatim:
|
76
|
+
# For code elements, preserve the content as is, including whitespace
|
77
|
+
content = element.text if element.text else ""
|
78
|
+
# Strip leading and trailing triple backticks if present,
|
79
|
+
# accounting for whitespace
|
80
|
+
return (
|
81
|
+
content.strip().removeprefix("```").removesuffix("```").strip()
|
82
|
+
if content.strip().startswith("```")
|
83
|
+
and content.strip().endswith("```")
|
84
|
+
else content
|
85
|
+
)
|
86
|
+
elif len(element) == 0:
|
87
|
+
# For non-code leaf elements, strip whitespace
|
88
|
+
return element.text.strip() if element.text else ""
|
89
|
+
else:
|
90
|
+
# For branch elements, handle potential lists or nested structures
|
91
|
+
children = [parse_element(child) for child in element]
|
92
|
+
if all(child.tag == element[0].tag for child in element):
|
93
|
+
# If all children have the same tag, treat as a list
|
94
|
+
return children
|
95
|
+
else:
|
96
|
+
# Otherwise, treat as a dictionary
|
97
|
+
result = {child.tag: parse_element(child) for child in element}
|
98
|
+
# Check if this corresponds to a nested Pydantic model
|
99
|
+
if field_info and issubclass(field_info.type_, BaseModel):
|
100
|
+
return field_info.type_(**result)
|
101
|
+
return result
|
102
|
+
|
103
|
+
result = parse_element(root)
|
104
|
+
if not isinstance(result, dict):
|
105
|
+
return None
|
106
|
+
# Filter out empty dictionaries from skipped underscore fields
|
107
|
+
return {k: v for k, v in result.items() if v != {}}
|
108
|
+
|
109
|
+
@classmethod
|
110
|
+
def parse(cls, formatted_string: str) -> Optional["XMLToolMessage"]:
|
111
|
+
"""
|
112
|
+
Parses the XML-formatted string and returns an instance of the class.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
formatted_string (str): The XML-formatted string to parse.
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
Optional["XMLToolMessage"]: An instance of the class if parsing succeeds,
|
119
|
+
None otherwise.
|
120
|
+
"""
|
121
|
+
try:
|
122
|
+
parsed_data = cls.extract_field_values(formatted_string)
|
123
|
+
if parsed_data is None:
|
124
|
+
return None
|
125
|
+
|
126
|
+
# Use Pydantic's parse_obj to create and validate the instance
|
127
|
+
return cls.model_validate(parsed_data)
|
128
|
+
except Exception as e:
|
129
|
+
from langroid.exceptions import XMLException
|
130
|
+
|
131
|
+
raise XMLException(f"Error parsing XML: {str(e)}")
|
132
|
+
|
133
|
+
@classmethod
|
134
|
+
def find_verbatim_fields(
|
135
|
+
cls, prefix: str = "", parent_cls: Optional["BaseModel"] = None
|
136
|
+
) -> List[str]:
|
137
|
+
verbatim_fields = []
|
138
|
+
for field_name, field_info in (parent_cls or cls).__fields__.items():
|
139
|
+
full_name = f"{prefix}.{field_name}" if prefix else field_name
|
140
|
+
if (
|
141
|
+
field_info.field_info.extra.get("verbatim", False)
|
142
|
+
or field_name == "code"
|
143
|
+
):
|
144
|
+
verbatim_fields.append(full_name)
|
145
|
+
if issubclass(field_info.type_, BaseModel):
|
146
|
+
verbatim_fields.extend(
|
147
|
+
cls.find_verbatim_fields(full_name, field_info.type_)
|
148
|
+
)
|
149
|
+
return verbatim_fields
|
150
|
+
|
151
|
+
@classmethod
|
152
|
+
def format_instructions(cls, tool: bool = False) -> str:
|
153
|
+
fields = [
|
154
|
+
f
|
155
|
+
for f in cls.__fields__.keys()
|
156
|
+
if f not in cls.Config.schema_extra.get("exclude", set())
|
157
|
+
]
|
158
|
+
|
159
|
+
instructions = """
|
160
|
+
To use this tool, please provide the required information in an XML-like
|
161
|
+
format. Here's how to structure your input:\n\n
|
162
|
+
"""
|
163
|
+
|
164
|
+
preamble = "Placeholders:\n"
|
165
|
+
xml_format = f"Formatting example:\n\n<{cls.Config.root_element}>\n"
|
166
|
+
|
167
|
+
def format_field(
|
168
|
+
field_name: str,
|
169
|
+
field_type: type,
|
170
|
+
indent: str = "",
|
171
|
+
path: str = "",
|
172
|
+
) -> None:
|
173
|
+
nonlocal preamble, xml_format
|
174
|
+
current_path = f"{path}.{field_name}" if path else field_name
|
175
|
+
|
176
|
+
origin = get_origin(field_type)
|
177
|
+
args = get_args(field_type)
|
178
|
+
|
179
|
+
if (
|
180
|
+
origin is None
|
181
|
+
and isinstance(field_type, type)
|
182
|
+
and issubclass(field_type, BaseModel)
|
183
|
+
):
|
184
|
+
preamble += (
|
185
|
+
f"{field_name.upper()} = [nested structure for {field_name}]\n"
|
186
|
+
)
|
187
|
+
xml_format += f"{indent}<{field_name}>\n"
|
188
|
+
for sub_field, sub_field_info in field_type.__fields__.items():
|
189
|
+
format_field(
|
190
|
+
sub_field,
|
191
|
+
sub_field_info.outer_type_,
|
192
|
+
indent + " ",
|
193
|
+
current_path,
|
194
|
+
)
|
195
|
+
xml_format += f"{indent}</{field_name}>\n"
|
196
|
+
elif origin in (list, List) or (field_type is list):
|
197
|
+
item_type = args[0] if args else Any
|
198
|
+
if isinstance(item_type, type) and issubclass(item_type, BaseModel):
|
199
|
+
preamble += (
|
200
|
+
f"{field_name.upper()} = "
|
201
|
+
f"[list of nested structures for {field_name}]\n"
|
202
|
+
)
|
203
|
+
else:
|
204
|
+
preamble += (
|
205
|
+
f"{field_name.upper()} = "
|
206
|
+
f"[list of {getattr(item_type, '__name__', str(item_type))} "
|
207
|
+
f"for {field_name}]\n"
|
208
|
+
)
|
209
|
+
xml_format += f"{indent}<{field_name}>\n"
|
210
|
+
xml_format += (
|
211
|
+
f"{indent} <item>"
|
212
|
+
f"[{getattr(item_type, '__name__', str(item_type))} value]"
|
213
|
+
f"</item>\n"
|
214
|
+
)
|
215
|
+
xml_format += f"{indent} ...\n"
|
216
|
+
xml_format += f"{indent}</{field_name}>\n"
|
217
|
+
elif origin in (dict, Dict) or (
|
218
|
+
isinstance(field_type, type) and issubclass(field_type, Mapping)
|
219
|
+
):
|
220
|
+
key_type, value_type = args if len(args) == 2 else (Any, Any)
|
221
|
+
preamble += (
|
222
|
+
f"{field_name.upper()} = "
|
223
|
+
f"[dictionary with "
|
224
|
+
f"{getattr(key_type, '__name__', str(key_type))} keys and "
|
225
|
+
f"{getattr(value_type, '__name__', str(value_type))} values]\n"
|
226
|
+
)
|
227
|
+
xml_format += f"{indent}<{field_name}>\n"
|
228
|
+
xml_format += (
|
229
|
+
f"{indent} <{getattr(key_type, '__name__', str(key_type))}>"
|
230
|
+
f"[{getattr(value_type, '__name__', str(value_type))} value]"
|
231
|
+
f"</{getattr(key_type, '__name__', str(key_type))}>\n"
|
232
|
+
)
|
233
|
+
xml_format += f"{indent} ...\n"
|
234
|
+
xml_format += f"{indent}</{field_name}>\n"
|
235
|
+
else:
|
236
|
+
preamble += f"{field_name.upper()} = [value for {field_name}]\n"
|
237
|
+
if current_path in verbatim_fields:
|
238
|
+
xml_format += (
|
239
|
+
f"{indent}<{field_name}>"
|
240
|
+
f"<![CDATA[{{{field_name.upper()}}}]]></{field_name}>\n"
|
241
|
+
)
|
242
|
+
else:
|
243
|
+
xml_format += (
|
244
|
+
f"{indent}<{field_name}>"
|
245
|
+
f"{{{field_name.upper()}}}</{field_name}>\n"
|
246
|
+
)
|
247
|
+
|
248
|
+
verbatim_fields = cls.find_verbatim_fields()
|
249
|
+
|
250
|
+
for field in fields:
|
251
|
+
field_info = cls.__fields__[field]
|
252
|
+
field_type = (
|
253
|
+
field_info.outer_type_
|
254
|
+
) # Use outer_type_ to get the actual type including List, etc.
|
255
|
+
format_field(field, field_type)
|
256
|
+
|
257
|
+
xml_format += f"</{cls.Config.root_element}>"
|
258
|
+
|
259
|
+
verbatim_alert = ""
|
260
|
+
if len(verbatim_fields) > 0:
|
261
|
+
verbatim_alert = f"""
|
262
|
+
EXTREMELY IMPORTANT: For these fields:
|
263
|
+
{', '.join(verbatim_fields)},
|
264
|
+
the contents MUST be wrapped in a CDATA section, and the content
|
265
|
+
must be written verbatim WITHOUT any modifications or escaping,
|
266
|
+
such as spaces, tabs, indents, newlines, quotes, etc.
|
267
|
+
"""
|
268
|
+
|
269
|
+
examples_str = ""
|
270
|
+
if cls.examples():
|
271
|
+
examples_str = "EXAMPLES:\n" + cls.usage_examples()
|
272
|
+
|
273
|
+
return f"""
|
274
|
+
TOOL: {cls.default_value("request")}
|
275
|
+
PURPOSE: {cls.default_value("purpose")}
|
276
|
+
|
277
|
+
{instructions}
|
278
|
+
{preamble}
|
279
|
+
{xml_format}
|
280
|
+
|
281
|
+
Make sure to replace the placeholders with actual values
|
282
|
+
when using the tool.
|
283
|
+
{verbatim_alert}
|
284
|
+
{examples_str}
|
285
|
+
""".lstrip()
|
286
|
+
|
287
|
+
def format_example(self) -> str:
|
288
|
+
"""
|
289
|
+
Format the current instance as an XML example.
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
str: A string representation of the current instance in XML format.
|
293
|
+
|
294
|
+
Raises:
|
295
|
+
ValueError: If the result from etree.tostring is not a string.
|
296
|
+
"""
|
297
|
+
|
298
|
+
def create_element(
|
299
|
+
parent: etree._Element, name: str, value: Any, path: str = ""
|
300
|
+
) -> None:
|
301
|
+
if value is None:
|
302
|
+
return
|
303
|
+
|
304
|
+
elem = etree.SubElement(parent, name)
|
305
|
+
current_path = f"{path}.{name}" if path else name
|
306
|
+
|
307
|
+
if isinstance(value, list):
|
308
|
+
for item in value:
|
309
|
+
create_element(elem, "item", item, current_path)
|
310
|
+
elif isinstance(value, dict):
|
311
|
+
for k, v in value.items():
|
312
|
+
create_element(elem, k, v, current_path)
|
313
|
+
elif isinstance(value, BaseModel):
|
314
|
+
# Handle nested Pydantic models
|
315
|
+
for field_name, field_value in value.model_dump().items():
|
316
|
+
create_element(elem, field_name, field_value, current_path)
|
317
|
+
else:
|
318
|
+
if current_path in self.__class__.find_verbatim_fields():
|
319
|
+
elem.text = etree.CDATA(str(value))
|
320
|
+
else:
|
321
|
+
elem.text = str(value)
|
322
|
+
|
323
|
+
root = etree.Element(self.Config.root_element)
|
324
|
+
exclude_fields = self.Config.schema_extra.get("exclude", set())
|
325
|
+
for name, value in self.model_dump().items():
|
326
|
+
if name not in exclude_fields:
|
327
|
+
create_element(root, name, value)
|
328
|
+
|
329
|
+
result = etree.tostring(root, encoding="unicode", pretty_print=True)
|
330
|
+
if not isinstance(result, str):
|
331
|
+
raise ValueError("Unexpected non-string result from etree.tostring")
|
332
|
+
return result
|
333
|
+
|
334
|
+
@classmethod
|
335
|
+
def find_candidates(cls, text: str) -> List[str]:
|
336
|
+
"""
|
337
|
+
Finds XML-like tool message candidates in text, with relaxed opening tag rules.
|
338
|
+
|
339
|
+
Args:
|
340
|
+
text: Input text to search for XML structures.
|
341
|
+
|
342
|
+
Returns:
|
343
|
+
List of XML strings. For fragments missing the root opening tag but having
|
344
|
+
valid XML structure and root closing tag, prepends the root opening tag.
|
345
|
+
|
346
|
+
Example:
|
347
|
+
With root_tag="tool", given:
|
348
|
+
"Hello <field1>data</field1> </tool>"
|
349
|
+
Returns: ["<tool><field1>data</field1></tool>"]
|
350
|
+
"""
|
351
|
+
|
352
|
+
root_tag = cls.Config.root_element
|
353
|
+
opening_tag = f"<{root_tag}>"
|
354
|
+
closing_tag = f"</{root_tag}>"
|
355
|
+
|
356
|
+
candidates = []
|
357
|
+
pos = 0
|
358
|
+
while True:
|
359
|
+
# Look for either proper opening tag or closing tag
|
360
|
+
start_normal = text.find(opening_tag, pos)
|
361
|
+
end = text.find(closing_tag, pos)
|
362
|
+
|
363
|
+
if start_normal == -1 and end == -1:
|
364
|
+
break
|
365
|
+
|
366
|
+
if start_normal != -1:
|
367
|
+
# Handle normal case (has opening tag)
|
368
|
+
end = text.find(closing_tag, start_normal)
|
369
|
+
if end != -1:
|
370
|
+
candidates.append(text[start_normal : end + len(closing_tag)])
|
371
|
+
pos = max(end + len(closing_tag), start_normal + 1)
|
372
|
+
continue
|
373
|
+
elif start_normal == text.rfind(opening_tag):
|
374
|
+
# last fragment - ok to miss closing tag
|
375
|
+
candidates.append(text[start_normal:] + closing_tag)
|
376
|
+
return candidates
|
377
|
+
else:
|
378
|
+
pos = start_normal + 1
|
379
|
+
continue
|
380
|
+
|
381
|
+
if end != -1:
|
382
|
+
# Look backwards for first XML tag
|
383
|
+
text_before = text[pos:end]
|
384
|
+
first_tag_match = re.search(r"<\w+>", text_before)
|
385
|
+
if first_tag_match:
|
386
|
+
start = pos + first_tag_match.start()
|
387
|
+
candidates.append(
|
388
|
+
opening_tag + text[start : end + len(closing_tag)]
|
389
|
+
)
|
390
|
+
pos = end + len(closing_tag)
|
391
|
+
|
392
|
+
return candidates
|
langroid/cachedb/base.py
CHANGED
@@ -2,9 +2,9 @@ import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
3
3
|
|
4
4
|
import numpy as np
|
5
|
+
from pydantic_settings import BaseSettings
|
5
6
|
|
6
7
|
from langroid.mytypes import EmbeddingFunction
|
7
|
-
from langroid.pydantic_v1 import BaseSettings
|
8
8
|
|
9
9
|
logging.getLogger("openai").setLevel(logging.ERROR)
|
10
10
|
|
@@ -57,7 +57,7 @@ class EmbeddingModel(ABC):
|
|
57
57
|
elif isinstance(config, GeminiEmbeddingsConfig):
|
58
58
|
return GeminiEmbeddings(config)
|
59
59
|
else:
|
60
|
-
raise ValueError(f"Unknown embedding config: {config.
|
60
|
+
raise ValueError(f"Unknown embedding config: {config.__class__.__name__}")
|
61
61
|
|
62
62
|
@abstractmethod
|
63
63
|
def embedding_fn(self) -> EmbeddingFunction:
|
@@ -7,6 +7,7 @@ import requests
|
|
7
7
|
import tiktoken
|
8
8
|
from dotenv import load_dotenv
|
9
9
|
from openai import AzureOpenAI, OpenAI
|
10
|
+
from pydantic_settings import SettingsConfigDict
|
10
11
|
|
11
12
|
from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
|
12
13
|
from langroid.exceptions import LangroidImportError
|
@@ -27,10 +28,7 @@ class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
|
|
27
28
|
context_length: int = 8192
|
28
29
|
langdb_params: LangDBParams = LangDBParams()
|
29
30
|
|
30
|
-
|
31
|
-
# enable auto-loading of env vars with OPENAI_ prefix, e.g.
|
32
|
-
# api_base is set from OPENAI_API_BASE env var, in .env or system env
|
33
|
-
env_prefix = "OPENAI_"
|
31
|
+
model_config = SettingsConfigDict(env_prefix="OPENAI_")
|
34
32
|
|
35
33
|
|
36
34
|
class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig):
|
@@ -48,9 +46,7 @@ class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig):
|
|
48
46
|
dims: int = 1536
|
49
47
|
context_length: int = 8192
|
50
48
|
|
51
|
-
|
52
|
-
# enable auto-loading of env vars with AZURE_OPENAI_ prefix
|
53
|
-
env_prefix = "AZURE_OPENAI_"
|
49
|
+
model_config = SettingsConfigDict(env_prefix="AZURE_OPENAI_")
|
54
50
|
|
55
51
|
|
56
52
|
class SentenceTransformerEmbeddingsConfig(EmbeddingModelsConfig):
|