langroid 0.58.2__py3-none-any.whl → 0.59.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. langroid/agent/base.py +39 -17
  2. langroid/agent/base.py-e +2216 -0
  3. langroid/agent/callbacks/chainlit.py +2 -1
  4. langroid/agent/chat_agent.py +73 -55
  5. langroid/agent/chat_agent.py-e +2086 -0
  6. langroid/agent/chat_document.py +7 -7
  7. langroid/agent/chat_document.py-e +513 -0
  8. langroid/agent/openai_assistant.py +9 -9
  9. langroid/agent/openai_assistant.py-e +882 -0
  10. langroid/agent/special/arangodb/arangodb_agent.py +10 -18
  11. langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
  12. langroid/agent/special/arangodb/tools.py +3 -3
  13. langroid/agent/special/doc_chat_agent.py +16 -14
  14. langroid/agent/special/lance_rag/critic_agent.py +2 -2
  15. langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
  16. langroid/agent/special/lance_tools.py +6 -5
  17. langroid/agent/special/lance_tools.py-e +61 -0
  18. langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
  19. langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
  20. langroid/agent/special/relevance_extractor_agent.py +1 -1
  21. langroid/agent/special/sql/sql_chat_agent.py +11 -3
  22. langroid/agent/task.py +9 -87
  23. langroid/agent/task.py-e +2418 -0
  24. langroid/agent/tool_message.py +33 -17
  25. langroid/agent/tool_message.py-e +400 -0
  26. langroid/agent/tools/file_tools.py +4 -2
  27. langroid/agent/tools/file_tools.py-e +234 -0
  28. langroid/agent/tools/mcp/fastmcp_client.py +19 -6
  29. langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
  30. langroid/agent/tools/orchestration.py +22 -17
  31. langroid/agent/tools/orchestration.py-e +301 -0
  32. langroid/agent/tools/recipient_tool.py +3 -3
  33. langroid/agent/tools/task_tool.py +22 -16
  34. langroid/agent/tools/task_tool.py-e +249 -0
  35. langroid/agent/xml_tool_message.py +90 -35
  36. langroid/agent/xml_tool_message.py-e +392 -0
  37. langroid/cachedb/base.py +1 -1
  38. langroid/embedding_models/base.py +2 -2
  39. langroid/embedding_models/models.py +3 -7
  40. langroid/embedding_models/models.py-e +563 -0
  41. langroid/exceptions.py +4 -1
  42. langroid/language_models/azure_openai.py +2 -2
  43. langroid/language_models/azure_openai.py-e +134 -0
  44. langroid/language_models/base.py +6 -4
  45. langroid/language_models/base.py-e +812 -0
  46. langroid/language_models/client_cache.py +64 -0
  47. langroid/language_models/config.py +2 -4
  48. langroid/language_models/config.py-e +18 -0
  49. langroid/language_models/model_info.py +9 -1
  50. langroid/language_models/model_info.py-e +483 -0
  51. langroid/language_models/openai_gpt.py +119 -20
  52. langroid/language_models/openai_gpt.py-e +2280 -0
  53. langroid/language_models/provider_params.py +3 -22
  54. langroid/language_models/provider_params.py-e +153 -0
  55. langroid/mytypes.py +11 -4
  56. langroid/mytypes.py-e +132 -0
  57. langroid/parsing/code_parser.py +1 -1
  58. langroid/parsing/file_attachment.py +1 -1
  59. langroid/parsing/file_attachment.py-e +246 -0
  60. langroid/parsing/md_parser.py +14 -4
  61. langroid/parsing/md_parser.py-e +574 -0
  62. langroid/parsing/parser.py +22 -7
  63. langroid/parsing/parser.py-e +410 -0
  64. langroid/parsing/repo_loader.py +3 -1
  65. langroid/parsing/repo_loader.py-e +812 -0
  66. langroid/parsing/search.py +1 -1
  67. langroid/parsing/url_loader.py +17 -51
  68. langroid/parsing/url_loader.py-e +683 -0
  69. langroid/parsing/urls.py +5 -4
  70. langroid/parsing/urls.py-e +279 -0
  71. langroid/prompts/prompts_config.py +1 -1
  72. langroid/pydantic_v1/__init__.py +45 -6
  73. langroid/pydantic_v1/__init__.py-e +36 -0
  74. langroid/pydantic_v1/main.py +11 -4
  75. langroid/pydantic_v1/main.py-e +11 -0
  76. langroid/utils/configuration.py +13 -11
  77. langroid/utils/configuration.py-e +141 -0
  78. langroid/utils/constants.py +1 -1
  79. langroid/utils/constants.py-e +32 -0
  80. langroid/utils/globals.py +21 -5
  81. langroid/utils/globals.py-e +49 -0
  82. langroid/utils/html_logger.py +2 -1
  83. langroid/utils/html_logger.py-e +825 -0
  84. langroid/utils/object_registry.py +1 -1
  85. langroid/utils/object_registry.py-e +66 -0
  86. langroid/utils/pydantic_utils.py +55 -28
  87. langroid/utils/pydantic_utils.py-e +602 -0
  88. langroid/utils/types.py +2 -2
  89. langroid/utils/types.py-e +113 -0
  90. langroid/vector_store/base.py +3 -3
  91. langroid/vector_store/lancedb.py +5 -5
  92. langroid/vector_store/lancedb.py-e +404 -0
  93. langroid/vector_store/meilisearch.py +2 -2
  94. langroid/vector_store/pineconedb.py +4 -4
  95. langroid/vector_store/pineconedb.py-e +427 -0
  96. langroid/vector_store/postgres.py +1 -1
  97. langroid/vector_store/qdrantdb.py +3 -3
  98. langroid/vector_store/weaviatedb.py +1 -1
  99. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/METADATA +3 -2
  100. langroid-0.59.0b1.dist-info/RECORD +181 -0
  101. langroid/agent/special/doc_chat_task.py +0 -0
  102. langroid/mcp/__init__.py +0 -1
  103. langroid/mcp/server/__init__.py +0 -1
  104. langroid-0.58.2.dist-info/RECORD +0 -145
  105. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/WHEEL +0 -0
  106. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -1,11 +1,20 @@
1
1
  import re
2
2
  from collections.abc import Mapping
3
- from typing import Any, Dict, List, Optional, get_args, get_origin
3
+ from typing import Any, Dict, List, Optional, Union, get_args, get_origin
4
4
 
5
5
  from lxml import etree
6
+ from pydantic import BaseModel, ConfigDict
6
7
 
7
8
  from langroid.agent.tool_message import ToolMessage
8
- from langroid.pydantic_v1 import BaseModel
9
+
10
+ # For Union type handling - check if we have Python 3.10+ UnionType
11
+ HAS_UNION_TYPE = False
12
+ try:
13
+ from types import UnionType # noqa: F401 # Used conditionally
14
+
15
+ HAS_UNION_TYPE = True
16
+ except ImportError:
17
+ pass
9
18
 
10
19
 
11
20
  class XMLToolMessage(ToolMessage):
@@ -27,10 +36,27 @@ class XMLToolMessage(ToolMessage):
27
36
  request: str
28
37
  purpose: str
29
38
 
30
- _allow_llm_use = True
39
+ _allow_llm_use: bool = True
40
+
41
+ model_config = ConfigDict(
42
+ # Inherit settings from ToolMessage
43
+ extra="allow",
44
+ arbitrary_types_allowed=False,
45
+ validate_default=True,
46
+ validate_assignment=True,
47
+ json_schema_extra={"exclude": ["purpose", "id"]},
48
+ )
31
49
 
32
- class Config(ToolMessage.Config):
33
- root_element = "tool"
50
+ # XMLToolMessage-specific settings as class methods to avoid Pydantic
51
+ # treating them as model fields
52
+ @classmethod
53
+ def _get_excluded_fields(cls) -> set[str]:
54
+ return {"purpose", "id"}
55
+
56
+ # Root element for XML formatting
57
+ @classmethod
58
+ def _get_root_element(cls) -> str:
59
+ return "tool"
34
60
 
35
61
  @classmethod
36
62
  def extract_field_values(cls, formatted_string: str) -> Optional[Dict[str, Any]]:
@@ -67,9 +93,13 @@ class XMLToolMessage(ToolMessage):
67
93
  if element.tag.startswith("_"):
68
94
  return {}
69
95
 
70
- field_info = cls.__fields__.get(element.tag)
71
- is_verbatim = field_info and field_info.field_info.extra.get(
72
- "verbatim", False
96
+ field_info = cls.model_fields.get(element.tag)
97
+ is_verbatim = (
98
+ field_info
99
+ and hasattr(field_info, "json_schema_extra")
100
+ and field_info.json_schema_extra is not None
101
+ and isinstance(field_info.json_schema_extra, dict)
102
+ and field_info.json_schema_extra.get("verbatim", False)
73
103
  )
74
104
 
75
105
  if is_verbatim:
@@ -96,8 +126,12 @@ class XMLToolMessage(ToolMessage):
96
126
  # Otherwise, treat as a dictionary
97
127
  result = {child.tag: parse_element(child) for child in element}
98
128
  # Check if this corresponds to a nested Pydantic model
99
- if field_info and issubclass(field_info.type_, BaseModel):
100
- return field_info.type_(**result)
129
+ if (
130
+ field_info
131
+ and isinstance(field_info.annotation, type)
132
+ and issubclass(field_info.annotation, BaseModel)
133
+ ):
134
+ return field_info.annotation(**result)
101
135
  return result
102
136
 
103
137
  result = parse_element(root)
@@ -124,7 +158,7 @@ class XMLToolMessage(ToolMessage):
124
158
  return None
125
159
 
126
160
  # Use Pydantic's parse_obj to create and validate the instance
127
- return cls.parse_obj(parsed_data)
161
+ return cls.model_validate(parsed_data)
128
162
  except Exception as e:
129
163
  from langroid.exceptions import XMLException
130
164
 
@@ -132,28 +166,30 @@ class XMLToolMessage(ToolMessage):
132
166
 
133
167
  @classmethod
134
168
  def find_verbatim_fields(
135
- cls, prefix: str = "", parent_cls: Optional["BaseModel"] = None
169
+ cls, prefix: str = "", parent_cls: Optional[type[BaseModel]] = None
136
170
  ) -> List[str]:
137
171
  verbatim_fields = []
138
- for field_name, field_info in (parent_cls or cls).__fields__.items():
172
+ for field_name, field_info in (parent_cls or cls).model_fields.items():
139
173
  full_name = f"{prefix}.{field_name}" if prefix else field_name
140
174
  if (
141
- field_info.field_info.extra.get("verbatim", False)
142
- or field_name == "code"
143
- ):
175
+ hasattr(field_info, "json_schema_extra")
176
+ and field_info.json_schema_extra is not None
177
+ and isinstance(field_info.json_schema_extra, dict)
178
+ and field_info.json_schema_extra.get("verbatim", False)
179
+ ) or field_name == "code":
144
180
  verbatim_fields.append(full_name)
145
- if issubclass(field_info.type_, BaseModel):
181
+ if isinstance(field_info.annotation, type) and issubclass(
182
+ field_info.annotation, BaseModel
183
+ ):
146
184
  verbatim_fields.extend(
147
- cls.find_verbatim_fields(full_name, field_info.type_)
185
+ cls.find_verbatim_fields(full_name, field_info.annotation)
148
186
  )
149
187
  return verbatim_fields
150
188
 
151
189
  @classmethod
152
190
  def format_instructions(cls, tool: bool = False) -> str:
153
191
  fields = [
154
- f
155
- for f in cls.__fields__.keys()
156
- if f not in cls.Config.schema_extra.get("exclude", set())
192
+ f for f in cls.model_fields.keys() if f not in cls._get_excluded_fields()
157
193
  ]
158
194
 
159
195
  instructions = """
@@ -162,11 +198,11 @@ class XMLToolMessage(ToolMessage):
162
198
  """
163
199
 
164
200
  preamble = "Placeholders:\n"
165
- xml_format = f"Formatting example:\n\n<{cls.Config.root_element}>\n"
201
+ xml_format = f"Formatting example:\n\n<{cls._get_root_element()}>\n"
166
202
 
167
203
  def format_field(
168
204
  field_name: str,
169
- field_type: type,
205
+ field_type: Any,
170
206
  indent: str = "",
171
207
  path: str = "",
172
208
  ) -> None:
@@ -176,6 +212,24 @@ class XMLToolMessage(ToolMessage):
176
212
  origin = get_origin(field_type)
177
213
  args = get_args(field_type)
178
214
 
215
+ # Handle Union types (including Optional types like List[Person] | None)
216
+ # Support both typing.Union and types.UnionType (Python 3.10+ | syntax)
217
+ is_union = origin is Union
218
+ if HAS_UNION_TYPE:
219
+ from types import UnionType as _UnionType
220
+
221
+ is_union = is_union or origin is _UnionType
222
+
223
+ if is_union:
224
+ # Filter out None type for Optional types
225
+ non_none_args = [arg for arg in args if arg is not type(None)]
226
+ if len(non_none_args) == 1:
227
+ # This is an Optional type, process the non-None type
228
+ field_type = non_none_args[0]
229
+ origin = get_origin(field_type)
230
+ args = get_args(field_type)
231
+ # If there are multiple non-None types, fall through to default handling
232
+
179
233
  if (
180
234
  origin is None
181
235
  and isinstance(field_type, type)
@@ -185,10 +239,10 @@ class XMLToolMessage(ToolMessage):
185
239
  f"{field_name.upper()} = [nested structure for {field_name}]\n"
186
240
  )
187
241
  xml_format += f"{indent}<{field_name}>\n"
188
- for sub_field, sub_field_info in field_type.__fields__.items():
242
+ for sub_field, sub_field_info in field_type.model_fields.items():
189
243
  format_field(
190
244
  sub_field,
191
- sub_field_info.outer_type_,
245
+ sub_field_info.annotation,
192
246
  indent + " ",
193
247
  current_path,
194
248
  )
@@ -248,13 +302,14 @@ class XMLToolMessage(ToolMessage):
248
302
  verbatim_fields = cls.find_verbatim_fields()
249
303
 
250
304
  for field in fields:
251
- field_info = cls.__fields__[field]
252
- field_type = (
253
- field_info.outer_type_
254
- ) # Use outer_type_ to get the actual type including List, etc.
305
+ field_info = cls.model_fields[field]
306
+ field_type = field_info.annotation
307
+ # Ensure we have a valid type
308
+ if field_type is None:
309
+ continue
255
310
  format_field(field, field_type)
256
311
 
257
- xml_format += f"</{cls.Config.root_element}>"
312
+ xml_format += f"</{cls._get_root_element()}>"
258
313
 
259
314
  verbatim_alert = ""
260
315
  if len(verbatim_fields) > 0:
@@ -312,7 +367,7 @@ class XMLToolMessage(ToolMessage):
312
367
  create_element(elem, k, v, current_path)
313
368
  elif isinstance(value, BaseModel):
314
369
  # Handle nested Pydantic models
315
- for field_name, field_value in value.dict().items():
370
+ for field_name, field_value in value.model_dump().items():
316
371
  create_element(elem, field_name, field_value, current_path)
317
372
  else:
318
373
  if current_path in self.__class__.find_verbatim_fields():
@@ -320,9 +375,9 @@ class XMLToolMessage(ToolMessage):
320
375
  else:
321
376
  elem.text = str(value)
322
377
 
323
- root = etree.Element(self.Config.root_element)
324
- exclude_fields = self.Config.schema_extra.get("exclude", set())
325
- for name, value in self.dict().items():
378
+ root = etree.Element(self._get_root_element())
379
+ exclude_fields: set[str] = self._get_excluded_fields()
380
+ for name, value in self.model_dump().items():
326
381
  if name not in exclude_fields:
327
382
  create_element(root, name, value)
328
383
 
@@ -349,7 +404,7 @@ class XMLToolMessage(ToolMessage):
349
404
  Returns: ["<tool><field1>data</field1></tool>"]
350
405
  """
351
406
 
352
- root_tag = cls.Config.root_element
407
+ root_tag = cls._get_root_element()
353
408
  opening_tag = f"<{root_tag}>"
354
409
  closing_tag = f"</{root_tag}>"
355
410
 
@@ -0,0 +1,392 @@
1
+ import re
2
+ from collections.abc import Mapping
3
+ from typing import Any, Dict, List, Optional, get_args, get_origin
4
+
5
+ from lxml import etree
6
+
7
+ from langroid.agent.tool_message import ToolMessage
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class XMLToolMessage(ToolMessage):
12
+ """
13
+ Abstract class for tools formatted using XML instead of JSON.
14
+
15
+ When a subclass defines a field with the attribute `verbatim=True`,
16
+ instructions are sent to the LLM to ensure the field's content is:
17
+ - preserved as is, including whitespace, indents, quotes, newlines, etc
18
+ with no escaping, and
19
+ - enclosed in a CDATA section in the XML output.
20
+ This is useful for LLMs sending code as part of a tool;
21
+ results can be far superior compared to sending code in JSON-formatted tools,
22
+ where code needs to confirm to JSON's strict rules and escaping requirements.
23
+ (see test_xml_tool_message.py for an example).
24
+
25
+ """
26
+
27
+ request: str
28
+ purpose: str
29
+
30
+ _allow_llm_use = True
31
+
32
+ class Config(ToolMessage.Config):
33
+ root_element = "tool"
34
+
35
+ @classmethod
36
+ def extract_field_values(cls, formatted_string: str) -> Optional[Dict[str, Any]]:
37
+ """
38
+ Extracts field values from an XML-formatted string.
39
+
40
+ Args:
41
+ formatted_string (str): The XML-formatted string to parse.
42
+
43
+ Returns:
44
+ Optional[Dict[str, Any]]: A dictionary containing the extracted field
45
+ values, where keys are the XML element names and values are their
46
+ corresponding contents.
47
+ Returns None if parsing fails or the root element is not a dictionary.
48
+
49
+ Raises:
50
+ etree.XMLSyntaxError: If the input string is not valid XML.
51
+ """
52
+ # SECURITY: Initialize XMLParser with flags to prevent
53
+ # XML External Entity (XXE), billion laughs, and external DTD attacks by
54
+ # disabling entity resolution, DTD loading, and network access;
55
+ # `strip_cdata=False` is needed to preserve
56
+ # content within CDATA sections (e.g., for code).
57
+ parser = etree.XMLParser(
58
+ strip_cdata=False,
59
+ resolve_entities=False,
60
+ load_dtd=False,
61
+ no_network=True,
62
+ )
63
+ root = etree.fromstring(formatted_string.encode("utf-8"), parser=parser)
64
+
65
+ def parse_element(element: etree._Element) -> Any:
66
+ # Skip elements starting with underscore
67
+ if element.tag.startswith("_"):
68
+ return {}
69
+
70
+ field_info = cls.__fields__.get(element.tag)
71
+ is_verbatim = field_info and field_info.field_info.extra.get(
72
+ "verbatim", False
73
+ )
74
+
75
+ if is_verbatim:
76
+ # For code elements, preserve the content as is, including whitespace
77
+ content = element.text if element.text else ""
78
+ # Strip leading and trailing triple backticks if present,
79
+ # accounting for whitespace
80
+ return (
81
+ content.strip().removeprefix("```").removesuffix("```").strip()
82
+ if content.strip().startswith("```")
83
+ and content.strip().endswith("```")
84
+ else content
85
+ )
86
+ elif len(element) == 0:
87
+ # For non-code leaf elements, strip whitespace
88
+ return element.text.strip() if element.text else ""
89
+ else:
90
+ # For branch elements, handle potential lists or nested structures
91
+ children = [parse_element(child) for child in element]
92
+ if all(child.tag == element[0].tag for child in element):
93
+ # If all children have the same tag, treat as a list
94
+ return children
95
+ else:
96
+ # Otherwise, treat as a dictionary
97
+ result = {child.tag: parse_element(child) for child in element}
98
+ # Check if this corresponds to a nested Pydantic model
99
+ if field_info and issubclass(field_info.type_, BaseModel):
100
+ return field_info.type_(**result)
101
+ return result
102
+
103
+ result = parse_element(root)
104
+ if not isinstance(result, dict):
105
+ return None
106
+ # Filter out empty dictionaries from skipped underscore fields
107
+ return {k: v for k, v in result.items() if v != {}}
108
+
109
+ @classmethod
110
+ def parse(cls, formatted_string: str) -> Optional["XMLToolMessage"]:
111
+ """
112
+ Parses the XML-formatted string and returns an instance of the class.
113
+
114
+ Args:
115
+ formatted_string (str): The XML-formatted string to parse.
116
+
117
+ Returns:
118
+ Optional["XMLToolMessage"]: An instance of the class if parsing succeeds,
119
+ None otherwise.
120
+ """
121
+ try:
122
+ parsed_data = cls.extract_field_values(formatted_string)
123
+ if parsed_data is None:
124
+ return None
125
+
126
+ # Use Pydantic's parse_obj to create and validate the instance
127
+ return cls.model_validate(parsed_data)
128
+ except Exception as e:
129
+ from langroid.exceptions import XMLException
130
+
131
+ raise XMLException(f"Error parsing XML: {str(e)}")
132
+
133
+ @classmethod
134
+ def find_verbatim_fields(
135
+ cls, prefix: str = "", parent_cls: Optional["BaseModel"] = None
136
+ ) -> List[str]:
137
+ verbatim_fields = []
138
+ for field_name, field_info in (parent_cls or cls).__fields__.items():
139
+ full_name = f"{prefix}.{field_name}" if prefix else field_name
140
+ if (
141
+ field_info.field_info.extra.get("verbatim", False)
142
+ or field_name == "code"
143
+ ):
144
+ verbatim_fields.append(full_name)
145
+ if issubclass(field_info.type_, BaseModel):
146
+ verbatim_fields.extend(
147
+ cls.find_verbatim_fields(full_name, field_info.type_)
148
+ )
149
+ return verbatim_fields
150
+
151
+ @classmethod
152
+ def format_instructions(cls, tool: bool = False) -> str:
153
+ fields = [
154
+ f
155
+ for f in cls.__fields__.keys()
156
+ if f not in cls.Config.schema_extra.get("exclude", set())
157
+ ]
158
+
159
+ instructions = """
160
+ To use this tool, please provide the required information in an XML-like
161
+ format. Here's how to structure your input:\n\n
162
+ """
163
+
164
+ preamble = "Placeholders:\n"
165
+ xml_format = f"Formatting example:\n\n<{cls.Config.root_element}>\n"
166
+
167
+ def format_field(
168
+ field_name: str,
169
+ field_type: type,
170
+ indent: str = "",
171
+ path: str = "",
172
+ ) -> None:
173
+ nonlocal preamble, xml_format
174
+ current_path = f"{path}.{field_name}" if path else field_name
175
+
176
+ origin = get_origin(field_type)
177
+ args = get_args(field_type)
178
+
179
+ if (
180
+ origin is None
181
+ and isinstance(field_type, type)
182
+ and issubclass(field_type, BaseModel)
183
+ ):
184
+ preamble += (
185
+ f"{field_name.upper()} = [nested structure for {field_name}]\n"
186
+ )
187
+ xml_format += f"{indent}<{field_name}>\n"
188
+ for sub_field, sub_field_info in field_type.__fields__.items():
189
+ format_field(
190
+ sub_field,
191
+ sub_field_info.outer_type_,
192
+ indent + " ",
193
+ current_path,
194
+ )
195
+ xml_format += f"{indent}</{field_name}>\n"
196
+ elif origin in (list, List) or (field_type is list):
197
+ item_type = args[0] if args else Any
198
+ if isinstance(item_type, type) and issubclass(item_type, BaseModel):
199
+ preamble += (
200
+ f"{field_name.upper()} = "
201
+ f"[list of nested structures for {field_name}]\n"
202
+ )
203
+ else:
204
+ preamble += (
205
+ f"{field_name.upper()} = "
206
+ f"[list of {getattr(item_type, '__name__', str(item_type))} "
207
+ f"for {field_name}]\n"
208
+ )
209
+ xml_format += f"{indent}<{field_name}>\n"
210
+ xml_format += (
211
+ f"{indent} <item>"
212
+ f"[{getattr(item_type, '__name__', str(item_type))} value]"
213
+ f"</item>\n"
214
+ )
215
+ xml_format += f"{indent} ...\n"
216
+ xml_format += f"{indent}</{field_name}>\n"
217
+ elif origin in (dict, Dict) or (
218
+ isinstance(field_type, type) and issubclass(field_type, Mapping)
219
+ ):
220
+ key_type, value_type = args if len(args) == 2 else (Any, Any)
221
+ preamble += (
222
+ f"{field_name.upper()} = "
223
+ f"[dictionary with "
224
+ f"{getattr(key_type, '__name__', str(key_type))} keys and "
225
+ f"{getattr(value_type, '__name__', str(value_type))} values]\n"
226
+ )
227
+ xml_format += f"{indent}<{field_name}>\n"
228
+ xml_format += (
229
+ f"{indent} <{getattr(key_type, '__name__', str(key_type))}>"
230
+ f"[{getattr(value_type, '__name__', str(value_type))} value]"
231
+ f"</{getattr(key_type, '__name__', str(key_type))}>\n"
232
+ )
233
+ xml_format += f"{indent} ...\n"
234
+ xml_format += f"{indent}</{field_name}>\n"
235
+ else:
236
+ preamble += f"{field_name.upper()} = [value for {field_name}]\n"
237
+ if current_path in verbatim_fields:
238
+ xml_format += (
239
+ f"{indent}<{field_name}>"
240
+ f"<![CDATA[{{{field_name.upper()}}}]]></{field_name}>\n"
241
+ )
242
+ else:
243
+ xml_format += (
244
+ f"{indent}<{field_name}>"
245
+ f"{{{field_name.upper()}}}</{field_name}>\n"
246
+ )
247
+
248
+ verbatim_fields = cls.find_verbatim_fields()
249
+
250
+ for field in fields:
251
+ field_info = cls.__fields__[field]
252
+ field_type = (
253
+ field_info.outer_type_
254
+ ) # Use outer_type_ to get the actual type including List, etc.
255
+ format_field(field, field_type)
256
+
257
+ xml_format += f"</{cls.Config.root_element}>"
258
+
259
+ verbatim_alert = ""
260
+ if len(verbatim_fields) > 0:
261
+ verbatim_alert = f"""
262
+ EXTREMELY IMPORTANT: For these fields:
263
+ {', '.join(verbatim_fields)},
264
+ the contents MUST be wrapped in a CDATA section, and the content
265
+ must be written verbatim WITHOUT any modifications or escaping,
266
+ such as spaces, tabs, indents, newlines, quotes, etc.
267
+ """
268
+
269
+ examples_str = ""
270
+ if cls.examples():
271
+ examples_str = "EXAMPLES:\n" + cls.usage_examples()
272
+
273
+ return f"""
274
+ TOOL: {cls.default_value("request")}
275
+ PURPOSE: {cls.default_value("purpose")}
276
+
277
+ {instructions}
278
+ {preamble}
279
+ {xml_format}
280
+
281
+ Make sure to replace the placeholders with actual values
282
+ when using the tool.
283
+ {verbatim_alert}
284
+ {examples_str}
285
+ """.lstrip()
286
+
287
+ def format_example(self) -> str:
288
+ """
289
+ Format the current instance as an XML example.
290
+
291
+ Returns:
292
+ str: A string representation of the current instance in XML format.
293
+
294
+ Raises:
295
+ ValueError: If the result from etree.tostring is not a string.
296
+ """
297
+
298
+ def create_element(
299
+ parent: etree._Element, name: str, value: Any, path: str = ""
300
+ ) -> None:
301
+ if value is None:
302
+ return
303
+
304
+ elem = etree.SubElement(parent, name)
305
+ current_path = f"{path}.{name}" if path else name
306
+
307
+ if isinstance(value, list):
308
+ for item in value:
309
+ create_element(elem, "item", item, current_path)
310
+ elif isinstance(value, dict):
311
+ for k, v in value.items():
312
+ create_element(elem, k, v, current_path)
313
+ elif isinstance(value, BaseModel):
314
+ # Handle nested Pydantic models
315
+ for field_name, field_value in value.model_dump().items():
316
+ create_element(elem, field_name, field_value, current_path)
317
+ else:
318
+ if current_path in self.__class__.find_verbatim_fields():
319
+ elem.text = etree.CDATA(str(value))
320
+ else:
321
+ elem.text = str(value)
322
+
323
+ root = etree.Element(self.Config.root_element)
324
+ exclude_fields = self.Config.schema_extra.get("exclude", set())
325
+ for name, value in self.model_dump().items():
326
+ if name not in exclude_fields:
327
+ create_element(root, name, value)
328
+
329
+ result = etree.tostring(root, encoding="unicode", pretty_print=True)
330
+ if not isinstance(result, str):
331
+ raise ValueError("Unexpected non-string result from etree.tostring")
332
+ return result
333
+
334
+ @classmethod
335
+ def find_candidates(cls, text: str) -> List[str]:
336
+ """
337
+ Finds XML-like tool message candidates in text, with relaxed opening tag rules.
338
+
339
+ Args:
340
+ text: Input text to search for XML structures.
341
+
342
+ Returns:
343
+ List of XML strings. For fragments missing the root opening tag but having
344
+ valid XML structure and root closing tag, prepends the root opening tag.
345
+
346
+ Example:
347
+ With root_tag="tool", given:
348
+ "Hello <field1>data</field1> </tool>"
349
+ Returns: ["<tool><field1>data</field1></tool>"]
350
+ """
351
+
352
+ root_tag = cls.Config.root_element
353
+ opening_tag = f"<{root_tag}>"
354
+ closing_tag = f"</{root_tag}>"
355
+
356
+ candidates = []
357
+ pos = 0
358
+ while True:
359
+ # Look for either proper opening tag or closing tag
360
+ start_normal = text.find(opening_tag, pos)
361
+ end = text.find(closing_tag, pos)
362
+
363
+ if start_normal == -1 and end == -1:
364
+ break
365
+
366
+ if start_normal != -1:
367
+ # Handle normal case (has opening tag)
368
+ end = text.find(closing_tag, start_normal)
369
+ if end != -1:
370
+ candidates.append(text[start_normal : end + len(closing_tag)])
371
+ pos = max(end + len(closing_tag), start_normal + 1)
372
+ continue
373
+ elif start_normal == text.rfind(opening_tag):
374
+ # last fragment - ok to miss closing tag
375
+ candidates.append(text[start_normal:] + closing_tag)
376
+ return candidates
377
+ else:
378
+ pos = start_normal + 1
379
+ continue
380
+
381
+ if end != -1:
382
+ # Look backwards for first XML tag
383
+ text_before = text[pos:end]
384
+ first_tag_match = re.search(r"<\w+>", text_before)
385
+ if first_tag_match:
386
+ start = pos + first_tag_match.start()
387
+ candidates.append(
388
+ opening_tag + text[start : end + len(closing_tag)]
389
+ )
390
+ pos = end + len(closing_tag)
391
+
392
+ return candidates
langroid/cachedb/base.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Any, Dict, List
3
3
 
4
- from langroid.pydantic_v1 import BaseSettings
4
+ from pydantic_settings import BaseSettings
5
5
 
6
6
 
7
7
  class CacheDBConfig(BaseSettings):
@@ -2,9 +2,9 @@ import logging
2
2
  from abc import ABC, abstractmethod
3
3
 
4
4
  import numpy as np
5
+ from pydantic_settings import BaseSettings
5
6
 
6
7
  from langroid.mytypes import EmbeddingFunction
7
- from langroid.pydantic_v1 import BaseSettings
8
8
 
9
9
  logging.getLogger("openai").setLevel(logging.ERROR)
10
10
 
@@ -57,7 +57,7 @@ class EmbeddingModel(ABC):
57
57
  elif isinstance(config, GeminiEmbeddingsConfig):
58
58
  return GeminiEmbeddings(config)
59
59
  else:
60
- raise ValueError(f"Unknown embedding config: {config.__repr_name__}")
60
+ raise ValueError(f"Unknown embedding config: {config.__class__.__name__}")
61
61
 
62
62
  @abstractmethod
63
63
  def embedding_fn(self) -> EmbeddingFunction:
@@ -7,6 +7,7 @@ import requests
7
7
  import tiktoken
8
8
  from dotenv import load_dotenv
9
9
  from openai import AzureOpenAI, OpenAI
10
+ from pydantic_settings import SettingsConfigDict
10
11
 
11
12
  from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
12
13
  from langroid.exceptions import LangroidImportError
@@ -27,10 +28,7 @@ class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
27
28
  context_length: int = 8192
28
29
  langdb_params: LangDBParams = LangDBParams()
29
30
 
30
- class Config:
31
- # enable auto-loading of env vars with OPENAI_ prefix, e.g.
32
- # api_base is set from OPENAI_API_BASE env var, in .env or system env
33
- env_prefix = "OPENAI_"
31
+ model_config = SettingsConfigDict(env_prefix="OPENAI_")
34
32
 
35
33
 
36
34
  class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig):
@@ -48,9 +46,7 @@ class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig):
48
46
  dims: int = 1536
49
47
  context_length: int = 8192
50
48
 
51
- class Config:
52
- # enable auto-loading of env vars with AZURE_OPENAI_ prefix
53
- env_prefix = "AZURE_OPENAI_"
49
+ model_config = SettingsConfigDict(env_prefix="AZURE_OPENAI_")
54
50
 
55
51
 
56
52
  class SentenceTransformerEmbeddingsConfig(EmbeddingModelsConfig):