langroid 0.59.0b3__py3-none-any.whl → 0.59.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. langroid/agent/done_sequence_parser.py +46 -11
  2. langroid/agent/special/doc_chat_task.py +0 -0
  3. langroid/agent/task.py +44 -7
  4. langroid/language_models/model_info.py +51 -0
  5. langroid/mcp/__init__.py +1 -0
  6. langroid/mcp/server/__init__.py +1 -0
  7. langroid/pydantic_v1/__init__.py +1 -1
  8. {langroid-0.59.0b3.dist-info → langroid-0.59.1.dist-info}/METADATA +4 -1
  9. {langroid-0.59.0b3.dist-info → langroid-0.59.1.dist-info}/RECORD +11 -47
  10. langroid/agent/base.py-e +0 -2216
  11. langroid/agent/chat_agent.py-e +0 -2086
  12. langroid/agent/chat_document.py-e +0 -513
  13. langroid/agent/openai_assistant.py-e +0 -882
  14. langroid/agent/special/arangodb/arangodb_agent.py-e +0 -648
  15. langroid/agent/special/lance_tools.py-e +0 -61
  16. langroid/agent/special/neo4j/neo4j_chat_agent.py-e +0 -430
  17. langroid/agent/task.py-e +0 -2418
  18. langroid/agent/tool_message.py-e +0 -400
  19. langroid/agent/tools/file_tools.py-e +0 -234
  20. langroid/agent/tools/mcp/fastmcp_client.py-e +0 -584
  21. langroid/agent/tools/orchestration.py-e +0 -301
  22. langroid/agent/tools/task_tool.py-e +0 -249
  23. langroid/agent/xml_tool_message.py-e +0 -392
  24. langroid/embedding_models/models.py-e +0 -563
  25. langroid/language_models/azure_openai.py-e +0 -134
  26. langroid/language_models/base.py-e +0 -812
  27. langroid/language_models/config.py-e +0 -18
  28. langroid/language_models/model_info.py-e +0 -483
  29. langroid/language_models/openai_gpt.py-e +0 -2280
  30. langroid/language_models/provider_params.py-e +0 -153
  31. langroid/mytypes.py-e +0 -132
  32. langroid/parsing/file_attachment.py-e +0 -246
  33. langroid/parsing/md_parser.py-e +0 -574
  34. langroid/parsing/parser.py-e +0 -410
  35. langroid/parsing/repo_loader.py-e +0 -812
  36. langroid/parsing/url_loader.py-e +0 -683
  37. langroid/parsing/urls.py-e +0 -279
  38. langroid/pydantic_v1/__init__.py-e +0 -36
  39. langroid/pydantic_v1/main.py-e +0 -11
  40. langroid/utils/configuration.py-e +0 -141
  41. langroid/utils/constants.py-e +0 -32
  42. langroid/utils/globals.py-e +0 -49
  43. langroid/utils/html_logger.py-e +0 -825
  44. langroid/utils/object_registry.py-e +0 -66
  45. langroid/utils/pydantic_utils.py-e +0 -602
  46. langroid/utils/types.py-e +0 -113
  47. langroid/vector_store/lancedb.py-e +0 -404
  48. langroid/vector_store/pineconedb.py-e +0 -427
  49. {langroid-0.59.0b3.dist-info → langroid-0.59.1.dist-info}/WHEEL +0 -0
  50. {langroid-0.59.0b3.dist-info → langroid-0.59.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,392 +0,0 @@
1
- import re
2
- from collections.abc import Mapping
3
- from typing import Any, Dict, List, Optional, get_args, get_origin
4
-
5
- from lxml import etree
6
-
7
- from langroid.agent.tool_message import ToolMessage
8
- from pydantic import BaseModel
9
-
10
-
11
- class XMLToolMessage(ToolMessage):
12
- """
13
- Abstract class for tools formatted using XML instead of JSON.
14
-
15
- When a subclass defines a field with the attribute `verbatim=True`,
16
- instructions are sent to the LLM to ensure the field's content is:
17
- - preserved as is, including whitespace, indents, quotes, newlines, etc
18
- with no escaping, and
19
- - enclosed in a CDATA section in the XML output.
20
- This is useful for LLMs sending code as part of a tool;
21
- results can be far superior compared to sending code in JSON-formatted tools,
22
- where code needs to confirm to JSON's strict rules and escaping requirements.
23
- (see test_xml_tool_message.py for an example).
24
-
25
- """
26
-
27
- request: str
28
- purpose: str
29
-
30
- _allow_llm_use = True
31
-
32
- class Config(ToolMessage.Config):
33
- root_element = "tool"
34
-
35
- @classmethod
36
- def extract_field_values(cls, formatted_string: str) -> Optional[Dict[str, Any]]:
37
- """
38
- Extracts field values from an XML-formatted string.
39
-
40
- Args:
41
- formatted_string (str): The XML-formatted string to parse.
42
-
43
- Returns:
44
- Optional[Dict[str, Any]]: A dictionary containing the extracted field
45
- values, where keys are the XML element names and values are their
46
- corresponding contents.
47
- Returns None if parsing fails or the root element is not a dictionary.
48
-
49
- Raises:
50
- etree.XMLSyntaxError: If the input string is not valid XML.
51
- """
52
- # SECURITY: Initialize XMLParser with flags to prevent
53
- # XML External Entity (XXE), billion laughs, and external DTD attacks by
54
- # disabling entity resolution, DTD loading, and network access;
55
- # `strip_cdata=False` is needed to preserve
56
- # content within CDATA sections (e.g., for code).
57
- parser = etree.XMLParser(
58
- strip_cdata=False,
59
- resolve_entities=False,
60
- load_dtd=False,
61
- no_network=True,
62
- )
63
- root = etree.fromstring(formatted_string.encode("utf-8"), parser=parser)
64
-
65
- def parse_element(element: etree._Element) -> Any:
66
- # Skip elements starting with underscore
67
- if element.tag.startswith("_"):
68
- return {}
69
-
70
- field_info = cls.__fields__.get(element.tag)
71
- is_verbatim = field_info and field_info.field_info.extra.get(
72
- "verbatim", False
73
- )
74
-
75
- if is_verbatim:
76
- # For code elements, preserve the content as is, including whitespace
77
- content = element.text if element.text else ""
78
- # Strip leading and trailing triple backticks if present,
79
- # accounting for whitespace
80
- return (
81
- content.strip().removeprefix("```").removesuffix("```").strip()
82
- if content.strip().startswith("```")
83
- and content.strip().endswith("```")
84
- else content
85
- )
86
- elif len(element) == 0:
87
- # For non-code leaf elements, strip whitespace
88
- return element.text.strip() if element.text else ""
89
- else:
90
- # For branch elements, handle potential lists or nested structures
91
- children = [parse_element(child) for child in element]
92
- if all(child.tag == element[0].tag for child in element):
93
- # If all children have the same tag, treat as a list
94
- return children
95
- else:
96
- # Otherwise, treat as a dictionary
97
- result = {child.tag: parse_element(child) for child in element}
98
- # Check if this corresponds to a nested Pydantic model
99
- if field_info and issubclass(field_info.type_, BaseModel):
100
- return field_info.type_(**result)
101
- return result
102
-
103
- result = parse_element(root)
104
- if not isinstance(result, dict):
105
- return None
106
- # Filter out empty dictionaries from skipped underscore fields
107
- return {k: v for k, v in result.items() if v != {}}
108
-
109
- @classmethod
110
- def parse(cls, formatted_string: str) -> Optional["XMLToolMessage"]:
111
- """
112
- Parses the XML-formatted string and returns an instance of the class.
113
-
114
- Args:
115
- formatted_string (str): The XML-formatted string to parse.
116
-
117
- Returns:
118
- Optional["XMLToolMessage"]: An instance of the class if parsing succeeds,
119
- None otherwise.
120
- """
121
- try:
122
- parsed_data = cls.extract_field_values(formatted_string)
123
- if parsed_data is None:
124
- return None
125
-
126
- # Use Pydantic's parse_obj to create and validate the instance
127
- return cls.model_validate(parsed_data)
128
- except Exception as e:
129
- from langroid.exceptions import XMLException
130
-
131
- raise XMLException(f"Error parsing XML: {str(e)}")
132
-
133
- @classmethod
134
- def find_verbatim_fields(
135
- cls, prefix: str = "", parent_cls: Optional["BaseModel"] = None
136
- ) -> List[str]:
137
- verbatim_fields = []
138
- for field_name, field_info in (parent_cls or cls).__fields__.items():
139
- full_name = f"{prefix}.{field_name}" if prefix else field_name
140
- if (
141
- field_info.field_info.extra.get("verbatim", False)
142
- or field_name == "code"
143
- ):
144
- verbatim_fields.append(full_name)
145
- if issubclass(field_info.type_, BaseModel):
146
- verbatim_fields.extend(
147
- cls.find_verbatim_fields(full_name, field_info.type_)
148
- )
149
- return verbatim_fields
150
-
151
- @classmethod
152
- def format_instructions(cls, tool: bool = False) -> str:
153
- fields = [
154
- f
155
- for f in cls.__fields__.keys()
156
- if f not in cls.Config.schema_extra.get("exclude", set())
157
- ]
158
-
159
- instructions = """
160
- To use this tool, please provide the required information in an XML-like
161
- format. Here's how to structure your input:\n\n
162
- """
163
-
164
- preamble = "Placeholders:\n"
165
- xml_format = f"Formatting example:\n\n<{cls.Config.root_element}>\n"
166
-
167
- def format_field(
168
- field_name: str,
169
- field_type: type,
170
- indent: str = "",
171
- path: str = "",
172
- ) -> None:
173
- nonlocal preamble, xml_format
174
- current_path = f"{path}.{field_name}" if path else field_name
175
-
176
- origin = get_origin(field_type)
177
- args = get_args(field_type)
178
-
179
- if (
180
- origin is None
181
- and isinstance(field_type, type)
182
- and issubclass(field_type, BaseModel)
183
- ):
184
- preamble += (
185
- f"{field_name.upper()} = [nested structure for {field_name}]\n"
186
- )
187
- xml_format += f"{indent}<{field_name}>\n"
188
- for sub_field, sub_field_info in field_type.__fields__.items():
189
- format_field(
190
- sub_field,
191
- sub_field_info.outer_type_,
192
- indent + " ",
193
- current_path,
194
- )
195
- xml_format += f"{indent}</{field_name}>\n"
196
- elif origin in (list, List) or (field_type is list):
197
- item_type = args[0] if args else Any
198
- if isinstance(item_type, type) and issubclass(item_type, BaseModel):
199
- preamble += (
200
- f"{field_name.upper()} = "
201
- f"[list of nested structures for {field_name}]\n"
202
- )
203
- else:
204
- preamble += (
205
- f"{field_name.upper()} = "
206
- f"[list of {getattr(item_type, '__name__', str(item_type))} "
207
- f"for {field_name}]\n"
208
- )
209
- xml_format += f"{indent}<{field_name}>\n"
210
- xml_format += (
211
- f"{indent} <item>"
212
- f"[{getattr(item_type, '__name__', str(item_type))} value]"
213
- f"</item>\n"
214
- )
215
- xml_format += f"{indent} ...\n"
216
- xml_format += f"{indent}</{field_name}>\n"
217
- elif origin in (dict, Dict) or (
218
- isinstance(field_type, type) and issubclass(field_type, Mapping)
219
- ):
220
- key_type, value_type = args if len(args) == 2 else (Any, Any)
221
- preamble += (
222
- f"{field_name.upper()} = "
223
- f"[dictionary with "
224
- f"{getattr(key_type, '__name__', str(key_type))} keys and "
225
- f"{getattr(value_type, '__name__', str(value_type))} values]\n"
226
- )
227
- xml_format += f"{indent}<{field_name}>\n"
228
- xml_format += (
229
- f"{indent} <{getattr(key_type, '__name__', str(key_type))}>"
230
- f"[{getattr(value_type, '__name__', str(value_type))} value]"
231
- f"</{getattr(key_type, '__name__', str(key_type))}>\n"
232
- )
233
- xml_format += f"{indent} ...\n"
234
- xml_format += f"{indent}</{field_name}>\n"
235
- else:
236
- preamble += f"{field_name.upper()} = [value for {field_name}]\n"
237
- if current_path in verbatim_fields:
238
- xml_format += (
239
- f"{indent}<{field_name}>"
240
- f"<![CDATA[{{{field_name.upper()}}}]]></{field_name}>\n"
241
- )
242
- else:
243
- xml_format += (
244
- f"{indent}<{field_name}>"
245
- f"{{{field_name.upper()}}}</{field_name}>\n"
246
- )
247
-
248
- verbatim_fields = cls.find_verbatim_fields()
249
-
250
- for field in fields:
251
- field_info = cls.__fields__[field]
252
- field_type = (
253
- field_info.outer_type_
254
- ) # Use outer_type_ to get the actual type including List, etc.
255
- format_field(field, field_type)
256
-
257
- xml_format += f"</{cls.Config.root_element}>"
258
-
259
- verbatim_alert = ""
260
- if len(verbatim_fields) > 0:
261
- verbatim_alert = f"""
262
- EXTREMELY IMPORTANT: For these fields:
263
- {', '.join(verbatim_fields)},
264
- the contents MUST be wrapped in a CDATA section, and the content
265
- must be written verbatim WITHOUT any modifications or escaping,
266
- such as spaces, tabs, indents, newlines, quotes, etc.
267
- """
268
-
269
- examples_str = ""
270
- if cls.examples():
271
- examples_str = "EXAMPLES:\n" + cls.usage_examples()
272
-
273
- return f"""
274
- TOOL: {cls.default_value("request")}
275
- PURPOSE: {cls.default_value("purpose")}
276
-
277
- {instructions}
278
- {preamble}
279
- {xml_format}
280
-
281
- Make sure to replace the placeholders with actual values
282
- when using the tool.
283
- {verbatim_alert}
284
- {examples_str}
285
- """.lstrip()
286
-
287
- def format_example(self) -> str:
288
- """
289
- Format the current instance as an XML example.
290
-
291
- Returns:
292
- str: A string representation of the current instance in XML format.
293
-
294
- Raises:
295
- ValueError: If the result from etree.tostring is not a string.
296
- """
297
-
298
- def create_element(
299
- parent: etree._Element, name: str, value: Any, path: str = ""
300
- ) -> None:
301
- if value is None:
302
- return
303
-
304
- elem = etree.SubElement(parent, name)
305
- current_path = f"{path}.{name}" if path else name
306
-
307
- if isinstance(value, list):
308
- for item in value:
309
- create_element(elem, "item", item, current_path)
310
- elif isinstance(value, dict):
311
- for k, v in value.items():
312
- create_element(elem, k, v, current_path)
313
- elif isinstance(value, BaseModel):
314
- # Handle nested Pydantic models
315
- for field_name, field_value in value.model_dump().items():
316
- create_element(elem, field_name, field_value, current_path)
317
- else:
318
- if current_path in self.__class__.find_verbatim_fields():
319
- elem.text = etree.CDATA(str(value))
320
- else:
321
- elem.text = str(value)
322
-
323
- root = etree.Element(self.Config.root_element)
324
- exclude_fields = self.Config.schema_extra.get("exclude", set())
325
- for name, value in self.model_dump().items():
326
- if name not in exclude_fields:
327
- create_element(root, name, value)
328
-
329
- result = etree.tostring(root, encoding="unicode", pretty_print=True)
330
- if not isinstance(result, str):
331
- raise ValueError("Unexpected non-string result from etree.tostring")
332
- return result
333
-
334
- @classmethod
335
- def find_candidates(cls, text: str) -> List[str]:
336
- """
337
- Finds XML-like tool message candidates in text, with relaxed opening tag rules.
338
-
339
- Args:
340
- text: Input text to search for XML structures.
341
-
342
- Returns:
343
- List of XML strings. For fragments missing the root opening tag but having
344
- valid XML structure and root closing tag, prepends the root opening tag.
345
-
346
- Example:
347
- With root_tag="tool", given:
348
- "Hello <field1>data</field1> </tool>"
349
- Returns: ["<tool><field1>data</field1></tool>"]
350
- """
351
-
352
- root_tag = cls.Config.root_element
353
- opening_tag = f"<{root_tag}>"
354
- closing_tag = f"</{root_tag}>"
355
-
356
- candidates = []
357
- pos = 0
358
- while True:
359
- # Look for either proper opening tag or closing tag
360
- start_normal = text.find(opening_tag, pos)
361
- end = text.find(closing_tag, pos)
362
-
363
- if start_normal == -1 and end == -1:
364
- break
365
-
366
- if start_normal != -1:
367
- # Handle normal case (has opening tag)
368
- end = text.find(closing_tag, start_normal)
369
- if end != -1:
370
- candidates.append(text[start_normal : end + len(closing_tag)])
371
- pos = max(end + len(closing_tag), start_normal + 1)
372
- continue
373
- elif start_normal == text.rfind(opening_tag):
374
- # last fragment - ok to miss closing tag
375
- candidates.append(text[start_normal:] + closing_tag)
376
- return candidates
377
- else:
378
- pos = start_normal + 1
379
- continue
380
-
381
- if end != -1:
382
- # Look backwards for first XML tag
383
- text_before = text[pos:end]
384
- first_tag_match = re.search(r"<\w+>", text_before)
385
- if first_tag_match:
386
- start = pos + first_tag_match.start()
387
- candidates.append(
388
- opening_tag + text[start : end + len(closing_tag)]
389
- )
390
- pos = end + len(closing_tag)
391
-
392
- return candidates