langroid 0.1.249__tar.gz → 0.1.251__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {langroid-0.1.249 → langroid-0.1.251}/PKG-INFO +3 -1
  2. {langroid-0.1.249 → langroid-0.1.251}/README.md +2 -0
  3. {langroid-0.1.249 → langroid-0.1.251}/langroid/__init__.py +6 -1
  4. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/base.py +42 -13
  5. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/chat_agent.py +9 -11
  6. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/doc_chat_agent.py +49 -15
  7. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/lance_rag/critic_agent.py +7 -1
  8. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/lance_rag/query_planner_agent.py +1 -1
  9. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/task.py +187 -76
  10. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tool_message.py +4 -1
  11. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/recipient_tool.py +17 -9
  12. langroid-0.1.251/langroid/agent/tools/retrieval_tool.py +29 -0
  13. langroid-0.1.251/langroid/exceptions.py +3 -0
  14. {langroid-0.1.249 → langroid-0.1.251}/langroid/mytypes.py +12 -0
  15. langroid-0.1.251/langroid/parsing/routing.py +27 -0
  16. {langroid-0.1.249 → langroid-0.1.251}/langroid/prompts/templates.py +6 -5
  17. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/configuration.py +1 -0
  18. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/system.py +20 -0
  19. {langroid-0.1.249 → langroid-0.1.251}/pyproject.toml +1 -1
  20. {langroid-0.1.249 → langroid-0.1.251}/LICENSE +0 -0
  21. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/__init__.py +0 -0
  22. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/batch.py +0 -0
  23. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/callbacks/__init__.py +0 -0
  24. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/callbacks/chainlit.py +0 -0
  25. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/chat_document.py +0 -0
  26. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/helpers.py +0 -0
  27. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/junk +0 -0
  28. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/openai_assistant.py +0 -0
  29. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/__init__.py +0 -0
  30. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  31. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/lance_rag/__init__.py +0 -0
  32. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  33. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/lance_tools.py +0 -0
  34. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/neo4j/__init__.py +0 -0
  35. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  36. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  37. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/neo4j/utils/__init__.py +0 -0
  38. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/neo4j/utils/system_message.py +0 -0
  39. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  40. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/retriever_agent.py +0 -0
  41. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/sql/__init__.py +0 -0
  42. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  43. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/sql/utils/__init__.py +0 -0
  44. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  45. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  46. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/sql/utils/system_message.py +0 -0
  47. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/sql/utils/tools.py +0 -0
  48. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/special/table_chat_agent.py +0 -0
  49. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/__init__.py +0 -0
  50. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  51. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/extract_tool.py +0 -0
  52. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/generator_tool.py +0 -0
  53. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/google_search_tool.py +0 -0
  54. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  55. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/run_python_code.py +0 -0
  56. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent/tools/segment_extract_tool.py +0 -0
  57. {langroid-0.1.249 → langroid-0.1.251}/langroid/agent_config.py +0 -0
  58. {langroid-0.1.249 → langroid-0.1.251}/langroid/cachedb/__init__.py +0 -0
  59. {langroid-0.1.249 → langroid-0.1.251}/langroid/cachedb/base.py +0 -0
  60. {langroid-0.1.249 → langroid-0.1.251}/langroid/cachedb/momento_cachedb.py +0 -0
  61. {langroid-0.1.249 → langroid-0.1.251}/langroid/cachedb/redis_cachedb.py +0 -0
  62. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/__init__.py +0 -0
  63. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/base.py +0 -0
  64. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/clustering.py +0 -0
  65. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/models.py +0 -0
  66. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/protoc/__init__.py +0 -0
  67. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  68. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  69. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  70. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  71. {langroid-0.1.249 → langroid-0.1.251}/langroid/embedding_models/remote_embeds.py +0 -0
  72. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/__init__.py +0 -0
  73. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/azure_openai.py +0 -0
  74. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/base.py +0 -0
  75. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/config.py +0 -0
  76. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/openai_assistants.py +0 -0
  77. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/openai_gpt.py +0 -0
  78. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  79. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/prompt_formatter/base.py +0 -0
  80. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  81. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  82. {langroid-0.1.249 → langroid-0.1.251}/langroid/language_models/utils.py +0 -0
  83. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/__init__.py +0 -0
  84. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/agent_chats.py +0 -0
  85. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/code-parsing.md +0 -0
  86. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/code_parser.py +0 -0
  87. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/config.py +0 -0
  88. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/document_parser.py +0 -0
  89. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/image_text.py +0 -0
  90. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/para_sentence_split.py +0 -0
  91. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/parse_json.py +0 -0
  92. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/parser.py +0 -0
  93. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/repo_loader.py +0 -0
  94. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/search.py +0 -0
  95. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/spider.py +0 -0
  96. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/table_loader.py +0 -0
  97. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/url_loader.py +0 -0
  98. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/url_loader_cookies.py +0 -0
  99. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/urls.py +0 -0
  100. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/utils.py +0 -0
  101. {langroid-0.1.249 → langroid-0.1.251}/langroid/parsing/web_search.py +0 -0
  102. {langroid-0.1.249 → langroid-0.1.251}/langroid/prompts/__init__.py +0 -0
  103. {langroid-0.1.249 → langroid-0.1.251}/langroid/prompts/chat-gpt4-system-prompt.md +0 -0
  104. {langroid-0.1.249 → langroid-0.1.251}/langroid/prompts/dialog.py +0 -0
  105. {langroid-0.1.249 → langroid-0.1.251}/langroid/prompts/prompts_config.py +0 -0
  106. {langroid-0.1.249 → langroid-0.1.251}/langroid/prompts/transforms.py +0 -0
  107. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/__init__.py +0 -0
  108. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/algorithms/__init__.py +0 -0
  109. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/algorithms/graph.py +0 -0
  110. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/constants.py +0 -0
  111. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/docker.py +0 -0
  112. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/globals.py +0 -0
  113. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/llms/__init__.py +0 -0
  114. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/llms/strings.py +0 -0
  115. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/logging.py +0 -0
  116. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/output/__init__.py +0 -0
  117. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/output/printing.py +0 -0
  118. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/output/status.py +0 -0
  119. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/pandas_utils.py +0 -0
  120. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/pydantic_utils.py +0 -0
  121. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/web/__init__.py +0 -0
  122. {langroid-0.1.249 → langroid-0.1.251}/langroid/utils/web/login.py +0 -0
  123. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/__init__.py +0 -0
  124. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/base.py +0 -0
  125. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/chromadb.py +0 -0
  126. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/lancedb.py +0 -0
  127. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/meilisearch.py +0 -0
  128. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/momento.py +0 -0
  129. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/qdrant_cloud.py +0 -0
  130. {langroid-0.1.249 → langroid-0.1.251}/langroid/vector_store/qdrantdb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.249
3
+ Version: 0.1.251
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -237,6 +237,8 @@ teacher_task.run()
237
237
  <summary> <b>Click to expand</b></summary>
238
238
 
239
239
  - **May 2024:**
240
+ - [Much-Improved Citation](https://github.com/langroid/langroid/issues/477)
241
+ generation and display when using `DocChatAgent`.
240
242
  - `gpt-4o` is now the default LLM throughout; Update tests and examples to work
241
243
  with this LLM; use tokenizer corresponding to the LLM.
242
244
  - `gemini 1.5 pro` support via `litellm`
@@ -123,6 +123,8 @@ teacher_task.run()
123
123
  <summary> <b>Click to expand</b></summary>
124
124
 
125
125
  - **May 2024:**
126
+ - [Much-Improved Citation](https://github.com/langroid/langroid/issues/477)
127
+ generation and display when using `DocChatAgent`.
126
128
  - `gpt-4o` is now the default LLM throughout; Update tests and examples to work
127
129
  with this LLM; use tokenizer corresponding to the LLM.
128
130
  - `gemini 1.5 pro` support via `litellm`
@@ -41,7 +41,7 @@ from .agent.chat_agent import (
41
41
  ChatAgentConfig,
42
42
  )
43
43
 
44
- from .agent.task import Task
44
+ from .agent.task import Task, TaskConfig
45
45
 
46
46
  try:
47
47
  from .agent.callbacks.chainlit import (
@@ -64,8 +64,11 @@ from .mytypes import (
64
64
  Entity,
65
65
  )
66
66
 
67
+ from .exceptions import InfiniteLoopException
68
+
67
69
  __all__ = [
68
70
  "mytypes",
71
+ "exceptions",
69
72
  "utils",
70
73
  "parsing",
71
74
  "prompts",
@@ -82,6 +85,7 @@ __all__ = [
82
85
  "ChatDocument",
83
86
  "ChatDocMetaData",
84
87
  "Task",
88
+ "TaskConfig",
85
89
  "DocMetaData",
86
90
  "Document",
87
91
  "Entity",
@@ -89,6 +93,7 @@ __all__ = [
89
93
  "run_batch_tasks",
90
94
  "llm_response_batch",
91
95
  "agent_response_batch",
96
+ "InfiniteLoopException",
92
97
  ]
93
98
  if chainlit_available:
94
99
  __all__.extend(
@@ -87,6 +87,7 @@ class Agent(ABC):
87
87
  self.llm_tools_map: Dict[str, Type[ToolMessage]] = {}
88
88
  self.llm_tools_handled: Set[str] = set()
89
89
  self.llm_tools_usable: Set[str] = set()
90
+ self.interactive: bool | None = None
90
91
  self.total_llm_token_cost = 0.0
91
92
  self.total_llm_token_usage = 0
92
93
  self.token_stats_str = ""
@@ -223,8 +224,8 @@ class Agent(ABC):
223
224
  ):
224
225
  setattr(self, tool, lambda obj: obj.response(self))
225
226
 
226
- if hasattr(message_class, "handle_message_fallback") and inspect.isfunction(
227
- message_class.handle_message_fallback
227
+ if hasattr(message_class, "handle_message_fallback") and (
228
+ inspect.isfunction(message_class.handle_message_fallback)
228
229
  ):
229
230
  setattr(
230
231
  self,
@@ -279,9 +280,9 @@ class Agent(ABC):
279
280
  ]
280
281
  return "\n\n".join(sample_convo)
281
282
 
282
- def agent_response_template(self) -> ChatDocument:
283
+ def create_agent_response(self, content: str | None = None) -> ChatDocument:
283
284
  """Template for agent_response."""
284
- return self._response_template(Entity.AGENT)
285
+ return self._response_template(Entity.AGENT, content)
285
286
 
286
287
  async def agent_response_async(
287
288
  self,
@@ -342,19 +343,19 @@ class Agent(ABC):
342
343
  ),
343
344
  )
344
345
 
345
- def _response_template(self, e: Entity) -> ChatDocument:
346
+ def _response_template(self, e: Entity, content: str | None = None) -> ChatDocument:
346
347
  """Template for response from entity `e`."""
347
348
  return ChatDocument(
348
- content="",
349
+ content=content or "",
349
350
  tool_messages=[],
350
351
  metadata=ChatDocMetaData(
351
352
  source=e, sender=e, sender_name=self.config.name, tool_ids=[]
352
353
  ),
353
354
  )
354
355
 
355
- def user_response_template(self) -> ChatDocument:
356
+ def create_user_response(self, content: str | None = None) -> ChatDocument:
356
357
  """Template for user_response."""
357
- return self._response_template(Entity.USER)
358
+ return self._response_template(Entity.USER, content)
358
359
 
359
360
  async def user_response_async(
360
361
  self,
@@ -377,11 +378,21 @@ class Agent(ABC):
377
378
  (str) User response, packaged as a ChatDocument
378
379
 
379
380
  """
380
- if self.default_human_response is not None:
381
+
382
+ # When msg explicitly addressed to user, this means an actual human response
383
+ # is being sought.
384
+ need_human_response = (
385
+ isinstance(msg, ChatDocument) and msg.metadata.recipient == Entity.USER
386
+ )
387
+
388
+ interactive = (
389
+ self.interactive if self.interactive is not None else settings.interactive
390
+ )
391
+ if self.default_human_response is not None and not need_human_response:
381
392
  # useful for automated testing
382
393
  user_msg = self.default_human_response
383
- elif not settings.interactive:
384
- user_msg = ""
394
+ elif not interactive and not need_human_response:
395
+ return None
385
396
  else:
386
397
  if self.callbacks.get_user_response is not None:
387
398
  # ask user with empty prompt: no need for prompt
@@ -440,9 +451,9 @@ class Agent(ABC):
440
451
 
441
452
  return True
442
453
 
443
- def llm_response_template(self) -> ChatDocument:
454
+ def create_llm_response(self, content: str | None = None) -> ChatDocument:
444
455
  """Template for llm_response."""
445
- return self._response_template(Entity.LLM)
456
+ return self._response_template(Entity.LLM, content)
446
457
 
447
458
  @no_type_check
448
459
  async def llm_response_async(
@@ -736,6 +747,24 @@ class Agent(ABC):
736
747
 
737
748
  def _get_one_tool_message(self, json_str: str) -> Optional[ToolMessage]:
738
749
  json_data = json.loads(json_str)
750
+ # check if the json_data contains a "properties" field
751
+ # which further contains the actual tool-call
752
+ # (some weak LLMs do this). E.g. gpt-4o sometimes generates this:
753
+ # TOOL: {
754
+ # "type": "object",
755
+ # "properties": {
756
+ # "request": "square",
757
+ # "number": 9
758
+ # },
759
+ # "required": [
760
+ # "number",
761
+ # "request"
762
+ # ]
763
+ # }
764
+
765
+ properties = json_data.get("properties")
766
+ if properties is not None:
767
+ json_data = properties
739
768
  request = json_data.get("request")
740
769
  if (
741
770
  request is None
@@ -273,10 +273,11 @@ class ChatAgent(Agent):
273
273
  example = "" if self.config.use_tools else (msg_cls.usage_example())
274
274
  if example != "":
275
275
  example = "EXAMPLE: " + example
276
+ class_instructions = msg_cls.instructions()
276
277
  guidance = (
277
278
  ""
278
- if msg_cls.instructions() == ""
279
- else ("GUIDANCE: " + msg_cls.instructions())
279
+ if class_instructions == ""
280
+ else ("GUIDANCE: " + class_instructions)
280
281
  )
281
282
  if guidance == "" and example == "":
282
283
  continue
@@ -783,23 +784,20 @@ class ChatAgent(Agent):
783
784
  if self.llm is None:
784
785
  return
785
786
  if not citation_only and (not self.llm.get_stream() or is_cached):
786
- # We expect response to be LLMResponse in this context
787
- if not isinstance(response, LLMResponse):
788
- raise ValueError(
789
- "Expected response to be LLMResponse, but got "
790
- f"{type(response)} instead."
791
- )
792
787
  # We would have already displayed the msg "live" ONLY if
793
788
  # streaming was enabled, AND we did not find a cached response.
794
789
  # If we are here, it means the response has not yet been displayed.
795
790
  cached = f"[red]{self.indent}(cached)[/red]" if is_cached else ""
796
791
  if not settings.quiet:
792
+ chat_doc = (
793
+ response
794
+ if isinstance(response, ChatDocument)
795
+ else ChatDocument.from_LLMResponse(response, displayed=True)
796
+ )
797
797
  print(cached + "[green]" + escape(str(response)))
798
798
  self.callbacks.show_llm_response(
799
799
  content=str(response),
800
- is_tool=self.has_tool_message_attempt(
801
- ChatDocument.from_LLMResponse(response, displayed=True),
802
- ),
800
+ is_tool=self.has_tool_message_attempt(chat_doc),
803
801
  cached=is_cached,
804
802
  )
805
803
  if isinstance(response, LLMResponse):
@@ -14,7 +14,6 @@ pip install "langroid[hf-embeddings]"
14
14
  """
15
15
 
16
16
  import logging
17
- import re
18
17
  from functools import cache
19
18
  from typing import Any, Dict, List, Optional, Set, Tuple, no_type_check
20
19
 
@@ -31,6 +30,7 @@ from langroid.agent.special.relevance_extractor_agent import (
31
30
  RelevanceExtractorAgentConfig,
32
31
  )
33
32
  from langroid.agent.task import Task
33
+ from langroid.agent.tools.retrieval_tool import RetrievalTool
34
34
  from langroid.embedding_models.models import OpenAIEmbeddingsConfig
35
35
  from langroid.language_models.base import StreamingIfAllowed
36
36
  from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
@@ -82,21 +82,47 @@ except ImportError:
82
82
  pass
83
83
 
84
84
 
85
- def extract_citations(text: str) -> List[int]:
86
- # Find all patterns that match [[<numbers>]]
87
- matches = re.findall(r"\[\[([\d,]+)\]\]", text)
85
+ def extract_markdown_references(md_string: str) -> list[int]:
86
+ """
87
+ Extracts markdown references (e.g., [^1], [^2]) from a string and returns
88
+ them as a sorted list of integers.
89
+
90
+ Args:
91
+ md_string (str): The markdown string containing references.
92
+
93
+ Returns:
94
+ list[int]: A sorted list of unique integers from the markdown references.
95
+ """
96
+ import re
97
+
98
+ # Regex to find all occurrences of [^<number>]
99
+ matches = re.findall(r"\[\^(\d+)\]", md_string)
100
+ # Convert matches to integers, remove duplicates with set, and sort
101
+ return sorted(set(int(match) for match in matches))
88
102
 
89
- # Initialize a set to hold distinct citation numbers
90
- citations: Set[int] = set()
91
103
 
92
- # Process each match
93
- for match in matches:
94
- # Split numbers by comma and convert to integers
95
- numbers = match.split(",")
96
- citations.update(int(number) for number in numbers)
104
+ def format_footnote_text(content: str, width: int = 80) -> str:
105
+ """
106
+ Formats the content part of a footnote (i.e. not the first line that
107
+ appears right after the reference [^4])
108
+ It wraps the text so that no line is longer than the specified width and indents
109
+ lines as necessary for markdown footnotes.
110
+
111
+ Args:
112
+ content (str): The text of the footnote to be formatted.
113
+ width (int): Maximum width of the text lines.
97
114
 
98
- # Return a sorted list of unique citations
99
- return sorted(citations)
115
+ Returns:
116
+ str: Properly formatted markdown footnote text.
117
+ """
118
+ import textwrap
119
+
120
+ # Wrap the text to the specified width
121
+ wrapped_lines = textwrap.wrap(content, width)
122
+ if len(wrapped_lines) == 0:
123
+ return ""
124
+ indent = " " # Indentation for markdown footnotes
125
+ return indent + ("\n" + indent).join(wrapped_lines)
100
126
 
101
127
 
102
128
  class DocChatAgentConfig(ChatAgentConfig):
@@ -438,6 +464,13 @@ class DocChatAgent(ChatAgent):
438
464
  self.setup_documents(docs, filter=self.config.filter)
439
465
  return len(docs)
440
466
 
467
+ def retrieval_tool(self, msg: RetrievalTool) -> str:
468
+ """Handle the RetrievalTool message"""
469
+ self.config.retrieve_only = True
470
+ self.config.parsing.n_similar_docs = msg.num_results
471
+ content_doc = self.answer_from_docs(msg.query)
472
+ return content_doc.content
473
+
441
474
  @staticmethod
442
475
  def document_compatible_dataframe(
443
476
  df: pd.DataFrame,
@@ -808,14 +841,15 @@ class DocChatAgent(ChatAgent):
808
841
  final_answer = answer_doc.content.strip()
809
842
  show_if_debug(final_answer, "SUMMARIZE_RESPONSE= ")
810
843
 
811
- citations = extract_citations(final_answer)
844
+ citations = extract_markdown_references(final_answer)
812
845
 
813
846
  citations_str = ""
814
847
  if len(citations) > 0:
815
848
  # append [i] source, content for each citation
816
849
  citations_str = "\n".join(
817
850
  [
818
- f"[{c}] {passages[c-1].metadata.source}\n{passages[c-1].content}"
851
+ f"[^{c}] {passages[c-1].metadata.source}"
852
+ f"\n{format_footnote_text(passages[c-1].content)}"
819
853
  for c in citations
820
854
  ]
821
855
  )
@@ -70,13 +70,19 @@ class QueryPlanCriticConfig(LanceQueryPlanAgentConfig):
70
70
  plan execution FAILED, and your feedback should say INVALID along
71
71
  with the ERROR message, `suggested_fix` that aims to help the assistant
72
72
  fix the problem (or simply equals "address the the error shown in feedback")
73
+ - Ask yourself, is the ANSWER in the expected form, e.g.
74
+ if the question is asking for the name of an ENTITY with max SIZE,
75
+ then the answer should be the ENTITY name, NOT the SIZE!!
73
76
  - If the ANSWER is in the expected form, then the QUERY PLAN is likely VALID,
74
77
  and your feedback should say VALID, with empty `suggested_fix`.
78
+ ===> HOWEVER!!! Watch out for a spurious correct-looking answer, for EXAMPLE:
79
+ the query was to find the ENTITY with a maximum SIZE,
80
+ but the dataframe calculation is find the SIZE, NOT the ENTITY!!
75
81
  - If the ANSWER is {NO_ANSWER} or of the wrong form,
76
82
  then try to DIAGNOSE the problem IN THE FOLLOWING ORDER:
77
83
  - DATAFRAME CALCULATION -- is it doing the right thing?
78
84
  Is it finding the Index of a row instead of the value in a column?
79
- Or another example: mmaybe it is finding the maximum population
85
+ Or another example: maybe it is finding the maximum population
80
86
  rather than the CITY with the maximum population?
81
87
  If you notice a problem with the DATAFRAME CALCULATION, then
82
88
  ONLY SUBMIT FEEDBACK ON THE DATAFRAME CALCULATION, and DO NOT
@@ -195,7 +195,7 @@ class LanceQueryPlanAgent(ChatAgent):
195
195
  plan=self.curr_query_plan,
196
196
  answer=self.result,
197
197
  )
198
- response_tmpl = self.agent_response_template()
198
+ response_tmpl = self.create_agent_response()
199
199
  # ... add the QueryPlanAnswerTool to the response
200
200
  # (Notice how the Agent is directly sending a tool, not the LLM)
201
201
  response_tmpl.tool_messages = [query_plan_answer_tool]