langroid 0.56.11__tar.gz → 0.56.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {langroid-0.56.11 → langroid-0.56.13}/PKG-INFO +1 -1
  2. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/base.py +9 -3
  3. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/task.py +27 -11
  4. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/task_tool.py +71 -13
  5. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/base.py +25 -19
  6. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/model_info.py +57 -3
  7. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/openai_gpt.py +45 -14
  8. {langroid-0.56.11 → langroid-0.56.13}/pyproject.toml +1 -1
  9. {langroid-0.56.11 → langroid-0.56.13}/.gitignore +0 -0
  10. {langroid-0.56.11 → langroid-0.56.13}/LICENSE +0 -0
  11. {langroid-0.56.11 → langroid-0.56.13}/README.md +0 -0
  12. {langroid-0.56.11 → langroid-0.56.13}/langroid/__init__.py +0 -0
  13. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/__init__.py +0 -0
  14. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/batch.py +0 -0
  15. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/callbacks/__init__.py +0 -0
  16. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/callbacks/chainlit.py +0 -0
  17. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/chat_agent.py +0 -0
  18. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/chat_document.py +0 -0
  19. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/done_sequence_parser.py +0 -0
  20. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/openai_assistant.py +0 -0
  21. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/__init__.py +0 -0
  22. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/arangodb/__init__.py +0 -0
  23. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  24. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/arangodb/system_messages.py +0 -0
  25. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/arangodb/tools.py +0 -0
  26. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/arangodb/utils.py +0 -0
  27. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/doc_chat_agent.py +0 -0
  28. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/doc_chat_task.py +0 -0
  29. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  30. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/lance_rag/__init__.py +0 -0
  31. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  32. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  33. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  34. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/lance_tools.py +0 -0
  35. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/neo4j/__init__.py +0 -0
  36. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  37. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  38. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/neo4j/system_messages.py +0 -0
  39. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/neo4j/tools.py +0 -0
  40. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  41. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/retriever_agent.py +0 -0
  42. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/sql/__init__.py +0 -0
  43. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  44. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/sql/utils/__init__.py +0 -0
  45. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  46. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  47. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/sql/utils/system_message.py +0 -0
  48. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/sql/utils/tools.py +0 -0
  49. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/special/table_chat_agent.py +0 -0
  50. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tool_message.py +0 -0
  51. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/__init__.py +0 -0
  52. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  53. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/exa_search_tool.py +0 -0
  54. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/file_tools.py +0 -0
  55. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/google_search_tool.py +0 -0
  56. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/mcp/__init__.py +0 -0
  57. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/mcp/decorators.py +0 -0
  58. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/mcp/fastmcp_client.py +0 -0
  59. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  60. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/orchestration.py +0 -0
  61. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/recipient_tool.py +0 -0
  62. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/retrieval_tool.py +0 -0
  63. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/rewind_tool.py +0 -0
  64. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/segment_extract_tool.py +0 -0
  65. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/tools/tavily_search_tool.py +0 -0
  66. {langroid-0.56.11 → langroid-0.56.13}/langroid/agent/xml_tool_message.py +0 -0
  67. {langroid-0.56.11 → langroid-0.56.13}/langroid/cachedb/__init__.py +0 -0
  68. {langroid-0.56.11 → langroid-0.56.13}/langroid/cachedb/base.py +0 -0
  69. {langroid-0.56.11 → langroid-0.56.13}/langroid/cachedb/redis_cachedb.py +0 -0
  70. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/__init__.py +0 -0
  71. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/base.py +0 -0
  72. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/models.py +0 -0
  73. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/protoc/__init__.py +0 -0
  74. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  75. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  76. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  77. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  78. {langroid-0.56.11 → langroid-0.56.13}/langroid/embedding_models/remote_embeds.py +0 -0
  79. {langroid-0.56.11 → langroid-0.56.13}/langroid/exceptions.py +0 -0
  80. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/__init__.py +0 -0
  81. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/azure_openai.py +0 -0
  82. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/client_cache.py +0 -0
  83. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/config.py +0 -0
  84. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/mock_lm.py +0 -0
  85. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  86. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/prompt_formatter/base.py +0 -0
  87. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  88. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  89. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/provider_params.py +0 -0
  90. {langroid-0.56.11 → langroid-0.56.13}/langroid/language_models/utils.py +0 -0
  91. {langroid-0.56.11 → langroid-0.56.13}/langroid/mcp/__init__.py +0 -0
  92. {langroid-0.56.11 → langroid-0.56.13}/langroid/mcp/server/__init__.py +0 -0
  93. {langroid-0.56.11 → langroid-0.56.13}/langroid/mytypes.py +0 -0
  94. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/__init__.py +0 -0
  95. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/agent_chats.py +0 -0
  96. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/code_parser.py +0 -0
  97. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/document_parser.py +0 -0
  98. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/file_attachment.py +0 -0
  99. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/md_parser.py +0 -0
  100. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/para_sentence_split.py +0 -0
  101. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/parse_json.py +0 -0
  102. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/parser.py +0 -0
  103. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/pdf_utils.py +0 -0
  104. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/repo_loader.py +0 -0
  105. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/routing.py +0 -0
  106. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/search.py +0 -0
  107. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/spider.py +0 -0
  108. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/table_loader.py +0 -0
  109. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/url_loader.py +0 -0
  110. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/urls.py +0 -0
  111. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/utils.py +0 -0
  112. {langroid-0.56.11 → langroid-0.56.13}/langroid/parsing/web_search.py +0 -0
  113. {langroid-0.56.11 → langroid-0.56.13}/langroid/prompts/__init__.py +0 -0
  114. {langroid-0.56.11 → langroid-0.56.13}/langroid/prompts/dialog.py +0 -0
  115. {langroid-0.56.11 → langroid-0.56.13}/langroid/prompts/prompts_config.py +0 -0
  116. {langroid-0.56.11 → langroid-0.56.13}/langroid/prompts/templates.py +0 -0
  117. {langroid-0.56.11 → langroid-0.56.13}/langroid/py.typed +0 -0
  118. {langroid-0.56.11 → langroid-0.56.13}/langroid/pydantic_v1/__init__.py +0 -0
  119. {langroid-0.56.11 → langroid-0.56.13}/langroid/pydantic_v1/main.py +0 -0
  120. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/__init__.py +0 -0
  121. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/algorithms/__init__.py +0 -0
  122. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/algorithms/graph.py +0 -0
  123. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/configuration.py +0 -0
  124. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/constants.py +0 -0
  125. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/git_utils.py +0 -0
  126. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/globals.py +0 -0
  127. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/logging.py +0 -0
  128. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/object_registry.py +0 -0
  129. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/output/__init__.py +0 -0
  130. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/output/citations.py +0 -0
  131. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/output/printing.py +0 -0
  132. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/output/status.py +0 -0
  133. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/pandas_utils.py +0 -0
  134. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/pydantic_utils.py +0 -0
  135. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/system.py +0 -0
  136. {langroid-0.56.11 → langroid-0.56.13}/langroid/utils/types.py +0 -0
  137. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/__init__.py +0 -0
  138. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/base.py +0 -0
  139. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/chromadb.py +0 -0
  140. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/lancedb.py +0 -0
  141. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/meilisearch.py +0 -0
  142. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/pineconedb.py +0 -0
  143. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/postgres.py +0 -0
  144. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/qdrantdb.py +0 -0
  145. {langroid-0.56.11 → langroid-0.56.13}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.56.11
3
+ Version: 0.56.13
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -137,6 +137,7 @@ class Agent(ABC):
137
137
 
138
138
  def __init__(self, config: AgentConfig = AgentConfig()):
139
139
  self.config = config
140
+ self.id = ObjectRegistry.new_id() # Initialize agent ID
140
141
  self.lock = asyncio.Lock() # for async access to update self.llm.usage_cost
141
142
  self.dialog: List[Tuple[str, str]] = [] # seq of LLM (prompt, response) tuples
142
143
  self.llm_tools_map: Dict[str, Type[ToolMessage]] = {}
@@ -685,6 +686,7 @@ class Agent(ABC):
685
686
  results.metadata.tool_ids = (
686
687
  [] if msg is None or isinstance(msg, str) else msg.metadata.tool_ids
687
688
  )
689
+ results.metadata.agent_id = self.id
688
690
  return results
689
691
  sender_name = self.config.name
690
692
  if isinstance(msg, ChatDocument) and msg.function_call is not None:
@@ -703,6 +705,7 @@ class Agent(ABC):
703
705
  metadata=ChatDocMetaData(
704
706
  source=Entity.AGENT,
705
707
  sender=Entity.AGENT,
708
+ agent_id=self.id,
706
709
  sender_name=sender_name,
707
710
  oai_tool_id=oai_tool_id,
708
711
  # preserve trail of tool_ids for OpenAI Assistant fn-calls
@@ -967,6 +970,7 @@ class Agent(ABC):
967
970
  return ChatDocument(
968
971
  content=user_msg,
969
972
  metadata=ChatDocMetaData(
973
+ agent_id=self.id,
970
974
  source=source,
971
975
  sender=sender,
972
976
  # preserve trail of tool_ids for OpenAI Assistant fn-calls
@@ -2142,7 +2146,7 @@ class Agent(ABC):
2142
2146
  completion_tokens = self.num_tokens(response.message)
2143
2147
  if response.function_call is not None:
2144
2148
  completion_tokens += self.num_tokens(str(response.function_call))
2145
- cost = self.compute_token_cost(prompt_tokens, completion_tokens)
2149
+ cost = self.compute_token_cost(prompt_tokens, 0, completion_tokens)
2146
2150
  response.usage = LLMTokenUsage(
2147
2151
  prompt_tokens=prompt_tokens,
2148
2152
  completion_tokens=completion_tokens,
@@ -2166,9 +2170,11 @@ class Agent(ABC):
2166
2170
  if print_response_stats:
2167
2171
  print(self.indent + self.token_stats_str)
2168
2172
 
2169
- def compute_token_cost(self, prompt: int, completion: int) -> float:
2173
+ def compute_token_cost(self, prompt: int, cached: int, completion: int) -> float:
2170
2174
  price = cast(LanguageModel, self.llm).chat_cost()
2171
- return (price[0] * prompt + price[1] * completion) / 1000
2175
+ return (
2176
+ price[0] * (prompt - cached) + price[1] * cached + price[2] * completion
2177
+ ) / 1000
2172
2178
 
2173
2179
  def ask_agent(
2174
2180
  self,
@@ -615,7 +615,10 @@ class Task:
615
615
  if isinstance(msg, ChatDocument):
616
616
  # carefully deep-copy: fresh metadata.id, register
617
617
  # as new obj in registry
618
+ original_parent_id = msg.metadata.parent_id
618
619
  self.pending_message = ChatDocument.deepcopy(msg)
620
+ # Preserve the parent pointer from the original message
621
+ self.pending_message.metadata.parent_id = original_parent_id
619
622
  if self.pending_message is not None and self.caller is not None:
620
623
  # msg may have come from `caller`, so we pretend this is from
621
624
  # the CURRENT task's USER entity
@@ -623,7 +626,11 @@ class Task:
623
626
  # update parent, child, agent pointers
624
627
  if msg is not None:
625
628
  msg.metadata.child_id = self.pending_message.metadata.id
626
- self.pending_message.metadata.parent_id = msg.metadata.id
629
+ # Only override parent_id if it wasn't already set in the
630
+ # original message. This preserves parent chains from TaskTool
631
+ if not msg.metadata.parent_id:
632
+ self.pending_message.metadata.parent_id = msg.metadata.id
633
+ if self.pending_message is not None:
627
634
  self.pending_message.metadata.agent_id = self.agent.id
628
635
 
629
636
  self._show_pending_message_if_debug()
@@ -2250,24 +2257,33 @@ class Task:
2250
2257
  def _get_message_chain(
2251
2258
  self, msg: ChatDocument | None, max_depth: Optional[int] = None
2252
2259
  ) -> List[ChatDocument]:
2253
- """Get the chain of messages by following parent pointers."""
2260
+ """Get the chain of messages using agent's message history."""
2254
2261
  if max_depth is None:
2255
2262
  # Get max depth needed from all sequences
2256
2263
  max_depth = 50 # default fallback
2257
2264
  if self._parsed_done_sequences:
2258
2265
  max_depth = max(len(seq.events) for seq in self._parsed_done_sequences)
2259
2266
 
2260
- chain = []
2261
- current = msg
2262
- depth = 0
2267
+ # Get chat document IDs from message history
2268
+ doc_ids = [
2269
+ m.chat_document_id for m in self.agent.message_history if m.chat_document_id
2270
+ ]
2271
+
2272
+ # Add current message ID if it exists and is not already the last one
2273
+ if msg:
2274
+ msg_id = msg.id()
2275
+ if not doc_ids or doc_ids[-1] != msg_id:
2276
+ doc_ids.append(msg_id)
2263
2277
 
2264
- while current is not None and depth < max_depth:
2265
- chain.append(current)
2266
- current = current.parent
2267
- depth += 1
2278
+ # Take only the last max_depth elements
2279
+ relevant_ids = doc_ids[-max_depth:]
2268
2280
 
2269
- # Reverse to get chronological order (oldest first)
2270
- return list(reversed(chain))
2281
+ # Convert IDs to ChatDocuments and filter out None values
2282
+ return [
2283
+ doc
2284
+ for doc_id in relevant_ids
2285
+ if (doc := ChatDocument.from_id(doc_id)) is not None
2286
+ ]
2271
2287
 
2272
2288
  def _matches_event(self, actual: AgentEvent, expected: AgentEvent) -> bool:
2273
2289
  """Check if an actual event matches an expected event pattern."""
@@ -38,10 +38,13 @@ class TaskTool(ToolMessage):
38
38
  system_message: Optional[str] = Field(
39
39
  ...,
40
40
  description="""
41
- Optional system message to configure the sub-agent's general behavior.
41
+ Optional system message to configure the sub-agent's general behavior and
42
+ to specify the task and its context.
42
43
  A good system message will have these components:
43
44
  - Inform the sub-agent of its role, e.g. "You are a financial analyst."
44
- - Clear spec of the task
45
+ - Clear spec of the task, with sufficient context for the sub-agent to
46
+ understand what it needs to do, since the sub-agent does
47
+ NOT have access to your conversation history!
45
48
  - Any additional general context needed for the task, such as a
46
49
  (part of a) document, or data items, etc.
47
50
  - Specify when to use certain tools, e.g.
@@ -73,9 +76,10 @@ class TaskTool(ToolMessage):
73
76
  A list of tool names to enable for the sub-agent.
74
77
  This must be a list of strings referring to the names of tools
75
78
  that are known to you.
76
- If you want to enable all tools, you can set this field
77
- to a singleton list containing 'ALL'
78
- To disable all tools, set it to a singleton list containing 'NONE'
79
+ If you want to enable all tools, or you do not have any preference
80
+ on what tools are enabled for the sub-agent, you can set
81
+ this field to a singleton list ['ALL']
82
+ To disable all tools, set it to a singleton list ['NONE']
79
83
  """,
80
84
  )
81
85
  # TODO: ensure valid model name
@@ -113,11 +117,20 @@ class TaskTool(ToolMessage):
113
117
  # TODO: Maybe we just copy the parent agent's config and override chat_model?
114
118
  # -- but what if parent agent has a MockLMConfig?
115
119
  llm_config = lm.OpenAIGPTConfig(
116
- chat_model=self.model or "gpt-4.1-mini", # Default model if not specified
120
+ chat_model=self.model or lm.OpenAIChatModel.GPT4_1_MINI,
117
121
  )
118
122
  config = ChatAgentConfig(
119
123
  name=agent_name,
120
124
  llm=llm_config,
125
+ handle_llm_no_tool=f"""
126
+ You forgot to use one of your TOOLs! Remember that you must either:
127
+ - use a tool, or a sequence of tools, to complete your task, OR
128
+ - if you are done with your task, use the `{DoneTool.name()}` tool
129
+ to return the result.
130
+
131
+ As a reminder, this was your task:
132
+ {self.prompt}
133
+ """,
121
134
  system_message=f"""
122
135
  {self.system_message}
123
136
 
@@ -138,7 +151,9 @@ class TaskTool(ToolMessage):
138
151
  tool_classes = [
139
152
  agent.llm_tools_map[t]
140
153
  for t in agent.llm_tools_known
141
- if t in agent.llm_tools_map and t != self.request
154
+ if t in agent.llm_tools_map
155
+ and t != self.request
156
+ and agent.llm_tools_map[t]._allow_llm_use
142
157
  # Exclude the TaskTool itself!
143
158
  ]
144
159
  elif self.tools == ["NONE"]:
@@ -150,6 +165,7 @@ class TaskTool(ToolMessage):
150
165
  agent.llm_tools_map[tool_name]
151
166
  for tool_name in self.tools
152
167
  if tool_name in agent.llm_tools_map
168
+ and agent.llm_tools_map[tool_name]._allow_llm_use
153
169
  ]
154
170
 
155
171
  # always enable the DoneTool to signal task completion
@@ -160,7 +176,9 @@ class TaskTool(ToolMessage):
160
176
 
161
177
  return task
162
178
 
163
- def handle(self, agent: ChatAgent) -> Optional[ChatDocument]:
179
+ def handle(
180
+ self, agent: ChatAgent, chat_doc: Optional[ChatDocument] = None
181
+ ) -> Optional[ChatDocument]:
164
182
  """
165
183
 
166
184
  Handle the TaskTool by creating a sub-agent with specified tools
@@ -168,24 +186,64 @@ class TaskTool(ToolMessage):
168
186
 
169
187
  Args:
170
188
  agent: The parent ChatAgent that is handling this tool
189
+ chat_doc: The ChatDocument containing this tool message
171
190
  """
172
191
 
173
192
  task = self._set_up_task(agent)
174
- # Run the task on the prompt, and return the result
175
- result = task.run(self.prompt, turns=self.max_iterations or 10)
193
+
194
+ # Create a ChatDocument for the prompt with parent pointer
195
+ prompt_doc = None
196
+ if chat_doc is not None:
197
+ from langroid.agent.chat_document import ChatDocMetaData
198
+
199
+ prompt_doc = ChatDocument(
200
+ content=self.prompt,
201
+ metadata=ChatDocMetaData(
202
+ parent_id=chat_doc.id(),
203
+ agent_id=agent.id,
204
+ sender=chat_doc.metadata.sender,
205
+ ),
206
+ )
207
+ # Set bidirectional parent-child relationship
208
+ chat_doc.metadata.child_id = prompt_doc.id()
209
+
210
+ # Run the task with the ChatDocument or string prompt
211
+ result = task.run(prompt_doc or self.prompt, turns=self.max_iterations or 10)
176
212
  return result
177
213
 
178
- async def handle_async(self, agent: ChatAgent) -> Optional[ChatDocument]:
214
+ async def handle_async(
215
+ self, agent: ChatAgent, chat_doc: Optional[ChatDocument] = None
216
+ ) -> Optional[ChatDocument]:
179
217
  """
180
218
  Async method to handle the TaskTool by creating a sub-agent with specified tools
181
219
  and running the task non-interactively.
182
220
 
183
221
  Args:
184
222
  agent: The parent ChatAgent that is handling this tool
223
+ chat_doc: The ChatDocument containing this tool message
185
224
  """
186
225
  task = self._set_up_task(agent)
187
- # Run the task on the prompt, and return the result
226
+
227
+ # Create a ChatDocument for the prompt with parent pointer
228
+ prompt_doc = None
229
+ if chat_doc is not None:
230
+ from langroid.agent.chat_document import ChatDocMetaData
231
+
232
+ prompt_doc = ChatDocument(
233
+ content=self.prompt,
234
+ metadata=ChatDocMetaData(
235
+ parent_id=chat_doc.id(),
236
+ agent_id=agent.id,
237
+ sender=chat_doc.metadata.sender,
238
+ ),
239
+ )
240
+ # Set bidirectional parent-child relationship
241
+ chat_doc.metadata.child_id = prompt_doc.id()
242
+
243
+ # Run the task with the ChatDocument or string prompt
188
244
  # TODO eventually allow the various task setup configs,
189
245
  # including termination conditions
190
- result = await task.run_async(self.prompt, turns=self.max_iterations or 10)
246
+ result = await task.run_async(
247
+ prompt_doc or self.prompt, turns=self.max_iterations or 10
248
+ )
191
249
  return result
@@ -91,10 +91,6 @@ class LLMConfig(BaseSettings):
91
91
  # reasoning output from reasoning models
92
92
  cache_config: None | CacheDBConfig = RedisCacheConfig()
93
93
  thought_delimiters: Tuple[str, str] = ("<think>", "</think>")
94
-
95
- # Dict of model -> (input/prompt cost, output/completion cost)
96
- chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
97
- completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
98
94
  retry_params: RetryParams = RetryParams()
99
95
 
100
96
  @property
@@ -131,7 +127,7 @@ class LLMFunctionCall(BaseModel):
131
127
  if not isinstance(dict_or_list, dict):
132
128
  raise ValueError(
133
129
  f"""
134
- Invalid function args: {fun_args_str}
130
+ Invalid function args: {fun_args_str}
135
131
  parsed as {dict_or_list},
136
132
  which is not a valid dict.
137
133
  """
@@ -224,12 +220,14 @@ class LLMTokenUsage(BaseModel):
224
220
  """
225
221
 
226
222
  prompt_tokens: int = 0
223
+ cached_tokens: int = 0
227
224
  completion_tokens: int = 0
228
225
  cost: float = 0.0
229
226
  calls: int = 0 # how many API calls - not used as of 2025-04-04
230
227
 
231
228
  def reset(self) -> None:
232
229
  self.prompt_tokens = 0
230
+ self.cached_tokens = 0
233
231
  self.completion_tokens = 0
234
232
  self.cost = 0.0
235
233
  self.calls = 0
@@ -237,7 +235,8 @@ class LLMTokenUsage(BaseModel):
237
235
  def __str__(self) -> str:
238
236
  return (
239
237
  f"Tokens = "
240
- f"(prompt {self.prompt_tokens}, completion {self.completion_tokens}), "
238
+ f"(prompt {self.prompt_tokens}, cached {self.cached_tokens}, "
239
+ f"completion {self.completion_tokens}), "
241
240
  f"Cost={self.cost}, Calls={self.calls}"
242
241
  )
243
242
 
@@ -462,9 +461,9 @@ class LanguageModel(ABC):
462
461
  if type(config) is LLMConfig:
463
462
  raise ValueError(
464
463
  """
465
- Cannot create a Language Model object from LLMConfig.
466
- Please specify a specific subclass of LLMConfig e.g.,
467
- OpenAIGPTConfig. If you are creating a ChatAgent from
464
+ Cannot create a Language Model object from LLMConfig.
465
+ Please specify a specific subclass of LLMConfig e.g.,
466
+ OpenAIGPTConfig. If you are creating a ChatAgent from
468
467
  a ChatAgentConfig, please specify the `llm` field of this config
469
468
  as a specific subclass of LLMConfig, e.g., OpenAIGPTConfig.
470
469
  """
@@ -666,8 +665,15 @@ class LanguageModel(ABC):
666
665
  def completion_context_length(self) -> int:
667
666
  return self.config.completion_context_length or DEFAULT_CONTEXT_LENGTH
668
667
 
669
- def chat_cost(self) -> Tuple[float, float]:
670
- return self.config.chat_cost_per_1k_tokens
668
+ def chat_cost(self) -> Tuple[float, float, float]:
669
+ """
670
+ Return the cost per 1000 tokens for chat completions.
671
+
672
+ Returns:
673
+ Tuple[float, float, float]: (input_cost, cached_cost, output_cost)
674
+ per 1000 tokens
675
+ """
676
+ return (0.0, 0.0, 0.0)
671
677
 
672
678
  def reset_usage_cost(self) -> None:
673
679
  for mdl in [self.config.chat_model, self.config.completion_model]:
@@ -754,18 +760,18 @@ class LanguageModel(ABC):
754
760
 
755
761
  prompt = f"""
756
762
  You are an expert at understanding a CHAT HISTORY between an AI Assistant
757
- and a User, and you are highly skilled in rephrasing the User's FOLLOW-UP
758
- QUESTION/REQUEST as a STANDALONE QUESTION/REQUEST that can be understood
763
+ and a User, and you are highly skilled in rephrasing the User's FOLLOW-UP
764
+ QUESTION/REQUEST as a STANDALONE QUESTION/REQUEST that can be understood
759
765
  WITHOUT the context of the chat history.
760
-
761
- Below is the CHAT HISTORY. When the User asks you to rephrase a
762
- FOLLOW-UP QUESTION/REQUEST, your ONLY task is to simply return the
763
- question REPHRASED as a STANDALONE QUESTION/REQUEST, without any additional
766
+
767
+ Below is the CHAT HISTORY. When the User asks you to rephrase a
768
+ FOLLOW-UP QUESTION/REQUEST, your ONLY task is to simply return the
769
+ question REPHRASED as a STANDALONE QUESTION/REQUEST, without any additional
764
770
  text or context.
765
-
771
+
766
772
  <CHAT_HISTORY>
767
773
  {history}
768
- </CHAT_HISTORY>
774
+ </CHAT_HISTORY>
769
775
  """.strip()
770
776
 
771
777
  follow_up_question = f"""
@@ -69,7 +69,9 @@ class GeminiModel(ModelName):
69
69
  GEMINI_1_5_FLASH = "gemini-1.5-flash"
70
70
  GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
71
71
  GEMINI_1_5_PRO = "gemini-1.5-pro"
72
- GEMINI_2_5_PRO = "gemini-2.5-pro-exp-02-05"
72
+ GEMINI_2_5_PRO = "gemini-2.5-pro"
73
+ GEMINI_2_5_FLASH = "gemini-2.5-flash"
74
+ GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
73
75
  GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
74
76
  GEMINI_2_FLASH = "gemini-2.0-flash"
75
77
  GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
@@ -108,6 +110,7 @@ class ModelInfo(BaseModel):
108
110
  max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
109
111
  max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
110
112
  input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
113
+ cached_cost_per_million: float = 0.0 # Cost in USD per million cached tokens
111
114
  output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
112
115
  allows_streaming: bool = True # Whether model supports streaming output
113
116
  allows_system_message: bool = True # Whether model supports system messages
@@ -173,6 +176,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
173
176
  context_length=1_047_576,
174
177
  max_output_tokens=32_768,
175
178
  input_cost_per_million=0.10,
179
+ cached_cost_per_million=0.025,
176
180
  output_cost_per_million=0.40,
177
181
  description="GPT-4.1",
178
182
  ),
@@ -182,6 +186,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
182
186
  context_length=1_047_576,
183
187
  max_output_tokens=32_768,
184
188
  input_cost_per_million=0.40,
189
+ cached_cost_per_million=0.10,
185
190
  output_cost_per_million=1.60,
186
191
  description="GPT-4.1 Mini",
187
192
  ),
@@ -191,6 +196,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
191
196
  context_length=1_047_576,
192
197
  max_output_tokens=32_768,
193
198
  input_cost_per_million=2.00,
199
+ cached_cost_per_million=0.50,
194
200
  output_cost_per_million=8.00,
195
201
  description="GPT-4.1",
196
202
  ),
@@ -200,6 +206,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
200
206
  context_length=128_000,
201
207
  max_output_tokens=16_384,
202
208
  input_cost_per_million=2.5,
209
+ cached_cost_per_million=1.25,
203
210
  output_cost_per_million=10.0,
204
211
  has_structured_output=True,
205
212
  description="GPT-4o (128K context)",
@@ -210,6 +217,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
210
217
  context_length=128_000,
211
218
  max_output_tokens=16_384,
212
219
  input_cost_per_million=0.15,
220
+ cached_cost_per_million=0.075,
213
221
  output_cost_per_million=0.60,
214
222
  has_structured_output=True,
215
223
  description="GPT-4o Mini",
@@ -220,6 +228,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
220
228
  context_length=200_000,
221
229
  max_output_tokens=100_000,
222
230
  input_cost_per_million=15.0,
231
+ cached_cost_per_million=7.50,
223
232
  output_cost_per_million=60.0,
224
233
  allows_streaming=True,
225
234
  allows_system_message=False,
@@ -233,8 +242,9 @@ MODEL_INFO: Dict[str, ModelInfo] = {
233
242
  provider=ModelProvider.OPENAI,
234
243
  context_length=200_000,
235
244
  max_output_tokens=100_000,
236
- input_cost_per_million=10.0,
237
- output_cost_per_million=40.0,
245
+ input_cost_per_million=2.0,
246
+ cached_cost_per_million=0.50,
247
+ output_cost_per_million=8.0,
238
248
  allows_streaming=True,
239
249
  allows_system_message=False,
240
250
  unsupported_params=["temperature"],
@@ -248,6 +258,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
248
258
  context_length=128_000,
249
259
  max_output_tokens=65_536,
250
260
  input_cost_per_million=1.1,
261
+ cached_cost_per_million=0.55,
251
262
  output_cost_per_million=4.4,
252
263
  allows_streaming=False,
253
264
  allows_system_message=False,
@@ -262,6 +273,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
262
273
  context_length=200_000,
263
274
  max_output_tokens=100_000,
264
275
  input_cost_per_million=1.1,
276
+ cached_cost_per_million=0.55,
265
277
  output_cost_per_million=4.4,
266
278
  allows_streaming=False,
267
279
  allows_system_message=False,
@@ -276,6 +288,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
276
288
  context_length=200_000,
277
289
  max_output_tokens=100_000,
278
290
  input_cost_per_million=1.10,
291
+ cached_cost_per_million=0.275,
279
292
  output_cost_per_million=4.40,
280
293
  allows_streaming=False,
281
294
  allows_system_message=False,
@@ -291,6 +304,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
291
304
  context_length=200_000,
292
305
  max_output_tokens=8192,
293
306
  input_cost_per_million=3.0,
307
+ cached_cost_per_million=0.30,
294
308
  output_cost_per_million=15.0,
295
309
  description="Claude 3.5 Sonnet",
296
310
  ),
@@ -300,6 +314,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
300
314
  context_length=200_000,
301
315
  max_output_tokens=4096,
302
316
  input_cost_per_million=15.0,
317
+ cached_cost_per_million=1.50,
303
318
  output_cost_per_million=75.0,
304
319
  description="Claude 3 Opus",
305
320
  ),
@@ -309,6 +324,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
309
324
  context_length=200_000,
310
325
  max_output_tokens=4096,
311
326
  input_cost_per_million=3.0,
327
+ cached_cost_per_million=0.30,
312
328
  output_cost_per_million=15.0,
313
329
  description="Claude 3 Sonnet",
314
330
  ),
@@ -318,6 +334,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
318
334
  context_length=200_000,
319
335
  max_output_tokens=4096,
320
336
  input_cost_per_million=0.25,
337
+ cached_cost_per_million=0.03,
321
338
  output_cost_per_million=1.25,
322
339
  description="Claude 3 Haiku",
323
340
  ),
@@ -328,6 +345,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
328
345
  context_length=64_000,
329
346
  max_output_tokens=8_000,
330
347
  input_cost_per_million=0.27,
348
+ cached_cost_per_million=0.07,
331
349
  output_cost_per_million=1.10,
332
350
  description="DeepSeek Chat",
333
351
  ),
@@ -337,6 +355,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
337
355
  context_length=64_000,
338
356
  max_output_tokens=8_000,
339
357
  input_cost_per_million=0.55,
358
+ cached_cost_per_million=0.14,
340
359
  output_cost_per_million=2.19,
341
360
  description="DeepSeek-R1 Reasoning LM",
342
361
  ),
@@ -347,6 +366,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
347
366
  context_length=1_056_768,
348
367
  max_output_tokens=8192,
349
368
  input_cost_per_million=0.10,
369
+ cached_cost_per_million=0.025,
350
370
  output_cost_per_million=0.40,
351
371
  rename_params={"max_tokens": "max_completion_tokens"},
352
372
  description="Gemini 2.0 Flash",
@@ -401,6 +421,40 @@ MODEL_INFO: Dict[str, ModelInfo] = {
401
421
  rename_params={"max_tokens": "max_completion_tokens"},
402
422
  description="Gemini 2.0 Flash Thinking",
403
423
  ),
424
+ # Gemini 2.5 Models
425
+ GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
426
+ name=GeminiModel.GEMINI_2_5_PRO.value,
427
+ provider=ModelProvider.GOOGLE,
428
+ context_length=1_048_576,
429
+ max_output_tokens=65_536,
430
+ input_cost_per_million=1.25,
431
+ cached_cost_per_million=0.31,
432
+ output_cost_per_million=10.0,
433
+ rename_params={"max_tokens": "max_completion_tokens"},
434
+ description="Gemini 2.5 Pro",
435
+ ),
436
+ GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
437
+ name=GeminiModel.GEMINI_2_5_FLASH.value,
438
+ provider=ModelProvider.GOOGLE,
439
+ context_length=1_048_576,
440
+ max_output_tokens=65_536,
441
+ input_cost_per_million=0.30,
442
+ cached_cost_per_million=0.075,
443
+ output_cost_per_million=2.50,
444
+ rename_params={"max_tokens": "max_completion_tokens"},
445
+ description="Gemini 2.5 Flash",
446
+ ),
447
+ GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
448
+ name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
449
+ provider=ModelProvider.GOOGLE,
450
+ context_length=65_536,
451
+ max_output_tokens=65_536,
452
+ input_cost_per_million=0.10,
453
+ cached_cost_per_million=0.025,
454
+ output_cost_per_million=0.40,
455
+ rename_params={"max_tokens": "max_completion_tokens"},
456
+ description="Gemini 2.5 Flash Lite Preview",
457
+ ),
404
458
  }
405
459
 
406
460