PyPI - langroid - Versions diffs - 0.20.0__tar.gz → 0.20.1__tar.gz - Mend

langroid 0.20.0tar.gz → 0.20.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

{langroid-0.20.0 → langroid-0.20.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langroid
-Version: 0.20.0
+Version: 0.20.1
 Summary: Harness LLMs with Multi-Agent Programming
 License: MIT
 Author: Prasad Chalasani

{langroid-0.20.0 → langroid-0.20.1}/langroid/agent/base.py RENAMED Viewed

@@ -880,7 +880,13 @@ class Agent(ABC):
         return cdoc
     def has_tool_message_attempt(self, msg: str | ChatDocument | None) -> bool:
-        """Check whether msg contains a Tool/fn-call attempt (by the LLM)"""
+        """
+        Check whether msg contains a Tool/fn-call attempt (by the LLM).
+        CAUTION: This uses self.get_tool_messages(msg) which as a side-effect
+        may update msg.tool_messages when msg is a ChatDocument, if there are
+        any tools in msg.
+        """
         if msg is None:
             return False
         try:
@@ -921,6 +927,9 @@ class Agent(ABC):
     ) -> List[ToolMessage]:
         """
         Get ToolMessages recognized in msg, handle-able by this agent.
+        NOTE: as a side-effect, this will update msg.tool_messages
+        when msg is a ChatDocument and msg contains tool messages.
         If all_tools is True:
         - return all tools, i.e. any tool in self.llm_tools_known,
             whether it is handled by this agent or not;

{langroid-0.20.0 → langroid-0.20.1}/langroid/agent/special/arangodb/arangodb_agent.py RENAMED Viewed

@@ -27,6 +27,7 @@ from langroid.agent.special.arangodb.tools import (
     aql_retrieval_tool_name,
     arango_schema_tool_name,
 )
+from langroid.agent.special.arangodb.utils import count_fields, trim_schema
 from langroid.agent.tools.orchestration import DoneTool, ForwardTool
 from langroid.exceptions import LangroidImportError
 from langroid.mytypes import Entity
@@ -88,11 +89,14 @@ class QueryResult(BaseModel):
 class ArangoChatAgentConfig(ChatAgentConfig):
     arango_settings: ArangoSettings = ArangoSettings()
     system_message: str = DEFAULT_ARANGO_CHAT_SYSTEM_MESSAGE
-    kg_schema: Optional[Dict[str, List[Dict[str, Any]]]] = None
+    kg_schema: str | Dict[str, List[Dict[str, Any]]] | None = None
     database_created: bool = False
-    use_schema_tools: bool = True
+    prepopulate_schema: bool = True
     use_functions_api: bool = True
+    max_num_results: int = 10  # how many results to return from AQL query
     max_result_tokens: int = 1000  # truncate long results to this many tokens
+    max_schema_fields: int = 500  # max fields to show in schema
+    max_tries: int = 10  # how many attempts to answer user question
     use_tools: bool = False
     schema_sample_pct: float = 0
     # whether the agent is used in a continuous chat with user,
@@ -103,16 +107,65 @@ class ArangoChatAgentConfig(ChatAgentConfig):
 class ArangoChatAgent(ChatAgent):
     def __init__(self, config: ArangoChatAgentConfig):
+        super().__init__(config)
         self.config: ArangoChatAgentConfig = config
+        self.init_state()
         self._validate_config()
         self._import_arango()
         self._initialize_db()
         self._init_tools_sys_message()
-        self.init_state()
     def init_state(self) -> None:
         super().init_state()
         self.current_retrieval_aql_query: str = ""
+        self.num_tries = 0  # how many attempts to answer user question
+    def user_response(
+        self,
+        msg: Optional[str | ChatDocument] = None,
+    ) -> Optional[ChatDocument]:
+        response = super().user_response(msg)
+        response_str = response.content if response is not None else ""
+        if response_str != "":
+            self.num_tries = 0  # reset number of tries if user responds
+        return response
+    def llm_response(
+        self, message: Optional[str | ChatDocument] = None
+    ) -> Optional[ChatDocument]:
+        if self.num_tries > self.config.max_tries:
+            if self.config.chat_mode:
+                return self.create_llm_response(
+                    content=f"""
+                    {self.config.addressing_prefix}User
+                    I give up, since I have exceeded the
+                    maximum number of tries ({self.config.max_tries}).
+                    Feel free to give me some hints!
+                    """
+                )
+            else:
+                return self.create_llm_response(
+                    tool_messages=[
+                        DoneTool(
+                            content=f"""
+                            Exceeded maximum number of tries ({self.config.max_tries}).
+                            """
+                        )
+                    ]
+                )
+        if isinstance(message, ChatDocument) and message.metadata.sender == Entity.USER:
+            message.content = (
+                message.content
+                + "\n"
+                + """
+                (REMEMBER, Do NOT use more than ONE TOOL/FUNCTION at a time!
+                you must WAIT for a helper to send you the RESULT(S) before
+                making another TOOL/FUNCTION call)
+                """
+            )
+        return super().llm_response(message)
     def _validate_config(self) -> None:
         assert isinstance(self.config, ArangoChatAgentConfig)
@@ -230,6 +283,7 @@ class ArangoChatAgent(ChatAgent):
             try:
                 cursor = self.db.aql.execute(query, bind_vars=bind_vars)
                 records = [doc for doc in cursor]  # type: ignore
+                records = records[: self.config.max_num_results]
                 logger.warning(f"Records retrieved: {records}")
                 return QueryResult(success=True, data=records if records else [])
             except Exception as e:
@@ -273,6 +327,28 @@ class ArangoChatAgent(ChatAgent):
                 success=False, data=f"Failed after max retries: {str(e)}"
             )
+    def _limit_tokens(self, text: str) -> str:
+        result = text
+        n_toks = self.num_tokens(result)
+        if n_toks > self.config.max_result_tokens:
+            logger.warning(
+                f"""
+                Your query resulted in a large result of
+                {n_toks} tokens,
+                which will be truncated to {self.config.max_result_tokens} tokens.
+                If this does not give satisfactory results,
+                please retry with a more focused query.
+                """
+            )
+            if self.parser is not None:
+                result = self.parser.truncate_tokens(
+                    result,
+                    self.config.max_result_tokens,
+                )
+            else:
+                result = result[: self.config.max_result_tokens * 4]  # truncate roughly
+        return result
     def aql_retrieval_tool(self, msg: AQLRetrievalTool) -> str:
         """Handle AQL query for data retrieval"""
         if not self.tried_schema:
@@ -285,6 +361,7 @@ class ArangoChatAgent(ChatAgent):
             return """
             You need to create the database first using `{aql_creation_tool_name}`.
             """
+        self.num_tries += 1
         query = msg.aql_query
         self.current_retrieval_aql_query = query
         logger.info(f"Executing AQL query: {query}")
@@ -299,28 +376,11 @@ class ArangoChatAgent(ChatAgent):
             """
         # truncate long results
         result = str(response.data)
-        n_toks = self.num_tokens(result)
-        if n_toks > self.config.max_result_tokens:
-            logger.warning(
-                f"""
-                Your query resulted in a large result of
-                {n_toks} tokens,
-                which will be truncated to {self.config.max_result_tokens} tokens.
-                If this does not give satisfactory results,
-                please retry with a more focused query.
-                """
-            )
-            if self.parser is not None:
-                result = self.parser.truncate_tokens(
-                    result,
-                    self.config.max_result_tokens,
-                )
-            else:
-                result = result[: self.config.max_result_tokens * 4]  # truncate roughly
-        return result
+        return self._limit_tokens(result)
     def aql_creation_tool(self, msg: AQLCreationTool) -> str:
         """Handle AQL query for creating data"""
+        self.num_tries += 1
         query = msg.aql_query
         logger.info(f"Executing AQL query: {query}")
         response = self.write_query(query)
@@ -334,12 +394,34 @@ class ArangoChatAgent(ChatAgent):
         self,
         msg: ArangoSchemaTool | None,
     ) -> Dict[str, List[Dict[str, Any]]] | str:
-        """Get database schema including collections, properties, and relationships"""
+        """Get database schema. If collections=None, include all collections.
+        If properties=False, show only connection info,
+        else show all properties and example-docs.
+        """
+        if msg is not None:
+            collections = msg.collections
+            properties = msg.properties
+        else:
+            collections = None
+            properties = True
         self.tried_schema = True
-        if self.config.kg_schema is not None and len(self.config.kg_schema) > 0:
+        if (
+            self.config.kg_schema is not None
+            and len(self.config.kg_schema) > 0
+            and msg is None
+        ):
+            # we are trying to pre-populate full schema before the agent runs,
+            # so get it if it's already available
+            # (Note of course that this "full schema" may actually be incomplete)
             return self.config.kg_schema
+        # increment tries only if the LLM is asking for the schema,
+        # in which case msg will not be None
+        self.num_tries += msg is not None
         try:
-            # Get graph schemas
+            # Get graph schemas (keeping full graph info)
             graph_schema = [
                 {"graph_name": g["name"], "edge_definitions": g["edge_definitions"]}
                 for g in self.db.graphs()  # type: ignore
@@ -348,57 +430,78 @@ class ArangoChatAgent(ChatAgent):
             # Get collection schemas
             collection_schema = []
             for collection in self.db.collections():  # type: ignore
-                if collection["name"].startswith("_"):  # Skip system collections
+                if collection["name"].startswith("_"):
                     continue
                 col_name = collection["name"]
+                if collections and col_name not in collections:
+                    continue
                 col_type = collection["type"]
                 col_size = self.db.collection(col_name).count()
-                if col_size == 0:  # Skip empty collections
+                if col_size == 0:
                     continue
-                # Calculate sample size
-                limit_amount = (
-                    ceil(
-                        self.config.schema_sample_pct * col_size / 100.0  # type: ignore
-                    )
-                    or 1
-                )
-                # Query to get sample documents and their properties
-                sample_query = f"""
-                    FOR doc in {col_name}
-                    LIMIT {limit_amount}
-                    RETURN doc
-                """
+                if properties:
+                    # Full property collection with sampling
+                    lim = self.config.schema_sample_pct * col_size  # type: ignore
+                    limit_amount = ceil(lim / 100.0) or 1
+                    sample_query = f"""
+                        FOR doc in {col_name}
+                        LIMIT {limit_amount}
+                        RETURN doc
+                    """
-                properties = []
-                example_doc = None
-                def simplify_doc(doc: Any) -> Any:
-                    if isinstance(doc, list) and len(doc) > 0:
-                        return [simplify_doc(doc[0])]
-                    if isinstance(doc, dict):
-                        return {k: simplify_doc(v) for k, v in doc.items()}
-                    return doc
-                for doc in self.db.aql.execute(sample_query):  # type: ignore
-                    if example_doc is None:
-                        example_doc = simplify_doc(doc)
-                    for key, value in doc.items():
-                        prop = {"name": key, "type": type(value).__name__}
-                        if prop not in properties:
-                            properties.append(prop)
-                collection_schema.append(
-                    {
+                    properties_list = []
+                    example_doc = None
+                    def simplify_doc(doc: Any) -> Any:
+                        if isinstance(doc, list) and len(doc) > 0:
+                            return [simplify_doc(doc[0])]
+                        if isinstance(doc, dict):
+                            return {k: simplify_doc(v) for k, v in doc.items()}
+                        return doc
+                    for doc in self.db.aql.execute(sample_query):  # type: ignore
+                        if example_doc is None:
+                            example_doc = simplify_doc(doc)
+                        for key, value in doc.items():
+                            prop = {"name": key, "type": type(value).__name__}
+                            if prop not in properties_list:
+                                properties_list.append(prop)
+                    collection_schema.append(
+                        {
+                            "collection_name": col_name,
+                            "collection_type": col_type,
+                            f"{col_type}_properties": properties_list,
+                            f"example_{col_type}": example_doc,
+                        }
+                    )
+                else:
+                    # Basic info + from/to for edges only
+                    collection_info = {
                         "collection_name": col_name,
                         "collection_type": col_type,
-                        f"{col_type}_properties": properties,
-                        f"example_{col_type}": example_doc,
                     }
-                )
+                    if col_type == "edge":
+                        # Get a sample edge to extract from/to fields
+                        sample_edge = next(
+                            self.db.aql.execute(  # type: ignore
+                                f"FOR e IN {col_name} LIMIT 1 RETURN e"
+                            ),
+                            None,
+                        )
+                        if sample_edge:
+                            collection_info["from_collection"] = sample_edge[
+                                "_from"
+                            ].split("/")[0]
+                            collection_info["to_collection"] = sample_edge["_to"].split(
+                                "/"
+                            )[0]
+                    collection_schema.append(collection_info)
             schema = {
                 "Graph Schema": graph_schema,
@@ -406,10 +509,41 @@ class ArangoChatAgent(ChatAgent):
             }
             schema_str = json.dumps(schema, indent=2)
             logger.warning(f"Schema retrieved:\n{schema_str}")
-            # save schema to file "logs/arangoo-schema.json"
             with open("logs/arango-schema.json", "w") as f:
                 f.write(schema_str)
-            self.config.kg_schema = schema  # type: ignore
+            if (n_fields := count_fields(schema)) > self.config.max_schema_fields:
+                logger.warning(
+                    f"""
+                    Schema has {n_fields} fields, which exceeds the maximum of
+                    {self.config.max_schema_fields}. Showing a trimmed version
+                    that only includes edge info and no other properties.
+                    """
+                )
+                schema = trim_schema(schema)
+                n_fields = count_fields(schema)
+                logger.warning(f"Schema trimmed down to {n_fields} fields.")
+                schema_str = (
+                    json.dumps(schema)
+                    + "\n"
+                    + f"""
+                    CAUTION: The requested schema was too large, so
+                    the schema has been trimmed down to show only all collection names,
+                    their types,
+                    and edge relationships (from/to collections) without any properties.
+                    To find out more about the schema, you can EITHER:
+                    - Use the `{arango_schema_tool_name}` tool again with the
+                      `properties` arg set to True, and `collections` arg set to
+                        specific collections you want to know more about, OR
+                    - Use the `{aql_retrieval_tool_name}` tool to learn more about
+                      the schema by querying the database.
+                    """
+                )
+                if msg is None:
+                    self.config.kg_schema = schema_str
+                return schema_str
+            self.config.kg_schema = schema
             return schema
         except Exception as e:
@@ -432,9 +566,10 @@ class ArangoChatAgent(ChatAgent):
         super().__init__(self.config)
         # Note we are enabling GraphSchemaTool regardless of whether
-        # self.config.use_schema_tools is True or False, because
+        # self.config.prepopulate_schema is True or False, because
         # even when schema provided, the agent may later want to get the schema,
-        # e.g. if the db evolves, or if it needs to bring in the schema
+        # e.g. if the db evolves, or schema was trimmed due to size, or
+        # if it needs to bring in the schema into recent context.
         self.enable_message(
             [
@@ -454,7 +589,7 @@ class ArangoChatAgent(ChatAgent):
         assert isinstance(self.config, ArangoChatAgentConfig)
         return (
             SCHEMA_TOOLS_SYS_MSG
-            if self.config.use_schema_tools
+            if not self.config.prepopulate_schema
             else SCHEMA_PROVIDED_SYS_MSG.format(schema=self.arango_schema_tool(None))
         )

{langroid-0.20.0 → langroid-0.20.1}/langroid/agent/special/arangodb/system_messages.py RENAMED Viewed

@@ -9,7 +9,7 @@ done_tool_name = DoneTool.default_value("request")
 arango_schema_tool_description = f"""
 `{arango_schema_tool_name}` tool/function-call to find the schema
-of the graph database, i.e. get all the collections
+of the graph database, or for some SPECIFIC collections, i.e. get information on
 (document and edge), their attributes, and graph definitions available in your
 ArangoDB database. You MUST use this tool BEFORE attempting to use the
 `{aql_retrieval_tool_name}` tool/function-call, to ensure that you are using the
@@ -18,7 +18,8 @@ correct collection names and attributes in your `{aql_retrieval_tool_name}` tool
 aql_retrieval_tool_description = f"""
 `{aql_retrieval_tool_name}` tool/function-call to retrieve information from
-  the database using AQL (ArangoDB Query Language) queries.
+  the database using AQL (ArangoDB Query Language) queries, to answer
+  the user's questions, OR for you to learn more about the SCHEMA of the database.
 """
 aql_creation_tool_description = f"""
@@ -26,6 +27,29 @@ aql_creation_tool_description = f"""
 documents/edges in the database.
 """
+aql_retrieval_query_example = """
+EXAMPLE:
+Suppose you are asked this question "Does Bob have a father?".
+Then you will go through the following steps, where YOU indicates
+the message YOU will be sending, and RESULTS indicates the RESULTS
+you will receive from the helper executing the query:
+1. YOU:
+    {{ "request": "aql_retrieval_tool",
+      "aql_query": "FOR v, e, p in ... [query truncated for brevity]..."}}
+    2. RESULTS:
+    [.. results from the query...]
+    3. YOU: [ since results were not satisfactory, you try ANOTHER query]
+    {{ "request": "aql_retrieval_tool",
+    "aql_query": "blah blah ... [query truncated for brevity]..."}}
+    }}
+    4. RESULTS:
+    [.. results from the query...]
+    5. YOU: [ now you have the answer, you can generate your response ]
+    The answer is YES, Bob has a father, and his name is John.
+"""
 aql_query_instructions = """
 When writing AQL queries:
 1. Use the exact property names shown in the schema
@@ -63,6 +87,7 @@ REMEMBER:
     with your response. DO NOT MAKE UP RESULTS FROM A TOOL!
 [3] YOU MUST NOT ANSWER queries from your OWN KNOWLEDGE; ALWAYS RELY ON
     the result of a TOOL/FUNCTION to compose your response.
+[4] Use ONLY ONE TOOL/FUNCTION at a TIME!
 """
 # sys msg to use when schema already provided initially,
 # so agent should not use schema tool
@@ -77,6 +102,7 @@ and their attribute keys available in your ArangoDB database.
 {{schema}}
 === END SCHEMA ===
 To help with the user's question or database update/creation request,
 you have access to these tools:
@@ -84,10 +110,6 @@ you have access to these tools:
 - {aql_creation_tool_description}
-Since the schema has been provided, you may not need to use the tool below,
-but you may use it if you need to remind yourself about the schema:
-- {arango_schema_tool_description}
 {tool_result_instruction}
 """
@@ -113,7 +135,9 @@ DEFAULT_ARANGO_CHAT_SYSTEM_MESSAGE = f"""
 {{mode}}
 You do not need to be able to answer a question with just one query.
-You could make a sequence of AQL queries to find the answer to the question.
+You can make a query, WAIT for the result,
+THEN make ANOTHER query, WAIT for result,
+THEN make ANOTHER query, and so on, until you have the answer.
 {aql_query_instructions}
@@ -134,6 +158,8 @@ If you receive a null or other unexpected result,
 Start by asking what the user needs help with.
 {tool_result_instruction}
+{aql_retrieval_query_example}
 """
 ADDRESSING_INSTRUCTION = """

langroid-0.20.1/langroid/agent/special/arangodb/tools.py ADDED Viewed

@@ -0,0 +1,102 @@
+from typing import List, Tuple
+from langroid.agent.tool_message import ToolMessage
+class AQLRetrievalTool(ToolMessage):
+    request: str = "aql_retrieval_tool"
+    purpose: str = """
+        To send an <aql_query> in response to a user's request/question,
+        OR to find SCHEMA information,
+        and WAIT for results of the <aql_query> BEFORE continuing with response.
+        You will receive RESULTS from this tool, and ONLY THEN you can continue.
+    """
+    aql_query: str
+    @classmethod
+    def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
+        """Few-shot examples to include in tool instructions."""
+        return [
+            (
+                "I want to see who Bob's Father is",
+                cls(
+                    aql_query="""
+                    FOR v, e, p IN 1..1 OUTBOUND 'users/Bob' GRAPH 'family_tree'
+                    FILTER p.edges[0].type == 'father'
+                    RETURN v
+                    """
+                ),
+            ),
+            (
+                "I want to know the properties of the Actor node",
+                cls(
+                    aql_query="""
+                    FOR doc IN Actor
+                    LIMIT 1
+                    RETURN ATTRIBUTES(doc)
+                    """
+                ),
+            ),
+        ]
+    @classmethod
+    def instructions(cls) -> str:
+        return """
+        When using this TOOL/Function-call, you must WAIT to receive the RESULTS
+        of the AQL query, before continuing your response!
+        DO NOT ASSUME YOU KNOW THE RESULTs BEFORE RECEIVING THEM.
+        """
+aql_retrieval_tool_name = AQLRetrievalTool.default_value("request")
+class AQLCreationTool(ToolMessage):
+    request: str = "aql_creation_tool"
+    purpose: str = """
+        To send the <aql_query> to create documents/edges in the graph database.
+        IMPORTANT: YOU MUST WAIT FOR THE RESULT OF THE TOOL BEFORE CONTINUING.
+        You will receive RESULTS from this tool, and ONLY THEN you can continue.
+    """
+    aql_query: str
+    @classmethod
+    def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
+        """Few-shot examples to include in tool instructions."""
+        return [
+            (
+                "Create a new document in the collection 'users'",
+                cls(
+                    aql_query="""
+                    INSERT {
+                      "name": "Alice",
+                      "age": 30
+                    } INTO users
+                    """
+                ),
+            ),
+        ]
+aql_creation_tool_name = AQLCreationTool.default_value("request")
+class ArangoSchemaTool(ToolMessage):
+    request: str = "arango_schema_tool"
+    purpose: str = """
+        To get the schema of the Arango graph database,
+        or some part of it. Follow these instructions:
+        1. Set <properties> to True to get the properties of the collections,
+        and False if you only want to see the graph structure and get only the
+        from/to relations of the edges.
+        2. Set <collections> to a list of collection names if you want to see,
+        or leave it as None to see all ALL collections.
+        IMPORTANT: YOU MUST WAIT FOR THE RESULT OF THE TOOL BEFORE CONTINUING.
+        You will receive RESULTS from this tool, and ONLY THEN you can continue.
+    """
+    properties: bool = True
+    collections: List[str] | None = None
+arango_schema_tool_name = ArangoSchemaTool.default_value("request")

langroid-0.20.1/langroid/agent/special/arangodb/utils.py ADDED Viewed

@@ -0,0 +1,36 @@
+from typing import Any, Dict, List
+def count_fields(schema: Dict[str, List[Dict[str, Any]]]) -> int:
+    total = 0
+    for coll in schema["Collection Schema"]:
+        # Count all keys in each collection's dict
+        total += len(coll)
+        # Also count properties if they exist
+        props = coll.get(f"{coll['collection_type']}_properties", [])
+        total += len(props)
+    return total
+def trim_schema(
+    schema: Dict[str, List[Dict[str, Any]]]
+) -> Dict[str, List[Dict[str, Any]]]:
+    """Keep only edge connection info, remove properties and examples"""
+    trimmed: Dict[str, List[Dict[str, Any]]] = {
+        "Graph Schema": schema["Graph Schema"],
+        "Collection Schema": [],
+    }
+    for coll in schema["Collection Schema"]:
+        col_info: Dict[str, Any] = {
+            "collection_name": coll["collection_name"],
+            "collection_type": coll["collection_type"],
+        }
+        if coll["collection_type"] == "edge":
+            # preserve from/to info if present
+            if f"example_{coll['collection_type']}" in coll:
+                example = coll[f"example_{coll['collection_type']}"]
+                if example and "_from" in example:
+                    col_info["from_collection"] = example["_from"].split("/")[0]
+                    col_info["to_collection"] = example["_to"].split("/")[0]
+        trimmed["Collection Schema"].append(col_info)
+    return trimmed

langroid 0.20.0__tar.gz → 0.20.1__tar.gz

langroid 0.20.0tar.gz → 0.20.1tar.gz