PyPI - langroid - Versions diffs - 0.20.0__tar.gz → 0.21.0__tar.gz - Mend

langroid 0.20.0tar.gz → 0.21.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

{langroid-0.20.0 → langroid-0.21.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langroid
-Version: 0.20.0
+Version: 0.21.0
 Summary: Harness LLMs with Multi-Agent Programming
 License: MIT
 Author: Prasad Chalasani
@@ -248,6 +248,9 @@ teacher_task.run()
 <details>
 <summary> <b>Click to expand</b></summary>
+- **Nov 2024:**
+  - **[0.20.0](https://github.com/langroid/langroid/releases/tag/0.20.0)** Support for
+    ArangoDB Knowledge Graphs.
 - **Oct 2024:**
   - **[0.18.0]** [LLMConfig.async_stream_quiet](https://langroid.github.io/langroid/notes/async-streaming/) flag to
     turn off LLM output in async + stream mode.

{langroid-0.20.0 → langroid-0.21.0}/README.md RENAMED Viewed

@@ -135,6 +135,9 @@ teacher_task.run()
 <details>
 <summary> <b>Click to expand</b></summary>
+- **Nov 2024:**
+  - **[0.20.0](https://github.com/langroid/langroid/releases/tag/0.20.0)** Support for
+    ArangoDB Knowledge Graphs.
 - **Oct 2024:**
   - **[0.18.0]** [LLMConfig.async_stream_quiet](https://langroid.github.io/langroid/notes/async-streaming/) flag to
     turn off LLM output in async + stream mode.

{langroid-0.20.0 → langroid-0.21.0}/langroid/agent/base.py RENAMED Viewed

@@ -880,7 +880,13 @@ class Agent(ABC):
         return cdoc
     def has_tool_message_attempt(self, msg: str | ChatDocument | None) -> bool:
-        """Check whether msg contains a Tool/fn-call attempt (by the LLM)"""
+        """
+        Check whether msg contains a Tool/fn-call attempt (by the LLM).
+        CAUTION: This uses self.get_tool_messages(msg) which as a side-effect
+        may update msg.tool_messages when msg is a ChatDocument, if there are
+        any tools in msg.
+        """
         if msg is None:
             return False
         try:
@@ -921,6 +927,9 @@ class Agent(ABC):
     ) -> List[ToolMessage]:
         """
         Get ToolMessages recognized in msg, handle-able by this agent.
+        NOTE: as a side-effect, this will update msg.tool_messages
+        when msg is a ChatDocument and msg contains tool messages.
         If all_tools is True:
         - return all tools, i.e. any tool in self.llm_tools_known,
             whether it is handled by this agent or not;

{langroid-0.20.0 → langroid-0.21.0}/langroid/agent/special/arangodb/arangodb_agent.py RENAMED Viewed

@@ -27,6 +27,7 @@ from langroid.agent.special.arangodb.tools import (
     aql_retrieval_tool_name,
     arango_schema_tool_name,
 )
+from langroid.agent.special.arangodb.utils import count_fields, trim_schema
 from langroid.agent.tools.orchestration import DoneTool, ForwardTool
 from langroid.exceptions import LangroidImportError
 from langroid.mytypes import Entity
@@ -88,11 +89,14 @@ class QueryResult(BaseModel):
 class ArangoChatAgentConfig(ChatAgentConfig):
     arango_settings: ArangoSettings = ArangoSettings()
     system_message: str = DEFAULT_ARANGO_CHAT_SYSTEM_MESSAGE
-    kg_schema: Optional[Dict[str, List[Dict[str, Any]]]] = None
+    kg_schema: str | Dict[str, List[Dict[str, Any]]] | None = None
     database_created: bool = False
-    use_schema_tools: bool = True
+    prepopulate_schema: bool = True
     use_functions_api: bool = True
+    max_num_results: int = 10  # how many results to return from AQL query
     max_result_tokens: int = 1000  # truncate long results to this many tokens
+    max_schema_fields: int = 500  # max fields to show in schema
+    max_tries: int = 10  # how many attempts to answer user question
     use_tools: bool = False
     schema_sample_pct: float = 0
     # whether the agent is used in a continuous chat with user,
@@ -103,16 +107,81 @@ class ArangoChatAgentConfig(ChatAgentConfig):
 class ArangoChatAgent(ChatAgent):
     def __init__(self, config: ArangoChatAgentConfig):
+        super().__init__(config)
         self.config: ArangoChatAgentConfig = config
+        self.init_state()
         self._validate_config()
         self._import_arango()
         self._initialize_db()
         self._init_tools_sys_message()
-        self.init_state()
     def init_state(self) -> None:
         super().init_state()
         self.current_retrieval_aql_query: str = ""
+        self.current_schema_params: ArangoSchemaTool = ArangoSchemaTool()
+        self.num_tries = 0  # how many attempts to answer user question
+    def user_response(
+        self,
+        msg: Optional[str | ChatDocument] = None,
+    ) -> Optional[ChatDocument]:
+        response = super().user_response(msg)
+        if response is None:
+            return None
+        response_str = response.content if response is not None else ""
+        if response_str != "":
+            self.num_tries = 0  # reset number of tries if user responds
+        return response
+    def llm_response(
+        self, message: Optional[str | ChatDocument] = None
+    ) -> Optional[ChatDocument]:
+        if self.num_tries > self.config.max_tries:
+            if self.config.chat_mode:
+                return self.create_llm_response(
+                    content=f"""
+                    {self.config.addressing_prefix}User
+                    I give up, since I have exceeded the
+                    maximum number of tries ({self.config.max_tries}).
+                    Feel free to give me some hints!
+                    """
+                )
+            else:
+                return self.create_llm_response(
+                    tool_messages=[
+                        DoneTool(
+                            content=f"""
+                            Exceeded maximum number of tries ({self.config.max_tries}).
+                            """
+                        )
+                    ]
+                )
+        if isinstance(message, ChatDocument) and message.metadata.sender == Entity.USER:
+            message.content = (
+                message.content
+                + "\n"
+                + """
+                (REMEMBER, Do NOT use more than ONE TOOL/FUNCTION at a time!
+                you must WAIT for a helper to send you the RESULT(S) before
+                making another TOOL/FUNCTION call)
+                """
+            )
+        response = super().llm_response(message)
+        if (
+            response is not None
+            and self.config.chat_mode
+            and self.config.addressing_prefix in response.content
+            and self.has_tool_message_attempt(response)
+        ):
+            # response contains both a user-addressing and a tool, which
+            # is not allowed, so remove the user-addressing prefix
+            response.content = response.content.replace(
+                self.config.addressing_prefix, ""
+            )
+        return response
     def _validate_config(self) -> None:
         assert isinstance(self.config, ArangoChatAgentConfig)
@@ -230,6 +299,7 @@ class ArangoChatAgent(ChatAgent):
             try:
                 cursor = self.db.aql.execute(query, bind_vars=bind_vars)
                 records = [doc for doc in cursor]  # type: ignore
+                records = records[: self.config.max_num_results]
                 logger.warning(f"Records retrieved: {records}")
                 return QueryResult(success=True, data=records if records else [])
             except Exception as e:
@@ -273,6 +343,28 @@ class ArangoChatAgent(ChatAgent):
                 success=False, data=f"Failed after max retries: {str(e)}"
             )
+    def _limit_tokens(self, text: str) -> str:
+        result = text
+        n_toks = self.num_tokens(result)
+        if n_toks > self.config.max_result_tokens:
+            logger.warning(
+                f"""
+                Your query resulted in a large result of
+                {n_toks} tokens,
+                which will be truncated to {self.config.max_result_tokens} tokens.
+                If this does not give satisfactory results,
+                please retry with a more focused query.
+                """
+            )
+            if self.parser is not None:
+                result = self.parser.truncate_tokens(
+                    result,
+                    self.config.max_result_tokens,
+                )
+            else:
+                result = result[: self.config.max_result_tokens * 4]  # truncate roughly
+        return result
     def aql_retrieval_tool(self, msg: AQLRetrievalTool) -> str:
         """Handle AQL query for data retrieval"""
         if not self.tried_schema:
@@ -285,7 +377,13 @@ class ArangoChatAgent(ChatAgent):
             return """
             You need to create the database first using `{aql_creation_tool_name}`.
             """
+        self.num_tries += 1
         query = msg.aql_query
+        if query == self.current_retrieval_aql_query:
+            return """
+            You have already tried this query, so you will get the same results again!
+            If you need to retry, please MODIFY the query to get different results.
+            """
         self.current_retrieval_aql_query = query
         logger.info(f"Executing AQL query: {query}")
         response = self.read_query(query)
@@ -299,28 +397,11 @@ class ArangoChatAgent(ChatAgent):
             """
         # truncate long results
         result = str(response.data)
-        n_toks = self.num_tokens(result)
-        if n_toks > self.config.max_result_tokens:
-            logger.warning(
-                f"""
-                Your query resulted in a large result of
-                {n_toks} tokens,
-                which will be truncated to {self.config.max_result_tokens} tokens.
-                If this does not give satisfactory results,
-                please retry with a more focused query.
-                """
-            )
-            if self.parser is not None:
-                result = self.parser.truncate_tokens(
-                    result,
-                    self.config.max_result_tokens,
-                )
-            else:
-                result = result[: self.config.max_result_tokens * 4]  # truncate roughly
-        return result
+        return self._limit_tokens(result)
     def aql_creation_tool(self, msg: AQLCreationTool) -> str:
         """Handle AQL query for creating data"""
+        self.num_tries += 1
         query = msg.aql_query
         logger.info(f"Executing AQL query: {query}")
         response = self.write_query(query)
@@ -334,12 +415,45 @@ class ArangoChatAgent(ChatAgent):
         self,
         msg: ArangoSchemaTool | None,
     ) -> Dict[str, List[Dict[str, Any]]] | str:
-        """Get database schema including collections, properties, and relationships"""
+        """Get database schema. If collections=None, include all collections.
+        If properties=False, show only connection info,
+        else show all properties and example-docs.
+        """
+        if (
+            msg is not None
+            and msg.collections == self.current_schema_params.collections
+            and msg.properties == self.current_schema_params.properties
+        ):
+            return """
+            You have already tried this schema TOOL, so you will get the same results
+            again! Please MODIFY the tool params `collections` or `properties` to get
+            different results.
+            """
+        if msg is not None:
+            collections = msg.collections
+            properties = msg.properties
+        else:
+            collections = None
+            properties = True
         self.tried_schema = True
-        if self.config.kg_schema is not None and len(self.config.kg_schema) > 0:
+        if (
+            self.config.kg_schema is not None
+            and len(self.config.kg_schema) > 0
+            and msg is None
+        ):
+            # we are trying to pre-populate full schema before the agent runs,
+            # so get it if it's already available
+            # (Note of course that this "full schema" may actually be incomplete)
             return self.config.kg_schema
+        # increment tries only if the LLM is asking for the schema,
+        # in which case msg will not be None
+        self.num_tries += msg is not None
         try:
-            # Get graph schemas
+            # Get graph schemas (keeping full graph info)
             graph_schema = [
                 {"graph_name": g["name"], "edge_definitions": g["edge_definitions"]}
                 for g in self.db.graphs()  # type: ignore
@@ -348,57 +462,78 @@ class ArangoChatAgent(ChatAgent):
             # Get collection schemas
             collection_schema = []
             for collection in self.db.collections():  # type: ignore
-                if collection["name"].startswith("_"):  # Skip system collections
+                if collection["name"].startswith("_"):
                     continue
                 col_name = collection["name"]
+                if collections and col_name not in collections:
+                    continue
                 col_type = collection["type"]
                 col_size = self.db.collection(col_name).count()
-                if col_size == 0:  # Skip empty collections
+                if col_size == 0:
                     continue
-                # Calculate sample size
-                limit_amount = (
-                    ceil(
-                        self.config.schema_sample_pct * col_size / 100.0  # type: ignore
-                    )
-                    or 1
-                )
-                # Query to get sample documents and their properties
-                sample_query = f"""
-                    FOR doc in {col_name}
-                    LIMIT {limit_amount}
-                    RETURN doc
-                """
+                if properties:
+                    # Full property collection with sampling
+                    lim = self.config.schema_sample_pct * col_size  # type: ignore
+                    limit_amount = ceil(lim / 100.0) or 1
+                    sample_query = f"""
+                        FOR doc in {col_name}
+                        LIMIT {limit_amount}
+                        RETURN doc
+                    """
-                properties = []
-                example_doc = None
-                def simplify_doc(doc: Any) -> Any:
-                    if isinstance(doc, list) and len(doc) > 0:
-                        return [simplify_doc(doc[0])]
-                    if isinstance(doc, dict):
-                        return {k: simplify_doc(v) for k, v in doc.items()}
-                    return doc
-                for doc in self.db.aql.execute(sample_query):  # type: ignore
-                    if example_doc is None:
-                        example_doc = simplify_doc(doc)
-                    for key, value in doc.items():
-                        prop = {"name": key, "type": type(value).__name__}
-                        if prop not in properties:
-                            properties.append(prop)
-                collection_schema.append(
-                    {
+                    properties_list = []
+                    example_doc = None
+                    def simplify_doc(doc: Any) -> Any:
+                        if isinstance(doc, list) and len(doc) > 0:
+                            return [simplify_doc(doc[0])]
+                        if isinstance(doc, dict):
+                            return {k: simplify_doc(v) for k, v in doc.items()}
+                        return doc
+                    for doc in self.db.aql.execute(sample_query):  # type: ignore
+                        if example_doc is None:
+                            example_doc = simplify_doc(doc)
+                        for key, value in doc.items():
+                            prop = {"name": key, "type": type(value).__name__}
+                            if prop not in properties_list:
+                                properties_list.append(prop)
+                    collection_schema.append(
+                        {
+                            "collection_name": col_name,
+                            "collection_type": col_type,
+                            f"{col_type}_properties": properties_list,
+                            f"example_{col_type}": example_doc,
+                        }
+                    )
+                else:
+                    # Basic info + from/to for edges only
+                    collection_info = {
                         "collection_name": col_name,
                         "collection_type": col_type,
-                        f"{col_type}_properties": properties,
-                        f"example_{col_type}": example_doc,
                     }
-                )
+                    if col_type == "edge":
+                        # Get a sample edge to extract from/to fields
+                        sample_edge = next(
+                            self.db.aql.execute(  # type: ignore
+                                f"FOR e IN {col_name} LIMIT 1 RETURN e"
+                            ),
+                            None,
+                        )
+                        if sample_edge:
+                            collection_info["from_collection"] = sample_edge[
+                                "_from"
+                            ].split("/")[0]
+                            collection_info["to_collection"] = sample_edge["_to"].split(
+                                "/"
+                            )[0]
+                    collection_schema.append(collection_info)
             schema = {
                 "Graph Schema": graph_schema,
@@ -406,10 +541,41 @@ class ArangoChatAgent(ChatAgent):
             }
             schema_str = json.dumps(schema, indent=2)
             logger.warning(f"Schema retrieved:\n{schema_str}")
-            # save schema to file "logs/arangoo-schema.json"
             with open("logs/arango-schema.json", "w") as f:
                 f.write(schema_str)
-            self.config.kg_schema = schema  # type: ignore
+            if (n_fields := count_fields(schema)) > self.config.max_schema_fields:
+                logger.warning(
+                    f"""
+                    Schema has {n_fields} fields, which exceeds the maximum of
+                    {self.config.max_schema_fields}. Showing a trimmed version
+                    that only includes edge info and no other properties.
+                    """
+                )
+                schema = trim_schema(schema)
+                n_fields = count_fields(schema)
+                logger.warning(f"Schema trimmed down to {n_fields} fields.")
+                schema_str = (
+                    json.dumps(schema)
+                    + "\n"
+                    + f"""
+                    CAUTION: The requested schema was too large, so
+                    the schema has been trimmed down to show only all collection names,
+                    their types,
+                    and edge relationships (from/to collections) without any properties.
+                    To find out more about the schema, you can EITHER:
+                    - Use the `{arango_schema_tool_name}` tool again with the
+                      `properties` arg set to True, and `collections` arg set to
+                        specific collections you want to know more about, OR
+                    - Use the `{aql_retrieval_tool_name}` tool to learn more about
+                      the schema by querying the database.
+                    """
+                )
+                if msg is None:
+                    self.config.kg_schema = schema_str
+                return schema_str
+            self.config.kg_schema = schema
             return schema
         except Exception as e:
@@ -432,9 +598,10 @@ class ArangoChatAgent(ChatAgent):
         super().__init__(self.config)
         # Note we are enabling GraphSchemaTool regardless of whether
-        # self.config.use_schema_tools is True or False, because
+        # self.config.prepopulate_schema is True or False, because
         # even when schema provided, the agent may later want to get the schema,
-        # e.g. if the db evolves, or if it needs to bring in the schema
+        # e.g. if the db evolves, or schema was trimmed due to size, or
+        # if it needs to bring in the schema into recent context.
         self.enable_message(
             [
@@ -454,7 +621,7 @@ class ArangoChatAgent(ChatAgent):
         assert isinstance(self.config, ArangoChatAgentConfig)
         return (
             SCHEMA_TOOLS_SYS_MSG
-            if self.config.use_schema_tools
+            if not self.config.prepopulate_schema
             else SCHEMA_PROVIDED_SYS_MSG.format(schema=self.arango_schema_tool(None))
         )

{langroid-0.20.0 → langroid-0.21.0}/langroid/agent/special/arangodb/system_messages.py RENAMED Viewed

@@ -9,7 +9,7 @@ done_tool_name = DoneTool.default_value("request")
 arango_schema_tool_description = f"""
 `{arango_schema_tool_name}` tool/function-call to find the schema
-of the graph database, i.e. get all the collections
+of the graph database, or for some SPECIFIC collections, i.e. get information on
 (document and edge), their attributes, and graph definitions available in your
 ArangoDB database. You MUST use this tool BEFORE attempting to use the
 `{aql_retrieval_tool_name}` tool/function-call, to ensure that you are using the
@@ -18,7 +18,8 @@ correct collection names and attributes in your `{aql_retrieval_tool_name}` tool
 aql_retrieval_tool_description = f"""
 `{aql_retrieval_tool_name}` tool/function-call to retrieve information from
-  the database using AQL (ArangoDB Query Language) queries.
+  the database using AQL (ArangoDB Query Language) queries, to answer
+  the user's questions, OR for you to learn more about the SCHEMA of the database.
 """
 aql_creation_tool_description = f"""
@@ -26,6 +27,29 @@ aql_creation_tool_description = f"""
 documents/edges in the database.
 """
+aql_retrieval_query_example = """
+EXAMPLE:
+Suppose you are asked this question "Does Bob have a father?".
+Then you will go through the following steps, where YOU indicates
+the message YOU will be sending, and RESULTS indicates the RESULTS
+you will receive from the helper executing the query:
+1. YOU:
+    {{ "request": "aql_retrieval_tool",
+      "aql_query": "FOR v, e, p in ... [query truncated for brevity]..."}}
+    2. RESULTS:
+    [.. results from the query...]
+    3. YOU: [ since results were not satisfactory, you try ANOTHER query]
+    {{ "request": "aql_retrieval_tool",
+    "aql_query": "blah blah ... [query truncated for brevity]..."}}
+    }}
+    4. RESULTS:
+    [.. results from the query...]
+    5. YOU: [ now you have the answer, you can generate your response ]
+    The answer is YES, Bob has a father, and his name is John.
+"""
 aql_query_instructions = """
 When writing AQL queries:
 1. Use the exact property names shown in the schema
@@ -63,6 +87,7 @@ REMEMBER:
     with your response. DO NOT MAKE UP RESULTS FROM A TOOL!
 [3] YOU MUST NOT ANSWER queries from your OWN KNOWLEDGE; ALWAYS RELY ON
     the result of a TOOL/FUNCTION to compose your response.
+[4] Use ONLY ONE TOOL/FUNCTION at a TIME!
 """
 # sys msg to use when schema already provided initially,
 # so agent should not use schema tool
@@ -77,6 +102,7 @@ and their attribute keys available in your ArangoDB database.
 {{schema}}
 === END SCHEMA ===
 To help with the user's question or database update/creation request,
 you have access to these tools:
@@ -84,10 +110,6 @@ you have access to these tools:
 - {aql_creation_tool_description}
-Since the schema has been provided, you may not need to use the tool below,
-but you may use it if you need to remind yourself about the schema:
-- {arango_schema_tool_description}
 {tool_result_instruction}
 """
@@ -113,27 +135,34 @@ DEFAULT_ARANGO_CHAT_SYSTEM_MESSAGE = f"""
 {{mode}}
 You do not need to be able to answer a question with just one query.
-You could make a sequence of AQL queries to find the answer to the question.
+You can make a query, WAIT for the result,
+THEN make ANOTHER query, WAIT for result,
+THEN make ANOTHER query, and so on, until you have the answer.
 {aql_query_instructions}
 RETRY-SUGGESTIONS:
 If you receive a null or other unexpected result,
 (a) make sure you use the available TOOLs correctly,
-(b) USE `{arango_schema_tool_name}` tool/function-call to get all collections,
-    their attributes and graph definitions available in your ArangoDB database.
+(b) learn more about the schema using EITHER:
+ - `{arango_schema_tool_name}` tool/function-call to find properties of specific
+    collections or other parts of the schema, OR
+ - `{aql_retrieval_tool_name}` tool/function-call to use AQL queries to
+    find specific parts of the schema.
 (c) Collection names are CASE-SENSITIVE -- make sure you adhere to the exact
     collection name you found in the schema.
 (d) see if you have made an assumption in your AQL query, and try another way,
     or use `{aql_retrieval_tool_name}` to explore the database contents before
     submitting your final query.
-(f) Try APPROXIMATE or PARTIAL MATCHES to strings in the user's query,
+(e) Try APPROXIMATE or PARTIAL MATCHES to strings in the user's query,
     e.g. user may ask about "Godfather" instead of "The Godfather",
     or try using CASE-INSENSITIVE MATCHES.
 Start by asking what the user needs help with.
 {tool_result_instruction}
+{aql_retrieval_query_example}
 """
 ADDRESSING_INSTRUCTION = """

langroid 0.20.0__tar.gz → 0.21.0__tar.gz

langroid 0.20.0tar.gz → 0.21.0tar.gz