PyPI - langroid - Versions diffs - 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

langroid/__init__.py +70 -0
langroid/agent/__init__.py +22 -0
langroid/agent/base.py +120 -33
langroid/agent/batch.py +134 -35
langroid/agent/callbacks/__init__.py +0 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +164 -100
langroid/agent/chat_document.py +19 -2
langroid/agent/openai_assistant.py +20 -10
langroid/agent/special/__init__.py +33 -10
langroid/agent/special/doc_chat_agent.py +521 -108
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +23 -7
langroid/agent/special/retriever_agent.py +29 -174
langroid/agent/special/sql/__init__.py +7 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +11 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +423 -114
langroid/agent/tool_message.py +67 -10
langroid/agent/tools/__init__.py +8 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +6 -24
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/cachedb/__init__.py +6 -0
langroid/embedding_models/__init__.py +24 -0
langroid/embedding_models/base.py +9 -1
langroid/embedding_models/models.py +117 -17
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +22 -0
langroid/language_models/azure_openai.py +47 -4
langroid/language_models/base.py +26 -10
langroid/language_models/config.py +5 -0
langroid/language_models/openai_gpt.py +407 -121
langroid/language_models/prompt_formatter/__init__.py +9 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +10 -9
langroid/mytypes.py +10 -4
langroid/parsing/__init__.py +33 -1
langroid/parsing/document_parser.py +259 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +20 -7
langroid/parsing/repo_loader.py +108 -46
langroid/parsing/search.py +8 -0
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -13
langroid/parsing/urls.py +18 -9
langroid/parsing/utils.py +130 -9
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +7 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +10 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/configuration.py +0 -1
langroid/utils/constants.py +4 -0
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +15 -2
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +446 -4
langroid/utils/system.py +36 -1
langroid/vector_store/__init__.py +34 -2
langroid/vector_store/base.py +33 -2
langroid/vector_store/chromadb.py +42 -13
langroid/vector_store/lancedb.py +226 -60
langroid/vector_store/meilisearch.py +7 -6
langroid/vector_store/momento.py +3 -2
langroid/vector_store/qdrantdb.py +82 -11
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
langroid-0.1.219.dist-info/RECORD +127 -0
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.139.dist-info/RECORD +0 -103
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0

langroid/agent/special/relevance_extractor_agent.py CHANGED Viewed

@@ -3,6 +3,7 @@ Agent to retrieve relevant segments from a body of text,
 that are relevant to a query.
 """
 import logging
 from typing import Optional, no_type_check
@@ -11,16 +12,18 @@ from rich.console import Console
 from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
 from langroid.agent.chat_document import ChatDocument
 from langroid.agent.tools.segment_extract_tool import SegmentExtractTool
+from langroid.language_models.base import LLMConfig
 from langroid.language_models.openai_gpt import OpenAIGPTConfig
+from langroid.mytypes import Entity
 from langroid.parsing.utils import extract_numbered_segments, number_segments
-from langroid.utils.constants import NO_ANSWER
+from langroid.utils.constants import DONE, NO_ANSWER
 console = Console()
 logger = logging.getLogger(__name__)
 class RelevanceExtractorAgentConfig(ChatAgentConfig):
-    llm: OpenAIGPTConfig = OpenAIGPTConfig()
+    llm: LLMConfig | None = OpenAIGPTConfig()
     segment_length: int = 1  # number of sentences per segment
     query: str = ""  # query for relevance extraction
     system_message = """
@@ -28,6 +31,7 @@ class RelevanceExtractorAgentConfig(ChatAgentConfig):
     <#1#>, <#2#>, <#3#>, etc.,
     followed by a QUERY. Extract ONLY the segment-numbers from
     the PASSAGE that are RELEVANT to the QUERY.
+    Present the extracted segment-numbers using the `extract_segments` tool/function.
     """
@@ -101,11 +105,23 @@ class RelevanceExtractorAgent(ChatAgent):
         """Method to handle a segmentExtractTool message from LLM"""
         spec = msg.segment_list
         if len(self.message_history) == 0:
-            return NO_ANSWER
-        if spec is None or spec.strip() == "":
-            return NO_ANSWER
+            return DONE + " " + NO_ANSWER
+        if spec is None or spec.strip() in ["", NO_ANSWER]:
+            return DONE + " " + NO_ANSWER
         assert self.numbered_passage is not None, "No numbered passage"
         # assume this has numbered segments
-        extracts = extract_numbered_segments(self.numbered_passage, spec)
+        try:
+            extracts = extract_numbered_segments(self.numbered_passage, spec)
+        except Exception:
+            return DONE + " " + NO_ANSWER
         # this response ends the task by saying DONE
-        return "DONE " + extracts
+        return DONE + " " + extracts
+    def handle_message_fallback(
+        self, msg: str | ChatDocument
+    ) -> str | ChatDocument | None:
+        """Handle case where LLM forgets to use SegmentExtractTool"""
+        if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
+            return DONE + " " + NO_ANSWER
+        else:
+            return None

langroid/agent/special/retriever_agent.py CHANGED Viewed

@@ -1,201 +1,56 @@
 """
-Agent to retrieve relevant verbatim whole docs/records from a vector store,
+Deprecated: use DocChatAgent instead, with DocChatAgentConfig.retrieve_only=True,
+and if you want to retrieve FULL relevant doc-contents rather than just extracts,
+then set DocChatAgentConfig.extraction_granularity=-1
+This is an agent to retrieve relevant extracts from a vector store,
 where the LLM is used to filter for "true" relevance after retrieval from the
 vector store.
+This is essentially the same as DocChatAgent, except that instead of
+generating final summary answer based on relevant extracts, it just returns
+those extracts.
 See test_retriever_agent.py for example usage.
 """
 import logging
-from abc import ABC, abstractmethod
-from typing import List, Optional, Sequence
+from typing import Sequence
-from rich import print
 from rich.console import Console
-from langroid.agent.chat_document import ChatDocMetaData, ChatDocument
 from langroid.agent.special.doc_chat_agent import DocChatAgent, DocChatAgentConfig
-from langroid.embedding_models.models import OpenAIEmbeddingsConfig
-from langroid.language_models.base import StreamingIfAllowed
-from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
-from langroid.mytypes import DocMetaData, Document, Entity
-from langroid.parsing.parser import ParsingConfig, Splitter
-from langroid.prompts.prompts_config import PromptsConfig
-from langroid.utils.constants import NO_ANSWER
-from langroid.vector_store.base import VectorStoreConfig
-from langroid.vector_store.qdrantdb import QdrantDBConfig
+from langroid.mytypes import DocMetaData, Document
 console = Console()
 logger = logging.getLogger(__name__)
+# for backwards compatibility:
+RecordMetadata = DocMetaData
+RecordDoc = Document
+RetrieverAgentConfig = DocChatAgentConfig
-class RecordMetadata(DocMetaData):
-    id: None | str = None
-class RecordDoc(Document):
-    metadata: RecordMetadata
-class RetrieverAgentConfig(DocChatAgentConfig):
-    n_matches: int = 3
-    debug: bool = False
-    max_context_tokens = 500
-    conversation_mode = True
-    cache: bool = True  # cache results
-    gpt4: bool = True  # use GPT-4
-    stream: bool = True  # allow streaming where needed
-    max_tokens: int = 10000
-    vecdb: VectorStoreConfig = QdrantDBConfig(
-        collection_name=None,
-        storage_path=".qdrant/data/",
-        embedding=OpenAIEmbeddingsConfig(
-            model_type="openai",
-            model_name="text-embedding-ada-002",
-            dims=1536,
-        ),
-    )
-    llm: OpenAIGPTConfig = OpenAIGPTConfig(
-        type="openai",
-        chat_model=OpenAIChatModel.GPT4,
-    )
-    parsing: ParsingConfig = ParsingConfig(
-        splitter=Splitter.TOKENS,
-        chunk_size=100,
-        n_similar_docs=5,
-    )
-    prompts: PromptsConfig = PromptsConfig(
-        max_tokens=1000,
-    )
-class RetrieverAgent(DocChatAgent, ABC):
+class RetrieverAgent(DocChatAgent):
     """
-    Agent for retrieving whole records/docs matching a query
+    Agent for just retrieving chunks/docs/extracts matching a query
     """
-    def __init__(self, config: RetrieverAgentConfig):
+    def __init__(self, config: DocChatAgentConfig):
         super().__init__(config)
-        self.config: RetrieverAgentConfig = config
+        self.config: DocChatAgentConfig = config
+        logger.warning(
+            """
+        `RetrieverAgent` is deprecated. Use `DocChatAgent` instead, with
+        `DocChatAgentConfig.retrieve_only=True`, and if you want to retrieve
+        FULL relevant doc-contents rather than just extracts, then set
+        `DocChatAgentConfig.extraction_granularity=-1`
+        """
+        )
-    @abstractmethod
-    def get_records(self) -> Sequence[RecordDoc]:
-        pass
+    def get_records(self) -> Sequence[Document]:
+        raise NotImplementedError
     def ingest(self) -> None:
         records = self.get_records()
         if self.vecdb is None:
             raise ValueError("No vector store specified")
         self.vecdb.add_documents(records)
-    def llm_response(
-        self,
-        query: None | str | ChatDocument = None,
-    ) -> Optional[ChatDocument]:
-        if not self.llm_can_respond(query):
-            return None
-        if query is None:
-            return super().llm_response(None)  # type: ignore
-        if isinstance(query, ChatDocument):
-            query_str = query.content
-        else:
-            query_str = query
-        docs = self.get_relevant_extracts(query_str)
-        if len(docs) == 0:
-            return None
-        content = "\n\n".join([d.content for d in docs])
-        print(f"[green]{content}")
-        meta = dict(
-            sender=Entity.LLM,
-        )
-        meta.update(docs[0].metadata)
-        return ChatDocument(
-            content=content,
-            metadata=ChatDocMetaData(**meta),
-        )
-    def get_relevant_extracts(self, query: str) -> List[Document]:
-        """
-        Given a query, get the records/docs whose contents are most relevant to the
-            query. First get nearest docs from vector store, then select the best
-            matches according to the LLM.
-        Args:
-            query (str): query string
-        Returns:
-            List[Document]: list of Document objects
-        """
-        response = Document(
-            content=NO_ANSWER,
-            metadata=DocMetaData(
-                source="None",
-            ),
-        )
-        nearest_docs = self.get_relevant_chunks(query)
-        if len(nearest_docs) == 0:
-            return [response]
-        if self.llm is None:
-            logger.warning("No LLM specified")
-            return nearest_docs
-        with console.status("LLM selecting relevant docs from retrieved ones..."):
-            with StreamingIfAllowed(self.llm, False):
-                doc_list = self.llm_select_relevant_docs(query, nearest_docs)
-        return doc_list
-    def llm_select_relevant_docs(
-        self, query: str, docs: List[Document]
-    ) -> List[Document]:
-        """
-        Given a query and a list of docs, select the docs whose contents match best,
-            according to the LLM. Use the doc IDs to select the docs from the vector
-            store.
-        Args:
-            query: query string
-            docs: list of Document objects
-        Returns:
-            list of Document objects
-        """
-        doc_contents = "\n\n".join(
-            [f"DOC: ID={d.id()}, CONTENT: {d.content}" for d in docs]
-        )
-        prompt = f"""
-        Given the following QUERY:
-        {query}
-        and the following DOCS with IDs and contents
-        {doc_contents}
-        Find at most {self.config.n_matches} DOCs that are most relevant to the QUERY.
-        Return your answer as a sequence of DOC IDS ONLY, for example:
-        "id1 id2 id3..."
-        If there are no relevant docs, simply say {NO_ANSWER}.
-        Even if there is only one relevant doc, return it as a single ID.
-        Do not give any explanations or justifications.
-        """
-        default_response = Document(
-            content=NO_ANSWER,
-            metadata=DocMetaData(
-                source="None",
-            ),
-        )
-        if self.llm is None:
-            logger.warning("No LLM specified")
-            return [default_response]
-        response = self.llm.generate(
-            prompt, max_tokens=self.config.llm.max_output_tokens
-        )
-        if response.message == NO_ANSWER:
-            return [default_response]
-        ids = response.message.split()
-        if len(ids) == 0:
-            return [default_response]
-        if self.vecdb is None:
-            logger.warning("No vector store specified")
-            return [default_response]
-        docs = self.vecdb.get_documents_by_ids(ids)
-        return [
-            Document(content=d.content, metadata=DocMetaData(source="LLM"))
-            for d in docs
-        ]

langroid/agent/special/sql/__init__.py CHANGED Viewed

@@ -2,3 +2,10 @@ from .sql_chat_agent import SQLChatAgentConfig, SQLChatAgent
 from . import sql_chat_agent
 from . import utils
+__all__ = [
+    "SQLChatAgentConfig",
+    "SQLChatAgent",
+    "sql_chat_agent",
+    "utils",
+]

langroid/agent/special/sql/sql_chat_agent.py CHANGED Viewed

@@ -6,12 +6,13 @@ Functionality includes:
 - adding table and column context
 - asking a question about a SQL schema
 """
 import logging
-from typing import Any, Dict, Optional, Sequence, Union
+from typing import Any, Dict, List, Optional, Sequence, Union
 from rich import print
 from rich.console import Console
-from sqlalchemy import MetaData, Row, create_engine, text
+from sqlalchemy import MetaData, Row, create_engine, inspect, text
 from sqlalchemy.engine import Engine
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.orm import Session, sessionmaker
@@ -35,9 +36,7 @@ from langroid.agent.special.sql.utils.tools import (
     GetTableSchemaTool,
     RunQueryTool,
 )
-from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
 from langroid.mytypes import Entity
-from langroid.prompts.prompts_config import PromptsConfig
 from langroid.vector_store.base import VectorStoreConfig
 logger = logging.getLogger(__name__)
@@ -67,7 +66,6 @@ SQL_ERROR_MSG = "There was an error in your SQL Query"
 class SQLChatAgentConfig(ChatAgentConfig):
     system_message: str = DEFAULT_SQL_CHAT_SYSTEM_MESSAGE
     user_message: None | str = None
-    max_context_tokens: int = 1000
     cache: bool = True  # cache results
     debug: bool = False
     stream: bool = True  # allow streaming where needed
@@ -76,6 +74,7 @@ class SQLChatAgentConfig(ChatAgentConfig):
     vecdb: None | VectorStoreConfig = None
     context_descriptions: Dict[str, Dict[str, Union[str, Dict[str, str]]]] = {}
     use_schema_tools: bool = False
+    multi_schema: bool = False
     """
     Optional, but strongly recommended, context descriptions for tables, columns,
@@ -90,6 +89,9 @@ class SQLChatAgentConfig(ChatAgentConfig):
     is another table name and the value is a description of the relationship to
     that table.
+    If multi_schema support is enabled, the tables names in the description
+    should be of the form 'schema_name.table_name'.
     For example:
     {
         'table1': {
@@ -109,15 +111,6 @@ class SQLChatAgentConfig(ChatAgentConfig):
     }
     """
-    llm: OpenAIGPTConfig = OpenAIGPTConfig(
-        type="openai",
-        chat_model=OpenAIChatModel.GPT4,
-        completion_model=OpenAIChatModel.GPT4,
-    )
-    prompts: PromptsConfig = PromptsConfig(
-        max_tokens=1000,
-    )
 class SQLChatAgent(ChatAgent):
     """
@@ -155,19 +148,44 @@ class SQLChatAgent(ChatAgent):
         """Initialize the database metadata."""
         if self.engine is None:
             raise ValueError("Database engine is None")
+        self.metadata: MetaData | List[MetaData] = []
-        self.metadata = MetaData()
-        self.metadata.reflect(self.engine)
-        logger.info(
-            "SQLChatAgent initialized with database: %s and tables: %s",
-            self.engine,
-            self.metadata.tables,
-        )
+        if self.config.multi_schema:
+            logger.info(
+                "Initializing SQLChatAgent with database: %s",
+                self.engine,
+            )
+            self.metadata = []
+            inspector = inspect(self.engine)
+            for schema in inspector.get_schema_names():
+                metadata = MetaData(schema=schema)
+                metadata.reflect(self.engine)
+                self.metadata.append(metadata)
+                logger.info(
+                    "Initializing SQLChatAgent with database: %s, schema: %s, "
+                    "and tables: %s",
+                    self.engine,
+                    schema,
+                    metadata.tables,
+                )
+        else:
+            self.metadata = MetaData()
+            self.metadata.reflect(self.engine)
+            logger.info(
+                "SQLChatAgent initialized with database: %s and tables: %s",
+                self.engine,
+                self.metadata.tables,
+            )
     def _init_table_metadata(self) -> None:
         """Initialize metadata for the tables present in the database."""
         if not self.config.context_descriptions and isinstance(self.engine, Engine):
-            self.config.context_descriptions = extract_schema_descriptions(self.engine)
+            self.config.context_descriptions = extract_schema_descriptions(
+                self.engine, self.config.multi_schema
+            )
         if self.config.use_schema_tools:
             self.table_metadata = populate_metadata_with_schema_tools(
@@ -228,8 +246,10 @@ class SQLChatAgent(ChatAgent):
         if isinstance(msg, ChatDocument) and msg.function_call is not None:
             sender_name = msg.function_call.name
+        content = results.content if isinstance(results, ChatDocument) else results
         return ChatDocument(
-            content=results,
+            content=content,
             metadata=ChatDocMetaData(
                 source=Entity.AGENT,
                 sender=Entity.AGENT,
@@ -329,6 +349,10 @@ class SQLChatAgent(ChatAgent):
         Returns:
             str: The names of all tables in the database.
         """
+        if isinstance(self.metadata, list):
+            table_names = [", ".join(md.tables.keys()) for md in self.metadata]
+            return ", ".join(table_names)
         return ", ".join(self.metadata.tables.keys())
     def get_table_schema(self, msg: GetTableSchemaTool) -> str:

langroid/agent/special/sql/utils/__init__.py CHANGED Viewed

@@ -9,3 +9,14 @@ from . import description_extractors
 from . import populate_metadata
 from . import system_message
 from . import tools
+__all__ = [
+    "RunQueryTool",
+    "GetTableNamesTool",
+    "GetTableSchemaTool",
+    "GetColumnDescriptionsTool",
+    "description_extractors",
+    "populate_metadata",
+    "system_message",
+    "tools",
+]

langroid/agent/special/sql/utils/description_extractors.py CHANGED Viewed

@@ -1,10 +1,13 @@
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 from sqlalchemy import inspect, text
 from sqlalchemy.engine import Engine
-def extract_postgresql_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
+def extract_postgresql_descriptions(
+    engine: Engine,
+    multi_schema: bool = False,
+) -> Dict[str, Dict[str, Any]]:
     """
     Extracts descriptions for tables and columns from a PostgreSQL database.
@@ -13,6 +16,7 @@ def extract_postgresql_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]
     Args:
         engine (Engine): SQLAlchemy engine connected to a PostgreSQL database.
+        multi_schema (bool): Generate descriptions for all schemas in the database.
     Returns:
         Dict[str, Dict[str, Any]]: A dictionary mapping table names to a
@@ -20,36 +24,53 @@ def extract_postgresql_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]
         column descriptions.
     """
     inspector = inspect(engine)
-    table_names: List[str] = inspector.get_table_names()
     result: Dict[str, Dict[str, Any]] = {}
-    with engine.connect() as conn:
-        for table in table_names:
-            table_comment = (
-                conn.execute(
-                    text(f"SELECT obj_description('{table}'::regclass)")
-                ).scalar()
-                or ""
-            )
+    def gen_schema_descriptions(schema: Optional[str] = None) -> None:
+        table_names: List[str] = inspector.get_table_names(schema=schema)
+        with engine.connect() as conn:
+            for table in table_names:
+                if schema is None:
+                    table_name = table
+                else:
+                    table_name = f"{schema}.{table}"
-            columns = {}
-            col_data = inspector.get_columns(table)
-            for idx, col in enumerate(col_data, start=1):
-                col_comment = (
+                table_comment = (
                     conn.execute(
-                        text(f"SELECT col_description('{table}'::regclass, {idx})")
+                        text(f"SELECT obj_description('{table_name}'::regclass)")
                     ).scalar()
                     or ""
                 )
-                columns[col["name"]] = col_comment
-            result[table] = {"description": table_comment, "columns": columns}
+                columns = {}
+                col_data = inspector.get_columns(table, schema=schema)
+                for idx, col in enumerate(col_data, start=1):
+                    col_comment = (
+                        conn.execute(
+                            text(
+                                f"SELECT col_description('{table_name}'::regclass, "
+                                f"{idx})"
+                            )
+                        ).scalar()
+                        or ""
+                    )
+                    columns[col["name"]] = col_comment
+                result[table_name] = {"description": table_comment, "columns": columns}
+    if multi_schema:
+        for schema in inspector.get_schema_names():
+            gen_schema_descriptions(schema)
+    else:
+        gen_schema_descriptions()
     return result
-def extract_mysql_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
+def extract_mysql_descriptions(
+    engine: Engine,
+    multi_schema: bool = False,
+) -> Dict[str, Dict[str, Any]]:
     """Extracts descriptions for tables and columns from a MySQL database.
     This method retrieves the descriptions of tables and their columns
@@ -57,6 +78,7 @@ def extract_mysql_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
     Args:
         engine (Engine): SQLAlchemy engine connected to a MySQL database.
+        multi_schema (bool): Generate descriptions for all schemas in the database.
     Returns:
         Dict[str, Dict[str, Any]]: A dictionary mapping table names to a
@@ -64,31 +86,45 @@ def extract_mysql_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
         column descriptions.
     """
     inspector = inspect(engine)
-    table_names: List[str] = inspector.get_table_names()
     result: Dict[str, Dict[str, Any]] = {}
-    with engine.connect() as conn:
-        for table in table_names:
-            query = text(
-                "SELECT table_comment FROM information_schema.tables WHERE"
-                " table_schema = :schema AND table_name = :table"
-            )
-            table_result = conn.execute(
-                query, {"schema": engine.url.database, "table": table}
-            )
-            table_comment = table_result.scalar() or ""
+    def gen_schema_descriptions(schema: Optional[str] = None) -> None:
+        table_names: List[str] = inspector.get_table_names(schema=schema)
-            columns = {}
-            for col in inspector.get_columns(table):
-                columns[col["name"]] = col.get("comment", "")
+        with engine.connect() as conn:
+            for table in table_names:
+                if schema is None:
+                    table_name = table
+                else:
+                    table_name = f"{schema}.{table}"
+                query = text(
+                    "SELECT table_comment FROM information_schema.tables WHERE"
+                    " table_schema = :schema AND table_name = :table"
+                )
+                table_result = conn.execute(
+                    query, {"schema": engine.url.database, "table": table_name}
+                )
+                table_comment = table_result.scalar() or ""
+                columns = {}
+                for col in inspector.get_columns(table, schema=schema):
+                    columns[col["name"]] = col.get("comment", "")
+                result[table_name] = {"description": table_comment, "columns": columns}
-            result[table] = {"description": table_comment, "columns": columns}
+    if multi_schema:
+        for schema in inspector.get_schema_names():
+            gen_schema_descriptions(schema)
+    else:
+        gen_schema_descriptions()
     return result
-def extract_default_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
+def extract_default_descriptions(
+    engine: Engine, multi_schema: bool = False
+) -> Dict[str, Dict[str, Any]]:
     """Extracts default descriptions for tables and columns from a database.
     This method retrieves the table and column names from the given database
@@ -96,6 +132,7 @@ def extract_default_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
     Args:
         engine (Engine): SQLAlchemy engine connected to a database.
+        multi_schema (bool): Generate descriptions for all schemas in the database.
     Returns:
         Dict[str, Dict[str, Any]]: A dictionary mapping table names to a
@@ -103,26 +140,36 @@ def extract_default_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
         empty column descriptions.
     """
     inspector = inspect(engine)
-    table_names: List[str] = inspector.get_table_names()
     result: Dict[str, Dict[str, Any]] = {}
-    for table in table_names:
-        columns = {}
-        for col in inspector.get_columns(table):
-            columns[col["name"]] = ""
+    def gen_schema_descriptions(schema: Optional[str] = None) -> None:
+        table_names: List[str] = inspector.get_table_names(schema=schema)
+        for table in table_names:
+            columns = {}
+            for col in inspector.get_columns(table):
+                columns[col["name"]] = ""
+            result[table] = {"description": "", "columns": columns}
-        result[table] = {"description": "", "columns": columns}
+    if multi_schema:
+        for schema in inspector.get_schema_names():
+            gen_schema_descriptions(schema)
+    else:
+        gen_schema_descriptions()
     return result
-def extract_schema_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
+def extract_schema_descriptions(
+    engine: Engine, multi_schema: bool = False
+) -> Dict[str, Dict[str, Any]]:
     """
     Extracts the schema descriptions from the database connected to by the engine.
     Args:
         engine (Engine): SQLAlchemy engine instance.
+        multi_schema (bool): Generate descriptions for all schemas in the database.
     Returns:
         Dict[str, Dict[str, Any]]: A dictionary representation of table and column
@@ -133,4 +180,6 @@ def extract_schema_descriptions(engine: Engine) -> Dict[str, Dict[str, Any]]:
         "postgresql": extract_postgresql_descriptions,
         "mysql": extract_mysql_descriptions,
     }
-    return extractors.get(engine.dialect.name, extract_default_descriptions)(engine)
+    return extractors.get(engine.dialect.name, extract_default_descriptions)(
+        engine, multi_schema=multi_schema
+    )

langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl