PyPI - langroid - Versions diffs - 0.40.0__py3-none-any.whl → 0.41.0__py3-none-any.whl - Mend

langroid 0.40.0py3-none-any.whl → 0.41.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

langroid/agent/tools/exa_search_tool.py +68 -0
langroid/agent/tools/tavily_search_tool.py +50 -0
langroid/parsing/search.py +1 -1
langroid/parsing/utils.py +4 -3
langroid/parsing/web_search.py +91 -7
langroid/vector_store/__init__.py +9 -11
langroid/vector_store/base.py +3 -0
langroid/vector_store/pineconedb.py +427 -0
langroid/vector_store/postgres.py +23 -15
langroid/vector_store/weaviatedb.py +16 -2
{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/METADATA +10 -1
{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/RECORD +14 -11
{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/WHEEL +0 -0
{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/licenses/LICENSE +0 -0

langroid/agent/tools/exa_search_tool.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""
+A tool to trigger a Exa search for a given query,
+(https://docs.exa.ai/reference/getting-started)
+and return the top results with their titles, links, summaries.
+Since the tool is stateless (i.e. does not need
+access to agent state), it can be enabled for any agent, without having to define a
+special method inside the agent: `agent.enable_message(ExaSearchTool)`
+NOTE: To use this tool, you need to:
+* set the EXA_API_KEY environment variables in
+your `.env` file, e.g. `EXA_API_KEY=your_api_key_here`
+(Note as of 28 Jan 2023, Metaphor renamed to Exa, so you can also use
+`EXA_API_KEY=your_api_key_here`)
+* install langroid with the `exa-py` extra, e.g.
+`pip install langroid[exa]` or `uv pip install langroid[exa]`
+or `poetry add langroid[exa]`  or `uv add langroid[exa]`
+(it installs the `exa_py` package from pypi).
+For more information, please refer to the official docs:
+https://exa.ai/
+"""
+from typing import List, Tuple
+from langroid.agent.tool_message import ToolMessage
+from langroid.parsing.web_search import exa_search
+class ExaSearchTool(ToolMessage):
+    request: str = "exa_search"
+    purpose: str = """
+            To search the web and return up to <num_results>
+            links relevant to the given <query>. When using this tool,
+            ONLY show the required JSON, DO NOT SAY ANYTHING ELSE.
+            Wait for the results of the web search, and then use them to
+            compose your response.
+            """
+    query: str
+    num_results: int
+    def handle(self) -> str:
+        """
+        Conducts a search using the exa API based on the provided query
+        and number of results by triggering a exa_search.
+        Returns:
+            str: A formatted string containing the titles, links, and
+                summaries of each search result, separated by two newlines.
+        """
+        search_results = exa_search(self.query, self.num_results)
+        # return Title, Link, Summary of each result, separated by two newlines
+        results_str = "\n\n".join(str(result) for result in search_results)
+        return f"""
+        BELOW ARE THE RESULTS FROM THE WEB SEARCH. USE THESE TO COMPOSE YOUR RESPONSE:
+        {results_str}
+        """
+    @classmethod
+    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]:
+        return [
+            cls(
+                query="When was the Llama2 Large Language Model (LLM) released?",
+                num_results=3,
+            ),
+        ]

langroid/agent/tools/tavily_search_tool.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""
+A tool to trigger a Tavily search for a given query, and return the top results with
+their titles, links, summaries. Since the tool is stateless (i.e. does not need
+access to agent state), it can be enabled for any agent, without having to define a
+special method inside the agent: `agent.enable_message(TavilySearchTool)`
+"""
+from typing import List, Tuple
+from langroid.agent.tool_message import ToolMessage
+from langroid.parsing.web_search import tavily_search
+class TavilySearchTool(ToolMessage):
+    request: str = "tavily_search"
+    purpose: str = """
+            To search the web and return up to <num_results>
+            links relevant to the given <query>. When using this tool,
+            ONLY show the required JSON, DO NOT SAY ANYTHING ELSE.
+            Wait for the results of the web search, and then use them to
+            compose your response.
+            """
+    query: str
+    num_results: int
+    def handle(self) -> str:
+        """
+        Conducts a search using Tavily based on the provided query
+        and number of results by triggering a tavily_search.
+        Returns:
+            str: A formatted string containing the titles, links, and
+                summaries of each search result, separated by two newlines.
+        """
+        search_results = tavily_search(self.query, self.num_results)
+        # return Title, Link, Summary of each result, separated by two newlines
+        results_str = "\n\n".join(str(result) for result in search_results)
+        return f"""
+        BELOW ARE THE RESULTS FROM THE WEB SEARCH. USE THESE TO COMPOSE YOUR RESPONSE:
+        {results_str}
+        """
+    @classmethod
+    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]:
+        return [
+            cls(
+                query="When was the Llama2 Large Language Model (LLM) released?",
+                num_results=3,
+            ),
+        ]

langroid/parsing/search.py CHANGED Viewed

@@ -118,7 +118,7 @@ def preprocess_text(text: str) -> str:
         str: The preprocessed text.
     """
     # Ensure the NLTK resources are available
-    for resource in ["punkt", "wordnet", "stopwords"]:
+    for resource in ["tokenizers/punkt", "corpora/wordnet", "corpora/stopwords"]:
         download_nltk_resource(resource)
     # Lowercase the text

langroid/parsing/utils.py CHANGED Viewed

@@ -28,12 +28,13 @@ def download_nltk_resource(resource: str) -> None:
     try:
         nltk.data.find(resource)
     except LookupError:
-        nltk.download(resource, quiet=True)
+        model = resource.split("/")[-1]
+        nltk.download(model, quiet=True)
 # Download punkt_tab resource at module import
-download_nltk_resource("punkt_tab")
-download_nltk_resource("gutenberg")
+download_nltk_resource("tokenizers/punkt_tab")
+download_nltk_resource("corpora/gutenberg")
 T = TypeVar("T")

langroid/parsing/web_search.py CHANGED Viewed

@@ -16,6 +16,8 @@ from duckduckgo_search import DDGS
 from googleapiclient.discovery import Resource, build
 from requests.models import Response
+from langroid.exceptions import LangroidImportError
 class WebSearchResult:
     """
@@ -109,13 +111,7 @@ def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
     try:
         from metaphor_python import Metaphor
     except ImportError:
-        raise ImportError(
-            "You are attempting to use the `metaphor_python` library;"
-            "To use it, please install langroid with the `metaphor` extra, e.g. "
-            "`pip install langroid[metaphor]` or `poetry add langroid[metaphor]` "
-            "or `uv add langroid[metaphor]`"
-            "(it installs the `metaphor_python` package from pypi)."
-        )
+        raise LangroidImportError("metaphor-python", "metaphor")
     client = Metaphor(api_key=api_key)
@@ -130,6 +126,53 @@ def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
     ]
+def exa_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
+    """
+    Method that makes an API call by Exa client that queries
+    the top num_results links that matches the query. Returns a list
+    of WebSearchResult objects.
+    Args:
+        query (str): The query body that users wants to make.
+        num_results (int): Number of top matching results that we want
+            to grab
+    """
+    load_dotenv()
+    api_key = os.getenv("EXA_API_KEY")
+    if not api_key:
+        raise ValueError(
+            """
+            EXA_API_KEY environment variables are not set.
+            Please set one of them to your API key, and try again.
+            """
+        )
+    try:
+        from exa_py import Exa
+    except ImportError:
+        raise LangroidImportError("exa-py", "exa")
+    client = Exa(api_key=api_key)
+    response = client.search(
+        query=query,
+        num_results=num_results,
+    )
+    raw_results = response.results
+    return [
+        WebSearchResult(
+            title=result.title or "",
+            link=result.url,
+            max_content_length=3500,
+            max_summary_length=300,
+        )
+        for result in raw_results
+    ]
 def duckduckgo_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
     """
     Method that makes an API call by DuckDuckGo client that queries
@@ -154,3 +197,44 @@ def duckduckgo_search(query: str, num_results: int = 5) -> List[WebSearchResult]
         )
         for result in search_results
     ]
+def tavily_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
+    """
+    Method that makes an API call to Tavily API that queries
+    the top `num_results` links that match the query. Returns a list
+    of WebSearchResult objects.
+    Args:
+        query (str): The query body that users wants to make.
+        num_results (int): Number of top matching results that we want
+            to grab
+    """
+    load_dotenv()
+    api_key = os.getenv("TAVILY_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "TAVILY_API_KEY environment variable is not set. "
+            "Please set it to your API key and try again."
+        )
+    try:
+        from tavily import TavilyClient
+    except ImportError:
+        raise LangroidImportError("tavily-python", "tavily")
+    client = TavilyClient(api_key=api_key)
+    response = client.search(query=query, max_results=num_results)
+    search_results = response["results"]
+    return [
+        WebSearchResult(
+            title=result["title"],
+            link=result["url"],
+            max_content_length=3500,
+            max_summary_length=300,
+        )
+        for result in search_results
+    ]

langroid/vector_store/__init__.py CHANGED Viewed

@@ -23,11 +23,7 @@ try:
     MeiliSearch
     MeiliSearchConfig
     __all__.extend(["meilisearch", "MeiliSearch", "MeiliSearchConfig"])
-except ImportError:
-    pass
-try:
     from . import lancedb
     from .lancedb import LanceDB, LanceDBConfig
@@ -35,10 +31,6 @@ try:
     LanceDB
     LanceDBConfig
     __all__.extend(["lancedb", "LanceDB", "LanceDBConfig"])
-except ImportError:
-    pass
-try:
     from . import chromadb
     from .chromadb import ChromaDBConfig, ChromaDB
@@ -46,10 +38,7 @@ try:
     ChromaDB
     ChromaDBConfig
     __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
-except ImportError:
-    pass
-try:
     from . import postgres
     from .postgres import PostgresDB, PostgresDBConfig
@@ -57,6 +46,7 @@ try:
     PostgresDB
     PostgresDBConfig
     __all__.extend(["postgres", "PostgresDB", "PostgresDBConfig"])
     from . import weaviatedb
     from .weaviatedb import WeaviateDBConfig, WeaviateDB
@@ -64,5 +54,13 @@ try:
     WeaviateDB
     WeaviateDBConfig
     __all__.extend(["weaviatedb", "WeaviateDB", "WeaviateDBConfig"])
+    from . import pineconedb
+    from .pineconedb import PineconeDB, PineconeDBConfig
+    pineconedb
+    PineconeDB
+    PineconeDBConfig
+    __all__.extend(["pineconedb", "PineconeDB", "PineconeDBConfig"])
 except ImportError:
     pass

langroid/vector_store/base.py CHANGED Viewed

@@ -59,6 +59,7 @@ class VectorStore(ABC):
         from langroid.vector_store.lancedb import LanceDB, LanceDBConfig
         from langroid.vector_store.meilisearch import MeiliSearch, MeiliSearchConfig
         from langroid.vector_store.momento import MomentoVI, MomentoVIConfig
+        from langroid.vector_store.pineconedb import PineconeDB, PineconeDBConfig
         from langroid.vector_store.postgres import PostgresDB, PostgresDBConfig
         from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig
         from langroid.vector_store.weaviatedb import WeaviateDB, WeaviateDBConfig
@@ -77,6 +78,8 @@ class VectorStore(ABC):
             return PostgresDB(config)
         elif isinstance(config, WeaviateDBConfig):
             return WeaviateDB(config)
+        elif isinstance(config, PineconeDBConfig):
+            return PineconeDB(config)
         else:
             logger.warning(

langroid/vector_store/pineconedb.py ADDED Viewed

@@ -0,0 +1,427 @@
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
+from dotenv import load_dotenv
+from langroid import LangroidImportError
+from langroid.mytypes import Document
+# import dataclass
+from langroid.pydantic_v1 import BaseModel
+from langroid.utils.configuration import settings
+from langroid.vector_store.base import VectorStore, VectorStoreConfig
+logger = logging.getLogger(__name__)
+has_pinecone: bool = True
+try:
+    from pinecone import Pinecone, PineconeApiException, ServerlessSpec
+except ImportError:
+    if not TYPE_CHECKING:
+        class ServerlessSpec(BaseModel):
+            """
+            Fallback Serverless specification configuration to avoid import errors.
+            """
+            cloud: str
+            region: str
+        PineconeApiException = Any  # type: ignore
+        Pinecone = Any  # type: ignore
+        has_pinecone = False
+@dataclass(frozen=True)
+class IndexMeta:
+    name: str
+    total_vector_count: int
+class PineconeDBConfig(VectorStoreConfig):
+    cloud: bool = True
+    collection_name: str | None = "temp"
+    spec: ServerlessSpec = ServerlessSpec(cloud="aws", region="us-east-1")
+    deletion_protection: Literal["enabled", "disabled"] | None = None
+    metric: str = "cosine"
+    pagination_size: int = 100
+class PineconeDB(VectorStore):
+    def __init__(self, config: PineconeDBConfig = PineconeDBConfig()):
+        super().__init__(config)
+        if not has_pinecone:
+            raise LangroidImportError("pinecone", "pinecone")
+        self.config: PineconeDBConfig = config
+        load_dotenv()
+        key = os.getenv("PINECONE_API_KEY")
+        if not key:
+            raise ValueError("PINECONE_API_KEY not set, could not instantiate client")
+        self.client = Pinecone(api_key=key)
+        if config.collection_name:
+            self.create_collection(
+                collection_name=config.collection_name,
+                replace=config.replace_collection,
+            )
+    def clear_empty_collections(self) -> int:
+        indexes = self._list_index_metas(empty=True)
+        n_deletes = 0
+        for index in indexes:
+            if index.total_vector_count == -1:
+                logger.warning(
+                    f"Error fetching details for {index.name} when scanning indexes"
+                )
+            n_deletes += 1
+            self.delete_collection(collection_name=index.name)
+        return n_deletes
+    def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
+        """
+        Returns:
+            Number of Pinecone indexes that were deleted
+        Args:
+            really: Optional[bool] - whether to really delete all Pinecone collections
+            prefix: Optional[str] - string to match potential Pinecone
+                indexes for deletion
+        """
+        if not really:
+            logger.warning("Not deleting all collections, set really=True to confirm")
+            return 0
+        indexes = [
+            c for c in self._list_index_metas(empty=True) if c.name.startswith(prefix)
+        ]
+        if len(indexes) == 0:
+            logger.warning(f"No collections found with prefix {prefix}")
+            return 0
+        n_empty_deletes, n_non_empty_deletes = 0, 0
+        for index_desc in indexes:
+            self.delete_collection(collection_name=index_desc.name)
+            n_empty_deletes += index_desc.total_vector_count == 0
+            n_non_empty_deletes += index_desc.total_vector_count > 0
+        logger.warning(
+            f"""
+            Deleted {n_empty_deletes} empty indexes and
+            {n_non_empty_deletes} non-empty indexes
+            """
+        )
+        return n_empty_deletes + n_non_empty_deletes
+    def list_collections(self, empty: bool = False) -> List[str]:
+        """
+        Returns:
+            List of Pinecone indices that have at least one vector.
+        Args:
+            empty: Optional[bool] - whether to include empty collections
+        """
+        indexes = self.client.list_indexes()
+        res: List[str] = []
+        if empty:
+            res.extend(indexes.names())
+            return res
+        for index in indexes.names():
+            index_meta = self.client.Index(name=index)
+            if index_meta.describe_index_stats().get("total_vector_count", 0) > 0:
+                res.append(index)
+        return res
+    def _list_index_metas(self, empty: bool = False) -> List[IndexMeta]:
+        """
+        Returns:
+            List of objects describing Pinecone indices
+        Args:
+            empty: Optional[bool] - whether to include empty collections
+        """
+        indexes = self.client.list_indexes()
+        res = []
+        for index in indexes.names():
+            index_meta = self._fetch_index_meta(index)
+            if empty:
+                res.append(index_meta)
+            elif index_meta.total_vector_count > 0:
+                res.append(index_meta)
+        return res
+    def _fetch_index_meta(self, index_name: str) -> IndexMeta:
+        """
+        Returns:
+            A dataclass describing the input Index by name and vector count
+            to save a bit on index description calls
+        Args:
+            index_name: str - Name of the index in Pinecone
+        """
+        try:
+            index = self.client.Index(name=index_name)
+            stats = index.describe_index_stats()
+            return IndexMeta(
+                name=index_name, total_vector_count=stats.get("total_vector_count", 0)
+            )
+        except PineconeApiException as e:
+            logger.warning(f"Error fetching details for index {index_name}")
+            logger.warning(e)
+            return IndexMeta(name=index_name, total_vector_count=-1)
+    def create_collection(self, collection_name: str, replace: bool = False) -> None:
+        """
+        Create a collection with the given name, optionally replacing an existing
+        collection if `replace` is True.
+        Args:
+            collection_name: str - Configuration of the collection to create.
+            replace: Optional[Bool] - Whether to replace an existing collection
+                with the same name. Defaults to False.
+        """
+        pattern = re.compile(r"^[a-z0-9-]+$")
+        if not pattern.match(collection_name):
+            raise ValueError(
+                "Pinecone index names must be lowercase alphanumeric characters or '-'"
+            )
+        self.config.collection_name = collection_name
+        if collection_name in self.list_collections(empty=True):
+            index = self.client.Index(name=collection_name)
+            stats = index.describe_index_stats()
+            status = self.client.describe_index(name=collection_name)
+            if status["status"]["ready"] and stats["total_vector_count"] > 0:
+                logger.warning(f"Non-empty collection {collection_name} already exists")
+                if not replace:
+                    logger.warning("Not replacing collection")
+                    return
+                else:
+                    logger.warning("Recreating fresh collection")
+            self.delete_collection(collection_name=collection_name)
+        payload = {
+            "name": collection_name,
+            "dimension": self.embedding_dim,
+            "spec": self.config.spec,
+            "metric": self.config.metric,
+            "timeout": self.config.timeout,
+        }
+        if self.config.deletion_protection:
+            payload["deletion_protection"] = self.config.deletion_protection
+        try:
+            self.client.create_index(**payload)
+        except PineconeApiException as e:
+            logger.error(e)
+    def delete_collection(self, collection_name: str) -> None:
+        logger.info(f"Attempting to delete {collection_name}")
+        try:
+            self.client.delete_index(name=collection_name)
+        except PineconeApiException as e:
+            logger.error(f"Failed to delete {collection_name}")
+            logger.error(e)
+    def add_documents(self, documents: Sequence[Document], namespace: str = "") -> None:
+        if self.config.collection_name is None:
+            raise ValueError("No collection name set, cannot ingest docs")
+        if len(documents) == 0:
+            logger.warning("Empty list of documents passed into add_documents")
+            return
+        super().maybe_add_ids(documents)
+        document_dicts = [doc.dict() for doc in documents]
+        document_ids = [doc.id() for doc in documents]
+        embedding_vectors = self.embedding_fn([doc.content for doc in documents])
+        vectors = [
+            {
+                "id": document_id,
+                "values": embedding_vector,
+                "metadata": {
+                    **document_dict["metadata"],
+                    **{
+                        key: value
+                        for key, value in document_dict.items()
+                        if key != "metadata"
+                    },
+                },
+            }
+            for document_dict, document_id, embedding_vector in zip(
+                document_dicts, document_ids, embedding_vectors
+            )
+        ]
+        if self.config.collection_name not in self.list_collections(empty=True):
+            self.create_collection(
+                collection_name=self.config.collection_name, replace=True
+            )
+        index = self.client.Index(name=self.config.collection_name)
+        batch_size = self.config.batch_size
+        for i in range(0, len(documents), batch_size):
+            try:
+                if namespace:
+                    index.upsert(
+                        vectors=vectors[i : i + batch_size], namespace=namespace
+                    )
+                else:
+                    index.upsert(vectors=vectors[i : i + batch_size])
+            except PineconeApiException as e:
+                logger.error(
+                    f"Unable to add of docs between indices {i} and {batch_size}"
+                )
+                logger.error(e)
+    def get_all_documents(
+        self, prefix: str = "", namespace: str = ""
+    ) -> List[Document]:
+        """
+        Returns:
+            All documents for the collection currently defined in
+            the configuration object
+        Args:
+            prefix: str - document id prefix to search for
+            namespace: str - partition of vectors to search within the index
+        """
+        if self.config.collection_name is None:
+            raise ValueError("No collection name set, cannot retrieve docs")
+        docs = []
+        request_filters: Dict[str, Union[str, int]] = {
+            "limit": self.config.pagination_size
+        }
+        if prefix:
+            request_filters["prefix"] = prefix
+        if namespace:
+            request_filters["namespace"] = namespace
+        index = self.client.Index(name=self.config.collection_name)
+        while True:
+            response = index.list_paginated(**request_filters)
+            vectors = response.get("vectors", [])
+            if not vectors:
+                logger.warning("Received empty list while requesting for vector ids")
+                logger.warning("Halting fetch requests")
+                if settings.debug:
+                    logger.debug(f"Request for failed fetch was: {request_filters}")
+                break
+            docs.extend(
+                self.get_documents_by_ids(
+                    ids=[vector.get("id") for vector in vectors],
+                    namespace=namespace if namespace else "",
+                )
+            )
+            pagination_token = response.get("pagination", {}).get("next", None)
+            if not pagination_token:
+                break
+            request_filters["pagination_token"] = pagination_token
+        return docs
+    def get_documents_by_ids(
+        self, ids: List[str], namespace: str = ""
+    ) -> List[Document]:
+        """
+        Returns:
+            Fetches document text embedded in Pinecone index metadata
+        Args:
+            ids: List[str] - vector data object ids to retrieve
+            namespace: str - partition of vectors to search within the index
+        """
+        if self.config.collection_name is None:
+            raise ValueError("No collection name set, cannot retrieve docs")
+        index = self.client.Index(name=self.config.collection_name)
+        if namespace:
+            records = index.fetch(ids=ids, namespace=namespace)
+        else:
+            records = index.fetch(ids=ids)
+        id_mapping = {key: value for key, value in records["vectors"].items()}
+        ordered_payloads = [id_mapping[_id] for _id in ids if _id in id_mapping]
+        return [
+            self.transform_pinecone_vector(payload.get("metadata", {}))
+            for payload in ordered_payloads
+        ]
+    def similar_texts_with_scores(
+        self,
+        text: str,
+        k: int = 1,
+        where: Optional[str] = None,
+        namespace: Optional[str] = None,
+    ) -> List[Tuple[Document, float]]:
+        if self.config.collection_name is None:
+            raise ValueError("No collection name set, cannot search")
+        if k < 1 or k > 9999:
+            raise ValueError(
+                f"TopK for Pinecone vector search must be 1 < k < 10000, k was {k}"
+            )
+        vector_search_request = {
+            "top_k": k,
+            "include_metadata": True,
+            "vector": self.embedding_fn([text])[0],
+        }
+        if where:
+            vector_search_request["filter"] = json.loads(where) if where else None
+        if namespace:
+            vector_search_request["namespace"] = namespace
+        index = self.client.Index(name=self.config.collection_name)
+        response = index.query(**vector_search_request)
+        doc_score_pairs = [
+            (
+                self.transform_pinecone_vector(match.get("metadata", {})),
+                match.get("score", 0),
+            )
+            for match in response.get("matches", [])
+        ]
+        if settings.debug:
+            max_score = max([pair[1] for pair in doc_score_pairs])
+            logger.info(f"Found {len(doc_score_pairs)} matches, max score: {max_score}")
+        self.show_if_debug(doc_score_pairs)
+        return doc_score_pairs
+    def transform_pinecone_vector(self, metadata_dict: Dict[str, Any]) -> Document:
+        """
+        Parses the metadata response from the Pinecone vector query and
+        formats it into a dictionary that can be parsed by the Document class
+        associated with the PineconeDBConfig class
+        Returns:
+            Well formed dictionary object to be transformed into a Document
+        Args:
+            metadata_dict: Dict - the metadata dictionary from the Pinecone
+                vector query match
+        """
+        return self.config.document_class(
+            **{**metadata_dict, "metadata": {**metadata_dict}}
+        )

langroid/vector_store/postgres.py CHANGED Viewed

@@ -5,21 +5,6 @@ import os
 import uuid
 from typing import Any, Dict, List, Optional, Sequence, Tuple
-from sqlalchemy import (
-    Column,
-    MetaData,
-    String,
-    Table,
-    case,
-    create_engine,
-    inspect,
-    text,
-)
-from sqlalchemy.dialects.postgresql import JSONB
-from sqlalchemy.engine import Connection, Engine
-from sqlalchemy.orm import sessionmaker
-from sqlalchemy.sql.expression import insert
 from langroid.embedding_models.base import (
     EmbeddingModelsConfig,
 )
@@ -28,6 +13,27 @@ from langroid.exceptions import LangroidImportError
 from langroid.mytypes import DocMetaData, Document
 from langroid.vector_store.base import VectorStore, VectorStoreConfig
+has_postgres: bool = True
+try:
+    from sqlalchemy import (
+        Column,
+        MetaData,
+        String,
+        Table,
+        case,
+        create_engine,
+        inspect,
+        text,
+    )
+    from sqlalchemy.dialects.postgresql import JSONB
+    from sqlalchemy.engine import Connection, Engine
+    from sqlalchemy.orm import sessionmaker
+    from sqlalchemy.sql.expression import insert
+except ImportError:
+    Engine = Any  # type: ignore
+    Connection = Any  # type: ignore
+    has_postgres = False
 logger = logging.getLogger(__name__)
@@ -48,6 +54,8 @@ class PostgresDBConfig(VectorStoreConfig):
 class PostgresDB(VectorStore):
     def __init__(self, config: PostgresDBConfig = PostgresDBConfig()):
         super().__init__(config)
+        if not has_postgres:
+            raise LangroidImportError("pgvector", "postgres")
         self.config: PostgresDBConfig = config
         self.engine = self._create_engine()
         PostgresDB._create_vector_extension(self.engine)

langroid/vector_store/weaviatedb.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import os
 import re
-from typing import Any, List, Optional, Sequence, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple
 from dotenv import load_dotenv
@@ -15,6 +15,7 @@ from langroid.utils.configuration import settings
 from langroid.vector_store.base import VectorStore, VectorStoreConfig
 logger = logging.getLogger(__name__)
+has_weaviate: bool = True
 try:
     import weaviate
     from weaviate.classes.config import (
@@ -25,7 +26,18 @@ try:
     from weaviate.classes.query import Filter, MetadataQuery
     from weaviate.util import generate_uuid5, get_valid_uuid
 except ImportError:
-    raise LangroidImportError("weaviate", "weaviate")
+    has_weaviate = False
+    if not TYPE_CHECKING:
+        class VectorDistances:
+            """
+            Fallback class when weaviate is not installed, to avoid import errors.
+            """
+            COSINE: str = "cosine"
+            DOTPRODUCT: str = "dot"
+            L2: str = "l2"
 class WeaviateDBConfig(VectorStoreConfig):
@@ -39,6 +51,8 @@ class WeaviateDBConfig(VectorStoreConfig):
 class WeaviateDB(VectorStore):
     def __init__(self, config: WeaviateDBConfig = WeaviateDBConfig()):
         super().__init__(config)
+        if not has_weaviate:
+            raise LangroidImportError("weaviate", "weaviate")
         self.config: WeaviateDBConfig = config
         load_dotenv()
         if not self.config.cloud:

{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.40.0
+Version: 0.41.0
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT
@@ -15,6 +15,7 @@ Requires-Dist: colorlog<7.0.0,>=6.7.0
 Requires-Dist: docling<3.0.0,>=2.16.0
 Requires-Dist: docstring-parser<1.0,>=0.16
 Requires-Dist: duckduckgo-search<7.0.0,>=6.0.0
+Requires-Dist: exa-py>=1.8.7
 Requires-Dist: faker<19.0.0,>=18.9.0
 Requires-Dist: fakeredis<3.0.0,>=2.12.1
 Requires-Dist: fire<1.0.0,>=0.5.0
@@ -48,6 +49,7 @@ Requires-Dist: redis<6.0.0,>=5.0.1
 Requires-Dist: requests-oauthlib<2.0.0,>=1.3.1
 Requires-Dist: requests<3.0.0,>=2.31.0
 Requires-Dist: rich<14.0.0,>=13.3.4
+Requires-Dist: tavily-python>=0.5.0
 Requires-Dist: thefuzz<1.0.0,>=0.20.0
 Requires-Dist: tiktoken<1.0.0,>=0.7.0
 Requires-Dist: trafilatura<2.0.0,>=1.5.0
@@ -106,6 +108,8 @@ Provides-Extra: docling
 Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'docling'
 Provides-Extra: docx
 Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'docx'
+Provides-Extra: exa
+Requires-Dist: exa-py>=1.8.7; extra == 'exa'
 Provides-Extra: fastembed
 Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == 'fastembed'
 Provides-Extra: google-generativeai
@@ -141,6 +145,8 @@ Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pdf-parsers'
 Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'pdf-parsers'
 Requires-Dist: pypdf>=5.1.0; extra == 'pdf-parsers'
 Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'pdf-parsers'
+Provides-Extra: pinecone
+Requires-Dist: pinecone-client>=5.0.1; extra == 'pinecone'
 Provides-Extra: postgres
 Requires-Dist: pgvector>=0.3.6; extra == 'postgres'
 Requires-Dist: psycopg2-binary>=2.9.10; extra == 'postgres'
@@ -154,6 +160,8 @@ Provides-Extra: sql
 Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'sql'
 Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'sql'
 Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'sql'
+Provides-Extra: tavily
+Requires-Dist: tavily-python>=0.5.0; extra == 'tavily'
 Provides-Extra: transformers
 Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'transformers'
 Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'transformers'
@@ -163,6 +171,7 @@ Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'unstructur
 Provides-Extra: vecdbs
 Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
 Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
+Requires-Dist: pinecone-client>=5.0.1; extra == 'vecdbs'
 Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'vecdbs'
 Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'vecdbs'
 Requires-Dist: weaviate-client>=4.9.6; extra == 'vecdbs'

{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/RECORD RENAMED Viewed

@@ -43,6 +43,7 @@ langroid/agent/special/sql/utils/system_message.py,sha256=qKLHkvQWRQodTtPLPxr1GS
 langroid/agent/special/sql/utils/tools.py,sha256=ovCePzq5cmbqw0vsVPBzxdZpUcSUIfTiDSMGXustZW8,1749
 langroid/agent/tools/__init__.py,sha256=IMgCte-_ZIvCkozGQmvMqxIw7_nKLKzD78ccJL1bnQU,804
 langroid/agent/tools/duckduckgo_search_tool.py,sha256=NhsCaGZkdv28nja7yveAhSK_w6l_Ftym8agbrdzqgfo,1935
+langroid/agent/tools/exa_search_tool.py,sha256=qxDs6vIiUtFyfX6gmS-PxoCXes-55in3ef5AkUQhiM0,2469
 langroid/agent/tools/file_tools.py,sha256=GjPB5YDILucYapElnvvoYpGJuZQ25ecLs2REv7edPEo,7292
 langroid/agent/tools/google_search_tool.py,sha256=y7b-3FtgXf0lfF4AYxrZ3K5pH2dhidvibUOAGBE--WI,1456
 langroid/agent/tools/metaphor_search_tool.py,sha256=ccyEhkShH5MxW6-sx1n0BLpD_GForQddS_nNvBZ67Ik,2561
@@ -51,6 +52,7 @@ langroid/agent/tools/recipient_tool.py,sha256=dr0yTxgNEIoxUYxH6TtaExC4G_8WdJ0xGo
 langroid/agent/tools/retrieval_tool.py,sha256=zcAV20PP_6VzSd-UE-IJcabaBseFL_QNz59Bnig8-lE,946
 langroid/agent/tools/rewind_tool.py,sha256=XAXL3BpNhCmBGYq_qi_sZfHJuIw7NY2jp4wnojJ7WRs,5606
 langroid/agent/tools/segment_extract_tool.py,sha256=__srZ_VGYLVOdPrITUM8S0HpmX4q7r5FHWMDdHdEv8w,1440
+langroid/agent/tools/tavily_search_tool.py,sha256=soI-j0HdgVQLf09wRQScaEK4b5RpAX9C4cwOivRFWWI,1903
 langroid/cachedb/__init__.py,sha256=icAT2s7Vhf-ZGUeqpDQGNU6ob6o0aFEyjwcxxUGRFjg,225
 langroid/cachedb/base.py,sha256=ztVjB1DtN6pLCujCWnR6xruHxwVj3XkYniRTYAKKqk0,1354
 langroid/cachedb/momento_cachedb.py,sha256=YEOJ62hEcV6iIeMr5aGgRYgWQqFYaej9gEDEcY0sm7M,3172
@@ -86,13 +88,13 @@ langroid/parsing/parser.py,sha256=pPzM3zXQvFtwTyQPtDha15oZhu1O3OKDLECnkB8waxg,12
 langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
 langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
 langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
-langroid/parsing/search.py,sha256=M1swZfZEMEsalmTwVCkql3DzBNBemky7pWt0PrcwGdQ,9779
+langroid/parsing/search.py,sha256=YPCwezM0c4PWbNUMEmQ5RrJBtvX4aWZ1CMCJFs4sqFo,9806
 langroid/parsing/spider.py,sha256=hAVM6wxh1pQ0EN4tI5wMBtAjIk0T-xnpi-ZUzWybhos,3258
 langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz-GM,3410
 langroid/parsing/url_loader.py,sha256=JK48KktLRDBfjrt4nsUfy92M6yGdEeicAqOum2MdULM,4656
 langroid/parsing/urls.py,sha256=86omykgxo4hg2jyF10Ef-FJa9n6MgXdSXy2mImqgo5c,8076
-langroid/parsing/utils.py,sha256=YrV2GNL4EOBGknA4AClPGdJ4S5B31radrt-Ou8OAKoU,12749
-langroid/parsing/web_search.py,sha256=8rW8EI3tyHITaB2l9MT_6yLMeQfo8y-Ih-8N2v2uMpk,4931
+langroid/parsing/utils.py,sha256=ZWMS7oG04GUY9EAIwnFN6KKo_ePCKhqk_H8jW6TDT0s,12805
+langroid/parsing/web_search.py,sha256=wWSmV0METFTGPhHJIs-M4tog2Aur_75Pxr4a49cKDkU,7042
 langroid/prompts/__init__.py,sha256=RW11vK6jiLPuaUh4GpeFvstti73gkm8_rDMtrbo2YsU,142
 langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
 langroid/prompts/prompts_config.py,sha256=p_lp9nbMuQwhhMwAZsOxveRw9C0ZFZvql7pdIfgVZYo,143
@@ -116,16 +118,17 @@ langroid/utils/output/__init__.py,sha256=7P0f--4IZneNsTxXY5fd6d6iW-CeVe-KSsl-87s
 langroid/utils/output/citations.py,sha256=9T69O_N6mxPQjQ-qC1vKS8_kyg1z5hDQXMhBsA45xkk,3147
 langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
 langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
-langroid/vector_store/__init__.py,sha256=iRAwrMn72NNQutdmYwtGFHywjX8r0rVwioUJBBPMESM,1432
-langroid/vector_store/base.py,sha256=On7SY2hU7fvtuAvoHNjQEcaBBUx4OJem8BKyKri2Wx8,14581
+langroid/vector_store/__init__.py,sha256=8ktJUVsVUoc7FMmkUFpFBZu7VMWUqQY9zpm4kEJ8yTs,1537
+langroid/vector_store/base.py,sha256=BgQzTScKNzKr3F3o9jrQNG-b3Dv16wKEGSM9jg-W03Y,14752
 langroid/vector_store/chromadb.py,sha256=p9mEqJwO2BrL2jSSXfa23kCPlPOwWpF3xJYd5zoWw_c,8661
 langroid/vector_store/lancedb.py,sha256=Qd20gKjWozPWfW5-D66J6U8dSrJo1yl-maj6s1lbf1c,14688
 langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
 langroid/vector_store/momento.py,sha256=xOaU7Hlyyn_5ihb0ARS5JHtmrKrTCt2IdRA-ioMM5ek,10307
-langroid/vector_store/postgres.py,sha256=-bQ_AXpIkoK_lg8k6qt7pEz8gZuTXHuhnAPXhqYpUQ0,15697
+langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZXpEY9M,14994
+langroid/vector_store/postgres.py,sha256=DQHd6dt-OcV_QVNm-ymn28rlTfhI6hqgcpLTPCsm0jI,15990
 langroid/vector_store/qdrantdb.py,sha256=v7TAsIoj_vxeKDYS9tpwJLBZA8fuTweTYxHo0X_uawM,17949
-langroid/vector_store/weaviatedb.py,sha256=cMg9kqJXlD1WURs6QivHvwausCyLYGr4mOK2v9uYkhw,11105
-langroid-0.40.0.dist-info/METADATA,sha256=6e_B25ingRSVwU_fIbpAu2pP5sYCHc3Bz1Y-TTXyMA0,60910
-langroid-0.40.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langroid-0.40.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.40.0.dist-info/RECORD,,
+langroid/vector_store/weaviatedb.py,sha256=ONEr2iGS0Ii73oMe7tRk6bB-BEXQUa70fYSrdI8d3yo,11481
+langroid-0.41.0.dist-info/METADATA,sha256=jCbP1nZgmhcN4XJE7eh8SBMbFEwNVV8yVoZqrmV8pCQ,61259
+langroid-0.41.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langroid-0.41.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.41.0.dist-info/RECORD,,

{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langroid 0.40.0__py3-none-any.whl → 0.41.0__py3-none-any.whl

langroid 0.40.0py3-none-any.whl → 0.41.0py3-none-any.whl