PyPI - langroid - Versions diffs - 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

langroid 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

langroid/agent/base.py +42 -6
langroid/agent/chat_agent.py +2 -2
langroid/agent/special/doc_chat_agent.py +14 -4
langroid/agent/special/lance_doc_chat_agent.py +25 -28
langroid/agent/special/lance_rag/critic_agent.py +16 -6
langroid/agent/special/lance_rag/query_planner_agent.py +8 -4
langroid/agent/special/lance_tools.py +14 -8
langroid/agent/tool_message.py +6 -10
langroid/utils/pydantic_utils.py +0 -50
langroid/vector_store/base.py +6 -4
langroid/vector_store/chromadb.py +4 -2
langroid/vector_store/lancedb.py +40 -172
langroid/vector_store/qdrantdb.py +6 -2
{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/METADATA +1 -1
{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/RECORD +18 -18
pyproject.toml +1 -1
{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/LICENSE +0 -0
{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/WHEEL +0 -0

langroid/agent/base.py CHANGED Viewed

@@ -784,15 +784,51 @@ class Agent(ABC):
         #     ]
         # }
+        if not isinstance(json_data, dict):
+            return None
         properties = json_data.get("properties")
-        if properties is not None:
+        if isinstance(properties, dict):
             json_data = properties
         request = json_data.get("request")
-        if (
-            request is None
-            or not (isinstance(request, str))
-            or request not in self.llm_tools_handled
-        ):
+        if request is None:
+            handled = [self.llm_tools_map[r] for r in self.llm_tools_handled]
+            default_keys = set(ToolMessage.__fields__.keys())
+            request_keys = set(json_data.keys())
+            def maybe_parse(tool: type[ToolMessage]) -> Optional[ToolMessage]:
+                all_keys = set(tool.__fields__.keys())
+                non_inherited_keys = all_keys.difference(default_keys)
+                # If the request has any keys not valid for the tool and
+                # does not specify some key specific to the type
+                # (e.g. not just `purpose`), the LLM must explicitly specify `request`
+                if not (
+                    request_keys.issubset(all_keys)
+                    and len(request_keys.intersection(non_inherited_keys)) > 0
+                ):
+                    return None
+                try:
+                    return tool.parse_obj(json_data)
+                except ValidationError:
+                    return None
+            candidate_tools = list(
+                filter(
+                    lambda t: t is not None,
+                    map(maybe_parse, handled),
+                )
+            )
+            # If only one valid candidate exists, we infer
+            # "request" to be the only possible value
+            if len(candidate_tools) == 1:
+                return candidate_tools[0]
+            else:
+                return None
+        if not isinstance(request, str) or request not in self.llm_tools_handled:
             return None
         message_class = self.llm_tools_map.get(request)

langroid/agent/chat_agent.py CHANGED Viewed

@@ -427,11 +427,11 @@ class ChatAgent(Agent):
                 but the Assistant fn-calling seems to pay attn to these,
                 and if we don't want this, we should set this to False.)
         """
+        if require_recipient and message_class is not None:
+            message_class = message_class.require_recipient()
         super().enable_message_handling(message_class)  # enables handling only
         tools = self._get_tool_list(message_class)
         if message_class is not None:
-            if require_recipient:
-                message_class = message_class.require_recipient()
             request = message_class.default_value("request")
             llm_function = message_class.llm_function_schema(defaults=include_defaults)
             self.llm_functions_map[request] = llm_function

langroid/agent/special/doc_chat_agent.py CHANGED Viewed

@@ -538,12 +538,13 @@ class DocChatAgent(ChatAgent):
         ]
     def get_field_values(self, fields: list[str]) -> Dict[str, str]:
-        """Get string-listing of possible values of each filterable field,
+        """Get string-listing of possible values of each field,
         e.g.
         {
             "genre": "crime, drama, mystery, ... (10 more)",
             "certificate": "R, PG-13, PG, R",
         }
+        The field names may have "metadata." prefix, e.g. "metadata.genre".
         """
         field_values: Dict[str, Set[str]] = {}
         # make empty set for each field
@@ -556,8 +557,11 @@ class DocChatAgent(ChatAgent):
         for d in docs:
             # extract fields from d
             doc_field_vals = extract_fields(d, fields)
-            for field, val in doc_field_vals.items():
-                field_values[field].add(val)
+            # the `field` returned by extract_fields may contain only the last
+            # part of the field name, e.g. "genre" instead of "metadata.genre",
+            # so we use the orig_field name to fill in the values
+            for (field, val), orig_field in zip(doc_field_vals.items(), fields):
+                field_values[orig_field].add(val)
         # For each field make a string showing list of possible values,
         # truncate to 20 values, and if there are more, indicate how many
         # more there are, e.g. Genre: crime, drama, mystery, ... (20 more)
@@ -680,7 +684,13 @@ class DocChatAgent(ChatAgent):
                 )
             return response
         if query_str == "":
-            return None
+            return ChatDocument(
+                content=NO_ANSWER + " since query was empty",
+                metadata=ChatDocMetaData(
+                    source="No query provided",
+                    sender=Entity.LLM,
+                ),
+            )
         elif query_str == "?" and self.response is not None:
             return self.justify_response()
         elif (query_str.startswith(("summar", "?")) and self.response is None) or (

langroid/agent/special/lance_doc_chat_agent.py CHANGED Viewed

@@ -22,7 +22,6 @@ from langroid.mytypes import DocMetaData, Document
 from langroid.parsing.table_loader import describe_dataframe
 from langroid.utils.constants import DONE, NO_ANSWER
 from langroid.utils.pydantic_utils import (
-    clean_schema,
     dataframe_to_documents,
 )
 from langroid.vector_store.lancedb import LanceDB
@@ -41,24 +40,26 @@ class LanceDocChatAgent(DocChatAgent):
     def _get_clean_vecdb_schema(self) -> str:
         """Get a cleaned schema of the vector-db, to pass to the LLM
         as part of instructions on how to generate a SQL filter."""
+        tbl_pandas = (
+            self.vecdb.client.open_table(self.vecdb.config.collection_name)
+            .search()
+            .limit(1)
+            .to_pandas(flatten=True)
+        )
         if len(self.config.filter_fields) == 0:
-            filterable_fields = (
-                self.vecdb.client.open_table(self.vecdb.config.collection_name)
-                .search()
-                .limit(1)
-                .to_pandas(flatten=True)
-                .columns.tolist()
-            )
+            filterable_fields = tbl_pandas.columns.tolist()
             # drop id, vector, metadata.id, metadata.window_ids, metadata.is_chunk
-            for fields in [
-                "id",
-                "vector",
-                "metadata.id",
-                "metadata.window_ids",
-                "metadata.is_chunk",
-            ]:
-                if fields in filterable_fields:
-                    filterable_fields.remove(fields)
+            filterable_fields = list(
+                set(filterable_fields)
+                - {
+                    "id",
+                    "vector",
+                    "metadata.id",
+                    "metadata.window_ids",
+                    "metadata.is_chunk",
+                }
+            )
             logger.warning(
                 f"""
             No filter_fields set in config, so using these fields as filterable fields:
@@ -69,15 +70,7 @@ class LanceDocChatAgent(DocChatAgent):
         if self.from_dataframe:
             return self.df_description
-        schema_dict = clean_schema(
-            self.vecdb.schema,
-            excludes=["id", "vector"],
-        )
-        # intersect config.filter_fields with schema_dict.keys() in case
-        # there are extraneous fields in config.filter_fields
-        filter_fields_set = set(
-            self.config.filter_fields or schema_dict.keys()
-        ).intersection(schema_dict.keys())
+        filter_fields_set = set(self.config.filter_fields)
         # remove 'content' from filter_fields_set, even if it's not in filter_fields_set
         filter_fields_set.discard("content")
@@ -85,10 +78,14 @@ class LanceDocChatAgent(DocChatAgent):
         # possible values of filterable fields
         filter_field_values = self.get_field_values(list(filter_fields_set))
+        schema_dict: Dict[str, Dict[str, Any]] = dict(
+            (field, {}) for field in filter_fields_set
+        )
         # add field values to schema_dict as another field `values` for each field
         for field, values in filter_field_values.items():
-            if field in schema_dict:
-                schema_dict[field]["values"] = values
+            schema_dict[field]["values"] = values
+            dtype = tbl_pandas[field].dtype.name
+            schema_dict[field]["dtype"] = dtype
         # if self.config.filter_fields is set, restrict to these:
         if len(self.config.filter_fields) > 0:
             schema_dict = {

langroid/agent/special/lance_rag/critic_agent.py CHANGED Viewed

@@ -37,20 +37,30 @@ class QueryPlanCriticConfig(LanceQueryPlanAgentConfig):
     system_message = f"""
     You are an expert at carefully planning a query that needs to be answered
     based on a large collection of documents. These docs have a special `content` field
-    and additional FILTERABLE fields in the SCHEMA below:
+    and additional FILTERABLE fields in the SCHEMA below, along with the
+    SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
     {{doc_schema}}
+    The ORIGINAL QUERY is handled by a QUERY PLANNER who sends the PLAN to an ASSISTANT,
+    who returns an ANSWER.
     You will receive a QUERY PLAN consisting of:
-    - ORIGINAL QUERY,
-    - SQL-Like FILTER, WHICH CAN BE EMPTY (and it's fine if results sound reasonable)
+    - ORIGINAL QUERY from the user, which a QUERY PLANNER processes,
+      to create a QUERY PLAN, to be handled by an ASSISTANT.
+    - PANDAS-LIKE FILTER, WHICH CAN BE EMPTY (and it's fine if results sound reasonable)
       FILTER SHOULD ONLY BE USED IF EXPLICITLY REQUIRED BY THE QUERY.
-    - REPHRASED QUERY that will be used to match against the CONTENT (not filterable)
-         of the documents.
+    - REPHRASED QUERY (CANNOT BE EMPTY) that will be used to match against the
+      CONTENT (not filterable) of the documents.
       In general the REPHRASED QUERY should be relied upon to match the CONTENT
       of the docs. Thus the REPHRASED QUERY itself acts like a
       SEMANTIC/LEXICAL/FUZZY FILTER since the Assistant is able to use it to match
-      the CONTENT of the docs in various ways (semantic, lexical, fuzzy, etc.).
+      the CONTENT of the docs in various ways (semantic, lexical, fuzzy, etc.).
+        Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
+        so the REPHRASED QUERY should NOT mention ANY FILTER fields.
+        The assistant will answer based on documents whose CONTENTS match the QUERY,
+        possibly REPHRASED.
+        !!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
     - DATAFRAME CALCULATION, which must be a SINGLE LINE calculation (or empty),
         [NOTE ==> This calculation is applied AFTER the FILTER and REPHRASED QUERY.],
     - ANSWER received from an assistant that used this QUERY PLAN.

langroid/agent/special/lance_rag/query_planner_agent.py CHANGED Viewed

@@ -43,23 +43,27 @@ class LanceQueryPlanAgentConfig(ChatAgentConfig):
     You will receive a QUERY, to be answered based on an EXTREMELY LARGE collection
     of documents you DO NOT have access to, but your ASSISTANT does.
     You only know that these documents have a special `content` field
-    and additional FILTERABLE fields in the SCHEMA below:
+    and additional FILTERABLE fields in the SCHEMA below, along with the
+    SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
     {{doc_schema}}
     Based on the QUERY and the above SCHEMA, your task is to determine a QUERY PLAN,
     consisting of:
-    -  a FILTER (can be empty string) that would help the ASSISTANT to answer the query.
+    -  a PANDAS-TYPE FILTER (can be empty string) that would help the ASSISTANT to
+        answer the query.
         Remember the FILTER can refer to ANY fields in the above SCHEMA
         EXCEPT the `content` field of the documents.
         ONLY USE A FILTER IF EXPLICITLY MENTIONED IN THE QUERY.
         TO get good results, for STRING MATCHES, consider using LIKE instead of =, e.g.
         "CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
-    - a possibly REPHRASED QUERY to be answerable given the FILTER.
+        YOUR FILTER MUST BE A PANDAS-TYPE FILTER, respecting the shown DTYPES.
+    - a possibly REPHRASED QUERY (CANNOT BE EMPTY) to be answerable given the FILTER.
         Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
         so the REPHRASED QUERY should NOT mention ANY FILTER fields.
         The assistant will answer based on documents whose CONTENTS match the QUERY,
         possibly REPHRASED.
+        !!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
     - an OPTIONAL SINGLE-LINE Pandas-dataframe calculation/aggregation string
         that can be used to calculate the answer to the original query,
         e.g. "df["rating"].mean()",
@@ -99,7 +103,7 @@ class LanceQueryPlanAgentConfig(ChatAgentConfig):
         hence this computation will give the total deaths in shoplifting crimes.
     ------------- END OF EXAMPLE ----------------
-    The FILTER must be a SQL-like condition, e.g.
+    The FILTER must be a PANDAS-like condition, e.g.
     "year > 2000 AND genre = 'ScienceFiction'".
     To ensure you get useful results, you should make your FILTER
     NOT TOO STRICT, e.g. look for approximate match using LIKE, etc.

langroid/agent/special/lance_tools.py CHANGED Viewed

@@ -1,16 +1,21 @@
 import logging
 from langroid.agent.tool_message import ToolMessage
-from langroid.pydantic_v1 import BaseModel
+from langroid.pydantic_v1 import BaseModel, Field
 logger = logging.getLogger(__name__)
 class QueryPlan(BaseModel):
-    original_query: str
-    query: str
-    filter: str
-    dataframe_calc: str = ""
+    original_query: str = Field(..., description="The original query for reference")
+    query: str = Field(..., description="A possibly NON-EMPTY rephrased query")
+    filter: str = Field(
+        "",
+        description="Filter condition if needed (or empty if no filter is needed)",
+    )
+    dataframe_calc: str = Field(
+        "", description="An optional Pandas-dataframe calculation/aggregation string"
+    )
 class QueryPlanTool(ToolMessage):
@@ -19,8 +24,9 @@ class QueryPlanTool(ToolMessage):
     Given a user's query, generate a query <plan> consisting of:
     - <original_query> - the original query for reference
     - <filter> condition if needed (or empty string if no filter is needed)
-    - <query> - a possibly rephrased query that can be used to match the CONTENT
-        of the documents (can be same as <original_query> if no rephrasing is needed)
+    - <query> - a possibly NON-EMPTY rephrased query that can be used to match the
+        CONTENT of the documents
+        (can be same as <original_query> if no rephrasing is needed)
     - <dataframe_calc> - a Pandas-dataframe calculation/aggregation string
         that can be used to calculate the answer
         (or empty string if no calculation is needed).
@@ -34,7 +40,7 @@ class QueryPlanAnswerTool(ToolMessage):
     Assemble query <plan> and <answer>
     """
     plan: QueryPlan
-    answer: str
+    answer: str = Field(..., description="The answer received from the assistant")
 class QueryPlanFeedbackTool(ToolMessage):

langroid/agent/tool_message.py CHANGED Viewed

@@ -35,12 +35,10 @@ class ToolMessage(ABC, BaseModel):
         request (str): name of agent method to map to.
         purpose (str): purpose of agent method, expressed in general terms.
             (This is used when auto-generating the tool instruction to the LLM)
-        result (str): example of result of agent method.
     """
     request: str
     purpose: str
-    result: str = ""
     class Config:
         arbitrary_types_allowed = False
@@ -48,7 +46,7 @@ class ToolMessage(ABC, BaseModel):
         validate_assignment = True
         # do not include these fields in the generated schema
         # since we don't require the LLM to specify them
-        schema_extra = {"exclude": {"purpose", "result"}}
+        schema_extra = {"exclude": {"purpose"}}
     @classmethod
     def instructions(cls) -> str:
@@ -110,13 +108,13 @@ class ToolMessage(ABC, BaseModel):
         return "\n\n".join(examples_jsons)
     def to_json(self) -> str:
-        return self.json(indent=4, exclude={"result", "purpose"})
+        return self.json(indent=4, exclude={"purpose"})
     def json_example(self) -> str:
-        return self.json(indent=4, exclude={"result", "purpose"})
+        return self.json(indent=4, exclude={"purpose"})
     def dict_example(self) -> Dict[str, Any]:
-        return self.dict(exclude={"result", "purpose"})
+        return self.dict(exclude={"purpose"})
     @classmethod
     def default_value(cls, f: str) -> Any:
@@ -220,9 +218,7 @@ class ToolMessage(ABC, BaseModel):
                 if "description" not in parameters["properties"][name]:
                     parameters["properties"][name]["description"] = description
-        excludes = (
-            ["result", "purpose"] if request else ["request", "result", "purpose"]
-        )
+        excludes = ["purpose"] if request else ["request", "purpose"]
         # exclude 'excludes' from parameters["properties"]:
         parameters["properties"] = {
             field: details
@@ -263,5 +259,5 @@ class ToolMessage(ABC, BaseModel):
         Returns:
             Dict[str, Any]: simplified schema
         """
-        schema = generate_simple_schema(cls, exclude=["result", "purpose"])
+        schema = generate_simple_schema(cls, exclude=["purpose"])
         return schema

langroid/utils/pydantic_utils.py CHANGED Viewed

@@ -9,8 +9,6 @@ from typing import (
     Tuple,
     Type,
     TypeVar,
-    get_args,
-    get_origin,
     no_type_check,
 )
@@ -313,54 +311,6 @@ def pydantic_obj_from_flat_dict(
     return model(**nested_data)
-def clean_schema(model: Type[BaseModel], excludes: List[str] = []) -> Dict[str, Any]:
-    """
-    Generate a simple schema for a given Pydantic model,
-    including inherited fields, with an option to exclude certain fields.
-    Handles cases where fields are Lists or other generic types and includes
-    field descriptions if available.
-    Args:
-        model (Type[BaseModel]): The Pydantic model class.
-        excludes (List[str]): A list of field names to exclude.
-    Returns:
-        Dict[str, Any]: A dictionary representing the simple schema.
-    """
-    schema = {}
-    for field_name, field_info in model.__fields__.items():
-        if field_name in excludes:
-            continue
-        field_type = field_info.outer_type_
-        description = field_info.field_info.description or ""
-        # Handle generic types like List[...]
-        if get_origin(field_type):
-            inner_types = get_args(field_type)
-            inner_type_names = [
-                t.__name__ if hasattr(t, "__name__") else str(t) for t in inner_types
-            ]
-            field_type_str = (
-                f"{get_origin(field_type).__name__}" f'[{", ".join(inner_type_names)}]'
-            )
-            schema[field_name] = {"type": field_type_str, "description": description}
-        elif issubclass(field_type, BaseModel):
-            # Directly use the nested model's schema,
-            # integrating it into the current level
-            nested_schema = clean_schema(field_type, excludes)
-            schema[field_name] = {**nested_schema, "description": description}
-        else:
-            # For basic types, use 'type'
-            schema[field_name] = {
-                "type": field_type.__name__,
-                "description": description,
-            }
-    return schema
 @contextmanager
 def temp_update(
     pydantic_object: BaseModel, updates: Dict[str, Any]

langroid/vector_store/base.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import copy
 import logging
 from abc import ABC, abstractmethod
-from typing import Dict, List, Optional, Sequence, Tuple
+from typing import Dict, List, Optional, Sequence, Tuple, Type
 import numpy as np
 import pandas as pd
 from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
 from langroid.embedding_models.models import OpenAIEmbeddingsConfig
-from langroid.mytypes import Document
+from langroid.mytypes import DocMetaData, Document
 from langroid.pydantic_v1 import BaseSettings
 from langroid.utils.algorithms.graph import components, topological_sort
 from langroid.utils.configuration import settings
@@ -32,6 +32,9 @@ class VectorStoreConfig(BaseSettings):
     timeout: int = 60
     host: str = "127.0.0.1"
     port: int = 6333
+    # used when parsing search results back as Document objects
+    document_class: Type[Document] = Document
+    metadata_class: Type[DocMetaData] = DocMetaData
     # compose_file: str = "langroid/vector_store/docker-compose-qdrant.yml"
@@ -113,8 +116,7 @@ class VectorStore(ABC):
         """
         self.config.collection_name = collection_name
-        if collection_name not in self.list_collections() or replace:
-            self.create_collection(collection_name, replace=replace)
+        self.config.replace_collection = replace
     @abstractmethod
     def create_collection(self, collection_name: str, replace: bool = False) -> None:

langroid/vector_store/chromadb.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langroid.embedding_models.base import (
 )
 from langroid.embedding_models.models import OpenAIEmbeddingsConfig
 from langroid.exceptions import LangroidImportError
-from langroid.mytypes import DocMetaData, Document
+from langroid.mytypes import Document
 from langroid.utils.configuration import settings
 from langroid.utils.output.printing import print_long_text
 from langroid.vector_store.base import VectorStore, VectorStoreConfig
@@ -200,7 +200,9 @@ class ChromaDB(VectorStore):
             else:
                 m["window_ids"] = m["window_ids"].split(",")
         docs = [
-            Document(content=d, metadata=DocMetaData(**m))
+            self.config.document_class(
+                content=d, metadata=self.config.metadata_class(**m)
+            )
             for d, m in zip(contents, metadatas)
         ]
         return docs

langroid/vector_store/lancedb.py CHANGED Viewed

@@ -32,13 +32,7 @@ from langroid.utils.configuration import settings
 from langroid.utils.pydantic_utils import (
     dataframe_to_document_model,
     dataframe_to_documents,
-    extend_document_class,
-    extra_metadata,
-    flatten_pydantic_instance,
-    flatten_pydantic_model,
-    nested_dict_from_flat,
 )
-from langroid.utils.system import pydantic_major_version
 from langroid.vector_store.base import VectorStore, VectorStoreConfig
 try:
@@ -58,10 +52,6 @@ class LanceDBConfig(VectorStoreConfig):
     storage_path: str = ".lancedb/data"
     embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
     distance: str = "cosine"
-    # document_class is used to store in lancedb with right schema,
-    # and also to retrieve the right type of Documents when searching.
-    document_class: Type[Document] = Document
-    flatten: bool = False  # flatten Document class into LanceSchema ?
 class LanceDB(VectorStore):
@@ -78,7 +68,6 @@ class LanceDB(VectorStore):
         self.port = config.port
         self.is_from_dataframe = False  # were docs ingested from a dataframe?
         self.df_metadata_columns: List[str] = []  # metadata columns from dataframe
-        self._setup_schemas(config.document_class)
         load_dotenv()
         if self.config.cloud:
@@ -104,40 +93,6 @@ class LanceDB(VectorStore):
                     uri=new_storage_path,
                 )
-        # Note: Only create collection if a non-null collection name is provided.
-        # This is useful to delay creation of vecdb until we have a suitable
-        # collection name (e.g. we could get it from the url or folder path).
-        if config.collection_name is not None:
-            self.create_collection(
-                config.collection_name, replace=config.replace_collection
-            )
-    def _setup_schemas(self, doc_cls: Type[Document] | None) -> None:
-        try:
-            doc_cls = doc_cls or self.config.document_class
-            self.unflattened_schema = self._create_lance_schema(doc_cls)
-            self.schema = (
-                self._create_flat_lance_schema(doc_cls)
-                if self.config.flatten
-                else self.unflattened_schema
-            )
-        except (AttributeError, TypeError) as e:
-            pydantic_version = pydantic_major_version()
-            if pydantic_version > 1:
-                raise ValueError(
-                    f"""
-                    {e}
-                    ====
-                    You are using Pydantic v{pydantic_version},
-                    which is not yet compatible with Langroid's LanceDB integration.
-                    To use Lancedb with Langroid, please install the
-                    latest pydantic 1.x instead of pydantic v2, e.g.
-                    pip install "pydantic<2.0.0"
-                    """
-                )
-            else:
-                raise e
     def clear_empty_collections(self) -> int:
         coll_names = self.list_collections()
         n_deletes = 0
@@ -234,91 +189,8 @@ class LanceDB(VectorStore):
         )  # type: ignore
         return NewModel  # type: ignore
-    def _create_flat_lance_schema(self, doc_cls: Type[Document]) -> Type[BaseModel]:
-        """
-        Flat version of the lance_schema, as nested Pydantic schemas are not yet
-        supported by LanceDB.
-        """
-        if not has_lancedb:
-            raise LangroidImportError("lancedb", "lancedb")
-        lance_model = self._create_lance_schema(doc_cls)
-        FlatModel = flatten_pydantic_model(lance_model, base_model=LanceModel)
-        return FlatModel
     def create_collection(self, collection_name: str, replace: bool = False) -> None:
-        """
-        Create a collection with the given name, optionally replacing an existing
-            collection if `replace` is True.
-        Args:
-            collection_name (str): Name of the collection to create.
-            replace (bool): Whether to replace an existing collection
-                with the same name. Defaults to False.
-        """
-        self.config.collection_name = collection_name
-        collections = self.list_collections()
-        if collection_name in collections:
-            coll = self.client.open_table(collection_name)
-            if coll.head().shape[0] > 0:
-                logger.warning(f"Non-empty Collection {collection_name} already exists")
-                if not replace:
-                    logger.warning("Not replacing collection")
-                    return
-                else:
-                    logger.warning("Recreating fresh collection")
-        try:
-            self.client.create_table(
-                collection_name, schema=self.schema, mode="overwrite"
-            )
-        except (AttributeError, TypeError) as e:
-            pydantic_version = pydantic_major_version()
-            if pydantic_version > 1:
-                raise ValueError(
-                    f"""
-                    {e}
-                    ====
-                    You are using Pydantic v{pydantic_version},
-                    which is not yet compatible with Langroid's LanceDB integration.
-                    To use Lancedb with Langroid, please install the
-                    latest pydantic 1.x instead of pydantic v2, e.g.
-                    pip install "pydantic<2.0.0"
-                    """
-                )
-            else:
-                raise e
-        if settings.debug:
-            level = logger.getEffectiveLevel()
-            logger.setLevel(logging.INFO)
-            logger.setLevel(level)
-    def _maybe_set_doc_class_schema(self, doc: Document) -> None:
-        """
-        Set the config.document_class and self.schema based on doc if needed
-        Args:
-            doc: an instance of Document, to be added to a collection
-        """
-        extra_metadata_fields = extra_metadata(doc, self.config.document_class)
-        if len(extra_metadata_fields) > 0:
-            logger.warning(
-                f"""
-                    Added documents contain extra metadata fields:
-                    {extra_metadata_fields}
-                    which were not present in the original config.document_class.
-                    Trying to change document_class and corresponding schemas.
-                    Overriding LanceDBConfig.document_class with an auto-generated
-                    Pydantic class that includes these extra fields.
-                    If this fails, or you see odd results, it is recommended that you
-                    define a subclass of Document, with metadata of class derived from
-                    DocMetaData, with extra fields defined via
-                    `Field(..., description="...")` declarations,
-                    and set this document class as the value of the
-                    LanceDBConfig.document_class attribute.
-                    """
-            )
-            doc_cls = extend_document_class(doc)
-            self.config.document_class = doc_cls
-            self._setup_schemas(doc_cls)
+        self.config.replace_collection = replace
     def add_documents(self, documents: Sequence[Document]) -> None:
         super().maybe_add_ids(documents)
@@ -329,39 +201,52 @@ class LanceDB(VectorStore):
         coll_name = self.config.collection_name
         if coll_name is None:
             raise ValueError("No collection name set, cannot ingest docs")
-        self._maybe_set_doc_class_schema(documents[0])
+        # self._maybe_set_doc_class_schema(documents[0])
+        table_exists = False
         if (
-            coll_name not in colls
-            or self.client.open_table(coll_name).head(1).shape[0] == 0
+            coll_name in colls
+            and self.client.open_table(coll_name).head(1).shape[0] > 0
         ):
-            # collection either doesn't exist or is empty, so replace it,
-            self.create_collection(coll_name, replace=True)
+            # collection exists and  is not empty:
+            # if replace_collection is True, we'll overwrite the existing collection,
+            # else we'll append to it.
+            if self.config.replace_collection:
+                self.client.drop_table(coll_name)
+            else:
+                table_exists = True
         ids = [str(d.id()) for d in documents]
         # don't insert all at once, batch in chunks of b,
         # else we get an API error
         b = self.config.batch_size
-        def make_batches() -> Generator[List[BaseModel], None, None]:
+        def make_batches() -> Generator[List[Dict[str, Any]], None, None]:
             for i in range(0, len(ids), b):
                 batch = [
-                    self.unflattened_schema(
+                    dict(
                         id=ids[i + j],
                         vector=embedding_vecs[i + j],
                         **doc.dict(),
                     )
                     for j, doc in enumerate(documents[i : i + b])
                 ]
-                if self.config.flatten:
-                    batch = [
-                        flatten_pydantic_instance(instance)  # type: ignore
-                        for instance in batch
-                    ]
                 yield batch
-        tbl = self.client.open_table(self.config.collection_name)
         try:
-            tbl.add(make_batches())
+            if table_exists:
+                tbl = self.client.open_table(coll_name)
+                tbl.add(make_batches())
+            else:
+                batch_gen = make_batches()
+                batch = next(batch_gen)
+                # use first batch to create table...
+                tbl = self.client.create_table(
+                    coll_name,
+                    data=batch,
+                    mode="create",
+                )
+                # ... and add the rest
+                tbl.add(batch_gen)
         except Exception as e:
             logger.error(
                 f"""
@@ -427,7 +312,6 @@ class LanceDB(VectorStore):
                 exclude=["vector"],
             )
             self.config.document_class = doc_cls  # type: ignore
-            self._setup_schemas(doc_cls)  # type: ignore
         else:
             # collection exists and is not empty, so append to it
             tbl = self.client.open_table(self.config.collection_name)
@@ -452,35 +336,19 @@ class LanceDB(VectorStore):
             return self._records_to_docs(records)
     def _records_to_docs(self, records: List[Dict[str, Any]]) -> List[Document]:
-        if self.config.flatten:
-            docs = [
-                self.unflattened_schema(**nested_dict_from_flat(rec)) for rec in records
-            ]
-        else:
-            try:
-                docs = [self.schema(**rec) for rec in records]
-            except ValidationError as e:
-                raise ValueError(
-                    f"""
-                Error validating LanceDB result: {e}
-                HINT: This could happen when you're re-using an
-                existing LanceDB store with a different schema.
-                Try deleting your local lancedb storage at `{self.config.storage_path}`
-                re-ingesting your documents and/or replacing the collections.
-                """
-                )
-        doc_cls = self.config.document_class
-        doc_cls_field_names = doc_cls.__fields__.keys()
-        return [
-            doc_cls(
-                **{
-                    field_name: getattr(doc, field_name)
-                    for field_name in doc_cls_field_names
-                }
+        try:
+            docs = [self.config.document_class(**rec) for rec in records]
+        except ValidationError as e:
+            raise ValueError(
+                f"""
+            Error validating LanceDB result: {e}
+            HINT: This could happen when you're re-using an
+            existing LanceDB store with a different schema.
+            Try deleting your local lancedb storage at `{self.config.storage_path}`
+            re-ingesting your documents and/or replacing the collections.
+            """
             )
-            for doc in docs
-        ]
+        return docs
     def get_all_documents(self, where: str = "") -> List[Document]:
         if self.config.collection_name is None:

langroid/vector_store/qdrantdb.py CHANGED Viewed

@@ -380,7 +380,11 @@ class QdrantDB(VectorStore):
                 with_payload=True,
                 with_vectors=False,
             )
-            docs += [Document(**record.payload) for record in results]  # type: ignore
+            docs += [
+                self.config.document_class(**record.payload)  # type: ignore
+                for record in results
+            ]
+            # ignore
             if next_page_offset is None:
                 break
             offset = next_page_offset  # type: ignore
@@ -451,7 +455,7 @@ class QdrantDB(VectorStore):
         ]  # 2D list -> 1D list
         scores = [match.score for match in search_result if match is not None]
         docs = [
-            Document(**(match.payload))  # type: ignore
+            self.config.document_class(**(match.payload))  # type: ignore
             for match in search_result
             if match is not None
         ]

{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langroid
-Version: 0.3.1
+Version: 0.5.0
 Summary: Harness LLMs with Multi-Agent Programming
 License: MIT
 Author: Prasad Chalasani

{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
 langroid/__init__.py,sha256=z_fCOLQJPOw3LLRPBlFB5-2HyCjpPgQa4m4iY5Fvb8Y,1800
 langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
-langroid/agent/base.py,sha256=eeYZ-NYbrepOjUVQS9K0nDhE8x2gKUNjgxFTA24mook,37560
+langroid/agent/base.py,sha256=x6SbInDGJUL_kusr-ligYsCwuaid2CmcRkzlucOXyw0,38999
 langroid/agent/batch.py,sha256=feRA_yRG768ElOQjrKEefcRv6Aefd_yY7qktuYUQDwc,10040
 langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/callbacks/chainlit.py,sha256=UKG2_v4ktfkEaGvdouVRHEqQejEYya2Rli8jrP65TmA,22055
-langroid/agent/chat_agent.py,sha256=bTQrIMbN8JxxtnVNC-xzODVLvH3SHmy5vijRjY3cCUE,41564
+langroid/agent/chat_agent.py,sha256=M5tdp1HuFthhMChLNd5XKBWxoiMSTkOuXlM8JoRLiUk,41586
 langroid/agent/chat_document.py,sha256=MwtNABK28tfSzqCeQlxoauT8uPn8oldU7dlnrX8aQ10,11232
 langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
 langroid/agent/openai_assistant.py,sha256=3saI9PwF8IZNJcjqyUy-rj73TInAzdlk14LiOvT_Dkc,33548
 langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
-langroid/agent/special/doc_chat_agent.py,sha256=CXFLfDMEabaBZwZwFgNOaG3E3S86xcBM4txrsMD_70I,54014
-langroid/agent/special/lance_doc_chat_agent.py,sha256=USp0U3eTaJzwF_3bdqE7CedSLbaqAi2tm-VzygcyLaA,10175
+langroid/agent/special/doc_chat_agent.py,sha256=8NPAhMnHkFUolQ8EHos40tz5Vwuz_m33NjUfjheXWXY,54569
+langroid/agent/special/lance_doc_chat_agent.py,sha256=Hjpu6u9UPAFMg5J6K97PRFaLbNrGhInC0N9oGi09CeY,10006
 langroid/agent/special/lance_rag/__init__.py,sha256=QTbs0IVE2ZgDg8JJy1zN97rUUg4uEPH7SLGctFNumk4,174
-langroid/agent/special/lance_rag/critic_agent.py,sha256=ufTdpHSeHgCzN85Q0sfWOrpBpsCjGVZdAg5yOH1ogU8,7296
+langroid/agent/special/lance_rag/critic_agent.py,sha256=S3NA3OAO7XaXjCrmwhKB7qCPlgRZFvDxiB5Qra65Zhs,7959
 langroid/agent/special/lance_rag/lance_rag_task.py,sha256=l_HQgrYY-CX2FwIsS961aEF3bYog3GDYo98fj0C0mSk,2889
-langroid/agent/special/lance_rag/query_planner_agent.py,sha256=M4RC_0f98_pwVL7ygrr1VI80LgJiFcmKjJFH0M4tccI,9830
-langroid/agent/special/lance_tools.py,sha256=BksGrrNgGgyYWP0HnfAuXMc0KzXooFOzY2l5rDDMtQ8,1467
+langroid/agent/special/lance_rag/query_planner_agent.py,sha256=QB8UYITUCkgSPturEwu_3i4kU8jXxW_jXNGSLlH5tMc,10109
+langroid/agent/special/lance_tools.py,sha256=BznV_r3LAFyybvBRa9KQ0oU7mPM3uQVfri7PFp7M_qc,1894
 langroid/agent/special/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/special/neo4j/csv_kg_chat.py,sha256=dRsAgMBa1H_EMI2YYgJR2Xyv1D7e4o3G9M64mTewq_c,6409
 langroid/agent/special/neo4j/neo4j_chat_agent.py,sha256=Y4Zu-m8WKO1xjeBRarV_m4y00Y5n_NR2B-hepjZp_cY,13104
@@ -34,7 +34,7 @@ langroid/agent/special/sql/utils/tools.py,sha256=vFYysk6Vi7HJjII8B4RitA3pt_z3gkS
 langroid/agent/special/table_chat_agent.py,sha256=d9v2wsblaRx7oMnKhLV7uO_ujvk9gh59pSGvBXyeyNc,9659
 langroid/agent/task.py,sha256=vKM2dmRYSH4i_VA0lf2axUtZcTGU44rVHz6EyxI4kG0,73990
 langroid/agent/team.py,sha256=88VNRSmK35WEl620GfBzuIrBASXYSeBZ8yDKX-nP_Bo,75778
-langroid/agent/tool_message.py,sha256=wIyZnUcZpxkiRPvM9O3MO3b5BBAdLEEan9kqPbvtApc,9743
+langroid/agent/tool_message.py,sha256=ggxmIZO_wi6x5uD-YWml07Bfgms-ohOSKHyQQdJFi4o,9571
 langroid/agent/tools/__init__.py,sha256=e-63cfwQNk_ftRKQwgDAJQK16QLbRVWDBILeXIc7wLk,402
 langroid/agent/tools/duckduckgo_search_tool.py,sha256=NhsCaGZkdv28nja7yveAhSK_w6l_Ftym8agbrdzqgfo,1935
 langroid/agent/tools/extract_tool.py,sha256=u5lL9rKBzaLBOrRyLnTAZ97pQ1uxyLP39XsWMnpaZpw,3789
@@ -118,20 +118,20 @@ langroid/utils/output/citations.py,sha256=PSY2cpti8W-ZGFMAgj1lYoEIZy0lsniLpCliMs
 langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
 langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
 langroid/utils/pandas_utils.py,sha256=UctS986Jtl_MvU5rA7-GfrjEHXP7MNu8ePhepv0bTn0,755
-langroid/utils/pydantic_utils.py,sha256=FKC8VKXH2uBEpFjnnMgIcEsQn6hs31ftea8zv5pMK9g,21740
+langroid/utils/pydantic_utils.py,sha256=X35qxjE4sSIi-oBMkI1s9fiUIJbpXHLmJqcJ7zsy0jg,19914
 langroid/utils/system.py,sha256=nvKeeUAj4eviR4kYpcr9h-HYdhqUNMTRBTHBOhz0GdU,5182
 langroid/utils/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/utils/web/login.py,sha256=1iz9eUAHa87vpKIkzwkmFa00avwFWivDSAr7QUhK7U0,2528
 langroid/vector_store/__init__.py,sha256=6xBjb_z4QtUy4vz4RuFbcbSwmHrggHL8-q0DwCf3PMM,972
-langroid/vector_store/base.py,sha256=tuEPaxJcuU_39sRnUjjNd8D8n8IjP6jrbwQv_ecNpSw,13532
-langroid/vector_store/chromadb.py,sha256=bZ5HjwgKgfJj1PUHsatYsrHv-v0dpOfMR2l0tJ2H0_A,7890
-langroid/vector_store/lancedb.py,sha256=9x7e_5zo7nLhMbhjYby2ZpBJ-vyawcC0_XAuatfHJf8,20517
+langroid/vector_store/base.py,sha256=pkc4n0yWGVk7iRUOLFkU_ID5NiBFfAcA3lBlPNX79pU,13623
+langroid/vector_store/chromadb.py,sha256=KMfHrgovQEOeJR_LsMpGM8BteJ50wpisDu608RhU3SU,7940
+langroid/vector_store/lancedb.py,sha256=MLubJBhtNIFX6zY0qANqCoB6MlL-oZiJCg9gZp2H2rs,14620
 langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
 langroid/vector_store/momento.py,sha256=qR-zBF1RKVHQZPZQYW_7g-XpTwr46p8HJuYPCkfJbM4,10534
 langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
-langroid/vector_store/qdrantdb.py,sha256=HkcK6jOf-FEDoOiG94MpsYDJr98T7vZkDyG__1BlnWI,17354
-pyproject.toml,sha256=x0YGXi9ennkubMYlFO-Eeyp6h2YE_aOBbeRJrUtTm34,7063
-langroid-0.3.1.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.3.1.dist-info/METADATA,sha256=9WLpuCfOtRfjB30PZa2jwGmnlotxXRZgHqt6UWiNh4E,54402
-langroid-0.3.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-langroid-0.3.1.dist-info/RECORD,,
+langroid/vector_store/qdrantdb.py,sha256=v88lqFkepADvlN6lByUj9I4NEKa9X9lWH16uTPPbYrE,17457
+pyproject.toml,sha256=pZsOBzFd2HoJ_P1_r3XSbCuD-wAllBqu6xr75947ITU,7063
+langroid-0.5.0.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.5.0.dist-info/METADATA,sha256=WRQVNy4M8RgAEw5hQwEh8YgjGX3RoqtbdQP6gxZ6ya4,54402
+langroid-0.5.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+langroid-0.5.0.dist-info/RECORD,,

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langroid"
-version = "0.3.1"
+version = "0.5.0"
 description = "Harness LLMs with Multi-Agent Programming"
 authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
 readme = "README.md"

{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{langroid-0.3.1.dist-info → langroid-0.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

langroid 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

langroid 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl