PyPI - langroid - Versions diffs - 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

langroid/__init__.py +70 -0
langroid/agent/__init__.py +22 -0
langroid/agent/base.py +120 -33
langroid/agent/batch.py +134 -35
langroid/agent/callbacks/__init__.py +0 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +164 -100
langroid/agent/chat_document.py +19 -2
langroid/agent/openai_assistant.py +20 -10
langroid/agent/special/__init__.py +33 -10
langroid/agent/special/doc_chat_agent.py +521 -108
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +23 -7
langroid/agent/special/retriever_agent.py +29 -174
langroid/agent/special/sql/__init__.py +7 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +11 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +423 -114
langroid/agent/tool_message.py +67 -10
langroid/agent/tools/__init__.py +8 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +6 -24
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/cachedb/__init__.py +6 -0
langroid/embedding_models/__init__.py +24 -0
langroid/embedding_models/base.py +9 -1
langroid/embedding_models/models.py +117 -17
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +22 -0
langroid/language_models/azure_openai.py +47 -4
langroid/language_models/base.py +26 -10
langroid/language_models/config.py +5 -0
langroid/language_models/openai_gpt.py +407 -121
langroid/language_models/prompt_formatter/__init__.py +9 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +10 -9
langroid/mytypes.py +10 -4
langroid/parsing/__init__.py +33 -1
langroid/parsing/document_parser.py +259 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +20 -7
langroid/parsing/repo_loader.py +108 -46
langroid/parsing/search.py +8 -0
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -13
langroid/parsing/urls.py +18 -9
langroid/parsing/utils.py +130 -9
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +7 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +10 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/configuration.py +0 -1
langroid/utils/constants.py +4 -0
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +15 -2
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +446 -4
langroid/utils/system.py +36 -1
langroid/vector_store/__init__.py +34 -2
langroid/vector_store/base.py +33 -2
langroid/vector_store/chromadb.py +42 -13
langroid/vector_store/lancedb.py +226 -60
langroid/vector_store/meilisearch.py +7 -6
langroid/vector_store/momento.py +3 -2
langroid/vector_store/qdrantdb.py +82 -11
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
langroid-0.1.219.dist-info/RECORD +127 -0
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.139.dist-info/RECORD +0 -103
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0

langroid/utils/pydantic_utils.py CHANGED Viewed

@@ -1,6 +1,26 @@
-from typing import Any, Dict, Tuple, Type, no_type_check
-from pydantic import BaseModel, create_model
+import logging
+from contextlib import contextmanager
+from typing import (
+    Any,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    get_args,
+    get_origin,
+    no_type_check,
+)
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, ValidationError, create_model
+from langroid.mytypes import DocMetaData, Document
+logger = logging.getLogger(__name__)
 def has_field(model_class: Type[BaseModel], field_name: str) -> bool:
@@ -8,6 +28,16 @@ def has_field(model_class: Type[BaseModel], field_name: str) -> bool:
     return field_name in model_class.__fields__
+def _recursive_purge_dict_key(d: Dict[str, Any], k: str) -> None:
+    """Remove a key from a dictionary recursively"""
+    if isinstance(d, dict):
+        for key in list(d.keys()):
+            if key == k and "type" in d.keys():
+                del d[key]
+            else:
+                _recursive_purge_dict_key(d[key], k)
 @no_type_check
 def _flatten_pydantic_model_ignore_defaults(
     model: Type[BaseModel],
@@ -105,6 +135,53 @@ def flatten_pydantic_model(
     return create_model("FlatModel", __base__=base_model, **flattened_fields)
+def get_field_names(model: Type[BaseModel]) -> List[str]:
+    """Get all field names from a possibly nested Pydantic model."""
+    mdl = flatten_pydantic_model(model)
+    fields = list(mdl.__fields__.keys())
+    # fields may be like a__b__c , so we only want the last part
+    return [f.split("__")[-1] for f in fields]
+def generate_simple_schema(
+    model: Type[BaseModel], exclude: List[str] = []
+) -> Dict[str, Any]:
+    """
+    Generates a JSON schema for a Pydantic model,
+    with options to exclude specific fields.
+    This function traverses the Pydantic model's fields, including nested models,
+    to generate a dictionary representing the JSON schema. Fields specified in
+    the exclude list will not be included in the generated schema.
+    Args:
+        model (Type[BaseModel]): The Pydantic model class to generate the schema for.
+        exclude (List[str]): A list of string field names to be excluded from the
+                             generated schema. Defaults to an empty list.
+    Returns:
+        Dict[str, Any]: A dictionary representing the JSON schema of the provided model,
+                        with specified fields excluded.
+    """
+    if hasattr(model, "__fields__"):
+        output: Dict[str, Any] = {}
+        for field_name, field in model.__fields__.items():
+            if field_name in exclude:
+                continue  # Skip excluded fields
+            field_type = field.type_
+            if issubclass(field_type, BaseModel):
+                # Recursively generate schema for nested models
+                output[field_name] = generate_simple_schema(field_type, exclude)
+            else:
+                # Represent the type as a string here
+                output[field_name] = {"type": field_type.__name__}
+        return output
+    else:
+        # Non-model type, return a simplified representation
+        return {"type": model.__name__}
 def flatten_pydantic_instance(
     instance: BaseModel,
     prefix: str = "",
@@ -138,6 +215,62 @@ def flatten_pydantic_instance(
     return flat_data
+def extract_fields(doc: BaseModel, fields: List[str]) -> Dict[str, Any]:
+    """
+    Extract specified fields from a Pydantic object.
+    Supports dotted field names, e.g. "metadata.author".
+    Dotted fields are matched exactly according to the corresponding path.
+    Non-dotted fields are matched against the last part of the path.
+    Clashes ignored.
+    Args:
+        doc (BaseModel): The Pydantic object.
+        fields (List[str]): The list of fields to extract.
+    Returns:
+        Dict[str, Any]: A dictionary of field names and values.
+    """
+    def get_value(obj: BaseModel, path: str) -> Any | None:
+        for part in path.split("."):
+            if hasattr(obj, part):
+                obj = getattr(obj, part)
+            else:
+                return None
+        return obj
+    def traverse(obj: BaseModel, result: Dict[str, Any], prefix: str = "") -> None:
+        for k, v in obj.__dict__.items():
+            key = f"{prefix}.{k}" if prefix else k
+            if isinstance(v, BaseModel):
+                traverse(v, result, key)
+            else:
+                result[key] = v
+    result: Dict[str, Any] = {}
+    # Extract values for dotted field names and use last part as key
+    for field in fields:
+        if "." in field:
+            value = get_value(doc, field)
+            if value is not None:
+                key = field.split(".")[-1]
+                result[key] = value
+    # Traverse the object to get non-dotted fields
+    all_fields: Dict[str, Any] = {}
+    traverse(doc, all_fields)
+    # Add non-dotted fields to the result,
+    # avoid overwriting if already present from dotted names
+    for field in [f for f in fields if "." not in f]:
+        for key, value in all_fields.items():
+            if key.split(".")[-1] == field and field not in result:
+                result[field] = value
+    return result
 def nested_dict_from_flat(
     flat_data: Dict[str, Any],
     sub_dict: str = "",
@@ -175,6 +308,315 @@ def pydantic_obj_from_flat_dict(
     model: Type[BaseModel],
     sub_dict: str = "",
 ) -> BaseModel:
-    """flatened dict with a__b__c style keys -> nested dict -> pydantic object"""
+    """Flattened dict with a__b__c style keys -> nested dict -> pydantic object"""
     nested_data = nested_dict_from_flat(flat_data, sub_dict)
     return model(**nested_data)
+def clean_schema(model: Type[BaseModel], excludes: List[str] = []) -> Dict[str, Any]:
+    """
+    Generate a simple schema for a given Pydantic model,
+    including inherited fields, with an option to exclude certain fields.
+    Handles cases where fields are Lists or other generic types and includes
+    field descriptions if available.
+    Args:
+        model (Type[BaseModel]): The Pydantic model class.
+        excludes (List[str]): A list of field names to exclude.
+    Returns:
+        Dict[str, Any]: A dictionary representing the simple schema.
+    """
+    schema = {}
+    for field_name, field_info in model.__fields__.items():
+        if field_name in excludes:
+            continue
+        field_type = field_info.outer_type_
+        description = field_info.field_info.description or ""
+        # Handle generic types like List[...]
+        if get_origin(field_type):
+            inner_types = get_args(field_type)
+            inner_type_names = [
+                t.__name__ if hasattr(t, "__name__") else str(t) for t in inner_types
+            ]
+            field_type_str = (
+                f"{get_origin(field_type).__name__}" f'[{", ".join(inner_type_names)}]'
+            )
+            schema[field_name] = {"type": field_type_str, "description": description}
+        elif issubclass(field_type, BaseModel):
+            # Directly use the nested model's schema,
+            # integrating it into the current level
+            nested_schema = clean_schema(field_type, excludes)
+            schema[field_name] = {**nested_schema, "description": description}
+        else:
+            # For basic types, use 'type'
+            schema[field_name] = {
+                "type": field_type.__name__,
+                "description": description,
+            }
+    return schema
+@contextmanager
+def temp_update(
+    pydantic_object: BaseModel, updates: Dict[str, Any]
+) -> Generator[None, None, None]:
+    original_values = {}
+    try:
+        for field, value in updates.items():
+            if hasattr(pydantic_object, field):
+                # Save original value
+                original_values[field] = getattr(pydantic_object, field)
+                setattr(pydantic_object, field, value)
+            else:
+                # Raise error for non-existent field
+                raise AttributeError(
+                    f"The field '{field}' does not exist in the "
+                    f"Pydantic model '{pydantic_object.__class__.__name__}'."
+                )
+        yield
+    except ValidationError as e:
+        # Handle validation error
+        print(f"Validation error: {e}")
+    finally:
+        # Restore original values
+        for field, value in original_values.items():
+            setattr(pydantic_object, field, value)
+T = TypeVar("T", bound=BaseModel)
+@contextmanager
+def temp_params(config: T, field: str, temp: T) -> Generator[None, None, None]:
+    """Context manager to temporarily override `field` in a `config`"""
+    original_vals = getattr(config, field)
+    try:
+        # Apply temporary settings
+        setattr(config, field, temp)
+        yield
+    finally:
+        # Revert to original settings
+        setattr(config, field, original_vals)
+def numpy_to_python_type(numpy_type: Type[Any]) -> Type[Any]:
+    """Converts a numpy data type to its Python equivalent."""
+    type_mapping = {
+        np.float64: float,
+        np.float32: float,
+        np.int64: int,
+        np.int32: int,
+        np.bool_: bool,
+        # Add other numpy types as necessary
+    }
+    return type_mapping.get(numpy_type, numpy_type)
+def dataframe_to_pydantic_model(df: pd.DataFrame) -> Type[BaseModel]:
+    """Make a Pydantic model from a dataframe."""
+    fields = {col: (type(df[col].iloc[0]), ...) for col in df.columns}
+    return create_model("DataFrameModel", __base__=BaseModel, **fields)  # type: ignore
+def dataframe_to_pydantic_objects(df: pd.DataFrame) -> List[BaseModel]:
+    """Make a list of Pydantic objects from a dataframe."""
+    Model = dataframe_to_pydantic_model(df)
+    return [Model(**row.to_dict()) for index, row in df.iterrows()]
+def first_non_null(series: pd.Series) -> Any | None:
+    """Find the first non-null item in a pandas Series."""
+    for item in series:
+        if item is not None:
+            return item
+    return None
+def dataframe_to_document_model(
+    df: pd.DataFrame,
+    content: str = "content",
+    metadata: List[str] = [],
+    exclude: List[str] = [],
+) -> Type[BaseModel]:
+    """
+    Make a subclass of Document from a dataframe.
+    Args:
+        df (pd.DataFrame): The dataframe.
+        content (str): The name of the column containing the content,
+            which will map to the Document.content field.
+        metadata (List[str]): A list of column names containing metadata;
+            these will be included in the Document.metadata field.
+        exclude (List[str]): A list of column names to exclude from the model.
+            (e.g. "vector" when lance is used to add an embedding vector to the df)
+    Returns:
+        Type[BaseModel]: A pydantic model subclassing Document.
+    """
+    # Remove excluded columns
+    df = df.drop(columns=exclude, inplace=False)
+    # Check if metadata_cols is empty
+    if metadata:
+        # Define fields for the dynamic subclass of DocMetaData
+        metadata_fields = {
+            col: (
+                Optional[numpy_to_python_type(type(first_non_null(df[col])))],
+                None,  # Optional[numpy_to_python_type(type(first_non_null(df[col])))],
+            )
+            for col in metadata
+        }
+        DynamicMetaData = create_model(  # type: ignore
+            "DynamicMetaData", __base__=DocMetaData, **metadata_fields
+        )
+    else:
+        # Use the base DocMetaData class directly
+        DynamicMetaData = DocMetaData
+    # Define additional top-level fields for DynamicDocument
+    additional_fields = {
+        col: (
+            Optional[numpy_to_python_type(type(first_non_null(df[col])))],
+            None,  # Optional[numpy_to_python_type(type(first_non_null(df[col])))],
+        )
+        for col in df.columns
+        if col not in metadata and col != content
+    }
+    # Create a dynamic subclass of Document
+    DynamicDocumentFields = {
+        **{"metadata": (DynamicMetaData, ...)},
+        **additional_fields,
+    }
+    DynamicDocument = create_model(  # type: ignore
+        "DynamicDocument", __base__=Document, **DynamicDocumentFields
+    )
+    def from_df_row(
+        cls: type[BaseModel],
+        row: pd.Series,
+        content: str = "content",
+        metadata: List[str] = [],
+    ) -> BaseModel | None:
+        content_val = row[content] if (content and content in row) else ""
+        metadata_values = (
+            {col: row[col] for col in metadata if col in row} if metadata else {}
+        )
+        additional_values = {
+            col: row[col] for col in additional_fields if col in row and col != content
+        }
+        metadata = DynamicMetaData(**metadata_values)
+        return cls(content=content_val, metadata=metadata, **additional_values)
+    # Bind the method to the class
+    DynamicDocument.from_df_row = classmethod(from_df_row)
+    return DynamicDocument  # type: ignore
+def dataframe_to_documents(
+    df: pd.DataFrame,
+    content: str = "content",
+    metadata: List[str] = [],
+    doc_cls: Type[BaseModel] | None = None,
+) -> List[Document]:
+    """
+    Make a list of Document objects from a dataframe.
+    Args:
+        df (pd.DataFrame): The dataframe.
+        content (str): The name of the column containing the content,
+            which will map to the Document.content field.
+        metadata (List[str]): A list of column names containing metadata;
+            these will be included in the Document.metadata field.
+        doc_cls (Type[BaseModel], optional): A Pydantic model subclassing
+            Document. Defaults to None.
+    Returns:
+        List[Document]: The list of Document objects.
+    """
+    Model = doc_cls or dataframe_to_document_model(df, content, metadata)
+    docs = [
+        Model.from_df_row(row, content, metadata)  # type: ignore
+        for _, row in df.iterrows()
+    ]
+    return [m for m in docs if m is not None]
+def extra_metadata(document: Document, doc_cls: Type[Document] = Document) -> List[str]:
+    """
+    Checks for extra fields in a document's metadata that are not defined in the
+    original metadata schema.
+    Args:
+        document (Document): The document instance to check for extra fields.
+        doc_cls (Type[Document]): The class type derived from Document, used
+            as a reference to identify extra fields in the document's metadata.
+    Returns:
+        List[str]: A list of strings representing the keys of the extra fields found
+        in the document's metadata.
+    """
+    # Convert metadata to dict, including extra fields.
+    metadata_fields = set(document.metadata.dict().keys())
+    # Get defined fields in the metadata of doc_cls
+    defined_fields = set(doc_cls.__fields__["metadata"].type_.__fields__.keys())
+    # Identify extra fields not in defined fields.
+    extra_fields = list(metadata_fields - defined_fields)
+    return extra_fields
+def extend_document_class(d: Document) -> Type[Document]:
+    """Generates a new pydantic class based on a given document instance.
+    This function dynamically creates a new pydantic class with additional
+    fields based on the "extra" metadata fields present in the given document
+    instance. The new class is a subclass of the original Document class, with
+    the original metadata fields retained and extra fields added as normal
+    fields to the metadata.
+    Args:
+        d: An instance of the Document class.
+    Returns:
+        A new subclass of the Document class that includes the additional fields
+        found in the metadata of the given document instance.
+    """
+    # Extract the fields from the original metadata class, including types,
+    # correctly handling special types like List[str].
+    original_metadata_fields = {
+        k: (v.outer_type_ if v.shape != 1 else v.type_, ...)
+        for k, v in DocMetaData.__fields__.items()
+    }
+    # Extract extra fields from the metadata instance with their types
+    extra_fields = {
+        k: (type(v), ...)
+        for k, v in d.metadata.__dict__.items()
+        if k not in DocMetaData.__fields__
+    }
+    # Combine original and extra fields for the new metadata class
+    combined_fields = {**original_metadata_fields, **extra_fields}
+    # Create a new metadata class with combined fields
+    NewMetadataClass = create_model(  # type: ignore
+        "ExtendedDocMetadata", **combined_fields, __base__=DocMetaData
+    )
+    # NewMetadataClass.__config__.arbitrary_types_allowed = True
+    # Create a new document class using the new metadata class
+    NewDocumentClass = create_model(
+        "ExtendedDocument",
+        content=(str, ...),
+        metadata=(NewMetadataClass, ...),
+        __base__=Document,
+    )
+    return NewDocumentClass

langroid/utils/system.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import getpass
 import hashlib
+import importlib
 import inspect
 import logging
 import shutil
 import socket
 import traceback
+from typing import Any
 logger = logging.getLogger(__name__)
@@ -15,6 +17,39 @@ DELETION_ALLOWED_PATHS = [
 ]
+class LazyLoad:
+    """Lazy loading of modules or classes."""
+    def __init__(self, import_path: str) -> None:
+        self.import_path = import_path
+        self._target = None
+        self._is_target_loaded = False
+    def _load_target(self) -> None:
+        if not self._is_target_loaded:
+            try:
+                # Attempt to import as a module
+                self._target = importlib.import_module(self.import_path)  # type: ignore
+            except ImportError:
+                # If module import fails, attempt to import as a
+                # class or function from a module
+                module_path, attr_name = self.import_path.rsplit(".", 1)
+                module = importlib.import_module(module_path)
+                self._target = getattr(module, attr_name)
+            self._is_target_loaded = True
+    def __getattr__(self, name: str) -> Any:
+        self._load_target()
+        return getattr(self._target, name)
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        self._load_target()
+        if callable(self._target):
+            return self._target(*args, **kwargs)
+        else:
+            raise TypeError(f"{self.import_path!r} object is not callable")
 def rmdir(path: str) -> bool:
     """
     Remove a directory recursively.
@@ -96,7 +131,7 @@ def generate_user_id(org: str = "") -> str:
 def update_hash(hash: str | None = None, s: str = "") -> str:
     """
     Takes a SHA256 hash string and a new string, updates the hash with the new string,
-    and returns the updated hash string along with the original string.
+    and returns the updated hash string.
     Args:
         hash (str): A SHA256 hash string.

langroid/vector_store/__init__.py CHANGED Viewed

@@ -1,8 +1,40 @@
 from . import base
-from . import chromadb
 from . import qdrantdb
 from . import meilisearch
+from . import lancedb
-from .chromadb import ChromaDBConfig, ChromaDB
+from .base import VectorStoreConfig, VectorStore
 from .qdrantdb import QdrantDBConfig, QdrantDB
 from .meilisearch import MeiliSearch, MeiliSearchConfig
+from .lancedb import LanceDB, LanceDBConfig
+has_chromadb = False
+try:
+    from . import chromadb
+    from .chromadb import ChromaDBConfig, ChromaDB
+    chromadb  # silence linters
+    ChromaDB
+    ChromaDBConfig
+    has_chromadb = True
+except ImportError:
+    pass
+__all__ = [
+    "base",
+    "VectorStore",
+    "VectorStoreConfig",
+    "qdrantdb",
+    "meilisearch",
+    "lancedb",
+    "QdrantDBConfig",
+    "QdrantDB",
+    "MeiliSearch",
+    "MeiliSearchConfig",
+    "LanceDB",
+    "LanceDBConfig",
+]
+if has_chromadb:
+    __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])

langroid/vector_store/base.py CHANGED Viewed

@@ -4,6 +4,7 @@ from abc import ABC, abstractmethod
 from typing import Dict, List, Optional, Sequence, Tuple
 import numpy as np
+import pandas as pd
 from pydantic import BaseSettings
 from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
@@ -12,6 +13,7 @@ from langroid.mytypes import Document
 from langroid.utils.algorithms.graph import components, topological_sort
 from langroid.utils.configuration import settings
 from langroid.utils.output.printing import print_long_text
+from langroid.utils.pandas_utils import stringify
 logger = logging.getLogger(__name__)
@@ -127,6 +129,35 @@ class VectorStore(ABC):
     def add_documents(self, documents: Sequence[Document]) -> None:
         pass
+    def compute_from_docs(self, docs: List[Document], calc: str) -> str:
+        """Compute a result on a set of documents,
+        using a dataframe calc string like `df.groupby('state')['income'].mean()`.
+        """
+        dicts = [doc.dict() for doc in docs]
+        df = pd.DataFrame(dicts)
+        try:
+            result = pd.eval(  # safer than eval but limited to single expression
+                calc,
+                engine="python",
+                parser="pandas",
+                local_dict={"df": df},
+            )
+        except Exception as e:
+            # return error message so LLM can fix the calc string if needed
+            err = f"""
+            Error encountered in pandas eval: {str(e)}
+            """
+            if isinstance(e, KeyError) and "not in index" in str(e):
+                # Pd.eval sometimes fails on a perfectly valid exprn like
+                # df.loc[..., 'column'] with a KeyError.
+                err += """
+                Maybe try a different way, e.g.
+                instead of df.loc[..., 'column'], try df.loc[...]['column']
+                """
+            return err
+        return stringify(result)
     def maybe_add_ids(self, documents: Sequence[Document]) -> None:
         """Add ids to metadata if absent, since some
         vecdbs don't like having blank ids."""
@@ -289,9 +320,9 @@ class VectorStore(ABC):
         return new_windows
     @abstractmethod
-    def get_all_documents(self) -> List[Document]:
+    def get_all_documents(self, where: str = "") -> List[Document]:
         """
-        Get all documents in the current collection.
+        Get all documents in the current collection, possibly filtered by `where`.
         """
         pass

langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl