PyPI - langroid - Versions diffs - 0.1.196__tar.gz → 0.1.198__tar.gz - Mend

langroid 0.1.196tar.gz → 0.1.198tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

{langroid-0.1.196 → langroid-0.1.198}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langroid
-Version: 0.1.196
+Version: 0.1.198
 Summary: Harness LLMs with Multi-Agent Programming
 License: MIT
 Author: Prasad Chalasani
@@ -18,20 +18,21 @@ Provides-Extra: mysql
 Provides-Extra: neo4j
 Provides-Extra: postgres
 Provides-Extra: sciphi
+Provides-Extra: transformers
+Provides-Extra: unstructured
 Requires-Dist: agent-search (>=0.0.7,<0.0.8) ; extra == "sciphi"
 Requires-Dist: aiohttp (>=3.9.1,<4.0.0)
 Requires-Dist: async-generator (>=1.10,<2.0)
 Requires-Dist: autopep8 (>=2.0.2,<3.0.0)
 Requires-Dist: black[jupyter] (>=23.3.0,<24.0.0)
 Requires-Dist: bs4 (>=0.0.1,<0.0.2)
-Requires-Dist: chainlit (>=1.0.200,<2.0.0) ; extra == "chainlit"
-Requires-Dist: chromadb (==0.3.21)
+Requires-Dist: chainlit (>=1.0.301,<2.0.0) ; extra == "chainlit"
+Requires-Dist: chromadb (>=0.4.21,<=0.4.23)
 Requires-Dist: colorlog (>=6.7.0,<7.0.0)
 Requires-Dist: docstring-parser (>=0.15,<0.16)
 Requires-Dist: duckduckgo-search (>=4.4,<5.0)
 Requires-Dist: faker (>=18.9.0,<19.0.0)
 Requires-Dist: fakeredis (>=2.12.1,<3.0.0)
-Requires-Dist: farm-haystack[file-conversion,ocr,pdf,preprocessing] (>=1.21.1,<2.0.0)
 Requires-Dist: fire (>=0.5.0,<0.6.0)
 Requires-Dist: flake8 (>=6.0.0,<7.0.0)
 Requires-Dist: google-api-python-client (>=2.95.0,<3.0.0)
@@ -95,7 +96,7 @@ Requires-Dist: trafilatura (>=1.5.0,<2.0.0)
 Requires-Dist: typer (>=0.9.0,<0.10.0)
 Requires-Dist: types-redis (>=4.5.5.2,<5.0.0.0)
 Requires-Dist: types-requests (>=2.31.0.1,<3.0.0.0)
-Requires-Dist: unstructured[docx,pdf,pptx] (>=0.10.16,<0.10.18)
+Requires-Dist: unstructured[docx,pdf,pptx] (>=0.10.16,<0.10.18) ; extra == "unstructured"
 Requires-Dist: wget (>=3.2,<4.0)
 Description-Content-Type: text/markdown

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/chat_agent.py RENAMED Viewed

@@ -225,14 +225,22 @@ class ChatAgent(Agent):
         enabled_classes: List[Type[ToolMessage]] = list(self.llm_tools_map.values())
         if len(enabled_classes) == 0:
             return "You can ask questions in natural language."
         json_instructions = "\n\n".join(
             [
-                msg_cls.json_instructions()
+                msg_cls.json_instructions(tool=self.config.use_tools)
                 for _, msg_cls in enumerate(enabled_classes)
                 if msg_cls.default_value("request") in self.llm_tools_usable
             ]
         )
+        # if any of the enabled classes has json_group_instructions, then use that,
+        # else fall back to ToolMessage.json_group_instructions
+        for msg_cls in enabled_classes:
+            if hasattr(msg_cls, "json_group_instructions") and callable(
+                getattr(msg_cls, "json_group_instructions")
+            ):
+                return msg_cls.json_group_instructions().format(
+                    json_instructions=json_instructions
+                )
         return ToolMessage.json_group_instructions().format(
             json_instructions=json_instructions
         )

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/chat_document.py RENAMED Viewed

@@ -84,7 +84,7 @@ class ChatDocument(Document):
             json_data = json.loads(j)
             tool = json_data.get("request")
             if tool is not None:
-                tools.append(tool)
+                tools.append(str(tool))
         return tools
     def log_fields(self) -> ChatDocLoggerFields:

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/doc_chat_agent.py RENAMED Viewed

@@ -135,7 +135,7 @@ class DocChatAgentConfig(ChatAgentConfig):
             # NOTE: PDF parsing is extremely challenging, and each library
             # has its own strengths and weaknesses.
             # Try one that works for your use case.
-            # or "haystack", "unstructured", "pdfplumber", "fitz", "pypdf"
+            # or "unstructured", "pdfplumber", "fitz", "pypdf"
             library="pdfplumber",
         ),
     )
@@ -156,7 +156,7 @@ class DocChatAgentConfig(ChatAgentConfig):
         collection_name="doc-chat-lancedb",
         replace_collection=True,
         storage_path=".lancedb/data/",
-        embedding=hf_embed_config,
+        embedding=oai_embed_config,
     )
     llm: OpenAIGPTConfig = OpenAIGPTConfig(
         type="openai",

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/task.py RENAMED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import copy
 import logging
+import re
 from collections import Counter
 from types import SimpleNamespace
 from typing import (
@@ -781,17 +782,20 @@ class Task:
         # handle routing instruction in result if any,
         # of the form PASS=<recipient>
         content = msg.content if isinstance(msg, ChatDocument) else msg
+        content = content.strip()
         if PASS in content and PASS_TO not in content:
             return True, None
         if PASS_TO in content and content.split(":")[1] != "":
             return True, content.split(":")[1]
-        if SEND_TO in content and content.split(":")[1] != "":
-            recipient = content.split(":")[1]
+        if SEND_TO in content and (send_parts := re.split(r"[,: ]", content))[1] != "":
+            # assume syntax is SEND_TO:<recipient> <content>
+            # or SEND_TO:<recipient>,<content> or SEND_TO:<recipient>:<content>
+            recipient = send_parts[1].strip()
             # get content to send, clean out routing instruction, and
             # start from 1 char after SEND_TO:<recipient>,
             # because we expect there is either a blank or some other separator
             # after the recipient
-            content_to_send = content.replace(f"{SEND_TO}:{recipient}", "").strip()[1:]
+            content_to_send = content.replace(f"{SEND_TO}{recipient}", "").strip()[1:]
             # if no content then treat same as PASS_TO
             if content_to_send == "":
                 return True, recipient

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tool_message.py RENAMED Viewed

@@ -16,7 +16,10 @@ from docstring_parser import parse
 from pydantic import BaseModel
 from langroid.language_models.base import LLMFunctionSpec
-from langroid.utils.pydantic_utils import _recursive_purge_dict_key
+from langroid.utils.pydantic_utils import (
+    _recursive_purge_dict_key,
+    generate_simple_schema,
+)
 class ToolMessage(ABC, BaseModel):
@@ -79,6 +82,9 @@ class ToolMessage(ABC, BaseModel):
         ex = choice(cls.examples())
         return ex.json_example()
+    def to_json(self) -> str:
+        return self.json(indent=4, exclude={"result", "purpose"})
     def json_example(self) -> str:
         return self.json(indent=4, exclude={"result", "purpose"})
@@ -101,22 +107,30 @@ class ToolMessage(ABC, BaseModel):
         return properties.get(f, {}).get("default", None)
     @classmethod
-    def json_instructions(cls) -> str:
+    def json_instructions(cls, tool: bool = False) -> str:
         """
         Default Instructions to the LLM showing how to use the tool/function-call.
         Works for GPT4 but override this for weaker LLMs if needed.
+        Args:
+            tool: instructions for Langroid-native tool use? (e.g. for non-OpenAI LLM)
+                (or else it would be for OpenAI Function calls)
         Returns:
             str: instructions on how to use the message
         """
+        # TODO: when we attempt to use a "simpler schema"
+        # (i.e. all nested fields explicit without definitions),
+        # we seem to get worse results, so we turn it off for now
+        param_dict = (
+            # cls.simple_schema() if tool else
+            cls.llm_function_schema(request=True).parameters
+        )
         return textwrap.dedent(
             f"""
             TOOL: {cls.default_value("request")}
             PURPOSE: {cls.default_value("purpose")}
             JSON FORMAT: {
-                json.dumps(
-                    cls.llm_function_schema(request=True).parameters,
-                    indent=4,
-                )
+                json.dumps(param_dict, indent=4)
             }
             {"EXAMPLE: " + cls.usage_example() if cls.examples() else ""}
             """.lstrip()
@@ -210,3 +224,14 @@ class ToolMessage(ABC, BaseModel):
             description=cls.default_value("purpose"),
             parameters=parameters,
         )
+    @classmethod
+    def simple_schema(cls) -> Dict[str, Any]:
+        """
+        Return a simplified schema for the message, with only the request and
+        required fields.
+        Returns:
+            Dict[str, Any]: simplified schema
+        """
+        schema = generate_simple_schema(cls, exclude=["result", "purpose"])
+        return schema

{langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/models.py RENAMED Viewed

@@ -6,7 +6,6 @@ from dotenv import load_dotenv
 from openai import OpenAI
 from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
-from langroid.language_models.utils import retry_with_exponential_backoff
 from langroid.mytypes import Embeddings
 from langroid.parsing.utils import batched
@@ -26,6 +25,58 @@ class SentenceTransformerEmbeddingsConfig(EmbeddingModelsConfig):
     context_length: int = 512
+class EmbeddingFunctionCallable:
+    """
+    A callable class designed to generate embeddings for a list of texts using
+    the OpenAI API, with automatic retries on failure.
+    Attributes:
+        model (OpenAIEmbeddings): An instance of OpenAIEmbeddings that provides
+                                configuration and utilities for generating embeddings.
+    Methods:
+        __call__(input: List[str]) -> Embeddings: Generate embeddings for
+                                a list of input texts.
+    """
+    def __init__(self, model: "OpenAIEmbeddings"):
+        """
+        Initialize the EmbeddingFunctionCallable with a specific model.
+        Args:
+            model (OpenAIEmbeddings): An instance of OpenAIEmbeddings to use for
+            generating embeddings.
+        """
+        self.model = model
+    def __call__(self, input: List[str]) -> Embeddings:
+        """
+        Generate embeddings for a given list of input texts using the OpenAI API,
+        with retries on failure.
+        This method:
+        - Truncates each text in the input list to the model's maximum context length.
+        - Processes the texts in batches to generate embeddings efficiently.
+        - Automatically retries the embedding generation process with exponential
+        backoff in case of failures.
+        Args:
+            input (List[str]): A list of input texts to generate embeddings for.
+        Returns:
+            Embeddings: A list of embedding vectors corresponding to the input texts.
+        """
+        tokenized_texts = self.model.truncate_texts(input)
+        embeds = []
+        for batch in batched(tokenized_texts, 500):
+            result = self.model.client.embeddings.create(
+                input=batch, model=self.model.config.model_name
+            )
+            batch_embeds = [d.embedding for d in result.data]
+            embeds.extend(batch_embeds)
+        return embeds
 class OpenAIEmbeddings(EmbeddingModel):
     def __init__(self, config: OpenAIEmbeddingsConfig = OpenAIEmbeddingsConfig()):
         super().__init__()
@@ -56,19 +107,7 @@ class OpenAIEmbeddings(EmbeddingModel):
         ]
     def embedding_fn(self) -> Callable[[List[str]], Embeddings]:
-        @retry_with_exponential_backoff
-        def fn(texts: List[str]) -> Embeddings:
-            tokenized_texts = self.truncate_texts(texts)
-            embeds = []
-            for batch in batched(tokenized_texts, 500):
-                result = self.client.embeddings.create(
-                    input=batch, model=self.config.model_name
-                )
-                batch_embeds = [d.embedding for d in result.data]
-                embeds.extend(batch_embeds)
-            return embeds
-        return fn
+        return EmbeddingFunctionCallable(self)
     @property
     def embedding_dims(self) -> int:

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/hf_formatter.py RENAMED Viewed

@@ -6,11 +6,10 @@ models will have the same tokenizer, so we just use the first one.
 """
 import logging
 import re
-from typing import List, Set
+from typing import Any, List, Set, Type
 from huggingface_hub import HfApi, ModelFilter
 from jinja2.exceptions import TemplateError
-from transformers import AutoTokenizer
 from langroid.language_models.base import LanguageModel, LLMMessage, Role
 from langroid.language_models.config import HFPromptFormatterConfig
@@ -19,6 +18,31 @@ from langroid.language_models.prompt_formatter.base import PromptFormatter
 logger = logging.getLogger(__name__)
+def try_import_AutoTokenizer() -> Type[Any]:
+    """
+    Attempts to import the AutoTokenizer class from the transformers package.
+    Returns:
+        The AutoTokenizer class if successful.
+    Raises:
+        ImportError: If the transformers package is not installed.
+    """
+    try:
+        from transformers import AutoTokenizer
+        return AutoTokenizer  # type: ignore
+    except ImportError:
+        raise ImportError(
+            """
+            You are trying to use the HuggingFace transformers.AutoTokenizer,
+            but the `transformers` package is not installed
+            by default with Langroid. Please install langroid using the
+            `transformers` extra, like so:
+            pip install "langroid[transformers]"
+            or equivalent.
+            """
+        )
 def find_hf_formatter(model_name: str) -> str:
     hf_api = HfApi()
     # try to find a matching model, with progressivly shorter prefixes of model_name
@@ -37,6 +61,7 @@ def find_hf_formatter(model_name: str) -> str:
             mdl = next(models)
         except StopIteration:
             continue
+        AutoTokenizer = try_import_AutoTokenizer()
         tokenizer = AutoTokenizer.from_pretrained(mdl.id)
         if tokenizer.chat_template is not None:
             return str(mdl.id)
@@ -60,6 +85,7 @@ class HFFormatter(PromptFormatter):
             mdl = next(models)
         except StopIteration:
             raise ValueError(f"Model {config.model_name} not found on HuggingFace Hub")
+        AutoTokenizer = try_import_AutoTokenizer()
         self.tokenizer = AutoTokenizer.from_pretrained(mdl.id)
         if self.tokenizer.chat_template is None:
             raise ValueError(

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/document_parser.py RENAMED Viewed

@@ -11,7 +11,6 @@ import requests
 from langroid.mytypes import DocMetaData, Document
 from langroid.parsing.parser import Parser, ParsingConfig
-from langroid.parsing.urls import url_to_tempfile
 logger = logging.getLogger(__name__)
@@ -54,8 +53,6 @@ class DocumentParser(Parser):
                 return PDFPlumberParser(source, config)
             elif config.pdf.library == "unstructured":
                 return UnstructuredPDFParser(source, config)
-            elif config.pdf.library == "haystack":
-                return HaystackPDFParser(source, config)
             else:
                 raise ValueError(
                     f"Unsupported PDF library specified: {config.pdf.library}"
@@ -301,59 +298,23 @@ class PDFPlumberParser(DocumentParser):
         return self.fix_text(page.extract_text())
-class HaystackPDFParser(DocumentParser):
-    """
-    Parser for processing PDFs using the `haystack` library.
-    """
-    def get_doc_chunks(self) -> List[Document]:
-        """
-        Overrides the base class method to use the `haystack` library.
-        See there for more details.
-        """
-        from haystack.nodes import PDFToTextConverter, PreProcessor
-        converter = PDFToTextConverter(
-            remove_numeric_tables=True,
-        )
-        path = self.source
-        if path.startswith(("http://", "https://")):
-            path = url_to_tempfile(path)
-        doc = converter.convert(file_path=path, meta=None)
-        # note self.config.chunk_size is in token units,
-        # and we use an approximation of 75 words per 100 tokens
-        # to convert to word units
-        preprocessor = PreProcessor(
-            clean_empty_lines=True,
-            clean_whitespace=True,
-            clean_header_footer=False,
-            split_by="word",
-            split_length=int(0.75 * self.config.chunk_size),
-            split_overlap=int(0.75 * self.config.overlap),
-            split_respect_sentence_boundary=True,
-            add_page_number=True,
-        )
-        chunks = preprocessor.process(doc)
-        return [
-            Document(
-                content=chunk.content,
-                metadata=DocMetaData(
-                    source=f"{self.source} page {chunk.meta['page']}",
-                    is_chunk=True,
-                ),
-            )
-            for chunk in chunks
-        ]
 class UnstructuredPDFParser(DocumentParser):
     """
     Parser for processing PDF files using the `unstructured` library.
     """
     def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:  # type: ignore
-        from unstructured.partition.pdf import partition_pdf
+        try:
+            from unstructured.partition.pdf import partition_pdf
+        except ImportError:
+            raise ImportError(
+                """
+                The `unstructured` library is not installed by default with langroid.
+                To include this library, please install langroid with the
+                `unstructured` extra by running `pip install "langroid[unstructured]"`
+                or equivalent.
+                """
+            )
         # from unstructured.chunking.title import chunk_by_title
@@ -367,7 +328,7 @@ class UnstructuredPDFParser(DocumentParser):
                 Please try a different library by setting the `library` field
                 in the `pdf` section of the `parsing` field in the config file.
                 Supported libraries are:
-                fitz, pypdf, pdfplumber, unstructured, haystack
+                fitz, pypdf, pdfplumber, unstructured
                 """
             )
@@ -406,7 +367,17 @@ class UnstructuredDocxParser(DocumentParser):
     """
     def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:  # type: ignore
-        from unstructured.partition.docx import partition_docx
+        try:
+            from unstructured.partition.docx import partition_docx
+        except ImportError:
+            raise ImportError(
+                """
+                The `unstructured` library is not installed by default with langroid.
+                To include this library, please install langroid with the
+                `unstructured` extra by running `pip install "langroid[unstructured]"`
+                or equivalent.
+                """
+            )
         elements = partition_docx(file=self.doc_bytes, include_page_breaks=True)
@@ -447,7 +418,17 @@ class UnstructuredDocxParser(DocumentParser):
 class UnstructuredDocParser(UnstructuredDocxParser):
     def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:  # type: ignore
-        from unstructured.partition.doc import partition_doc
+        try:
+            from unstructured.partition.doc import partition_doc
+        except ImportError:
+            raise ImportError(
+                """
+                The `unstructured` library is not installed by default with langroid.
+                To include this library, please install langroid with the
+                `unstructured` extra by running `pip install "langroid[unstructured]"`
+                or equivalent.
+                """
+            )
         elements = partition_doc(filename=self.source, include_page_breaks=True)

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/json.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import json
+import re
 from typing import Any, Iterator, List
 from pyparsing import nestedExpr, originalTextFor
@@ -44,6 +45,60 @@ def get_json_candidates(s: str) -> List[str]:
         return []
+def replace_undefined(s: str, undefined_placeholder: str = '"<undefined>"') -> str:
+    """
+    Replace undefined values in a potential json str with a placeholder.
+    Args:
+    - s (str): The potential JSON string to parse.
+    - undefined_placeholder (str): The placeholder or error message
+        for undefined values.
+    Returns:
+    - str: The (potential) JSON string with undefined values
+        replaced by the placeholder.
+    """
+    # Preprocess the string to replace undefined values with the placeholder
+    # This regex looks for patterns like ": <identifier>" and replaces them
+    # with the placeholder.
+    # It's a simple approach and might need adjustments for complex cases
+    # This is an attempt to handle cases where a weak LLM may produce
+    # a JSON-like string without quotes around some values, e.g.
+    # {"rent": DO-NOT-KNOW }
+    preprocessed_s = re.sub(
+        r":\s*([a-zA-Z_][a-zA-Z_0-9\-]*)", f": {undefined_placeholder}", s
+    )
+    # Now, attempt to parse the preprocessed string as JSON
+    try:
+        return preprocessed_s
+    except Exception:
+        # If parsing fails, return an error message instead
+        # (this should be rare after preprocessing)
+        return s
+def repair_newlines(s: str) -> str:
+    """
+    Attempt to load as json, and if it fails, try with newlines replaced by space.
+    Intended to handle cases where weak LLMs produce JSON-like strings where
+    some string-values contain explicit newlines, e.g.:
+    {"text": "This is a text\n with a newline"}
+    These would not be valid JSON, so we try to clean them up here.
+    """
+    try:
+        json.loads(s)
+        return s
+    except Exception:
+        try:
+            s = s.replace("\n", " ")
+            json.loads(s)
+            return s
+        except Exception:
+            return s
 def extract_top_level_json(s: str) -> List[str]:
     """Extract all top-level JSON-formatted substrings from a given string.
@@ -53,15 +108,17 @@ def extract_top_level_json(s: str) -> List[str]:
     Returns:
         List[str]: A list of top-level JSON-formatted substrings.
     """
-    # Find JSON object and array candidates using regular expressions
+    # Find JSON object and array candidates
     json_candidates = get_json_candidates(s)
     normalized_candidates = [
         candidate.replace("\\{", "{").replace("\\}", "}").replace("\\_", "_")
         for candidate in json_candidates
     ]
+    candidates = [replace_undefined(candidate) for candidate in normalized_candidates]
+    candidates = [repair_newlines(candidate) for candidate in candidates]
     top_level_jsons = [
-        candidate for candidate in normalized_candidates if is_valid_json(candidate)
+        candidate for candidate in candidates if is_valid_json(candidate)
     ]
     return top_level_jsons

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/parser.py RENAMED Viewed

@@ -19,9 +19,7 @@ class Splitter(str, Enum):
 class PdfParsingConfig(BaseSettings):
-    library: Literal[
-        "fitz", "pdfplumber", "pypdf", "unstructured", "haystack"
-    ] = "pdfplumber"
+    library: Literal["fitz", "pdfplumber", "pypdf", "unstructured"] = "pdfplumber"
 class DocxParsingConfig(BaseSettings):

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/pydantic_utils.py RENAMED Viewed

@@ -135,6 +135,53 @@ def flatten_pydantic_model(
     return create_model("FlatModel", __base__=base_model, **flattened_fields)
+def get_field_names(model: Type[BaseModel]) -> List[str]:
+    """Get all field names from a possibly nested Pydantic model."""
+    mdl = flatten_pydantic_model(model)
+    fields = list(mdl.__fields__.keys())
+    # fields may be like a__b__c , so we only want the last part
+    return [f.split("__")[-1] for f in fields]
+def generate_simple_schema(
+    model: Type[BaseModel], exclude: List[str] = []
+) -> Dict[str, Any]:
+    """
+    Generates a JSON schema for a Pydantic model,
+    with options to exclude specific fields.
+    This function traverses the Pydantic model's fields, including nested models,
+    to generate a dictionary representing the JSON schema. Fields specified in
+    the exclude list will not be included in the generated schema.
+    Args:
+        model (Type[BaseModel]): The Pydantic model class to generate the schema for.
+        exclude (List[str]): A list of string field names to be excluded from the
+                             generated schema. Defaults to an empty list.
+    Returns:
+        Dict[str, Any]: A dictionary representing the JSON schema of the provided model,
+                        with specified fields excluded.
+    """
+    if hasattr(model, "__fields__"):
+        output: Dict[str, Any] = {}
+        for field_name, field in model.__fields__.items():
+            if field_name in exclude:
+                continue  # Skip excluded fields
+            field_type = field.type_
+            if issubclass(field_type, BaseModel):
+                # Recursively generate schema for nested models
+                output[field_name] = generate_simple_schema(field_type, exclude)
+            else:
+                # Represent the type as a string here
+                output[field_name] = {"type": field_type.__name__}
+        return output
+    else:
+        # Non-model type, return a simplified representation
+        return {"type": model.__name__}
 def flatten_pydantic_instance(
     instance: BaseModel,
     prefix: str = "",

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/system.py RENAMED Viewed

@@ -1,10 +1,12 @@
 import getpass
 import hashlib
+import importlib
 import inspect
 import logging
 import shutil
 import socket
 import traceback
+from typing import Any
 logger = logging.getLogger(__name__)
@@ -15,6 +17,39 @@ DELETION_ALLOWED_PATHS = [
 ]
+class LazyLoad:
+    """Lazy loading of modules or classes."""
+    def __init__(self, import_path: str) -> None:
+        self.import_path = import_path
+        self._target = None
+        self._is_target_loaded = False
+    def _load_target(self) -> None:
+        if not self._is_target_loaded:
+            try:
+                # Attempt to import as a module
+                self._target = importlib.import_module(self.import_path)  # type: ignore
+            except ImportError:
+                # If module import fails, attempt to import as a
+                # class or function from a module
+                module_path, attr_name = self.import_path.rsplit(".", 1)
+                module = importlib.import_module(module_path)
+                self._target = getattr(module, attr_name)
+            self._is_target_loaded = True
+    def __getattr__(self, name: str) -> Any:
+        self._load_target()
+        return getattr(self._target, name)
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        self._load_target()
+        if callable(self._target):
+            return self._target(*args, **kwargs)
+        else:
+            raise TypeError(f"{self.import_path!r} object is not callable")
 def rmdir(path: str) -> bool:
     """
     Remove a directory recursively.

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/chromadb.py RENAMED Viewed

@@ -141,10 +141,16 @@ class ChromaDB(VectorStore):
         return self._docs_from_results(results)
     def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
-        results = self.collection.get(ids=ids, include=["documents", "metadatas"])
-        results["documents"] = [results["documents"]]
-        results["metadatas"] = [results["metadatas"]]
-        return self._docs_from_results(results)
+        # get them one by one since chroma mangles the order of the results
+        # when fetched from a list of ids.
+        results = [
+            self.collection.get(ids=[id], include=["documents", "metadatas"])
+            for id in ids
+        ]
+        final_results = {}
+        final_results["documents"] = [[r["documents"][0] for r in results]]
+        final_results["metadatas"] = [[r["metadatas"][0] for r in results]]
+        return self._docs_from_results(final_results)
     def delete_collection(self, collection_name: str) -> None:
         self.client.delete_collection(name=collection_name)

{langroid-0.1.196 → langroid-0.1.198}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langroid"
-version = "0.1.196"
+version = "0.1.198"
 description = "Harness LLMs with Multi-Agent Programming"
 authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
 readme = "README.md"
@@ -16,7 +16,7 @@ mkdocs-gen-files = "^0.4.0"
 mkdocs-literate-nav = "^0.6.0"
 mkdocs-section-index = "^0.3.5"
 mkdocs-jupyter = "^0.24.1"
-chromadb = "0.3.21"
+chromadb = ">=0.4.21, <=0.4.23"
 onnxruntime = "1.16.1"
 fire = "^0.5.0"
 black = {extras = ["jupyter"], version = "^23.3.0"}
@@ -56,7 +56,7 @@ prettytable = "^3.8.0"
 tantivy = "^0.21.0"
 google-api-python-client = "^2.95.0"
 lxml = "^4.9.3"
-unstructured = {extras = ["docx", "pptx", "pdf"], version = ">=0.10.16,<0.10.18"}
+unstructured = {extras = ["docx", "pptx", "pdf"], version = ">=0.10.16,<0.10.18", optional=true}
 sentence-transformers = {version="2.2.2", optional=true}
 torch = {version="2.0.0", optional=true}
@@ -72,7 +72,6 @@ pymupdf = "^1.23.3"
 jinja2 = "^3.1.2"
 pytest-asyncio = "^0.21.1"
 docstring-parser = "^0.15"
-farm-haystack = {extras = ["ocr", "preprocessing", "file-conversion", "pdf"], version = "^1.21.1"}
 meilisearch = "^0.28.3"
 meilisearch-python-sdk = "^2.2.3"
 litellm = {version = "^1.23.0", optional = true}
@@ -85,7 +84,7 @@ agent-search = {version = "^0.0.7", optional = true}
 python-docx = "^1.1.0"
 aiohttp = "^3.9.1"
 metaphor-python = {version = "^0.1.23", optional = true}
-chainlit = {version = "^1.0.200", optional = true}
+chainlit = {version = "^1.0.301", optional = true}
 python-socketio = {version="^5.11.0", optional=true}
 duckduckgo-search = "^4.4"
@@ -93,6 +92,8 @@ duckduckgo-search = "^4.4"
 # install these using `poetry install -E [...]` where [...] is one of the extras below
 # or install multiple extras using, e.g.,  `poetry install -E "litellm mysql"
 hf-embeddings = ["sentence-transformers", "torch"]
+transformers = ["transformers"]
+unstructured = ["unstructured"]
 postgres = ["psycopg2", "pytest-postgresql"]
 mysql = ["pymysql", "pytest-mysql"]
 litellm = ["litellm"]
@@ -127,6 +128,7 @@ exclude = [
     "langroid/embedding_models/clustering.py",
     #TODO revisit why mypy keeps failing on gh actions, but works fine locally
     "langroid/agent/callbacks/chainlit.py",
+    "langroid/vector_store/chromadb.py"
 ]
 files=["langroid/*"]
 plugins = [

{langroid-0.1.196 → langroid-0.1.198}/LICENSE RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/README.md RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/base.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/batch.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/callbacks/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/callbacks/chainlit.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/helpers.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/junk RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/openai_assistant.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_doc_chat_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/critic_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/lance_rag_task.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/lance_tools.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/query_planner_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/csv_kg_chat.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/neo4j_chat_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/utils/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/utils/system_message.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/relevance_extractor_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/retriever_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/sql_chat_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/description_extractors.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/populate_metadata.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/system_message.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/tools.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/table_chat_agent.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/duckduckgo_search_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/extract_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/generator_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/google_search_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/metaphor_search_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/recipient_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/run_python_code.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/sciphi_search_rag_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/segment_extract_tool.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/agent_config.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/base.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/momento_cachedb.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/redis_cachedb.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/base.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/clustering.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/azure_openai.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/base.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/config.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/openai_assistants.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/openai_gpt.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/base.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/llama2_formatter.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/utils.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/mytypes.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/agent_chats.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/code-parsing.md RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/code_parser.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/config.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/para_sentence_split.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/repo_loader.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/search.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/spider.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/table_loader.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/url_loader.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/url_loader_cookies.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/urls.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/utils.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/web_search.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/chat-gpt4-system-prompt.md RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/dialog.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/prompts_config.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/templates.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/transforms.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/algorithms/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/algorithms/graph.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/configuration.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/constants.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/docker.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/globals.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/llms/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/llms/strings.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/logging.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/output/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/output/printing.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/pandas_utils.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/web/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/web/login.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/utils/web/selenium_login.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/__init__.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/base.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/lancedb.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/meilisearch.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/momento.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/qdrant_cloud.py RENAMED Viewed

File without changes

{langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/qdrantdb.py RENAMED Viewed

File without changes

langroid 0.1.196__tar.gz → 0.1.198__tar.gz

langroid 0.1.196tar.gz → 0.1.198tar.gz