PyPI - langroid - Versions diffs - 0.30.1__tar.gz → 0.31.0__tar.gz - Mend

langroid 0.30.1tar.gz → 0.31.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

{langroid-0.30.1 → langroid-0.31.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langroid
-Version: 0.30.1
+Version: 0.31.0
 Summary: Harness LLMs with Multi-Agent Programming
 License: MIT
 Author: Prasad Chalasani
@@ -249,6 +249,14 @@ teacher_task.run()
 <details>
 <summary> <b>Click to expand</b></summary>
+- **Dec 2024:**
+  - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings.
+  - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client
+  - [0.28.0](https://github.com/langroid/langroid/releases/tag/0.28.0) `ToolMessage`: `_handler` field to override
+default handler method name in `request` field.
+  - [0.27.0](https://github.com/langroid/langroid/releases/tag/0.27.0) OpenRouter Support.
+  - [0.26.0](https://github.com/langroid/langroid/releases/tag/0.26.0) Update to latest Chainlit.
+  - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and user-response.
 - **Nov 2024:**
   - **[0.24.0](https://langroid.github.io/langroid/notes/structured-output/)**:
      Enables support for `Agent`s with strict JSON schema output format on compatible LLMs and strict mode for the OpenAI tools API.

{langroid-0.30.1 → langroid-0.31.0}/README.md RENAMED Viewed

@@ -135,6 +135,14 @@ teacher_task.run()
 <details>
 <summary> <b>Click to expand</b></summary>
+- **Dec 2024:**
+  - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings.
+  - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client
+  - [0.28.0](https://github.com/langroid/langroid/releases/tag/0.28.0) `ToolMessage`: `_handler` field to override
+default handler method name in `request` field.
+  - [0.27.0](https://github.com/langroid/langroid/releases/tag/0.27.0) OpenRouter Support.
+  - [0.26.0](https://github.com/langroid/langroid/releases/tag/0.26.0) Update to latest Chainlit.
+  - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and user-response.
 - **Nov 2024:**
   - **[0.24.0](https://langroid.github.io/langroid/notes/structured-output/)**:
      Enables support for `Agent`s with strict JSON schema output format on compatible LLMs and strict mode for the OpenAI tools API.

{langroid-0.30.1 → langroid-0.31.0}/langroid/embedding_models/base.py RENAMED Viewed

@@ -24,6 +24,8 @@ class EmbeddingModel(ABC):
     @classmethod
     def create(cls, config: EmbeddingModelsConfig) -> "EmbeddingModel":
         from langroid.embedding_models.models import (
+            AzureOpenAIEmbeddings,
+            AzureOpenAIEmbeddingsConfig,
             FastEmbedEmbeddings,
             FastEmbedEmbeddingsConfig,
             LlamaCppServerEmbeddings,
@@ -42,6 +44,8 @@ class EmbeddingModel(ABC):
             return RemoteEmbeddings(config)
         elif isinstance(config, OpenAIEmbeddingsConfig):
             return OpenAIEmbeddings(config)
+        elif isinstance(config, AzureOpenAIEmbeddingsConfig):
+            return AzureOpenAIEmbeddings(config)
         elif isinstance(config, SentenceTransformerEmbeddingsConfig):
             return SentenceTransformerEmbeddings(config)
         elif isinstance(config, FastEmbedEmbeddingsConfig):

{langroid-0.30.1 → langroid-0.31.0}/langroid/embedding_models/models.py RENAMED Viewed

@@ -1,18 +1,20 @@
 import atexit
 import os
 from functools import cached_property
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 import requests
 import tiktoken
 from dotenv import load_dotenv
-from openai import OpenAI
+from openai import AzureOpenAI, OpenAI
 from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
 from langroid.exceptions import LangroidImportError
 from langroid.mytypes import Embeddings
 from langroid.parsing.utils import batched
+AzureADTokenProvider = Callable[[], str]
 class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
     model_type: str = "openai"
@@ -24,6 +26,26 @@ class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
     context_length: int = 8192
+class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig):
+    model_type: str = "azure-openai"
+    model_name: str = "text-embedding-ada-002"
+    api_key: str = ""
+    api_base: str = ""
+    deployment_name: Optional[str] = None
+    # api_version defaulted to 2024-06-01 as per https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/embeddings?tabs=python-new
+    # change this to required  supported version
+    api_version: Optional[str] = "2024-06-01"
+    # TODO: Add auth support for Azure OpenAI via AzureADTokenProvider
+    azure_ad_token: Optional[str] = None
+    azure_ad_token_provider: Optional[AzureADTokenProvider] = None
+    dims: int = 1536
+    context_length: int = 8192
+    class Config:
+        # enable auto-loading of env vars with AZURE_OPENAI_ prefix
+        env_prefix = "AZURE_OPENAI_"
 class SentenceTransformerEmbeddingsConfig(EmbeddingModelsConfig):
     model_type: str = "sentence-transformer"
     model_name: str = "BAAI/bge-large-en-v1.5"
@@ -58,11 +80,11 @@ class LlamaCppServerEmbeddingsConfig(EmbeddingModelsConfig):
 class EmbeddingFunctionCallable:
     """
     A callable class designed to generate embeddings for a list of texts using
-    the OpenAI API, with automatic retries on failure.
+    the OpenAI or Azure OpenAI API, with automatic retries on failure.
     Attributes:
         embed_model (EmbeddingModel): An instance of EmbeddingModel that provides
-                                configuration and utilities for generating embeddings.
+               configuration and utilities for generating embeddings.
     Methods:
         __call__(input: List[str]) -> Embeddings: Generate embeddings for
@@ -74,8 +96,9 @@ class EmbeddingFunctionCallable:
         Initialize the EmbeddingFunctionCallable with a specific model.
         Args:
-            model (OpenAIEmbeddings): An instance of OpenAIEmbeddings to use for
-            generating embeddings.
+            model ( OpenAIEmbeddings or AzureOpenAIEmbeddings): An instance of
+                            OpenAIEmbeddings or AzureOpenAIEmbeddings to use for
+                            generating embeddings.
             batch_size (int): Batch size
         """
         self.embed_model = embed_model
@@ -99,8 +122,7 @@ class EmbeddingFunctionCallable:
             Embeddings: A list of embedding vectors corresponding to the input texts.
         """
         embeds = []
-        if isinstance(self.embed_model, OpenAIEmbeddings):
+        if isinstance(self.embed_model, (OpenAIEmbeddings, AzureOpenAIEmbeddings)):
             tokenized_texts = self.embed_model.truncate_texts(input)
             for batch in batched(tokenized_texts, self.batch_size):
@@ -178,6 +200,72 @@ class OpenAIEmbeddings(EmbeddingModel):
         return self.config.dims
+class AzureOpenAIEmbeddings(EmbeddingModel):
+    """
+    Azure OpenAI embeddings model implementation.
+    """
+    def __init__(
+        self, config: AzureOpenAIEmbeddingsConfig = AzureOpenAIEmbeddingsConfig()
+    ):
+        """
+        Initializes Azure OpenAI embeddings model.
+        Args:
+            config: Configuration for Azure OpenAI embeddings model.
+        Raises:
+            ValueError: If required Azure config values are not set.
+        """
+        super().__init__()
+        self.config = config
+        load_dotenv()
+        if self.config.api_key == "":
+            raise ValueError(
+                """AZURE_OPENAI_API_KEY env variable must be set to use
+            AzureOpenAIEmbeddings. Please set the AZURE_OPENAI_API_KEY value
+            in your .env file."""
+            )
+        if self.config.api_base == "":
+            raise ValueError(
+                """AZURE_OPENAI_API_BASE env variable must be set to use
+            AzureOpenAIEmbeddings. Please set the AZURE_OPENAI_API_BASE value
+            in your .env file."""
+            )
+        self.client = AzureOpenAI(
+            api_key=self.config.api_key,
+            api_version=self.config.api_version,
+            azure_endpoint=self.config.api_base,
+            azure_deployment=self.config.deployment_name,
+        )
+        self.tokenizer = tiktoken.encoding_for_model(self.config.model_name)
+    def truncate_texts(self, texts: List[str]) -> List[List[int]]:
+        """
+        Truncate texts to the embedding model's context length.
+        TODO: Maybe we should show warning, and consider doing T5 summarization?
+        """
+        return [
+            self.tokenizer.encode(text, disallowed_special=())[
+                : self.config.context_length
+            ]
+            for text in texts
+        ]
+    def embedding_fn(self) -> Callable[[List[str]], Embeddings]:
+        """Get the embedding function for Azure OpenAI.
+        Returns:
+            Callable that generates embeddings for input texts.
+        """
+        return EmbeddingFunctionCallable(self, self.config.batch_size)
+    @property
+    def embedding_dims(self) -> int:
+        return self.config.dims
 STEC = SentenceTransformerEmbeddingsConfig
@@ -352,13 +440,19 @@ class LlamaCppServerEmbeddings(EmbeddingModel):
 def embedding_model(embedding_fn_type: str = "openai") -> EmbeddingModel:
     """
     Args:
-        embedding_fn_type: "openai" or "fastembed" or
-                           "llamacppserver" or "sentencetransformer" # others soon
+        embedding_fn_type: Type of embedding model to use. Options are:
+         - "openai",
+         - "azure-openai",
+         - "sentencetransformer", or
+         - "fastembed".
+            (others may be added in the future)
     Returns:
-        EmbeddingModel
+        EmbeddingModel: The corresponding embedding model class.
     """
     if embedding_fn_type == "openai":
         return OpenAIEmbeddings  # type: ignore
+    elif embedding_fn_type == "azure-openai":
+        return AzureOpenAIEmbeddings  # type: ignore
     elif embedding_fn_type == "fastembed":
         return FastEmbedEmbeddings  # type: ignore
     elif embedding_fn_type == "llamacppserver":

langroid-0.31.0/langroid/experimental/team-save.py ADDED Viewed

@@ -0,0 +1,391 @@
+import logging
+from abc import ABC, abstractmethod
+from typing import Callable, Dict, List, Optional, Union
+import langroid as lr
+from langroid.language_models.mock_lm import MockLMConfig
+# Fix logging level type
+logging.basicConfig(level=logging.WARNING)
+logger = logging.getLogger(__name__)
+def sum_fn(s: str) -> str:
+    """Dummy response for MockLM"""
+    nums = [
+        int(subpart)
+        for part in s.split()
+        for subpart in part.split(",")
+        if subpart.isdigit()
+    ]
+    return str(sum(nums) + 1)
+def user_message(msg: Union[str, lr.ChatDocument]) -> lr.ChatDocument:
+    """Create a user-role msg from a string or ChatDocument"""
+    if isinstance(msg, lr.ChatDocument):
+        return msg
+    return lr.ChatDocument(
+        content=msg,
+        metadata=lr.ChatDocMetaData(
+            sender=lr.Entity.USER,
+            sender_name="user",
+        ),
+    )
+class InputContext:
+    """Context for a Component to respond to"""
+    def __init__(self) -> None:
+        self.messages: List[lr.ChatDocument] = []
+    def add(
+        self, results: Union[str, List[str], lr.ChatDocument, List[lr.ChatDocument]]
+    ) -> None:
+        """
+        Add messages to the input messages list
+        """
+        msgs: List[lr.ChatDocument] = []
+        if isinstance(results, str):
+            msgs = [user_message(results)]
+        elif isinstance(results, lr.ChatDocument):
+            msgs = [results]
+        elif isinstance(results, list):
+            if len(results) == 0:
+                return
+            if isinstance(results[0], str):
+                msgs = [user_message(r) for r in results]
+            else:
+                msgs = [r for r in results if isinstance(r, lr.ChatDocument)]
+        self.messages.extend(msgs)
+    def clear(self) -> None:
+        self.messages.clear()
+    def get_context(self) -> lr.ChatDocument:
+        """Construct a user-role ChatDocument from the input messages"""
+        if len(self.messages) == 0:
+            return lr.ChatDocument(content="", metadata={"sender": lr.Entity.USER})
+        content = "\n".join(
+            f"{msg.metadata.sender_name}: {msg.content}" for msg in self.messages
+        )
+        return lr.ChatDocument(content=content, metadata={"sender": lr.Entity.USER})
+class Scheduler(ABC):
+    """Schedule the Components of a Team"""
+    def __init__(self) -> None:
+        self.init_state()
+    def init_state(self) -> None:
+        self.stepped = False
+        self.responders: List[str] = []
+        self.responder_counts: Dict[str, int] = {}
+        self.current_result: List[lr.ChatDocument] = []
+    @abstractmethod
+    def step(self) -> None:
+        pass
+    @abstractmethod
+    def done(self) -> bool:
+        pass
+    @abstractmethod
+    def result(self) -> List[lr.ChatDocument]:
+        pass
+    def run(self) -> List[lr.ChatDocument]:
+        self.init_state()
+        while not self.done():
+            self.step()
+        return self.result()
+class Component(ABC):
+    """A component of a Team"""
+    def __init__(self) -> None:
+        self.input = InputContext()
+        self._listeners: List["Component"] = []
+        self.name: str = ""
+    @abstractmethod
+    def run(self) -> List[lr.ChatDocument]:
+        pass
+    def listen(self, component: Union["Component", List["Component"]]) -> None:
+        if isinstance(component, list):
+            for comp in component:
+                comp.listeners.append(self)
+        else:
+            component.listeners.append(self)
+    @property
+    def listeners(self) -> List["Component"]:
+        return self._listeners
+    def _notify(self, results: List[lr.ChatDocument]) -> None:
+        logger.warning(f"{self.name} Notifying listeners...")
+        for listener in self.listeners:
+            logger.warning(f"--> Listener {listener.name} notified")
+            listener.input.add(results)
+class SimpleScheduler(Scheduler):
+    def __init__(
+        self,
+        components: List[Component],
+    ) -> None:
+        super().__init__()
+        self.components = components  # Get components from team
+        self.stepped: bool = False
+    def step(self) -> None:
+        results = []
+        for comp in self.components:
+            result = comp.run()
+            if result:
+                results.extend(result)
+        self.current_result = results
+        self.stepped = True
+    def done(self) -> bool:
+        """done after 1 step, i.e. all components have responded"""
+        return self.stepped
+    def result(self) -> List[lr.ChatDocument]:
+        return self.current_result
+class OrElseScheduler(Scheduler):
+    """
+    Implements "OrElse scheduling", i.e. if the components are A, B, C, then
+    in each step, it will try for a valid response from A OrElse B OrElse C,
+    i.e. the first component that gives a valid response is chosen.
+    In the next step, it will start from the next component in the list,
+    cycling back to the first component after the last component.
+    (There may be a better name than OrElseScheduler though.)
+    """
+    def __init__(
+        self,
+        components: List[Component],
+    ) -> None:
+        super().__init__()
+        self.components = components
+        self.team: Optional[Team] = None
+        self.current_index: int = 0
+    def init_state(self) -> None:
+        super().init_state()
+        self.current_index = 0
+    def is_valid(self, result: Optional[List[lr.ChatDocument]]) -> bool:
+        return result is not None and len(result) > 0
+    def step(self) -> None:
+        start_index = self.current_index
+        n = len(self.components)
+        for i in range(n):
+            idx = (start_index + i) % n
+            comp = self.components[idx]
+            result = comp.run()
+            if self.is_valid(result):
+                self.responders.append(comp.name)
+                self.responder_counts[comp.name] = (
+                    self.responder_counts.get(comp.name, 0) + 1
+                )
+                self.current_result = result
+                # cycle to next component
+                self.current_index = (idx + 1) % n
+                return
+    def done(self) -> bool:
+        if self.team is None:
+            return False
+        return self.team.done(self)
+    def result(self) -> List[lr.ChatDocument]:
+        return self.current_result
+class Team(Component):
+    def __init__(
+        self,
+        name: str,
+        done_condition: Optional[Callable[["Team", Scheduler], bool]] = None,
+    ) -> None:
+        super().__init__()
+        self.name = name
+        self.components: List[Component] = []
+        self.scheduler: Optional[Scheduler] = None
+        self.done_condition = done_condition or Team.default_done_condition
+    def set_done_condition(
+        self, done_condition: Callable[["Team", Scheduler], bool]
+    ) -> None:
+        self.done_condition = done_condition
+    def done(self, scheduler: Scheduler) -> bool:
+        return self.done_condition(self, scheduler)
+    def default_done_condition(self, scheduler: Scheduler) -> bool:
+        # Default condition, can be overridden
+        return False
+    def add_scheduler(self, scheduler_class: type) -> None:
+        self.scheduler = scheduler_class(self.components)
+        if hasattr(self.scheduler, "team"):
+            setattr(self.scheduler, "team", self)
+    def add(self, component: Union[Component, List[Component]]) -> None:
+        if isinstance(component, list):
+            self.components.extend(component)
+        else:
+            self.components.append(component)
+    def reset(self) -> None:
+        self.input.clear()
+        if self.scheduler is not None:
+            self.scheduler.init_state()
+    def run(self, input: str | lr.ChatDocument | None = None) -> List[lr.ChatDocument]:
+        if input is not None:
+            self.input.add(input)
+        if self.scheduler is None:
+            raise ValueError(
+                f"Team '{self.name}' has no scheduler. Call add_scheduler() first."
+            )
+        input_str = self.input.get_context().content
+        logger.warning(f"Running team {self.name}... on input = {input_str}")
+        # push the input of self to each component that's a listener of self.
+        n_pushed = 0
+        for comp in self.components:
+            if comp in self.listeners:
+                comp.input.add(self.input.messages)
+                n_pushed += 1
+        if len(self.input.messages) > 0 and n_pushed == 0:
+            logger.warning(
+                """
+                Warning: Team inputs not pushed to any components!
+                You may not be able to run any components unless they have their
+                own inputs. Make sure to set up component to listen to parent team
+                if needed.
+                """
+            )
+        # clear own input since we've pushed it to internal components
+        self.input.clear()
+        result = self.scheduler.run()
+        if len(result) > 0:
+            self._notify(result)
+        result_value = result[0].content if len(result) > 0 else "null"
+        logger.warning(f"Team {self.name} done: {result_value}")
+        return result
+class DummyAgent:
+    def __init__(self, name: str) -> None:
+        self.name = name
+    def process(self, data: str) -> str:
+        return f"{self.name} processed: {data}"
+class TaskComponent(Component):
+    def __init__(self, task: lr.Task) -> None:
+        super().__init__()
+        self.task = task
+        self.name = task.agent.config.name
+    def run(self, input: str | lr.ChatDocument | None = None) -> List[lr.ChatDocument]:
+        if input is not None:
+            self.input.add(input)
+        input_msg = self.input.get_context()
+        if input_msg.content == "":
+            return []
+        logger.warning(f"Running task {self.name} on input = {input_msg.content}")
+        result = self.task.run(input_msg)
+        result_value = result.content if result else "null"
+        logger.warning(f"Task {self.name} done: {result_value}")
+        result_list = [result] if result else []
+        if len(result_list) > 0:
+            self._notify(result_list)
+            self.input.clear()  # clear own input since we just consumed it!
+        return result_list
+def make_task(name: str, sys: str = "") -> TaskComponent:
+    llm_config = MockLMConfig(response_fn=sum_fn)
+    agent = lr.ChatAgent(
+        lr.ChatAgentConfig(
+            llm=llm_config,
+            name=name,
+        )
+    )
+    # set as single_round since there are no Tools
+    task = lr.Task(agent, interactive=False, single_round=True)
+    return TaskComponent(task)
+if __name__ == "__main__":
+    # Create agents, tasks
+    t1 = make_task("a1")
+    t2 = make_task("a2")
+    t3 = make_task("a3")
+    # done conditions for each time
+    def team1_done_condition(team: Team, scheduler: Scheduler) -> bool:
+        return (
+            scheduler.responder_counts.get("a1", 0) >= 2
+            and scheduler.responder_counts.get("a2", 0) >= 2
+        )
+    def team2_done_condition(team: Team, scheduler: Scheduler) -> bool:
+        return "a3" in scheduler.responders
+    def general_team_done_condition(team: Team, scheduler: Scheduler) -> bool:
+        # Example: all components have responded at least once
+        return len(set(scheduler.responders)) == len(team.components)
+    # Create teams
+    team1 = Team("T1", done_condition=team1_done_condition)
+    team2 = Team("T2", done_condition=team2_done_condition)
+    team = Team("Team", done_condition=general_team_done_condition)
+    team1.add_scheduler(OrElseScheduler)
+    team2.add_scheduler(OrElseScheduler)
+    team.add_scheduler(OrElseScheduler)
+    team.add([team1, team2])
+    # Build hierarchy
+    team1.add([t1, t2])
+    team2.add(t3)
+    # Set up listening
+    # team2.listen(team1)  # listens to team1 final result
+    team1.listen(team)
+    t1.listen(team1)
+    t2.listen(t1)
+    t1.listen(t2)
+    # TODO should we forbid listening to a component OUTSIDE the team?
+    # t3 listens to its parent team2 =>
+    # any input to team2 gets pushed to t3 when t3 runs
+    team2.listen([t1, t2])
+    t3.listen(team2)
+    # TODO - we should either define which component of a team gets the teams inputs,
+    # or explicitly add messages to a specific component of the team
+    print("Running top-level team...")
+    result = team.run("1")
+    ##########

langroid 0.30.1__tar.gz → 0.31.0__tar.gz

langroid 0.30.1tar.gz → 0.31.0tar.gz