PyPI - ag2 - Versions diffs - 0.3.2__py3-none-any.whl - Mend

ag2 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ag2 might be problematic. Click here for more details.

Files changed (112) hide show

ag2-0.3.2.dist-info/LICENSE +201 -0
ag2-0.3.2.dist-info/METADATA +490 -0
ag2-0.3.2.dist-info/NOTICE.md +19 -0
ag2-0.3.2.dist-info/RECORD +112 -0
ag2-0.3.2.dist-info/WHEEL +5 -0
ag2-0.3.2.dist-info/top_level.txt +1 -0
autogen/__init__.py +17 -0
autogen/_pydantic.py +116 -0
autogen/agentchat/__init__.py +26 -0
autogen/agentchat/agent.py +142 -0
autogen/agentchat/assistant_agent.py +85 -0
autogen/agentchat/chat.py +306 -0
autogen/agentchat/contrib/__init__.py +0 -0
autogen/agentchat/contrib/agent_builder.py +785 -0
autogen/agentchat/contrib/agent_optimizer.py +450 -0
autogen/agentchat/contrib/capabilities/__init__.py +0 -0
autogen/agentchat/contrib/capabilities/agent_capability.py +21 -0
autogen/agentchat/contrib/capabilities/generate_images.py +297 -0
autogen/agentchat/contrib/capabilities/teachability.py +406 -0
autogen/agentchat/contrib/capabilities/text_compressors.py +72 -0
autogen/agentchat/contrib/capabilities/transform_messages.py +92 -0
autogen/agentchat/contrib/capabilities/transforms.py +565 -0
autogen/agentchat/contrib/capabilities/transforms_util.py +120 -0
autogen/agentchat/contrib/capabilities/vision_capability.py +217 -0
autogen/agentchat/contrib/gpt_assistant_agent.py +545 -0
autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
autogen/agentchat/contrib/graph_rag/document.py +24 -0
autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +76 -0
autogen/agentchat/contrib/graph_rag/graph_query_engine.py +50 -0
autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +56 -0
autogen/agentchat/contrib/img_utils.py +390 -0
autogen/agentchat/contrib/llamaindex_conversable_agent.py +114 -0
autogen/agentchat/contrib/llava_agent.py +176 -0
autogen/agentchat/contrib/math_user_proxy_agent.py +471 -0
autogen/agentchat/contrib/multimodal_conversable_agent.py +128 -0
autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
autogen/agentchat/contrib/retrieve_user_proxy_agent.py +701 -0
autogen/agentchat/contrib/society_of_mind_agent.py +203 -0
autogen/agentchat/contrib/text_analyzer_agent.py +76 -0
autogen/agentchat/contrib/vectordb/__init__.py +0 -0
autogen/agentchat/contrib/vectordb/base.py +243 -0
autogen/agentchat/contrib/vectordb/chromadb.py +326 -0
autogen/agentchat/contrib/vectordb/mongodb.py +559 -0
autogen/agentchat/contrib/vectordb/pgvectordb.py +958 -0
autogen/agentchat/contrib/vectordb/qdrant.py +334 -0
autogen/agentchat/contrib/vectordb/utils.py +126 -0
autogen/agentchat/contrib/web_surfer.py +305 -0
autogen/agentchat/conversable_agent.py +2904 -0
autogen/agentchat/groupchat.py +1666 -0
autogen/agentchat/user_proxy_agent.py +109 -0
autogen/agentchat/utils.py +207 -0
autogen/browser_utils.py +291 -0
autogen/cache/__init__.py +10 -0
autogen/cache/abstract_cache_base.py +78 -0
autogen/cache/cache.py +182 -0
autogen/cache/cache_factory.py +85 -0
autogen/cache/cosmos_db_cache.py +150 -0
autogen/cache/disk_cache.py +109 -0
autogen/cache/in_memory_cache.py +61 -0
autogen/cache/redis_cache.py +128 -0
autogen/code_utils.py +745 -0
autogen/coding/__init__.py +22 -0
autogen/coding/base.py +113 -0
autogen/coding/docker_commandline_code_executor.py +262 -0
autogen/coding/factory.py +45 -0
autogen/coding/func_with_reqs.py +203 -0
autogen/coding/jupyter/__init__.py +22 -0
autogen/coding/jupyter/base.py +32 -0
autogen/coding/jupyter/docker_jupyter_server.py +164 -0
autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
autogen/coding/jupyter/jupyter_client.py +224 -0
autogen/coding/jupyter/jupyter_code_executor.py +161 -0
autogen/coding/jupyter/local_jupyter_server.py +168 -0
autogen/coding/local_commandline_code_executor.py +410 -0
autogen/coding/markdown_code_extractor.py +44 -0
autogen/coding/utils.py +57 -0
autogen/exception_utils.py +46 -0
autogen/extensions/__init__.py +0 -0
autogen/formatting_utils.py +76 -0
autogen/function_utils.py +362 -0
autogen/graph_utils.py +148 -0
autogen/io/__init__.py +15 -0
autogen/io/base.py +105 -0
autogen/io/console.py +43 -0
autogen/io/websockets.py +213 -0
autogen/logger/__init__.py +11 -0
autogen/logger/base_logger.py +140 -0
autogen/logger/file_logger.py +287 -0
autogen/logger/logger_factory.py +29 -0
autogen/logger/logger_utils.py +42 -0
autogen/logger/sqlite_logger.py +459 -0
autogen/math_utils.py +356 -0
autogen/oai/__init__.py +33 -0
autogen/oai/anthropic.py +428 -0
autogen/oai/bedrock.py +600 -0
autogen/oai/cerebras.py +264 -0
autogen/oai/client.py +1148 -0
autogen/oai/client_utils.py +167 -0
autogen/oai/cohere.py +453 -0
autogen/oai/completion.py +1216 -0
autogen/oai/gemini.py +469 -0
autogen/oai/groq.py +281 -0
autogen/oai/mistral.py +279 -0
autogen/oai/ollama.py +576 -0
autogen/oai/openai_utils.py +810 -0
autogen/oai/together.py +343 -0
autogen/retrieve_utils.py +487 -0
autogen/runtime_logging.py +163 -0
autogen/token_count_utils.py +257 -0
autogen/types.py +20 -0
autogen/version.py +7 -0

autogen/agentchat/contrib/vectordb/qdrant.py ADDED Viewed

@@ -0,0 +1,334 @@
+# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Portions derived from  https://github.com/microsoft/autogen are under the MIT License.
+# SPDX-License-Identifier: MIT
+import abc
+import logging
+import os
+from typing import Callable, List, Optional, Sequence, Tuple, Union
+from .base import Document, ItemID, QueryResults, VectorDB
+from .utils import get_logger
+try:
+    from qdrant_client import QdrantClient, models
+except ImportError:
+    raise ImportError("Please install qdrant-client: `pip install qdrant-client`")
+logger = get_logger(__name__)
+Embeddings = Union[Sequence[float], Sequence[int]]
+class EmbeddingFunction(abc.ABC):
+    @abc.abstractmethod
+    def __call__(self, inputs: List[str]) -> List[Embeddings]:
+        raise NotImplementedError
+class FastEmbedEmbeddingFunction(EmbeddingFunction):
+    """Embedding function implementation using FastEmbed - https://qdrant.github.io/fastembed."""
+    def __init__(
+        self,
+        model_name: str = "BAAI/bge-small-en-v1.5",
+        batch_size: int = 256,
+        cache_dir: Optional[str] = None,
+        threads: Optional[int] = None,
+        parallel: Optional[int] = None,
+        **kwargs,
+    ):
+        """Initialize fastembed.TextEmbedding.
+        Args:
+            model_name (str): The name of the model to use. Defaults to `"BAAI/bge-small-en-v1.5"`.
+            batch_size (int): Batch size for encoding. Higher values will use more memory, but be faster.\
+                                        Defaults to 256.
+            cache_dir (str, optional): The path to the model cache directory.\
+                                       Can also be set using the `FASTEMBED_CACHE_PATH` env variable.
+            threads (int, optional): The number of threads single onnxruntime session can use.
+            parallel (int, optional): If `>1`, data-parallel encoding will be used, recommended for large datasets.\
+                                      If `0`, use all available cores.\
+                                      If `None`, don't use data-parallel processing, use default onnxruntime threading.\
+                                      Defaults to None.
+            **kwargs: Additional options to pass to fastembed.TextEmbedding
+        Raises:
+            ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-small-en-v1.5.
+        """
+        try:
+            from fastembed import TextEmbedding
+        except ImportError as e:
+            raise ValueError(
+                "The 'fastembed' package is not installed. Please install it with `pip install fastembed`",
+            ) from e
+        self._batch_size = batch_size
+        self._parallel = parallel
+        self._model = TextEmbedding(model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs)
+    def __call__(self, inputs: List[str]) -> List[Embeddings]:
+        embeddings = self._model.embed(inputs, batch_size=self._batch_size, parallel=self._parallel)
+        return [embedding.tolist() for embedding in embeddings]
+class QdrantVectorDB(VectorDB):
+    """
+    A vector database implementation that uses Qdrant as the backend.
+    """
+    def __init__(
+        self,
+        *,
+        client=None,
+        embedding_function: EmbeddingFunction = None,
+        content_payload_key: str = "_content",
+        metadata_payload_key: str = "_metadata",
+        collection_options: dict = {},
+        **kwargs,
+    ) -> None:
+        """
+        Initialize the vector database.
+        Args:
+            client: qdrant_client.QdrantClient | An instance of QdrantClient.
+            embedding_function: Callable | The embedding function used to generate the vector representation
+                of the documents. Defaults to FastEmbedEmbeddingFunction.
+            collection_options: dict | The options for creating the collection.
+            kwargs: dict | Additional keyword arguments.
+        """
+        self.client: QdrantClient = client or QdrantClient(location=":memory:")
+        self.embedding_function = embedding_function or FastEmbedEmbeddingFunction()
+        self.collection_options = collection_options
+        self.content_payload_key = content_payload_key
+        self.metadata_payload_key = metadata_payload_key
+        self.type = "qdrant"
+    def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> None:
+        """
+        Create a collection in the vector database.
+        Case 1. if the collection does not exist, create the collection.
+        Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
+        Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection,
+            otherwise it raise a ValueError.
+        Args:
+            collection_name: str | The name of the collection.
+            overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
+            get_or_create: bool | Whether to get the collection if it exists. Default is True.
+        Returns:
+            Any | The collection object.
+        """
+        embeddings_size = len(self.embedding_function(["test"])[0])
+        if self.client.collection_exists(collection_name) and overwrite:
+            self.client.delete_collection(collection_name)
+        if not self.client.collection_exists(collection_name):
+            self.client.create_collection(
+                collection_name,
+                vectors_config=models.VectorParams(size=embeddings_size, distance=models.Distance.COSINE),
+                **self.collection_options,
+            )
+        elif not get_or_create:
+            raise ValueError(f"Collection {collection_name} already exists.")
+    def get_collection(self, collection_name: str = None):
+        """
+        Get the collection from the vector database.
+        Args:
+            collection_name: str | The name of the collection.
+        Returns:
+            Any | The collection object.
+        """
+        if collection_name is None:
+            raise ValueError("The collection name is required.")
+        return self.client.get_collection(collection_name)
+    def delete_collection(self, collection_name: str) -> None:
+        """Delete the collection from the vector database.
+        Args:
+            collection_name: str | The name of the collection.
+        Returns:
+            Any
+        """
+        return self.client.delete_collection(collection_name)
+    def insert_docs(self, docs: List[Document], collection_name: str = None, upsert: bool = False) -> None:
+        """
+        Insert documents into the collection of the vector database.
+        Args:
+            docs: List[Document] | A list of documents. Each document is a TypedDict `Document`.
+            collection_name: str | The name of the collection. Default is None.
+            upsert: bool | Whether to update the document if it exists. Default is False.
+            kwargs: Dict | Additional keyword arguments.
+        Returns:
+            None
+        """
+        if not docs:
+            return
+        if any(doc.get("content") is None for doc in docs):
+            raise ValueError("The document content is required.")
+        if any(doc.get("id") is None for doc in docs):
+            raise ValueError("The document id is required.")
+        if not upsert and not self._validate_upsert_ids(collection_name, [doc["id"] for doc in docs]):
+            logger.log("Some IDs already exist. Skipping insert", level=logging.WARN)
+        self.client.upsert(collection_name, points=self._documents_to_points(docs))
+    def update_docs(self, docs: List[Document], collection_name: str = None) -> None:
+        if not docs:
+            return
+        if any(doc.get("id") is None for doc in docs):
+            raise ValueError("The document id is required.")
+        if any(doc.get("content") is None for doc in docs):
+            raise ValueError("The document content is required.")
+        if self._validate_update_ids(collection_name, [doc["id"] for doc in docs]):
+            return self.client.upsert(collection_name, points=self._documents_to_points(docs))
+        raise ValueError("Some IDs do not exist. Skipping update")
+    def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs) -> None:
+        """
+        Delete documents from the collection of the vector database.
+        Args:
+            ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`.
+            collection_name: str | The name of the collection. Default is None.
+            kwargs: Dict | Additional keyword arguments.
+        Returns:
+            None
+        """
+        self.client.delete(collection_name, ids)
+    def retrieve_docs(
+        self,
+        queries: List[str],
+        collection_name: str = None,
+        n_results: int = 10,
+        distance_threshold: float = 0,
+        **kwargs,
+    ) -> QueryResults:
+        """
+        Retrieve documents from the collection of the vector database based on the queries.
+        Args:
+            queries: List[str] | A list of queries. Each query is a string.
+            collection_name: str | The name of the collection. Default is None.
+            n_results: int | The number of relevant documents to return. Default is 10.
+            distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
+                returned. Don't filter with it if < 0. Default is 0.
+            kwargs: Dict | Additional keyword arguments.
+        Returns:
+            QueryResults | The query results. Each query result is a list of list of tuples containing the document and
+                the distance.
+        """
+        embeddings = self.embedding_function(queries)
+        requests = [
+            models.SearchRequest(
+                vector=embedding,
+                limit=n_results,
+                score_threshold=distance_threshold,
+                with_payload=True,
+                with_vector=False,
+            )
+            for embedding in embeddings
+        ]
+        batch_results = self.client.search_batch(collection_name, requests)
+        return [self._scored_points_to_documents(results) for results in batch_results]
+    def get_docs_by_ids(
+        self, ids: List[ItemID] = None, collection_name: str = None, include=True, **kwargs
+    ) -> List[Document]:
+        """
+        Retrieve documents from the collection of the vector database based on the ids.
+        Args:
+            ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
+            collection_name: str | The name of the collection. Default is None.
+            include: List[str] | The fields to include. Default is True.
+                If None, will include ["metadatas", "documents"], ids will always be included.
+            kwargs: dict | Additional keyword arguments.
+        Returns:
+            List[Document] | The results.
+        """
+        if ids is None:
+            results = self.client.scroll(collection_name=collection_name, with_payload=include, with_vectors=True)[0]
+        else:
+            results = self.client.retrieve(collection_name, ids=ids, with_payload=include, with_vectors=True)
+        return [self._point_to_document(result) for result in results]
+    def _point_to_document(self, point) -> Document:
+        return {
+            "id": point.id,
+            "content": point.payload.get(self.content_payload_key, ""),
+            "metadata": point.payload.get(self.metadata_payload_key, {}),
+            "embedding": point.vector,
+        }
+    def _points_to_documents(self, points) -> List[Document]:
+        return [self._point_to_document(point) for point in points]
+    def _scored_point_to_document(self, scored_point: models.ScoredPoint) -> Tuple[Document, float]:
+        return self._point_to_document(scored_point), scored_point.score
+    def _documents_to_points(self, documents: List[Document]):
+        contents = [document["content"] for document in documents]
+        embeddings = self.embedding_function(contents)
+        points = [
+            models.PointStruct(
+                id=documents[i]["id"],
+                vector=embeddings[i],
+                payload={
+                    self.content_payload_key: documents[i].get("content"),
+                    self.metadata_payload_key: documents[i].get("metadata"),
+                },
+            )
+            for i in range(len(documents))
+        ]
+        return points
+    def _scored_points_to_documents(self, scored_points: List[models.ScoredPoint]) -> List[Tuple[Document, float]]:
+        return [self._scored_point_to_document(scored_point) for scored_point in scored_points]
+    def _validate_update_ids(self, collection_name: str, ids: List[str]) -> bool:
+        """
+        Validates all the IDs exist in the collection
+        """
+        retrieved_ids = [
+            point.id for point in self.client.retrieve(collection_name, ids=ids, with_payload=False, with_vectors=False)
+        ]
+        if missing_ids := set(ids) - set(retrieved_ids):
+            logger.log(f"Missing IDs: {missing_ids}. Skipping update", level=logging.WARN)
+            return False
+        return True
+    def _validate_upsert_ids(self, collection_name: str, ids: List[str]) -> bool:
+        """
+        Validate none of the IDs exist in the collection
+        """
+        retrieved_ids = [
+            point.id for point in self.client.retrieve(collection_name, ids=ids, with_payload=False, with_vectors=False)
+        ]
+        if existing_ids := set(ids) & set(retrieved_ids):
+            logger.log(f"Existing IDs: {existing_ids}.", level=logging.WARN)
+            return False
+        return True

autogen/agentchat/contrib/vectordb/utils.py ADDED Viewed

@@ -0,0 +1,126 @@
+# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Portions derived from  https://github.com/microsoft/autogen are under the MIT License.
+# SPDX-License-Identifier: MIT
+import logging
+from typing import Any, Dict, List
+from termcolor import colored
+from .base import QueryResults
+class ColoredLogger(logging.Logger):
+    def __init__(self, name, level=logging.NOTSET):
+        super().__init__(name, level)
+    def debug(self, msg, *args, color=None, **kwargs):
+        super().debug(colored(msg, color), *args, **kwargs)
+    def info(self, msg, *args, color=None, **kwargs):
+        super().info(colored(msg, color), *args, **kwargs)
+    def warning(self, msg, *args, color="yellow", **kwargs):
+        super().warning(colored(msg, color), *args, **kwargs)
+    def error(self, msg, *args, color="light_red", **kwargs):
+        super().error(colored(msg, color), *args, **kwargs)
+    def critical(self, msg, *args, color="red", **kwargs):
+        super().critical(colored(msg, color), *args, **kwargs)
+    def fatal(self, msg, *args, color="red", **kwargs):
+        super().fatal(colored(msg, color), *args, **kwargs)
+def get_logger(name: str, level: int = logging.INFO) -> ColoredLogger:
+    logger = ColoredLogger(name, level)
+    console_handler = logging.StreamHandler()
+    logger.addHandler(console_handler)
+    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    logger.handlers[0].setFormatter(formatter)
+    return logger
+logger = get_logger(__name__)
+def filter_results_by_distance(results: QueryResults, distance_threshold: float = -1) -> QueryResults:
+    """Filters results based on a distance threshold.
+    Args:
+        results: QueryResults | The query results. List[List[Tuple[Document, float]]]
+        distance_threshold: The maximum distance allowed for results.
+    Returns:
+        QueryResults | A filtered results containing only distances smaller than the threshold.
+    """
+    if distance_threshold > 0:
+        results = [[(key, value) for key, value in data if value < distance_threshold] for data in results]
+    return results
+def chroma_results_to_query_results(data_dict: Dict[str, List[List[Any]]], special_key="distances") -> QueryResults:
+    """Converts a dictionary with list-of-list values to a list of tuples.
+    Args:
+        data_dict: A dictionary where keys map to lists of lists or None.
+        special_key: The key in the dictionary containing the special values
+                    for each tuple.
+    Returns:
+        A list of tuples, where each tuple contains a sub-dictionary with
+        some keys from the original dictionary and the value from the
+        special_key.
+    Example:
+        data_dict = {
+            "key1s": [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            "key2s": [["a", "b", "c"], ["c", "d", "e"], ["e", "f", "g"]],
+            "key3s": None,
+            "key4s": [["x", "y", "z"], ["1", "2", "3"], ["4", "5", "6"]],
+            "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]],
+        }
+        results = [
+            [
+                ({"key1": 1, "key2": "a", "key4": "x"}, 0.1),
+                ({"key1": 2, "key2": "b", "key4": "y"}, 0.2),
+                ({"key1": 3, "key2": "c", "key4": "z"}, 0.3),
+            ],
+            [
+                ({"key1": 4, "key2": "c", "key4": "1"}, 0.4),
+                ({"key1": 5, "key2": "d", "key4": "2"}, 0.5),
+                ({"key1": 6, "key2": "e", "key4": "3"}, 0.6),
+            ],
+            [
+                ({"key1": 7, "key2": "e", "key4": "4"}, 0.7),
+                ({"key1": 8, "key2": "f", "key4": "5"}, 0.8),
+                ({"key1": 9, "key2": "g", "key4": "6"}, 0.9),
+            ],
+        ]
+    """
+    keys = [
+        key
+        for key in data_dict
+        if key != special_key and data_dict[key] is not None and isinstance(data_dict[key][0], list)
+    ]
+    result = []
+    data_special_key = data_dict[special_key]
+    for i in range(len(data_special_key)):
+        sub_result = []
+        for j, distance in enumerate(data_special_key[i]):
+            sub_dict = {}
+            for key in keys:
+                if len(data_dict[key]) > i:
+                    sub_dict[key[:-1]] = data_dict[key][i][j]  # remove 's' in the end from key
+            sub_result.append((sub_dict, distance))
+        result.append(sub_result)
+    return result