PyPI - camel-ai - Versions diffs - 0.2.20a1__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

camel-ai 0.2.20a1py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (33) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +2 -3
camel/agents/knowledge_graph_agent.py +1 -5
camel/benchmarks/apibench.py +1 -5
camel/benchmarks/nexus.py +1 -5
camel/benchmarks/ragbench.py +2 -2
camel/bots/telegram_bot.py +1 -5
camel/configs/__init__.py +3 -0
camel/configs/aiml_config.py +80 -0
camel/datagen/__init__.py +3 -1
camel/datagen/self_improving_cot.py +821 -0
camel/interpreters/subprocess_interpreter.py +72 -6
camel/models/__init__.py +2 -0
camel/models/aiml_model.py +147 -0
camel/models/model_factory.py +3 -0
camel/models/siliconflow_model.py +1 -1
camel/societies/workforce/role_playing_worker.py +2 -4
camel/societies/workforce/single_agent_worker.py +1 -6
camel/societies/workforce/workforce.py +3 -9
camel/toolkits/__init__.py +2 -0
camel/toolkits/reddit_toolkit.py +8 -38
camel/toolkits/sympy_toolkit.py +778 -0
camel/toolkits/whatsapp_toolkit.py +11 -32
camel/types/enums.py +25 -1
camel/utils/__init__.py +7 -2
camel/utils/commons.py +198 -21
camel/utils/deduplication.py +232 -0
camel/utils/token_counting.py +0 -38
{camel_ai-0.2.20a1.dist-info → camel_ai-0.2.21.dist-info}/METADATA +10 -12
{camel_ai-0.2.20a1.dist-info → camel_ai-0.2.21.dist-info}/RECORD +33 -28
/camel/datagen/{cotdatagen.py → cot_datagen.py} +0 -0
{camel_ai-0.2.20a1.dist-info → camel_ai-0.2.21.dist-info}/LICENSE +0 -0
{camel_ai-0.2.20a1.dist-info → camel_ai-0.2.21.dist-info}/WHEEL +0 -0

camel/toolkits/whatsapp_toolkit.py CHANGED Viewed

@@ -19,7 +19,7 @@ import requests
 from camel.toolkits import FunctionTool
 from camel.toolkits.base import BaseToolkit
-from camel.utils.commons import retry_request
+from camel.utils import retry_on_error
 class WhatsAppToolkit(BaseToolkit):
@@ -36,18 +36,8 @@ class WhatsAppToolkit(BaseToolkit):
         version (str): API version.
     """
-    def __init__(self, retries: int = 3, delay: int = 1):
-        r"""Initializes the WhatsAppToolkit with the specified number of
-        retries and delay.
-        Args:
-            retries (int): Number of times to retry the request in case of
-                failure. (default: :obj:`3`)
-            delay (int): Time in seconds to wait between retries.
-                (default: :obj:`1`)
-        """
-        self.retries = retries
-        self.delay = delay
+    def __init__(self):
+        r"""Initializes the WhatsAppToolkit."""
         self.base_url = "https://graph.facebook.com"
         self.version = "v17.0"
@@ -61,6 +51,7 @@ class WhatsAppToolkit(BaseToolkit):
                 "WHATSAPP_PHONE_NUMBER_ID environment variables."
             )
+    @retry_on_error()
     def send_message(
         self, to: str, message: str
     ) -> Union[Dict[str, Any], str]:
@@ -88,19 +79,15 @@ class WhatsAppToolkit(BaseToolkit):
         }
         try:
-            response = retry_request(
-                requests.post,
-                retries=self.retries,
-                delay=self.delay,
-                url=url,
-                headers=headers,
-                json=data,
-            )
+            response = requests.post(url=url, headers=headers, json=data)
             response.raise_for_status()
             return response.json()
+        except requests.exceptions.RequestException as e:
+            raise e
         except Exception as e:
             return f"Failed to send message: {e!s}"
+    @retry_on_error()
     def get_message_templates(self) -> Union[List[Dict[str, Any]], str]:
         r"""Retrieves all message templates for the WhatsApp Business account.
@@ -116,18 +103,13 @@ class WhatsAppToolkit(BaseToolkit):
         headers = {"Authorization": f"Bearer {self.access_token}"}
         try:
-            response = retry_request(
-                requests.get,
-                retries=self.retries,
-                delay=self.delay,
-                url=url,
-                headers=headers,
-            )
+            response = requests.get(url=url, headers=headers)
             response.raise_for_status()
             return response.json().get("data", [])
         except Exception as e:
             return f"Failed to retrieve message templates: {e!s}"
+    @retry_on_error()
     def get_business_profile(self) -> Union[Dict[str, Any], str]:
         r"""Retrieves the WhatsApp Business profile information.
@@ -149,10 +131,7 @@ class WhatsAppToolkit(BaseToolkit):
         }
         try:
-            response = retry_request(
-                requests.get,
-                retries=self.retries,
-                delay=self.delay,
+            response = requests.get(
                 url=url,
                 headers=headers,
                 params=params,

camel/types/enums.py CHANGED Viewed

@@ -204,6 +204,10 @@ class ModelType(UnifiedModelType, Enum):
     SILICONFLOW_THUDM_GLM_4_9B_CHAT = "THUDM/glm-4-9b-chat"
     SILICONFLOW_PRO_THUDM_GLM_4_9B_CHAT = "Pro/THUDM/glm-4-9b-chat"
+    # AIML models support tool calling
+    AIML_MIXTRAL_8X7B = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+    AIML_MISTRAL_7B_INSTRUCT = "mistralai/Mistral-7B-Instruct-v0.1"
     def __str__(self):
         return self.value
@@ -218,7 +222,11 @@ class ModelType(UnifiedModelType, Enum):
     @property
     def support_native_structured_output(self) -> bool:
-        return self.is_openai
+        return any(
+            [
+                self.is_openai,
+            ]
+        )
     @property
     def support_native_tool_calling(self) -> bool:
@@ -238,6 +246,7 @@ class ModelType(UnifiedModelType, Enum):
                 self.is_moonshot,
                 self.is_siliconflow,
                 self.is_zhipuai,
+                self.is_aiml,
             ]
         )
@@ -513,6 +522,13 @@ class ModelType(UnifiedModelType, Enum):
             ModelType.SILICONFLOW_PRO_THUDM_GLM_4_9B_CHAT,
         }
+    @property
+    def is_aiml(self) -> bool:
+        return self in {
+            ModelType.AIML_MIXTRAL_8X7B,
+            ModelType.AIML_MISTRAL_7B_INSTRUCT,
+        }
     @property
     def token_limit(self) -> int:
         r"""Returns the maximum token limit for a given model.
@@ -586,6 +602,8 @@ class ModelType(UnifiedModelType, Enum):
             ModelType.TOGETHER_MIXTRAL_8_7B,
             ModelType.SGLANG_MISTRAL_7B,
             ModelType.MOONSHOT_V1_32K,
+            ModelType.AIML_MIXTRAL_8X7B,
+            ModelType.AIML_MISTRAL_7B_INSTRUCT,
         }:
             return 32_768
         elif self in {
@@ -864,6 +882,7 @@ class ModelPlatformType(Enum):
     INTERNLM = "internlm"
     MOONSHOT = "moonshot"
     SILICONFLOW = "siliconflow"
+    AIML = "aiml"
     @property
     def is_openai(self) -> bool:
@@ -981,6 +1000,11 @@ class ModelPlatformType(Enum):
         r"""Returns whether this platform is SiliconFlow."""
         return self is ModelPlatformType.SILICONFLOW
+    @property
+    def is_aiml(self) -> bool:
+        r"""Returns whether this platform is AIML."""
+        return self is ModelPlatformType.AIML
 class AudioModelType(Enum):
     TTS_1 = "tts-1"

camel/utils/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 from .commons import (
     AgentOpsMeta,
+    BatchProcessor,
     agentops_decorator,
     api_keys_required,
     check_server_running,
@@ -33,16 +34,17 @@ from .commons import (
     is_docker_running,
     json_to_function_code,
     print_text_animated,
+    retry_on_error,
     text_extract_from_web,
     to_pascal,
     track_agent,
 )
 from .constants import Constants
+from .deduplication import DeduplicationResult, deduplicate_internally
 from .response_format import get_pydantic_model
 from .token_counting import (
     AnthropicTokenCounter,
     BaseTokenCounter,
-    GeminiTokenCounter,
     LiteLLMTokenCounter,
     MistralTokenCounter,
     OpenAITokenCounter,
@@ -69,7 +71,6 @@ __all__ = [
     "dependencies_required",
     "api_keys_required",
     "is_docker_running",
-    "GeminiTokenCounter",
     "MistralTokenCounter",
     "get_pydantic_major_version",
     "get_pydantic_object_schema",
@@ -82,4 +83,8 @@ __all__ = [
     "get_pydantic_model",
     "download_github_subdirectory",
     "generate_prompt_for_structured_output",
+    "deduplicate_internally",
+    "DeduplicationResult",
+    "retry_on_error",
+    "BatchProcessor",
 ]

camel/utils/commons.py CHANGED Viewed

@@ -11,7 +11,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import functools
 import importlib
+import logging
 import os
 import platform
 import re
@@ -47,6 +49,8 @@ from .constants import Constants
 F = TypeVar('F', bound=Callable[..., Any])
+logger = logging.getLogger(__name__)
 def print_text_animated(text, delay: float = 0.02, end: str = ""):
     r"""Prints the given text with an animated effect.
@@ -620,33 +624,206 @@ def handle_http_error(response: requests.Response) -> str:
         return "HTTP Error"
-def retry_request(
-    func: Callable, retries: int = 3, delay: int = 1, *args: Any, **kwargs: Any
-) -> Any:
-    r"""Retries a function in case of any errors.
+def retry_on_error(
+    max_retries: int = 3, initial_delay: float = 1.0
+) -> Callable:
+    r"""Decorator to retry function calls on exception with exponential
+    backoff.
     Args:
-        func (Callable): The function to be retried.
-        retries (int): Number of retry attempts. (default: :obj:`3`)
-        delay (int): Delay between retries in seconds. (default: :obj:`1`)
-        *args: Arguments to pass to the function.
-        **kwargs: Keyword arguments to pass to the function.
+        max_retries (int): Maximum number of retry attempts
+        initial_delay (float): Initial delay between retries in seconds
     Returns:
-        Any: The result of the function call if successful.
+        Callable: Decorated function with retry logic
+    """
-    Raises:
-        Exception: If all retry attempts fail.
+    def decorator(func: Callable) -> Callable:
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            delay = initial_delay
+            last_exception = None
+            for attempt in range(max_retries + 1):
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    last_exception = e
+                    if attempt == max_retries:
+                        logger.error(
+                            f"Failed after {max_retries} retries: {e!s}"
+                        )
+                        raise
+                    logger.warning(
+                        f"Attempt {attempt + 1} failed: {e!s}. "
+                        f"Retrying in {delay:.1f}s..."
+                    )
+                    time.sleep(delay)
+                    delay *= 2  # Exponential backoff
+            raise last_exception
+        return wrapper
+    return decorator
+class BatchProcessor:
+    r"""Handles batch processing with dynamic sizing and error handling based
+    on system load.
     """
-    for attempt in range(retries):
-        try:
-            return func(*args, **kwargs)
-        except Exception as e:
-            print(f"Attempt {attempt + 1}/{retries} failed: {e}")
-            if attempt < retries - 1:
-                time.sleep(delay)
-            else:
-                raise
+    def __init__(
+        self,
+        max_workers: Optional[int] = None,
+        initial_batch_size: Optional[int] = None,
+        monitoring_interval: float = 5.0,
+        cpu_threshold: float = 80.0,
+        memory_threshold: float = 85.0,
+    ):
+        r"""Initialize the BatchProcessor with dynamic worker allocation.
+        Args:
+            max_workers: Maximum number of workers. If None, will be
+                determined dynamically based on system resources.
+                (default: :obj:`None`)
+            initial_batch_size: Initial size of each batch. If `None`,
+                defaults to `10`. (default: :obj:`None`)
+            monitoring_interval: Interval in seconds between resource checks.
+                (default: :obj:`5.0`)
+            cpu_threshold: CPU usage percentage threshold for scaling down.
+                (default: :obj:`80.0`)
+            memory_threshold: Memory usage percentage threshold for scaling
+                down. (default: :obj:`85.0`)
+        """
+        import psutil
+        self.monitoring_interval = monitoring_interval
+        self.cpu_threshold = cpu_threshold
+        self.memory_threshold = memory_threshold
+        self.last_check_time = time.time()
+        self.psutil = psutil
+        # Initialize performance metrics
+        self.total_processed = 0
+        self.total_errors = 0
+        self.processing_times: List = []
+        if max_workers is None:
+            self.max_workers = self._calculate_optimal_workers()
+        else:
+            self.max_workers = max_workers
+        self.batch_size = (
+            10 if initial_batch_size is None else initial_batch_size
+        )
+        self.min_batch_size = 1
+        self.max_batch_size = 20
+        self.backoff_factor = 0.8
+        self.success_factor = 1.2
+        # Initial resource check
+        self._update_resource_metrics()
+    def _calculate_optimal_workers(self) -> int:
+        r"""Calculate optimal number of workers based on system resources."""
+        cpu_count = self.psutil.cpu_count()
+        cpu_percent = self.psutil.cpu_percent(interval=1)
+        memory = self.psutil.virtual_memory()
+        # Base number of workers on CPU count and current load
+        if cpu_percent > self.cpu_threshold:
+            workers = max(1, cpu_count // 4)
+        elif cpu_percent > 60:
+            workers = max(1, cpu_count // 2)
+        else:
+            workers = max(1, cpu_count - 1)
+        # Further reduce if memory is constrained
+        if memory.percent > self.memory_threshold:
+            workers = max(1, workers // 2)
+        return workers
+    def _update_resource_metrics(self) -> None:
+        r"""Update current resource usage metrics."""
+        self.current_cpu = self.psutil.cpu_percent()
+        self.current_memory = self.psutil.virtual_memory().percent
+        self.last_check_time = time.time()
+    def _should_check_resources(self) -> bool:
+        r"""Determine if it's time to check resource usage again."""
+        return time.time() - self.last_check_time >= self.monitoring_interval
+    def adjust_batch_size(
+        self, success: bool, processing_time: Optional[float] = None
+    ) -> None:
+        r"""Adjust batch size based on success/failure and system resources.
+        Args:
+            success (bool): Whether the last batch completed successfully
+            processing_time (Optional[float]): Time taken to process the last
+                batch. (default: :obj:`None`)
+        """
+        # Update metrics
+        self.total_processed += 1
+        if not success:
+            self.total_errors += 1
+        if processing_time is not None:
+            self.processing_times.append(processing_time)
+        # Check system resources if interval has elapsed
+        if self._should_check_resources():
+            self._update_resource_metrics()
+            # Adjust based on resource usage
+            if (
+                self.current_cpu > self.cpu_threshold
+                or self.current_memory > self.memory_threshold
+            ):
+                self.batch_size = max(
+                    int(self.batch_size * self.backoff_factor),
+                    self.min_batch_size,
+                )
+                self.max_workers = max(1, self.max_workers - 1)
+                return
+        # Adjust based on success/failure
+        if success:
+            self.batch_size = min(
+                int(self.batch_size * self.success_factor), self.max_batch_size
+            )
+        else:
+            self.batch_size = max(
+                int(self.batch_size * self.backoff_factor), self.min_batch_size
+            )
+    def get_performance_metrics(self) -> Dict[str, Any]:
+        r"""Get current performance metrics.
+        Returns:
+            Dict containing performance metrics including:
+            - total_processed: Total number of batches processed
+            - error_rate: Percentage of failed batches
+            - avg_processing_time: Average time per batch
+            - current_batch_size: Current batch size
+            - current_workers: Current number of workers
+            - current_cpu: Current CPU usage percentage
+            - current_memory: Current memory usage percentage
+        """
+        metrics = {
+            "total_processed": self.total_processed,
+            "error_rate": (self.total_errors / max(1, self.total_processed))
+            * 100,
+            "avg_processing_time": sum(self.processing_times)
+            / max(1, len(self.processing_times)),
+            "current_batch_size": self.batch_size,
+            "current_workers": self.max_workers,
+            "current_cpu": self.current_cpu,
+            "current_memory": self.current_memory,
+        }
+        return metrics
 def download_github_subdirectory(

camel/utils/deduplication.py ADDED Viewed

@@ -0,0 +1,232 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import Dict, List, Literal, Optional
+from pydantic import BaseModel
+from camel.embeddings.base import BaseEmbedding
+class DeduplicationResult(BaseModel):
+    r"""The result of deduplication.
+    Attributes:
+        original_texts (List[str]): The original texts.
+        unique_ids (List[int]): A list of ids that are unique (not duplicates).
+        unique_embeddings_dict (Dict[int, List[float]]): A mapping from the
+            index of each unique text to its embedding.
+        duplicate_to_target_map (Dict[int, int]): A mapping from the index of
+            the duplicate text to the index of the text it is considered a
+            duplicate of.
+    """
+    original_texts: List[str]
+    unique_ids: List[int]
+    unique_embeddings_dict: Dict[int, List[float]]
+    duplicate_to_target_map: Dict[int, int]
+def deduplicate_internally(
+    texts: List[str],
+    threshold: float = 0.65,
+    embedding_instance: Optional[BaseEmbedding[str]] = None,
+    embeddings: Optional[List[List[float]]] = None,
+    strategy: Literal["top1", "llm-supervise"] = "top1",
+    batch_size: int = 1000,
+) -> DeduplicationResult:
+    r"""Deduplicate a list of strings based on their cosine similarity.
+    You can either:
+    1) Provide a CAMEL `BaseEmbedding` instance via `embedding_instance` to let
+        this function handle the embedding internally, OR
+    2) Directly pass a list of pre-computed embeddings to `embeddings`.
+    If both `embedding_instance` and `embeddings` are provided, the function
+    will raise a ValueError to avoid ambiguous usage.
+    strategy is used to specify different strategies, where 'top1' selects the
+    one with highest similarity, and 'llm-supervise' uses LLM to determine if
+    texts are duplicates (not yet implemented).
+    Args:
+        texts (List[str]): The list of texts to be deduplicated.
+        threshold (float, optional): The similarity threshold for considering
+            two texts as duplicates. (default: :obj:`0.65`)
+        embedding_instance (Optional[BaseEmbedding[str]], optional):
+            A CAMEL embedding instance for automatic embedding. (default:
+            :obj:`None`)
+        embeddings (Optional[List[List[float]]], optional):
+            Pre-computed embeddings of `texts`. Each element in the list
+            corresponds to the embedding of the text in the same index of
+            `texts`. (default: :obj:`None`)
+        strategy (Literal["top1", "llm-supervise"], optional):
+            The strategy to use for deduplication. (default: :obj:`"top1"`)
+        batch_size (int, optional): The size of the batch to use for
+            calculating cosine similarities. (default: :obj:`1000`)
+    Returns:
+        DeduplicationResult: An object that contains:
+            - `original_texts`: The original texts.
+            - `unique_ids`: The unique ids after deduplication.
+            - `unique_embeddings_dict`: A dict mapping from (unique) text id
+              to its embedding.
+            - `duplicate_to_target_map`: A dict mapping from the id of a
+              duplicate text to the id of the text it is considered a duplicate
+              of.
+    Raises:
+        NotImplementedError: If the strategy is not "top1".
+        ValueError: If neither embeddings nor embedding_instance is provided,
+                    or if both are provided at the same time.
+        ValueError: If the length of `embeddings` does not match the length of
+            `texts`.
+    Example:
+        >>> from camel.embeddings.openai_embedding import OpenAIEmbedding
+        >>> # Suppose we have 5 texts, some of which may be duplicates
+        >>> texts = [
+        ...     "What is AI?",
+        ...     "Artificial Intelligence is about machines",
+        ...     "What is AI?",
+        ...     "Deep Learning is a subset of AI",
+        ...     "What is artificial intelligence?"
+        ... ]
+        >>> # or any other BaseEmbedding instance
+        >>> embedding_model = OpenAIEmbedding()
+        >>> result = deduplicate_internally(
+        ...     texts=texts,
+        ...     threshold=0.7,
+        ...     embedding_instance=embedding_model
+        ... )
+        >>> print("Unique ids:")
+        >>> for uid in result.unique_ids:
+        ...     print(texts[uid])
+        Unique ids:
+        What is AI?
+        Artificial Intelligence is about machines
+        Deep Learning is a subset of AI
+        What is artificial intelligence?
+        >>> print("Duplicate map:")
+        >>> print(result.duplicate_to_target_map)
+        {2: 0}
+        # This indicates the text at index 2 is considered
+        # a duplicate of index 0.
+    """
+    import numpy as np
+    from sklearn.metrics.pairwise import cosine_similarity
+    if len(texts) == 0:
+        return DeduplicationResult(
+            original_texts=[],
+            unique_ids=[],
+            unique_embeddings_dict={},
+            duplicate_to_target_map={},
+        )
+    if len(texts) == 1:
+        return DeduplicationResult(
+            original_texts=texts,
+            unique_ids=[0],
+            unique_embeddings_dict={
+                0: embeddings[0]
+                if embeddings
+                else embedding_instance.embed_list(texts)[0]  # type: ignore[union-attr]
+            },
+            duplicate_to_target_map={},
+        )
+    if strategy == "llm-supervise":
+        # TODO: Implement LLM-supervise deduplication.
+        raise NotImplementedError(
+            "LLM-supervise deduplication is not yet implemented."
+        )
+    # Check if the parameters are valid.
+    if not 0 <= threshold <= 1:
+        raise ValueError("Threshold must be between 0 and 1")
+    if embedding_instance is None and embeddings is None:
+        raise ValueError(
+            "Either 'embedding_instance' or 'embeddings' must be provided."
+        )
+    if embedding_instance is not None and embeddings is not None:
+        raise ValueError(
+            "Cannot provide both 'embedding_instance' and 'embeddings'. "
+            "Please choose only one way to supply embeddings."
+        )
+    if embedding_instance is not None:
+        # Use Camel's embedding_instance to vectorize.
+        embeddings = embedding_instance.embed_list(texts)
+    else:
+        # Use pre-supplied embeddings.
+        if embeddings and len(embeddings) != len(texts):
+            raise ValueError(
+                "The length of 'embeddings' does not match the length "
+                "of 'texts'."
+            )
+    # Convert embeddings to numpy array for efficient computation
+    embeddings_array = np.array(embeddings)
+    n = len(texts)
+    duplicate_to_target_map: Dict[int, int] = {}
+    # Process in batches to reduce memory usage
+    for i in range(0, n, batch_size):
+        batch_end = min(i + batch_size, n)
+        # Calculate cosine similarity for current batch
+        batch_similarities = cosine_similarity(
+            embeddings_array[i:batch_end], embeddings_array[:batch_end]
+        )
+        # Create mask for lower triangle (avoid self-comparison and redundant
+        # checks)
+        tril_mask = np.tril(np.ones_like(batch_similarities), k=-1)
+        batch_similarities = batch_similarities * tril_mask
+        # Find duplicates in current batch
+        masked_similarities = np.where(
+            batch_similarities > threshold, batch_similarities, -1
+        )
+        max_indices = masked_similarities.argmax(axis=1)
+        above_threshold = (
+            batch_similarities[np.arange(batch_end - i), max_indices]
+            > threshold
+        )
+        # Update duplicate map
+        for j, is_duplicate in enumerate(above_threshold):
+            if is_duplicate:
+                duplicate_to_target_map[i + j] = max_indices[j]
+    # Get the actual unique ids and embeddings.
+    unique_ids = []
+    unique_embeddings_dict = {}
+    assert embeddings, "embeddings must be valid"
+    for i, (_, emb) in enumerate(zip(texts, embeddings)):
+        if i not in duplicate_to_target_map:
+            unique_ids.append(i)
+            unique_embeddings_dict[i] = emb
+    return DeduplicationResult(
+        original_texts=texts,
+        unique_ids=unique_ids,
+        unique_embeddings_dict=unique_embeddings_dict,
+        duplicate_to_target_map=duplicate_to_target_map,
+    )

camel-ai 0.2.20a1__py3-none-any.whl → 0.2.21__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.20a1py3-none-any.whl → 0.2.21py3-none-any.whl