PyPI - camel-ai - Versions diffs - 0.2.33__py3-none-any.whl → 0.2.35__py3-none-any.whl - Mend

camel-ai 0.2.33py3-none-any.whl → 0.2.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (33) hide show

camel/__init__.py +1 -1
camel/agents/_types.py +1 -1
camel/agents/_utils.py +4 -4
camel/agents/chat_agent.py +174 -29
camel/agents/knowledge_graph_agent.py +5 -0
camel/configs/openai_config.py +20 -16
camel/datasets/__init__.py +2 -4
camel/datasets/base_generator.py +170 -226
camel/datasets/few_shot_generator.py +261 -0
camel/datasets/static_dataset.py +54 -2
camel/memories/agent_memories.py +47 -1
camel/memories/base.py +23 -1
camel/memories/records.py +5 -0
camel/models/openai_compatible_model.py +2 -4
camel/models/sglang_model.py +4 -1
camel/models/stub_model.py +25 -0
camel/retrievers/vector_retriever.py +12 -7
camel/storages/key_value_storages/__init__.py +2 -1
camel/storages/key_value_storages/json.py +3 -7
camel/storages/vectordb_storages/base.py +5 -1
camel/toolkits/__init__.py +2 -1
camel/toolkits/file_write_toolkit.py +24 -2
camel/toolkits/github_toolkit.py +15 -3
camel/toolkits/memory_toolkit.py +129 -0
camel/utils/chunker/__init__.py +22 -0
camel/utils/chunker/base.py +24 -0
camel/utils/chunker/code_chunker.py +193 -0
camel/utils/chunker/uio_chunker.py +66 -0
camel/utils/token_counting.py +133 -0
{camel_ai-0.2.33.dist-info → camel_ai-0.2.35.dist-info}/METADATA +3 -3
{camel_ai-0.2.33.dist-info → camel_ai-0.2.35.dist-info}/RECORD +33 -27
{camel_ai-0.2.33.dist-info → camel_ai-0.2.35.dist-info}/WHEEL +0 -0
{camel_ai-0.2.33.dist-info → camel_ai-0.2.35.dist-info}/licenses/LICENSE +0 -0

camel/datasets/few_shot_generator.py ADDED Viewed

@@ -0,0 +1,261 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import asyncio
+from datetime import datetime
+from typing import List
+from pydantic import ValidationError
+from camel.agents import ChatAgent
+from camel.logger import get_logger
+from camel.models.base_model import BaseModelBackend
+from camel.verifiers import BaseVerifier
+from camel.verifiers.models import VerifierInput
+from .base_generator import BaseGenerator
+from .models import DataPoint
+from .static_dataset import StaticDataset
+logger = get_logger(__name__)
+SYSTEM_PROMPT = """**You are an advanced data generation assistant.**
+Your goal is to generate high-quality synthetic data points based on
+provided examples. Your output must be well-structured,
+logically sound, and formatted correctly.
+**Instructions:**
+1. **Follow the Structure**
+   Each data point must include:
+   - **Question**: A clear, well-formed query.
+   - **Rationale**: A step-by-step, executable reasoning process ending
+   with `print(final_answer)`.
+   - **Final Answer**: The correct, concise result.
+2. **Ensure Logical Consistency**
+   - The `rationale` must be code that runs correctly.
+   - The `final_answer` should match the printed output.
+3. **Output Format (Strict)**
+```
+Question: [Generated question]
+Rationale: [Code that solves the question, ending in a print statement,
+outputting the answer.]
+Final Answer: [The Final Answer]
+**Now, generate a new data point based on the given examples.**
+"""
+class FewShotGenerator(BaseGenerator):
+    r"""A generator for creating synthetic datapoints using few-shot learning.
+    This class leverages a seed dataset, an agent, and a verifier to generate
+    new synthetic datapoints on demand through few-shot prompting.
+    """
+    def __init__(
+        self,
+        seed_dataset: StaticDataset,
+        verifier: BaseVerifier,
+        model: BaseModelBackend,
+        seed: int = 42,
+        **kwargs,
+    ):
+        r"""Initialize the few-shot generator.
+        Args:
+            seed_dataset (StaticDataset): Validated static dataset to
+                use for examples.
+            verifier (BaseVerifier): Verifier to validate generated content.
+            model (BaseModelBackend): The underlying LLM that the generating
+            agent will be initiated with.
+            seed (int): Random seed for reproducibility. (default: :obj:`42`)
+            **kwargs: Additional generator parameters.
+        """
+        super().__init__(seed=seed, **kwargs)
+        self.seed_dataset = seed_dataset
+        try:
+            self._validate_seed_dataset()
+        except Exception:
+            raise RuntimeError("Seed Data does not follow Datapoint format")
+        self.verifier = verifier
+        self.agent = ChatAgent(system_message=SYSTEM_PROMPT, model=model)
+    # TODO: Validate that seed dataset contains rationale
+    def _validate_seed_dataset(self) -> None:
+        pass
+    def _construct_prompt(self, examples: List[DataPoint]) -> str:
+        r"""Construct a prompt for generating new datapoints
+        using a fixed sample of examples from the seed dataset.
+        Args:
+            examples (List[DataPoint]): Examples to include in the prompt.
+        Returns:
+            str: Formatted prompt with examples.
+        """
+        prompt = (
+            "Generate a new datapoint similar to the following examples:\n\n"
+        )
+        for i, example in enumerate(examples, 1):
+            prompt += f"Example {i}:\n"
+            prompt += f"Question: {example.question}\n"
+            if example.rationale is not None:
+                prompt += f"Rationale: {example.rationale}\n"
+            else:
+                prompt += "Rationale: None\n"
+            prompt += f"Final Answer: {example.final_answer}\n\n"
+        prompt += "New datapoint:"
+        return prompt
+    async def generate_new(
+        self,
+        n: int,
+        max_retries: int = 10,
+        num_examples: int = 3,
+        **kwargs,
+    ) -> List[DataPoint]:
+        r"""Generates and validates `n` new datapoints through
+        few-shot prompting, with a retry limit.
+        Steps:
+            1. Samples examples from the seed dataset.
+            2. Constructs a prompt using the selected examples.
+            3. Uses an agent to generate a new datapoint,
+            consisting of a question and code to solve the question.
+            4. Executes code using a verifier to get pseudo ground truth.
+            5. Stores valid datapoints in memory.
+        Args:
+            n (int): Number of valid datapoints to generate.
+            max_retries (int): Maximum number of retries before stopping.
+                (default: :obj:`10`)
+            num_examples (int): Number of examples to sample from the
+            seed dataset for few shot prompting.
+                (default: :obj:`3`)
+            **kwargs: Additional generation parameters.
+        Returns:
+            List[DataPoint]: A list of newly generated valid datapoints.
+        Raises:
+            TypeError: If the agent's output is not a dictionary (or does not
+                match the expected format).
+            KeyError: If required keys are missing from the response.
+            AttributeError: If the verifier response lacks attributes.
+            ValidationError: If a datapoint fails schema validation.
+            RuntimeError: If retries are exhausted before `n` valid datapoints
+                are generated.
+        Notes:
+            - Retries on validation failures until `n` valid datapoints exist
+                or `max_retries` is reached, whichever comes first.
+            - If retries are exhausted before reaching `n`, a `RuntimeError`
+                is raised.
+            - Metadata includes a timestamp for tracking datapoint creation.
+        """
+        valid_data_points: List[DataPoint] = []
+        retries = 0
+        while len(valid_data_points) < n and retries < max_retries:
+            try:
+                examples = [
+                    self.seed_dataset.sample() for _ in range(num_examples)
+                ]
+                prompt = self._construct_prompt(examples)
+                try:
+                    agent_output = (
+                        self.agent.step(prompt, response_format=DataPoint)
+                        .msgs[0]
+                        .parsed
+                    )
+                    assert isinstance(agent_output, DataPoint)
+                    self.agent.reset()
+                except (TypeError, KeyError) as e:
+                    logger.warning(
+                        f"Agent output issue: {e}, retrying... "
+                        f"({retries + 1}/{max_retries})"
+                    )
+                    retries += 1
+                    continue
+                rationale = agent_output.rationale
+                if not isinstance(rationale, str):
+                    raise TypeError(f"Rationale {rationale} is not a string.")
+                try:
+                    verifier_response = await self.verifier.verify(
+                        VerifierInput(
+                            llm_response=rationale,
+                            ground_truth=None,
+                        )
+                    )
+                    if not verifier_response or not verifier_response.result:
+                        raise ValueError(
+                            "Verifier unsuccessful, response: "
+                            f"{verifier_response}"
+                        )
+                except (ValueError, AttributeError) as e:
+                    logger.warning(
+                        f"Verifier issue: {e}, "
+                        f"retrying... ({retries + 1}/{max_retries})"
+                    )
+                    retries += 1
+                    continue
+                try:
+                    new_datapoint = DataPoint(
+                        question=agent_output.question,
+                        rationale=rationale,
+                        final_answer=verifier_response.result,
+                        metadata={
+                            "synthetic": str(True),
+                            "created": datetime.now().isoformat(),
+                            "generator": "few_shot",
+                        },
+                    )
+                except ValidationError as e:
+                    logger.warning(
+                        f"Datapoint validation failed: {e}, "
+                        f"retrying... ({retries + 1}/{max_retries})"
+                    )
+                    retries += 1
+                    continue
+                valid_data_points.append(new_datapoint)
+            except Exception as e:
+                logger.warning(
+                    f"Unexpected error: {e}, retrying..."
+                    f" ({retries + 1}/{max_retries})"
+                )
+                retries += 1
+        if len(valid_data_points) < n:
+            raise RuntimeError(
+                f"Failed to generate {n} valid datapoints "
+                f"after {max_retries} retries."
+            )
+        # Thread-safe way to extend the data list
+        async with asyncio.Lock():
+            self._data.extend(valid_data_points)
+        return valid_data_points

camel/datasets/static_dataset.py CHANGED Viewed

@@ -60,7 +60,7 @@ class StaticDataset(Dataset):
                 Input data, which can be one of the following:
                 - A Hugging Face Dataset (:obj:`HFDataset`).
                 - A PyTorch Dataset (:obj:`torch.utils.data.Dataset`).
-                - A :obj:`Path` object representing a JSON file.
+                - A :obj:`Path` object representing a JSON or JSONL file.
                 - A list of dictionaries with :obj:`DataPoint`-compatible
                   fields.
             seed (int): Random seed for reproducibility.
@@ -112,6 +112,7 @@ class StaticDataset(Dataset):
         Raises:
             TypeError: If the input data type is unsupported.
+            ValueError: If the Path has an unsupported file extension.
         """
         if isinstance(data, HFDataset):
@@ -119,7 +120,16 @@ class StaticDataset(Dataset):
         elif isinstance(data, Dataset):
             raw_data = self._init_from_pytorch_dataset(data)
         elif isinstance(data, Path):
-            raw_data = self._init_from_json_path(data)
+            if data.suffix == ".jsonl":
+                raw_data = self._init_from_jsonl_path(data)
+            elif data.suffix == ".json":
+                raw_data = self._init_from_json_path(data)
+            else:
+                raise ValueError(
+                    f"Unsupported file extension: {data.suffix}."
+                    " Please enter a .json or .jsonl object."
+                )
         elif isinstance(data, list):
             raw_data = self._init_from_list(data)
         else:
@@ -322,6 +332,48 @@ class StaticDataset(Dataset):
                 )
         return loaded_data
+    def _init_from_jsonl_path(self, data: Path) -> List[Dict[str, Any]]:
+        r"""Load and parse a dataset from a JSONL file.
+        Args:
+            data (Path): Path to the JSONL file.
+        Returns:
+            List[Dict[str, Any]]: A list of datapoint dictionaries.
+        Raises:
+            FileNotFoundError: If the specified JSONL file does not exist.
+            ValueError: If a line in the file contains invalid JSON or
+                is not a dictionary.
+        """
+        if not data.exists():
+            raise FileNotFoundError(f"JSONL file not found: {data}")
+        raw_data = []
+        logger.debug(f"Loading JSONL from {data}")
+        with data.open('r', encoding='utf-8') as f:
+            for line_number, line in enumerate(f, start=1):
+                line = line.strip()
+                if not line:
+                    continue  # Skip blank lines if any exist.
+                try:
+                    record = json.loads(line)
+                except json.JSONDecodeError as e:
+                    raise ValueError(
+                        f"Invalid JSON on line {line_number} in file "
+                        f"{data}: {e}"
+                    )
+                raw_data.append(record)
+        logger.info(f"Successfully loaded {len(raw_data)} items from {data}")
+        for i, item in enumerate(raw_data):
+            if not isinstance(item, dict):
+                raise ValueError(
+                    f"Expected a dictionary at record {i+1} (line {i+1}), "
+                    f"got {type(item).__name__}"
+                )
+        return raw_data
     def _init_from_list(
         self, data: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:

camel/memories/agent_memories.py CHANGED Viewed

@@ -33,6 +33,8 @@ class ChatHistoryMemory(AgentMemory):
         window_size (int, optional): The number of recent chat messages to
             retrieve. If not provided, the entire chat history will be
             retrieved.  (default: :obj:`None`)
+        agent_id (str, optional): The ID of the agent associated with the chat
+            history.
     """
     def __init__(
@@ -40,6 +42,7 @@ class ChatHistoryMemory(AgentMemory):
         context_creator: BaseContextCreator,
         storage: Optional[BaseKeyValueStorage] = None,
         window_size: Optional[int] = None,
+        agent_id: Optional[str] = None,
     ) -> None:
         if window_size is not None and not isinstance(window_size, int):
             raise TypeError("`window_size` must be an integer or None.")
@@ -48,6 +51,15 @@ class ChatHistoryMemory(AgentMemory):
         self._context_creator = context_creator
         self._window_size = window_size
         self._chat_history_block = ChatHistoryBlock(storage=storage)
+        self._agent_id = agent_id
+    @property
+    def agent_id(self) -> Optional[str]:
+        return self._agent_id
+    @agent_id.setter
+    def agent_id(self, val: Optional[str]) -> None:
+        self._agent_id = val
     def retrieve(self) -> List[ContextRecord]:
         records = self._chat_history_block.retrieve(self._window_size)
@@ -63,6 +75,10 @@ class ChatHistoryMemory(AgentMemory):
         return records
     def write_records(self, records: List[MemoryRecord]) -> None:
+        for record in records:
+            # assign the agent_id to the record
+            if record.agent_id == "" and self.agent_id is not None:
+                record.agent_id = self.agent_id
         self._chat_history_block.write_records(records)
     def get_context_creator(self) -> BaseContextCreator:
@@ -84,6 +100,8 @@ class VectorDBMemory(AgentMemory):
             (default: :obj:`None`)
         retrieve_limit (int, optional): The maximum number of messages
             to be added into the context.  (default: :obj:`3`)
+        agent_id (str, optional): The ID of the agent associated with
+            the messages stored in the vector database.
     """
     def __init__(
@@ -91,13 +109,23 @@ class VectorDBMemory(AgentMemory):
         context_creator: BaseContextCreator,
         storage: Optional[BaseVectorStorage] = None,
         retrieve_limit: int = 3,
+        agent_id: Optional[str] = None,
     ) -> None:
         self._context_creator = context_creator
         self._retrieve_limit = retrieve_limit
         self._vectordb_block = VectorDBBlock(storage=storage)
+        self._agent_id = agent_id
         self._current_topic: str = ""
+    @property
+    def agent_id(self) -> Optional[str]:
+        return self._agent_id
+    @agent_id.setter
+    def agent_id(self, val: Optional[str]) -> None:
+        self._agent_id = val
     def retrieve(self) -> List[ContextRecord]:
         return self._vectordb_block.retrieve(
             self._current_topic,
@@ -109,6 +137,11 @@ class VectorDBMemory(AgentMemory):
         for record in records:
             if record.role_at_backend == OpenAIBackendRole.USER:
                 self._current_topic = record.message.content
+            # assign the agent_id to the record
+            if record.agent_id == "" and self.agent_id is not None:
+                record.agent_id = self.agent_id
         self._vectordb_block.write_records(records)
     def get_context_creator(self) -> BaseContextCreator:
@@ -133,6 +166,8 @@ class LongtermAgentMemory(AgentMemory):
             (default: :obj:`None`)
         retrieve_limit (int, optional): The maximum number of messages
             to be added into the context.  (default: :obj:`3`)
+        agent_id (str, optional): The ID of the agent associated with the chat
+            history and the messages stored in the vector database.
     """
     def __init__(
@@ -141,12 +176,22 @@ class LongtermAgentMemory(AgentMemory):
         chat_history_block: Optional[ChatHistoryBlock] = None,
         vector_db_block: Optional[VectorDBBlock] = None,
         retrieve_limit: int = 3,
+        agent_id: Optional[str] = None,
     ) -> None:
         self.chat_history_block = chat_history_block or ChatHistoryBlock()
         self.vector_db_block = vector_db_block or VectorDBBlock()
         self.retrieve_limit = retrieve_limit
         self._context_creator = context_creator
         self._current_topic: str = ""
+        self._agent_id = agent_id
+    @property
+    def agent_id(self) -> Optional[str]:
+        return self._agent_id
+    @agent_id.setter
+    def agent_id(self, val: Optional[str]) -> None:
+        self._agent_id = val
     def get_context_creator(self) -> BaseContextCreator:
         r"""Returns the context creator used by the memory.
@@ -166,7 +211,8 @@ class LongtermAgentMemory(AgentMemory):
         """
         chat_history = self.chat_history_block.retrieve()
         vector_db_retrieve = self.vector_db_block.retrieve(
-            self._current_topic, self.retrieve_limit
+            self._current_topic,
+            self.retrieve_limit,
         )
         return chat_history[:1] + vector_db_retrieve + chat_history[1:]

camel/memories/base.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 from abc import ABC, abstractmethod
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 from camel.memories.records import ContextRecord, MemoryRecord
 from camel.messages import OpenAIMessage
@@ -112,6 +112,16 @@ class AgentMemory(MemoryBlock, ABC):
     the memory records stored within the AgentMemory.
     """
+    @property
+    @abstractmethod
+    def agent_id(self) -> Optional[str]:
+        pass
+    @agent_id.setter
+    @abstractmethod
+    def agent_id(self, val: Optional[str]) -> None:
+        pass
     @abstractmethod
     def retrieve(self) -> List[ContextRecord]:
         r"""Get a record list from the memory for creating model context.
@@ -138,3 +148,15 @@ class AgentMemory(MemoryBlock, ABC):
                 context in OpenAIMessage format and the total token count.
         """
         return self.get_context_creator().create_context(self.retrieve())
+    def __repr__(self) -> str:
+        r"""Returns a string representation of the AgentMemory.
+        Returns:
+            str: A string in the format 'ClassName(agent_id=<id>)'
+                if agent_id exists, otherwise just 'ClassName()'.
+        """
+        agent_id = getattr(self, '_agent_id', None)
+        if agent_id:
+            return f"{self.__class__.__name__}(agent_id='{agent_id}')"
+        return f"{self.__class__.__name__}()"

camel/memories/records.py CHANGED Viewed

@@ -39,6 +39,8 @@ class MemoryRecord(BaseModel):
             key-value pairs that provide more information. If not given, it
             will be an empty `Dict`.
         timestamp (float, optional): The timestamp when the record was created.
+        agent_id (str): The identifier of the agent associated with this
+            memory.
     """
     model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -50,6 +52,7 @@ class MemoryRecord(BaseModel):
     timestamp: float = Field(
         default_factory=lambda: datetime.now(timezone.utc).timestamp()
     )
+    agent_id: str = Field(default="")
     _MESSAGE_TYPES: ClassVar[dict] = {
         "BaseMessage": BaseMessage,
@@ -73,6 +76,7 @@ class MemoryRecord(BaseModel):
             role_at_backend=record_dict["role_at_backend"],
             extra_info=record_dict["extra_info"],
             timestamp=record_dict["timestamp"],
+            agent_id=record_dict["agent_id"],
         )
     def to_dict(self) -> Dict[str, Any]:
@@ -88,6 +92,7 @@ class MemoryRecord(BaseModel):
             "role_at_backend": self.role_at_backend,
             "extra_info": self.extra_info,
             "timestamp": self.timestamp,
+            "agent_id": self.agent_id,
         }
     def to_openai_message(self) -> OpenAIMessage:

camel/models/openai_compatible_model.py CHANGED Viewed

@@ -56,10 +56,8 @@ class OpenAICompatibleModel(BaseModelBackend):
         url: Optional[str] = None,
         token_counter: Optional[BaseTokenCounter] = None,
     ) -> None:
-        self.api_key = api_key or os.environ.get(
-            "OPENAI_COMPATIBILITY_API_KEY"
-        )
-        self.url = url or os.environ.get("OPENAI_COMPATIBILITY_API_BASE_URL")
+        api_key = api_key or os.environ.get("OPENAI_COMPATIBILITY_API_KEY")
+        url = url or os.environ.get("OPENAI_COMPATIBILITY_API_BASE_URL")
         super().__init__(
             model_type, model_config_dict, api_key, url, token_counter
         )

camel/models/sglang_model.py CHANGED Viewed

@@ -324,7 +324,10 @@ def _kill_process_tree(
             # Sometime processes cannot be killed with SIGKILL
             # so we send an additional signal to kill them.
-            itself.send_signal(signal.SIGQUIT)
+            if hasattr(signal, "SIGQUIT"):
+                itself.send_signal(signal.SIGQUIT)
+            else:
+                itself.send_signal(signal.SIGTERM)
         except psutil.NoSuchProcess:
             pass

camel/models/stub_model.py CHANGED Viewed

@@ -44,6 +44,31 @@ class StubTokenCounter(BaseTokenCounter):
         """
         return 10
+    def encode(self, text: str) -> List[int]:
+        r"""Encode text into token IDs for STUB models.
+        Args:
+            text (str): The text to encode.
+        Returns:
+            List[int]: List of token IDs.
+        """
+        # For stub models, just return a list of 0s with length proportional
+        # to text length
+        return [0] * (len(text) // 4 + 1)  # Simple approximation
+    def decode(self, token_ids: List[int]) -> str:
+        r"""Decode token IDs back to text for STUB models.
+        Args:
+            token_ids (List[int]): List of token IDs to decode.
+        Returns:
+            str: Decoded text.
+        """
+        # For stub models, return a placeholder string
+        return "[Stub decoded text]"
 class StubModel(BaseModelBackend):
     r"""A dummy model used for unit tests."""

camel/retrievers/vector_retriever.py CHANGED Viewed

@@ -27,6 +27,7 @@ from camel.storages import (
     VectorRecord,
 )
 from camel.utils import Constants
+from camel.utils.chunker import BaseChunker, UnstructuredIOChunker
 if TYPE_CHECKING:
     from unstructured.documents.elements import Element
@@ -78,6 +79,7 @@ class VectorRetriever(BaseRetriever):
         should_chunk: bool = True,
         extra_info: Optional[dict] = None,
         metadata_filename: Optional[str] = None,
+        chunker: Optional[BaseChunker] = None,
         **kwargs: Any,
     ) -> None:
         r"""Processes content from local file path, remote URL, string
@@ -101,6 +103,12 @@ class VectorRetriever(BaseRetriever):
                 used for storing metadata. Defaults to None.
             **kwargs (Any): Additional keyword arguments for content parsing.
         """
+        if chunker is None:
+            chunker = UnstructuredIOChunker(
+                chunk_type=chunk_type,
+                max_characters=max_characters,
+                metadata_filename=metadata_filename,
+            )
         from unstructured.documents.elements import Element
         if isinstance(content, Element):
@@ -140,13 +148,7 @@ class VectorRetriever(BaseRetriever):
         else:
             # Chunk the content if required
             chunks = (
-                self.uio.chunk_elements(
-                    chunk_type=chunk_type,
-                    elements=elements,
-                    max_characters=max_characters,
-                )
-                if should_chunk
-                else elements
+                chunker.chunk(content=elements) if should_chunk else (elements)
             )
             # Process chunks in batches and store embeddings
@@ -157,6 +159,7 @@ class VectorRetriever(BaseRetriever):
                 )
                 records = []
+                offset = 0
                 # Prepare the payload for each vector record, includes the
                 # content path, chunk metadata, and chunk text
                 for vector, chunk in zip(batch_vectors, batch_chunks):
@@ -178,6 +181,7 @@ class VectorRetriever(BaseRetriever):
                     chunk_metadata["metadata"].pop("orig_elements", "")
                     chunk_metadata["extra_info"] = extra_info or {}
                     chunk_text = {"text": str(chunk)}
+                    chunk_metadata["metadata"]["piece_num"] = i + offset + 1
                     combined_dict = {
                         **content_path_info,
                         **chunk_metadata,
@@ -187,6 +191,7 @@ class VectorRetriever(BaseRetriever):
                     records.append(
                         VectorRecord(vector=vector, payload=combined_dict)
                     )
+                    offset += 1
                 self.storage.add(records=records)

camel/storages/key_value_storages/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from .base import BaseKeyValueStorage
 from .in_memory import InMemoryKeyValueStorage
-from .json import JsonStorage
+from .json import CamelJSONEncoder, JsonStorage
 from .redis import RedisStorage
 __all__ = [
@@ -22,4 +22,5 @@ __all__ = [
     'InMemoryKeyValueStorage',
     'JsonStorage',
     'RedisStorage',
+    'CamelJSONEncoder',
 ]

camel-ai 0.2.33__py3-none-any.whl → 0.2.35__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.33py3-none-any.whl → 0.2.35py3-none-any.whl