camel-ai 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +29 -30
- camel/agents/knowledge_graph_agent.py +1 -5
- camel/agents/multi_hop_generator_agent.py +35 -3
- camel/agents/programmed_agent_instruction.py +73 -18
- camel/benchmarks/apibench.py +1 -5
- camel/benchmarks/nexus.py +1 -5
- camel/benchmarks/ragbench.py +2 -2
- camel/bots/telegram_bot.py +1 -5
- camel/configs/__init__.py +9 -0
- camel/configs/aiml_config.py +80 -0
- camel/configs/gemini_config.py +1 -1
- camel/configs/moonshot_config.py +63 -0
- camel/configs/sglang_config.py +4 -0
- camel/configs/siliconflow_config.py +91 -0
- camel/datagen/__init__.py +3 -1
- camel/datagen/self_improving_cot.py +821 -0
- camel/datagen/source2synth/__init__.py +31 -0
- camel/{synthetic_datagen → datagen}/source2synth/data_processor.py +194 -29
- camel/{synthetic_datagen → datagen}/source2synth/models.py +25 -0
- camel/{synthetic_datagen → datagen}/source2synth/user_data_processor_config.py +9 -8
- camel/datahubs/huggingface.py +3 -3
- camel/embeddings/__init__.py +2 -0
- camel/embeddings/jina_embedding.py +161 -0
- camel/messages/func_message.py +1 -1
- camel/models/__init__.py +4 -0
- camel/models/aiml_model.py +147 -0
- camel/models/deepseek_model.py +29 -11
- camel/models/groq_model.py +0 -2
- camel/models/model_factory.py +9 -0
- camel/models/moonshot_model.py +138 -0
- camel/models/openai_model.py +1 -9
- camel/models/siliconflow_model.py +142 -0
- camel/societies/workforce/role_playing_worker.py +2 -4
- camel/societies/workforce/single_agent_worker.py +1 -6
- camel/societies/workforce/workforce.py +3 -9
- camel/toolkits/__init__.py +4 -0
- camel/toolkits/reddit_toolkit.py +8 -38
- camel/toolkits/search_toolkit.py +17 -6
- camel/toolkits/semantic_scholar_toolkit.py +308 -0
- camel/toolkits/sympy_toolkit.py +778 -0
- camel/toolkits/whatsapp_toolkit.py +11 -32
- camel/types/enums.py +205 -16
- camel/types/unified_model_type.py +5 -0
- camel/utils/__init__.py +7 -2
- camel/utils/commons.py +198 -21
- camel/utils/deduplication.py +199 -0
- camel/utils/token_counting.py +1 -39
- {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/METADATA +17 -12
- {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/RECORD +53 -41
- /camel/datagen/{cotdatagen.py → cot_datagen.py} +0 -0
- {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/LICENSE +0 -0
- {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/WHEEL +0 -0
camel/utils/commons.py
CHANGED
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import functools
|
|
14
15
|
import importlib
|
|
16
|
+
import logging
|
|
15
17
|
import os
|
|
16
18
|
import platform
|
|
17
19
|
import re
|
|
@@ -47,6 +49,8 @@ from .constants import Constants
|
|
|
47
49
|
|
|
48
50
|
F = TypeVar('F', bound=Callable[..., Any])
|
|
49
51
|
|
|
52
|
+
logger = logging.getLogger(__name__)
|
|
53
|
+
|
|
50
54
|
|
|
51
55
|
def print_text_animated(text, delay: float = 0.02, end: str = ""):
|
|
52
56
|
r"""Prints the given text with an animated effect.
|
|
@@ -620,33 +624,206 @@ def handle_http_error(response: requests.Response) -> str:
|
|
|
620
624
|
return "HTTP Error"
|
|
621
625
|
|
|
622
626
|
|
|
623
|
-
def
|
|
624
|
-
|
|
625
|
-
) ->
|
|
626
|
-
r"""
|
|
627
|
+
def retry_on_error(
|
|
628
|
+
max_retries: int = 3, initial_delay: float = 1.0
|
|
629
|
+
) -> Callable:
|
|
630
|
+
r"""Decorator to retry function calls on exception with exponential
|
|
631
|
+
backoff.
|
|
627
632
|
|
|
628
633
|
Args:
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
delay (int): Delay between retries in seconds. (default: :obj:`1`)
|
|
632
|
-
*args: Arguments to pass to the function.
|
|
633
|
-
**kwargs: Keyword arguments to pass to the function.
|
|
634
|
+
max_retries (int): Maximum number of retry attempts
|
|
635
|
+
initial_delay (float): Initial delay between retries in seconds
|
|
634
636
|
|
|
635
637
|
Returns:
|
|
636
|
-
|
|
638
|
+
Callable: Decorated function with retry logic
|
|
639
|
+
"""
|
|
637
640
|
|
|
638
|
-
|
|
639
|
-
|
|
641
|
+
def decorator(func: Callable) -> Callable:
|
|
642
|
+
@functools.wraps(func)
|
|
643
|
+
def wrapper(*args, **kwargs):
|
|
644
|
+
delay = initial_delay
|
|
645
|
+
last_exception = None
|
|
646
|
+
|
|
647
|
+
for attempt in range(max_retries + 1):
|
|
648
|
+
try:
|
|
649
|
+
return func(*args, **kwargs)
|
|
650
|
+
except Exception as e:
|
|
651
|
+
last_exception = e
|
|
652
|
+
if attempt == max_retries:
|
|
653
|
+
logger.error(
|
|
654
|
+
f"Failed after {max_retries} retries: {e!s}"
|
|
655
|
+
)
|
|
656
|
+
raise
|
|
657
|
+
|
|
658
|
+
logger.warning(
|
|
659
|
+
f"Attempt {attempt + 1} failed: {e!s}. "
|
|
660
|
+
f"Retrying in {delay:.1f}s..."
|
|
661
|
+
)
|
|
662
|
+
time.sleep(delay)
|
|
663
|
+
delay *= 2 # Exponential backoff
|
|
664
|
+
|
|
665
|
+
raise last_exception
|
|
666
|
+
|
|
667
|
+
return wrapper
|
|
668
|
+
|
|
669
|
+
return decorator
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
class BatchProcessor:
|
|
673
|
+
r"""Handles batch processing with dynamic sizing and error handling based
|
|
674
|
+
on system load.
|
|
640
675
|
"""
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
676
|
+
|
|
677
|
+
def __init__(
|
|
678
|
+
self,
|
|
679
|
+
max_workers: Optional[int] = None,
|
|
680
|
+
initial_batch_size: Optional[int] = None,
|
|
681
|
+
monitoring_interval: float = 5.0,
|
|
682
|
+
cpu_threshold: float = 80.0,
|
|
683
|
+
memory_threshold: float = 85.0,
|
|
684
|
+
):
|
|
685
|
+
r"""Initialize the BatchProcessor with dynamic worker allocation.
|
|
686
|
+
|
|
687
|
+
Args:
|
|
688
|
+
max_workers: Maximum number of workers. If None, will be
|
|
689
|
+
determined dynamically based on system resources.
|
|
690
|
+
(default: :obj:`None`)
|
|
691
|
+
initial_batch_size: Initial size of each batch. If `None`,
|
|
692
|
+
defaults to `10`. (default: :obj:`None`)
|
|
693
|
+
monitoring_interval: Interval in seconds between resource checks.
|
|
694
|
+
(default: :obj:`5.0`)
|
|
695
|
+
cpu_threshold: CPU usage percentage threshold for scaling down.
|
|
696
|
+
(default: :obj:`80.0`)
|
|
697
|
+
memory_threshold: Memory usage percentage threshold for scaling
|
|
698
|
+
down. (default: :obj:`85.0`)
|
|
699
|
+
"""
|
|
700
|
+
import psutil
|
|
701
|
+
|
|
702
|
+
self.monitoring_interval = monitoring_interval
|
|
703
|
+
self.cpu_threshold = cpu_threshold
|
|
704
|
+
self.memory_threshold = memory_threshold
|
|
705
|
+
self.last_check_time = time.time()
|
|
706
|
+
self.psutil = psutil
|
|
707
|
+
|
|
708
|
+
# Initialize performance metrics
|
|
709
|
+
self.total_processed = 0
|
|
710
|
+
self.total_errors = 0
|
|
711
|
+
self.processing_times: List = []
|
|
712
|
+
|
|
713
|
+
if max_workers is None:
|
|
714
|
+
self.max_workers = self._calculate_optimal_workers()
|
|
715
|
+
else:
|
|
716
|
+
self.max_workers = max_workers
|
|
717
|
+
|
|
718
|
+
self.batch_size = (
|
|
719
|
+
10 if initial_batch_size is None else initial_batch_size
|
|
720
|
+
)
|
|
721
|
+
self.min_batch_size = 1
|
|
722
|
+
self.max_batch_size = 20
|
|
723
|
+
self.backoff_factor = 0.8
|
|
724
|
+
self.success_factor = 1.2
|
|
725
|
+
|
|
726
|
+
# Initial resource check
|
|
727
|
+
self._update_resource_metrics()
|
|
728
|
+
|
|
729
|
+
def _calculate_optimal_workers(self) -> int:
|
|
730
|
+
r"""Calculate optimal number of workers based on system resources."""
|
|
731
|
+
cpu_count = self.psutil.cpu_count()
|
|
732
|
+
cpu_percent = self.psutil.cpu_percent(interval=1)
|
|
733
|
+
memory = self.psutil.virtual_memory()
|
|
734
|
+
|
|
735
|
+
# Base number of workers on CPU count and current load
|
|
736
|
+
if cpu_percent > self.cpu_threshold:
|
|
737
|
+
workers = max(1, cpu_count // 4)
|
|
738
|
+
elif cpu_percent > 60:
|
|
739
|
+
workers = max(1, cpu_count // 2)
|
|
740
|
+
else:
|
|
741
|
+
workers = max(1, cpu_count - 1)
|
|
742
|
+
|
|
743
|
+
# Further reduce if memory is constrained
|
|
744
|
+
if memory.percent > self.memory_threshold:
|
|
745
|
+
workers = max(1, workers // 2)
|
|
746
|
+
|
|
747
|
+
return workers
|
|
748
|
+
|
|
749
|
+
def _update_resource_metrics(self) -> None:
|
|
750
|
+
r"""Update current resource usage metrics."""
|
|
751
|
+
self.current_cpu = self.psutil.cpu_percent()
|
|
752
|
+
self.current_memory = self.psutil.virtual_memory().percent
|
|
753
|
+
self.last_check_time = time.time()
|
|
754
|
+
|
|
755
|
+
def _should_check_resources(self) -> bool:
|
|
756
|
+
r"""Determine if it's time to check resource usage again."""
|
|
757
|
+
return time.time() - self.last_check_time >= self.monitoring_interval
|
|
758
|
+
|
|
759
|
+
def adjust_batch_size(
|
|
760
|
+
self, success: bool, processing_time: Optional[float] = None
|
|
761
|
+
) -> None:
|
|
762
|
+
r"""Adjust batch size based on success/failure and system resources.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
success (bool): Whether the last batch completed successfully
|
|
766
|
+
processing_time (Optional[float]): Time taken to process the last
|
|
767
|
+
batch. (default: :obj:`None`)
|
|
768
|
+
"""
|
|
769
|
+
# Update metrics
|
|
770
|
+
self.total_processed += 1
|
|
771
|
+
if not success:
|
|
772
|
+
self.total_errors += 1
|
|
773
|
+
if processing_time is not None:
|
|
774
|
+
self.processing_times.append(processing_time)
|
|
775
|
+
|
|
776
|
+
# Check system resources if interval has elapsed
|
|
777
|
+
if self._should_check_resources():
|
|
778
|
+
self._update_resource_metrics()
|
|
779
|
+
|
|
780
|
+
# Adjust based on resource usage
|
|
781
|
+
if (
|
|
782
|
+
self.current_cpu > self.cpu_threshold
|
|
783
|
+
or self.current_memory > self.memory_threshold
|
|
784
|
+
):
|
|
785
|
+
self.batch_size = max(
|
|
786
|
+
int(self.batch_size * self.backoff_factor),
|
|
787
|
+
self.min_batch_size,
|
|
788
|
+
)
|
|
789
|
+
self.max_workers = max(1, self.max_workers - 1)
|
|
790
|
+
return
|
|
791
|
+
|
|
792
|
+
# Adjust based on success/failure
|
|
793
|
+
if success:
|
|
794
|
+
self.batch_size = min(
|
|
795
|
+
int(self.batch_size * self.success_factor), self.max_batch_size
|
|
796
|
+
)
|
|
797
|
+
else:
|
|
798
|
+
self.batch_size = max(
|
|
799
|
+
int(self.batch_size * self.backoff_factor), self.min_batch_size
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
def get_performance_metrics(self) -> Dict[str, Any]:
|
|
803
|
+
r"""Get current performance metrics.
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
Dict containing performance metrics including:
|
|
807
|
+
- total_processed: Total number of batches processed
|
|
808
|
+
- error_rate: Percentage of failed batches
|
|
809
|
+
- avg_processing_time: Average time per batch
|
|
810
|
+
- current_batch_size: Current batch size
|
|
811
|
+
- current_workers: Current number of workers
|
|
812
|
+
- current_cpu: Current CPU usage percentage
|
|
813
|
+
- current_memory: Current memory usage percentage
|
|
814
|
+
"""
|
|
815
|
+
metrics = {
|
|
816
|
+
"total_processed": self.total_processed,
|
|
817
|
+
"error_rate": (self.total_errors / max(1, self.total_processed))
|
|
818
|
+
* 100,
|
|
819
|
+
"avg_processing_time": sum(self.processing_times)
|
|
820
|
+
/ max(1, len(self.processing_times)),
|
|
821
|
+
"current_batch_size": self.batch_size,
|
|
822
|
+
"current_workers": self.max_workers,
|
|
823
|
+
"current_cpu": self.current_cpu,
|
|
824
|
+
"current_memory": self.current_memory,
|
|
825
|
+
}
|
|
826
|
+
return metrics
|
|
650
827
|
|
|
651
828
|
|
|
652
829
|
def download_github_subdirectory(
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from typing import Dict, List, Literal, Optional
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
21
|
+
|
|
22
|
+
from camel.embeddings.base import BaseEmbedding
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DeduplicationResult(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
The result of deduplication.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
original_texts (List[str]): The original texts.
|
|
31
|
+
unique_ids (List[int]): A list of ids that are unique (not duplicates).
|
|
32
|
+
unique_embeddings_dict (Dict[int, List[float]]):
|
|
33
|
+
A mapping from the index of each unique text to its embedding.
|
|
34
|
+
duplicate_to_target_map (Dict[int, int]):
|
|
35
|
+
A mapping from the index of the duplicate text to the index
|
|
36
|
+
of the text it is considered a duplicate of.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
original_texts: List[str]
|
|
40
|
+
unique_ids: List[int]
|
|
41
|
+
unique_embeddings_dict: Dict[int, List[float]]
|
|
42
|
+
duplicate_to_target_map: Dict[int, int]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def deduplicate_internally(
|
|
46
|
+
texts: List[str],
|
|
47
|
+
threshold: float = 0.65,
|
|
48
|
+
embedding_instance: Optional[BaseEmbedding[str]] = None,
|
|
49
|
+
embeddings: Optional[List[List[float]]] = None,
|
|
50
|
+
strategy: Literal["top1", "llm-supervise"] = "top1",
|
|
51
|
+
) -> DeduplicationResult:
|
|
52
|
+
"""
|
|
53
|
+
Deduplicate a list of strings based on their cosine similarity.
|
|
54
|
+
|
|
55
|
+
You can either:
|
|
56
|
+
1) Provide a Camel `BaseEmbedding` instance via `embedding_instance` to let
|
|
57
|
+
this function handle the embedding internally, OR
|
|
58
|
+
2) Directly pass a list of pre-computed embeddings to `embeddings`.
|
|
59
|
+
|
|
60
|
+
If both `embedding_instance` and `embeddings` are provided, the function
|
|
61
|
+
will raise a ValueError to avoid ambiguous usage.
|
|
62
|
+
|
|
63
|
+
strategy is used to specify different strategies, where 'top1' selects the
|
|
64
|
+
one with highest similarity, and 'llm-supervise' uses LLM to determine if
|
|
65
|
+
texts are duplicates (not yet implemented).
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
texts (List[str]): The list of texts to be deduplicated.
|
|
69
|
+
threshold (float, optional): The similarity threshold for considering
|
|
70
|
+
two texts as duplicates. Default is 0.65.
|
|
71
|
+
embedding_instance (Optional[BaseEmbedding[str]], optional):
|
|
72
|
+
A Camel embedding instance for automatic embedding. Defaults to
|
|
73
|
+
None.
|
|
74
|
+
embeddings (Optional[List[List[float]]], optional):
|
|
75
|
+
Pre-computed embeddings of `texts`. Each element in the list
|
|
76
|
+
corresponds to the embedding of the text in the same index of
|
|
77
|
+
`texts`. Defaults to None.
|
|
78
|
+
strategy (Literal["top1", "llm-supervise"], optional):
|
|
79
|
+
The strategy to use for deduplication. Defaults to "top1".
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
DeduplicationResult: An object that contains:
|
|
83
|
+
- `original_texts`: The original texts.
|
|
84
|
+
- `unique_ids`: The unique ids after deduplication.
|
|
85
|
+
- `unique_embeddings_dict`: A dict mapping from (unique) text id
|
|
86
|
+
to its embedding.
|
|
87
|
+
- `duplicate_to_target_map`: A dict mapping from the id of a
|
|
88
|
+
duplicate text to the id of the text it is considered a duplicate
|
|
89
|
+
of.
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
NotImplementedError: If the strategy is not "top1".
|
|
93
|
+
ValueError: If neither embeddings nor embedding_instance is provided,
|
|
94
|
+
or if both are provided at the same time.
|
|
95
|
+
ValueError: If the length of `embeddings` does not match the length of
|
|
96
|
+
`texts`.
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
>>> from camel.embeddings.openai_embedding import OpenAIEmbedding
|
|
100
|
+
>>> # Suppose we have 5 texts, some of which may be duplicates
|
|
101
|
+
>>> texts = [
|
|
102
|
+
... "What is AI?",
|
|
103
|
+
... "Artificial Intelligence is about machines",
|
|
104
|
+
... "What is AI?",
|
|
105
|
+
... "Deep Learning is a subset of AI",
|
|
106
|
+
... "What is artificial intelligence?"
|
|
107
|
+
... ]
|
|
108
|
+
>>> # or any other BaseEmbedding instance
|
|
109
|
+
>>> embedding_model = OpenAIEmbedding()
|
|
110
|
+
>>> result = deduplicate_internally(
|
|
111
|
+
... texts=texts,
|
|
112
|
+
... threshold=0.7,
|
|
113
|
+
... embedding_instance=embedding_model
|
|
114
|
+
... )
|
|
115
|
+
>>> print("Unique ids:")
|
|
116
|
+
>>> for uid in result.unique_ids:
|
|
117
|
+
... print(texts[uid])
|
|
118
|
+
Unique ids:
|
|
119
|
+
What is AI?
|
|
120
|
+
Artificial Intelligence is about machines
|
|
121
|
+
Deep Learning is a subset of AI
|
|
122
|
+
What is artificial intelligence?
|
|
123
|
+
|
|
124
|
+
>>> print("Duplicate map:")
|
|
125
|
+
>>> print(result.duplicate_to_target_map)
|
|
126
|
+
{2: 0}
|
|
127
|
+
# This indicates the text at index 2 is considered
|
|
128
|
+
# a duplicate of index 0.
|
|
129
|
+
"""
|
|
130
|
+
if strategy == "llm-supervise":
|
|
131
|
+
# TODO: Implement LLM-supervise deduplication.
|
|
132
|
+
raise NotImplementedError(
|
|
133
|
+
"LLM-supervise deduplication is not yet implemented."
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Check if the parameters are valid.
|
|
137
|
+
if embedding_instance is None and embeddings is None:
|
|
138
|
+
raise ValueError(
|
|
139
|
+
"Either 'embedding_instance' or 'embeddings' must be provided."
|
|
140
|
+
)
|
|
141
|
+
if embedding_instance is not None and embeddings is not None:
|
|
142
|
+
raise ValueError(
|
|
143
|
+
"Cannot provide both 'embedding_instance' and 'embeddings'. "
|
|
144
|
+
"Please choose only one way to supply embeddings."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if embedding_instance is not None:
|
|
148
|
+
# Use Camel's embedding_instance to vectorize.
|
|
149
|
+
embeddings = embedding_instance.embed_list(texts)
|
|
150
|
+
else:
|
|
151
|
+
# Use pre-supplied embeddings.
|
|
152
|
+
if embeddings and len(embeddings) != len(texts):
|
|
153
|
+
raise ValueError(
|
|
154
|
+
"The length of 'embeddings' does not match the length "
|
|
155
|
+
"of 'texts'."
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Calculate cosine similarity.
|
|
159
|
+
similarity_matrix = cosine_similarity(embeddings)
|
|
160
|
+
n = len(texts)
|
|
161
|
+
|
|
162
|
+
# Use the lower triangle to avoid redundant comparisons
|
|
163
|
+
# (or self-comparisons).
|
|
164
|
+
tril_mask = np.tril(np.ones((n, n)), k=-1)
|
|
165
|
+
similarity_matrix = similarity_matrix * tril_mask
|
|
166
|
+
|
|
167
|
+
# For each row, find the column with the highest similarity
|
|
168
|
+
# that exceeds the threshold. If no similarity exceeds the threshold,
|
|
169
|
+
# set the column index to -1.
|
|
170
|
+
masked_similarities = np.where(
|
|
171
|
+
similarity_matrix > threshold, similarity_matrix, -1
|
|
172
|
+
)
|
|
173
|
+
max_indices = masked_similarities.argmax(axis=1)
|
|
174
|
+
|
|
175
|
+
duplicate_to_target_map: Dict[int, int] = {}
|
|
176
|
+
above_threshold = similarity_matrix[np.arange(n), max_indices] > threshold
|
|
177
|
+
|
|
178
|
+
# Construct the "duplicate->target" mapping.
|
|
179
|
+
for i in range(n):
|
|
180
|
+
if above_threshold[i]:
|
|
181
|
+
duplicate_to_target_map[i] = max_indices[i]
|
|
182
|
+
|
|
183
|
+
# Get the actual unique ids and embeddings.
|
|
184
|
+
unique_ids = []
|
|
185
|
+
unique_embeddings_dict = {}
|
|
186
|
+
|
|
187
|
+
assert embeddings, "embeddings must be valid"
|
|
188
|
+
|
|
189
|
+
for i, (_, emb) in enumerate(zip(texts, embeddings)):
|
|
190
|
+
if i not in duplicate_to_target_map:
|
|
191
|
+
unique_ids.append(i)
|
|
192
|
+
unique_embeddings_dict[i] = emb
|
|
193
|
+
|
|
194
|
+
return DeduplicationResult(
|
|
195
|
+
original_texts=texts,
|
|
196
|
+
unique_ids=unique_ids,
|
|
197
|
+
unique_embeddings_dict=unique_embeddings_dict,
|
|
198
|
+
duplicate_to_target_map=duplicate_to_target_map,
|
|
199
|
+
)
|
camel/utils/token_counting.py
CHANGED
|
@@ -112,7 +112,7 @@ class OpenAITokenCounter(BaseTokenCounter):
|
|
|
112
112
|
elif ("gpt-3.5-turbo" in self.model) or ("gpt-4" in self.model):
|
|
113
113
|
self.tokens_per_message = 3
|
|
114
114
|
self.tokens_per_name = 1
|
|
115
|
-
elif "o1" in self.model:
|
|
115
|
+
elif ("o1" in self.model) or ("o3" in self.model):
|
|
116
116
|
self.tokens_per_message = 2
|
|
117
117
|
self.tokens_per_name = 1
|
|
118
118
|
else:
|
|
@@ -267,44 +267,6 @@ class AnthropicTokenCounter(BaseTokenCounter):
|
|
|
267
267
|
).input_tokens
|
|
268
268
|
|
|
269
269
|
|
|
270
|
-
class GeminiTokenCounter(BaseTokenCounter):
|
|
271
|
-
def __init__(self, model_type: UnifiedModelType):
|
|
272
|
-
r"""Constructor for the token counter for Gemini models.
|
|
273
|
-
|
|
274
|
-
Args:
|
|
275
|
-
model_type (UnifiedModelType): Model type for which tokens will be
|
|
276
|
-
counted.
|
|
277
|
-
"""
|
|
278
|
-
import google.generativeai as genai
|
|
279
|
-
|
|
280
|
-
self._client = genai.GenerativeModel(model_type)
|
|
281
|
-
|
|
282
|
-
def count_tokens_from_messages(self, messages: List[OpenAIMessage]) -> int:
|
|
283
|
-
r"""Count number of tokens in the provided message list using
|
|
284
|
-
loaded tokenizer specific for this type of model.
|
|
285
|
-
|
|
286
|
-
Args:
|
|
287
|
-
messages (List[OpenAIMessage]): Message list with the chat history
|
|
288
|
-
in OpenAI API format.
|
|
289
|
-
|
|
290
|
-
Returns:
|
|
291
|
-
int: Number of tokens in the messages.
|
|
292
|
-
"""
|
|
293
|
-
converted_messages = []
|
|
294
|
-
for message in messages:
|
|
295
|
-
role = message.get('role')
|
|
296
|
-
if role == 'assistant':
|
|
297
|
-
role_to_gemini = 'model'
|
|
298
|
-
else:
|
|
299
|
-
role_to_gemini = 'user'
|
|
300
|
-
converted_message = {
|
|
301
|
-
"role": role_to_gemini,
|
|
302
|
-
"parts": message.get("content"),
|
|
303
|
-
}
|
|
304
|
-
converted_messages.append(converted_message)
|
|
305
|
-
return self._client.count_tokens(converted_messages).total_tokens
|
|
306
|
-
|
|
307
|
-
|
|
308
270
|
class LiteLLMTokenCounter(BaseTokenCounter):
|
|
309
271
|
def __init__(self, model_type: UnifiedModelType):
|
|
310
272
|
r"""Constructor for the token counter for LiteLLM models.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: camel-ai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.20
|
|
4
4
|
Summary: Communicative Agents for AI Society Study
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: communicative-ai,ai-societies,artificial-intelligence,deep-learning,multi-agent-systems,cooperative-ai,natural-language-processing,large-language-models
|
|
@@ -37,7 +37,7 @@ Requires-Dist: azure-storage-blob (>=12.21.0,<13.0.0) ; extra == "storage" or ex
|
|
|
37
37
|
Requires-Dist: beautifulsoup4 (>=4,<5) ; extra == "document-tools" or extra == "all"
|
|
38
38
|
Requires-Dist: botocore (>=1.35.3,<2.0.0) ; extra == "storage" or extra == "all"
|
|
39
39
|
Requires-Dist: cohere (>=5.11.0,<6.0.0) ; extra == "rag" or extra == "model-platforms" or extra == "all"
|
|
40
|
-
Requires-Dist: colorama (>=0,<
|
|
40
|
+
Requires-Dist: colorama (>=0.4.6,<0.5.0)
|
|
41
41
|
Requires-Dist: curl_cffi (==0.6.2)
|
|
42
42
|
Requires-Dist: dappier (>=0.3.3,<0.4.0) ; extra == "web-tools" or extra == "all"
|
|
43
43
|
Requires-Dist: datacommons (>=1.4.3,<2.0.0) ; extra == "data-tools" or extra == "all"
|
|
@@ -55,7 +55,6 @@ Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0) ; extra == "media-tools" or extra
|
|
|
55
55
|
Requires-Dist: firecrawl-py (>=1.0.0,<2.0.0) ; extra == "web-tools" or extra == "all"
|
|
56
56
|
Requires-Dist: fish-audio-sdk (>=2024.12.5,<2025.0.0) ; extra == "model-platforms" or extra == "all"
|
|
57
57
|
Requires-Dist: google-cloud-storage (>=2.18.0,<3.0.0) ; extra == "storage" or extra == "all"
|
|
58
|
-
Requires-Dist: google-generativeai (>=0.6.0,<0.7.0) ; extra == "model-platforms" or extra == "all"
|
|
59
58
|
Requires-Dist: googlemaps (>=4.10.0,<5.0.0) ; extra == "web-tools" or extra == "all"
|
|
60
59
|
Requires-Dist: httpx (>=0.23.0,<0.27.3)
|
|
61
60
|
Requires-Dist: imageio[pyav] (>=2.34.2,<3.0.0) ; extra == "media-tools" or extra == "all"
|
|
@@ -70,21 +69,21 @@ Requires-Dist: nebula3-python (==3.8.2) ; extra == "rag" or extra == "storage" o
|
|
|
70
69
|
Requires-Dist: neo4j (>=5.18.0,<6.0.0) ; extra == "rag" or extra == "storage" or extra == "all"
|
|
71
70
|
Requires-Dist: newspaper3k (>=0.2.8,<0.3.0) ; extra == "web-tools" or extra == "all"
|
|
72
71
|
Requires-Dist: notion-client (>=2.2.1,<3.0.0) ; extra == "communication-tools" or extra == "all"
|
|
73
|
-
Requires-Dist: numpy (>=1,<2)
|
|
74
|
-
Requires-Dist: openai (>=1.
|
|
72
|
+
Requires-Dist: numpy (>=1.26,<2.0)
|
|
73
|
+
Requires-Dist: openai (>=1.59.7,<2.0.0)
|
|
75
74
|
Requires-Dist: openapi-spec-validator (>=0.7.1,<0.8.0) ; extra == "document-tools" or extra == "all"
|
|
76
75
|
Requires-Dist: openbb (>=4.3.5,<5.0.0) ; extra == "data-tools" or extra == "all"
|
|
77
76
|
Requires-Dist: opencv-python (>=4,<5) ; extra == "huggingface" or extra == "all"
|
|
78
77
|
Requires-Dist: outlines (>=0.1.7,<0.2.0) ; extra == "all"
|
|
79
78
|
Requires-Dist: pandas (>=1.5.3,<2.0.0) ; extra == "data-tools" or extra == "all"
|
|
80
79
|
Requires-Dist: pandasai (>=2.3.0,<3.0.0) ; extra == "rag" or extra == "document-tools" or extra == "all"
|
|
81
|
-
Requires-Dist: pandoc
|
|
82
|
-
Requires-Dist: pathlib (>=1.0.1,<2.0.0)
|
|
80
|
+
Requires-Dist: pandoc (>=2.4,<3.0)
|
|
83
81
|
Requires-Dist: pdfplumber (>=0.11.0,<0.12.0) ; extra == "document-tools" or extra == "all"
|
|
84
82
|
Requires-Dist: pillow (>=10.1.0,<11.0.0) ; extra == "media-tools" or extra == "all"
|
|
85
83
|
Requires-Dist: prance (>=23.6.21.0,<24.0.0.0) ; extra == "document-tools" or extra == "all"
|
|
86
84
|
Requires-Dist: praw (>=7.7.1,<8.0.0) ; extra == "communication-tools" or extra == "all"
|
|
87
|
-
Requires-Dist: protobuf (>=
|
|
85
|
+
Requires-Dist: protobuf (>=5,<6)
|
|
86
|
+
Requires-Dist: psutil (>=5.9.8,<6.0.0)
|
|
88
87
|
Requires-Dist: pyTelegramBotAPI (>=4.18.0,<5.0.0) ; extra == "communication-tools" or extra == "all"
|
|
89
88
|
Requires-Dist: pydantic (>=1.9,<2.10)
|
|
90
89
|
Requires-Dist: pydub (>=0.25.1,<0.26.0) ; extra == "media-tools" or extra == "all"
|
|
@@ -95,7 +94,6 @@ Requires-Dist: pytest (>=7,<8) ; extra == "test"
|
|
|
95
94
|
Requires-Dist: pytest-asyncio (>=0.23.0,<0.24.0) ; extra == "test"
|
|
96
95
|
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
|
97
96
|
Requires-Dist: qdrant-client (>=1.9.0,<2.0.0) ; extra == "rag" or extra == "storage" or extra == "all"
|
|
98
|
-
Requires-Dist: ragas (<=0.1.6) ; extra == "all"
|
|
99
97
|
Requires-Dist: rank-bm25 (>=0.2.2,<0.3.0) ; extra == "rag" or extra == "all"
|
|
100
98
|
Requires-Dist: redis (>=5.0.6,<6.0.0) ; extra == "storage" or extra == "all"
|
|
101
99
|
Requires-Dist: reka-api (>=3.0.8,<4.0.0) ; extra == "model-platforms" or extra == "all"
|
|
@@ -103,12 +101,13 @@ Requires-Dist: requests_oauthlib (>=1.3.1,<2.0.0) ; extra == "web-tools" or extr
|
|
|
103
101
|
Requires-Dist: rouge (>=1.0.1,<2.0.0) ; extra == "data-tools" or extra == "all"
|
|
104
102
|
Requires-Dist: scholarly[tor] (==1.7.11) ; extra == "research-tools" or extra == "all"
|
|
105
103
|
Requires-Dist: sentence-transformers (>=3.0.1,<4.0.0) ; extra == "rag" or extra == "all"
|
|
106
|
-
Requires-Dist: sentencepiece (>=0,<
|
|
104
|
+
Requires-Dist: sentencepiece (>=0.2,<0.3) ; extra == "huggingface" or extra == "all"
|
|
107
105
|
Requires-Dist: sglang (>=0.4.0,<0.5.0) ; extra == "model-platforms" or extra == "all"
|
|
108
106
|
Requires-Dist: slack-bolt (>=1.20.1,<2.0.0) ; extra == "communication-tools" or extra == "all"
|
|
109
107
|
Requires-Dist: slack-sdk (>=3.27.2,<4.0.0) ; extra == "communication-tools" or extra == "all"
|
|
110
|
-
Requires-Dist: soundfile (>=0,<
|
|
108
|
+
Requires-Dist: soundfile (>=0.13,<0.14) ; extra == "huggingface" or extra == "all"
|
|
111
109
|
Requires-Dist: stripe (>=11.3.0,<12.0.0) ; extra == "data-tools" or extra == "all"
|
|
110
|
+
Requires-Dist: sympy (>=1.13.3,<2.0.0) ; extra == "web-tools" or extra == "all"
|
|
112
111
|
Requires-Dist: tavily-python (>=0.5.0,<0.6.0) ; extra == "web-tools" or extra == "all"
|
|
113
112
|
Requires-Dist: textblob (>=0.17.1,<0.18.0) ; extra == "data-tools" or extra == "all"
|
|
114
113
|
Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
|
|
@@ -408,7 +407,8 @@ Practical guides and tutorials for implementing specific functionalities in CAME
|
|
|
408
407
|
| **[CoT Data Generation and SFT Qwen with Unsolth](https://docs.camel-ai.org/cookbooks/data_generation/cot_data_gen_sft_qwen_unsolth_upload_huggingface.html)** | Discover how to generate CoT data using CAMEL and SFT Qwen with Unsolth, and seamlessly upload your data and model to Huggingface. |
|
|
409
408
|
| **[Agentic Data Generation, Evaluation & Filtering with Reward Models](https://docs.camel-ai.org/cookbooks/data_generation/synthetic_dataevaluation&filter_with_reward_model.html)** | Discover methods for generating, evaluating, and filtering agentic data using reward models to enhance the quality and efficiency of your synthetic data pipelines. |
|
|
410
409
|
| **[Data Model Generation and Structured Output with Qwen Model](https://docs.camel-ai.org/cookbooks/data_generation/data_model_generation_and_structured_output_with_qwen.html)** |Learn how to generate data models and structured outputs using the Qwen Model for improved data representation.|
|
|
411
|
-
|
|
410
|
+
| **[Distill Math Reasoning Data from DeepSeek R1](https://docs.camel-ai.org/cookbooks/data_generation/distill_math_reasoning_data_from_deepseek_r1.html)** |Learn how to set up and leverage CAMEL's data distillation pipline for distilling high-quality maths reasoning data with thought process (Long CoT data)from deepseek R1, and uploading the results to Hugging Face.|
|
|
411
|
+
| **[Self-Improving Math Reasoning Data Distillation from DeepSeek R1](https://docs.camel-ai.org/cookbooks/data_generation/self_improving_math_reasoning_data_distillation_from_deepSeek_r1.html)** |Learn how to set up and leverage CAMEL's data distillation pipline for self-improving math reasoning data distillation from deepseek R1, and uploading the results to Hugging Face.|
|
|
412
412
|
|
|
413
413
|
### Multi-Agent Systems & Applications
|
|
414
414
|
| Cookbook | Description |
|
|
@@ -418,6 +418,7 @@ Practical guides and tutorials for implementing specific functionalities in CAME
|
|
|
418
418
|
| **[Dynamic Travel Planner Role-Playing: Multi-Agent System with Real-Time Insights Powered by Dappier](https://docs.camel-ai.org/cookbooks/applications/dynamic_travel_planner.html)** | Explore an innovative approach to travel planning, blending AI-driven role-playing and real-time data for seamless experiences. |
|
|
419
419
|
| **[Customer Service Discord Bot with Agentic RAG](https://docs.camel-ai.org/cookbooks/applications/customer_service_Discord_bot_using_SambaNova_with_agentic_RAG.html)** | Learn how to build a robust customer service bot for Discord using Agentic RAG. |
|
|
420
420
|
| **[Customer Service Discord Bot with Local Model](https://docs.camel-ai.org/cookbooks/applications/customer_service_Discord_bot_using_local_model_with_agentic_RAG.html)** | Learn how to build a robust customer service bot for Discord using Agentic RAG which supports local deployment. |
|
|
421
|
+
| **[Customer Service Discord Bot for Finance with OpenBB](https://docs.camel-ai.org/cookbooks/applications/finance_discord_bot.html)**| Learn how to build a sipmle yet powerful financial data assistant Discord bot using OpenBB tools. |
|
|
421
422
|
|
|
422
423
|
### Data Processing
|
|
423
424
|
| Cookbook | Description |
|
|
@@ -456,6 +457,10 @@ We implemented amazing research ideas from other works for you to build, compare
|
|
|
456
457
|
|
|
457
458
|
- `Self-Instruct` from *Yizhong Wang et al.*: [SELF-INSTRUCT: Aligning Language Models with Self-Generated Instructions](https://arxiv.org/pdf/2212.10560). [[Example](https://github.com/camel-ai/camel/blob/master/examples/datagen/self_instruct/self_instruct.py)]
|
|
458
459
|
|
|
460
|
+
- `Source2Synth` from *Alisia Lupidi et al.*: [Source2Synth: Synthetic Data Generation and Curation Grounded in Real Data Sources](https://arxiv.org/abs/2409.08239). [[Example](https://github.com/camel-ai/camel/blob/master/examples/datagen/source2synth.py)]
|
|
461
|
+
|
|
462
|
+
- `STaR` from *Eric Zelikman et al.*: [STaR: Bootstrapping Reasoning With Reasoning](https://arxiv.org/abs/2203.14465). [[Example](https://github.com/camel-ai/camel/blob/master/examples/datagen/star)]
|
|
463
|
+
|
|
459
464
|
## Other Research Works Based on Camel
|
|
460
465
|
- [Agent Trust](http://agent-trust.camel-ai.org/): Can Large Language Model Agents Simulate Human Trust Behavior?
|
|
461
466
|
|