camel-ai 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (53) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +29 -30
  3. camel/agents/knowledge_graph_agent.py +1 -5
  4. camel/agents/multi_hop_generator_agent.py +35 -3
  5. camel/agents/programmed_agent_instruction.py +73 -18
  6. camel/benchmarks/apibench.py +1 -5
  7. camel/benchmarks/nexus.py +1 -5
  8. camel/benchmarks/ragbench.py +2 -2
  9. camel/bots/telegram_bot.py +1 -5
  10. camel/configs/__init__.py +9 -0
  11. camel/configs/aiml_config.py +80 -0
  12. camel/configs/gemini_config.py +1 -1
  13. camel/configs/moonshot_config.py +63 -0
  14. camel/configs/sglang_config.py +4 -0
  15. camel/configs/siliconflow_config.py +91 -0
  16. camel/datagen/__init__.py +3 -1
  17. camel/datagen/self_improving_cot.py +821 -0
  18. camel/datagen/source2synth/__init__.py +31 -0
  19. camel/{synthetic_datagen → datagen}/source2synth/data_processor.py +194 -29
  20. camel/{synthetic_datagen → datagen}/source2synth/models.py +25 -0
  21. camel/{synthetic_datagen → datagen}/source2synth/user_data_processor_config.py +9 -8
  22. camel/datahubs/huggingface.py +3 -3
  23. camel/embeddings/__init__.py +2 -0
  24. camel/embeddings/jina_embedding.py +161 -0
  25. camel/messages/func_message.py +1 -1
  26. camel/models/__init__.py +4 -0
  27. camel/models/aiml_model.py +147 -0
  28. camel/models/deepseek_model.py +29 -11
  29. camel/models/groq_model.py +0 -2
  30. camel/models/model_factory.py +9 -0
  31. camel/models/moonshot_model.py +138 -0
  32. camel/models/openai_model.py +1 -9
  33. camel/models/siliconflow_model.py +142 -0
  34. camel/societies/workforce/role_playing_worker.py +2 -4
  35. camel/societies/workforce/single_agent_worker.py +1 -6
  36. camel/societies/workforce/workforce.py +3 -9
  37. camel/toolkits/__init__.py +4 -0
  38. camel/toolkits/reddit_toolkit.py +8 -38
  39. camel/toolkits/search_toolkit.py +17 -6
  40. camel/toolkits/semantic_scholar_toolkit.py +308 -0
  41. camel/toolkits/sympy_toolkit.py +778 -0
  42. camel/toolkits/whatsapp_toolkit.py +11 -32
  43. camel/types/enums.py +205 -16
  44. camel/types/unified_model_type.py +5 -0
  45. camel/utils/__init__.py +7 -2
  46. camel/utils/commons.py +198 -21
  47. camel/utils/deduplication.py +199 -0
  48. camel/utils/token_counting.py +1 -39
  49. {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/METADATA +17 -12
  50. {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/RECORD +53 -41
  51. /camel/datagen/{cotdatagen.py → cot_datagen.py} +0 -0
  52. {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/LICENSE +0 -0
  53. {camel_ai-0.2.18.dist-info → camel_ai-0.2.20.dist-info}/WHEEL +0 -0
@@ -0,0 +1,31 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from .data_processor import (
15
+ DataCurator,
16
+ ExampleConstructor,
17
+ UserDataProcessor,
18
+ )
19
+ from .models import MultiHopQA, ReasoningStep
20
+ from .user_data_processor_config import (
21
+ ProcessorConfig,
22
+ )
23
+
24
+ __all__ = [
25
+ "DataCurator",
26
+ "ExampleConstructor",
27
+ "ProcessorConfig",
28
+ "UserDataProcessor",
29
+ "ReasoningStep",
30
+ "MultiHopQA",
31
+ ]
@@ -15,33 +15,61 @@
15
15
  import random
16
16
  from typing import Any, Dict, List, Optional, Sequence
17
17
 
18
- import numpy as np
19
18
  from tqdm import tqdm
20
19
 
21
20
  from camel.agents.multi_hop_generator_agent import MultiHopGeneratorAgent
22
- from camel.logger import get_logger
23
- from camel.synthetic_datagen.source2synth.user_data_processor_config import (
21
+ from camel.datagen.source2synth.user_data_processor_config import (
24
22
  ProcessorConfig,
25
23
  )
24
+ from camel.logger import get_logger
26
25
 
27
26
  logger = get_logger(__name__)
28
27
 
29
28
 
30
29
  class UserDataProcessor:
31
- r"""User Data Processor."""
30
+ r"""A processor for generating multi-hop question-answer pairs from user
31
+ data.
32
+
33
+ This class handles the processing of text data to generate multi-hop
34
+ question-answer pairs using either an AI model or rule-based approaches.
35
+ It manages the entire pipeline from text preprocessing to dataset curation.
36
+
37
+ Attributes:
38
+ config (ProcessorConfig): Configuration for data processing parameters.
39
+ rng (random.Random): Random number generator for reproducibility.
40
+ multi_hop_agent (Optional[MultiHopGeneratorAgent]): Agent for
41
+ generating QA pairs.
42
+ """
32
43
 
33
44
  def __init__(self, config: Optional[ProcessorConfig] = None):
45
+ r"""Initialize the UserDataProcessor.
46
+
47
+ Args:
48
+ config (Optional[ProcessorConfig], optional): Configuration for
49
+ data processing. (default: :obj:`None`)
50
+ """
34
51
  self.config = config or ProcessorConfig()
35
- random.seed(self.config.seed)
36
- np.random.seed(self.config.seed)
52
+ self.rng = random.Random(self.config.seed)
37
53
  self.multi_hop_agent = (
38
- MultiHopGeneratorAgent() if self.config.use_ai_model else None
54
+ self.config.hop_generating_agent
55
+ if self.config.use_ai_model
56
+ else None
39
57
  )
40
58
 
41
59
  def process_text(
42
60
  self, text: str, source: str = "user_input"
43
61
  ) -> List[Dict[str, Any]]:
44
- r"""Process a single text."""
62
+ r"""Process a single text to generate multi-hop QA pairs.
63
+
64
+ Args:
65
+ text (str): The input text to process.
66
+ source (str, optional): Source identifier for the text.
67
+ (default: :obj:`"user_input"`)
68
+
69
+ Returns:
70
+ List[Dict[str, Any]]: List of processed examples with QA pairs and
71
+ metadata.
72
+ """
45
73
  # Convert text to standard format
46
74
  raw_data = [
47
75
  {
@@ -55,7 +83,7 @@ class UserDataProcessor:
55
83
  examples = constructor.construct_examples(raw_data)
56
84
 
57
85
  # Manage data
58
- curator = DataCurator(self.config)
86
+ curator = DataCurator(self.config, self.rng)
59
87
  final_dataset = curator.curate_dataset(examples)
60
88
 
61
89
  return final_dataset
@@ -63,7 +91,20 @@ class UserDataProcessor:
63
91
  def process_batch(
64
92
  self, texts: List[str], sources: Optional[List[str]] = None
65
93
  ) -> List[Dict[str, Any]]:
66
- r"""Process multiple texts in batch."""
94
+ r"""Process multiple texts in batch to generate multi-hop QA pairs.
95
+
96
+ Args:
97
+ texts (List[str]): List of input texts to process.
98
+ sources (Optional[List[str]], optional): List of source
99
+ identifiers. (default: :obj:`None`)
100
+
101
+ Returns:
102
+ List[Dict[str, Any]]: List of processed examples with QA pairs and
103
+ metadata.
104
+
105
+ Raises:
106
+ ValueError: If length of sources doesn't match length of texts.
107
+ """
67
108
  if sources is None:
68
109
  sources = ["user_input"] * len(texts)
69
110
  elif len(sources) != len(texts):
@@ -82,27 +123,52 @@ class UserDataProcessor:
82
123
  examples = constructor.construct_examples(raw_data)
83
124
 
84
125
  # Manage data
85
- curator = DataCurator(self.config)
126
+ curator = DataCurator(self.config, self.rng)
86
127
  final_dataset = curator.curate_dataset(examples)
87
128
 
88
129
  return final_dataset
89
130
 
90
131
 
91
132
  class ExampleConstructor:
92
- r"""Example Constructor."""
133
+ r"""Constructs training examples from raw text data.
134
+
135
+ This class handles the construction of training examples by preprocessing
136
+ text, extracting information pairs, and generating question-answer pairs.
137
+
138
+ Attributes:
139
+ config (ProcessorConfig): Configuration for example construction.
140
+ multi_hop_agent (Optional[MultiHopGeneratorAgent]): Agent for QA
141
+ generation.
142
+ """
93
143
 
94
144
  def __init__(
95
145
  self,
96
146
  config: ProcessorConfig,
97
147
  multi_hop_agent: Optional[MultiHopGeneratorAgent] = None,
98
148
  ):
149
+ r"""Initialize the ExampleConstructor.
150
+
151
+ Args:
152
+ config (ProcessorConfig): Configuration for example construction.
153
+ multi_hop_agent (Optional[MultiHopGeneratorAgent], optional):
154
+ Agent for generating multi-hop QA pairs. (default: :obj:`None`)
155
+ """
99
156
  self.config = config
100
157
  self.multi_hop_agent = multi_hop_agent
101
158
 
102
159
  def construct_examples(
103
160
  self, raw_data: List[Dict[str, Any]]
104
161
  ) -> List[Dict[str, Any]]:
105
- r"""Construct training examples."""
162
+ r"""Construct training examples from raw data.
163
+
164
+ Args:
165
+ raw_data (List[Dict[str, Any]]): List of raw data dictionaries
166
+ containing text and metadata.
167
+
168
+ Returns:
169
+ List[Dict[str, Any]]: List of constructed examples with QA pairs
170
+ and metadata.
171
+ """
106
172
  logger.info("Starting to construct training examples...")
107
173
  examples = []
108
174
 
@@ -135,7 +201,15 @@ class ExampleConstructor:
135
201
  return examples
136
202
 
137
203
  def _preprocess_text(self, text: str) -> str:
138
- r"""Text preprocessing."""
204
+ r"""Preprocess input text for example construction.
205
+
206
+ Args:
207
+ text (str): Input text to preprocess.
208
+
209
+ Returns:
210
+ str: Preprocessed text, or empty string if text fails quality
211
+ checks.
212
+ """
139
213
  if not isinstance(text, str):
140
214
  return ''
141
215
 
@@ -156,7 +230,14 @@ class ExampleConstructor:
156
230
  return text
157
231
 
158
232
  def _check_text_quality(self, text: str) -> bool:
159
- r"""Check text quality."""
233
+ r"""Check the quality of input text.
234
+
235
+ Args:
236
+ text (str): Text to check quality for.
237
+
238
+ Returns:
239
+ bool: True if text passes quality checks, False otherwise.
240
+ """
160
241
  # 1. Basic quality check
161
242
  if text.count('.') < 2: # Must have at least 2 sentences
162
243
  return False
@@ -171,7 +252,15 @@ class ExampleConstructor:
171
252
  return True
172
253
 
173
254
  def _extract_info_pairs(self, text: str) -> List[Dict[str, Sequence[str]]]:
174
- r"""Extract information pairs and relationships."""
255
+ r"""Extract information pairs and relationships from text.
256
+
257
+ Args:
258
+ text (str): Input text to extract information from.
259
+
260
+ Returns:
261
+ List[Dict[str, Sequence[str]]]: List of dictionaries containing
262
+ premise, intermediate, conclusion, and related contexts.
263
+ """
175
264
  # Split into sentences
176
265
  sentences = [s.strip() for s in text.split('.') if s.strip()]
177
266
  info_pairs = []
@@ -200,7 +289,15 @@ class ExampleConstructor:
200
289
  def _generate_qa_pairs(
201
290
  self, info_pairs: List[Dict[str, Sequence[str]]]
202
291
  ) -> List[Dict[str, str]]:
203
- r"""Generate multi-hop question-answer pairs."""
292
+ r"""Generate multi-hop question-answer pairs from information pairs.
293
+
294
+ Args:
295
+ info_pairs (List[Dict[str, Sequence[str]]]): List of information
296
+ pairs extracted from text.
297
+
298
+ Returns:
299
+ List[Dict[str, str]]: List of generated QA pairs.
300
+ """
204
301
  qa_pairs = []
205
302
 
206
303
  for pair in info_pairs:
@@ -219,7 +316,15 @@ class ExampleConstructor:
219
316
  return qa_pairs
220
317
 
221
318
  def _calculate_complexity(self, qa_pairs: List[Dict[str, Any]]) -> float:
222
- r"""Calculate complexity of QA pairs."""
319
+ r"""Calculate the complexity score for a set of QA pairs.
320
+
321
+ Args:
322
+ qa_pairs (List[Dict[str, Any]]): List of QA pairs to calculate
323
+ complexity for.
324
+
325
+ Returns:
326
+ float: Complexity score between 0.0 and 1.0.
327
+ """
223
328
  if not qa_pairs:
224
329
  return 0.0
225
330
 
@@ -233,10 +338,10 @@ class ExampleConstructor:
233
338
  supporting_facts_count = len(qa.get('supporting_facts', []))
234
339
 
235
340
  # 3. Question length
236
- question_length = len(qa['question'].split())
341
+ question_length = len(qa.get('question', '').split())
237
342
 
238
343
  # 4. Answer length
239
- answer_length = len(qa['answer'].split())
344
+ answer_length = len(qa.get('answer', '').split())
240
345
 
241
346
  # Calculate complexity of a single QA pair
242
347
  qa_complexity = (
@@ -256,15 +361,37 @@ class ExampleConstructor:
256
361
 
257
362
 
258
363
  class DataCurator:
259
- r"""Data Manager."""
364
+ r"""Manages and curates datasets of multi-hop question-answer pairs.
365
+
366
+ This class handles dataset management tasks including quality filtering,
367
+ complexity filtering, deduplication, and dataset sampling.
260
368
 
261
- def __init__(self, config: ProcessorConfig):
369
+ Attributes:
370
+ config (ProcessorConfig): Configuration for data curation parameters.
371
+ rng (random.Random): Random number generator for reproducible sampling.
372
+ """
373
+
374
+ def __init__(self, config: ProcessorConfig, rng: random.Random):
375
+ r"""Initialize the DataCurator.
376
+
377
+ Args:
378
+ config (ProcessorConfig): Configuration for data curation.
379
+ rng (random.Random): Random number generator for reproducibility.
380
+ """
262
381
  self.config = config
382
+ self.rng = rng
263
383
 
264
384
  def curate_dataset(
265
385
  self, examples: List[Dict[str, Any]]
266
386
  ) -> List[Dict[str, Any]]:
267
- r"""Dataset management."""
387
+ r"""Manage and curate a dataset through multiple filtering stages.
388
+
389
+ Args:
390
+ examples (List[Dict[str, Any]]): List of examples to curate.
391
+
392
+ Returns:
393
+ List[Dict[str, Any]]: Curated dataset meeting quality criteria.
394
+ """
268
395
  logger.info("Starting dataset management...")
269
396
 
270
397
  # 1. Quality filtering
@@ -296,7 +423,14 @@ class DataCurator:
296
423
  def _quality_filter(
297
424
  self, examples: List[Dict[str, Any]]
298
425
  ) -> List[Dict[str, Any]]:
299
- r"""Quality filtering."""
426
+ r"""Filter examples based on quality criteria.
427
+
428
+ Args:
429
+ examples (List[Dict[str, Any]]): List of examples to filter.
430
+
431
+ Returns:
432
+ List[Dict[str, Any]]: Examples that pass quality checks.
433
+ """
300
434
  filtered = []
301
435
 
302
436
  for example in examples:
@@ -314,7 +448,14 @@ class DataCurator:
314
448
  return filtered
315
449
 
316
450
  def _check_qa_quality(self, qa_pairs: List[Dict[str, str]]) -> bool:
317
- r"""Check quality of QA pairs."""
451
+ r"""Check the quality of question-answer pairs.
452
+
453
+ Args:
454
+ qa_pairs (List[Dict[str, str]]): List of QA pairs to check.
455
+
456
+ Returns:
457
+ bool: True if QA pairs meet quality criteria, False otherwise.
458
+ """
318
459
  if not qa_pairs:
319
460
  return False
320
461
 
@@ -335,7 +476,17 @@ class DataCurator:
335
476
  def _complexity_filter(
336
477
  self, examples: List[Dict[str, Any]]
337
478
  ) -> List[Dict[str, Any]]:
338
- r"""Complexity filtering."""
479
+ """
480
+ Filter examples based on complexity threshold.
481
+
482
+ Removes examples with complexity scores below the configured threshold.
483
+
484
+ Args:
485
+ examples (List[Dict[str, Any]]): List of examples to filter.
486
+
487
+ Returns:
488
+ List[Dict[str, Any]]: Examples meeting complexity threshold.
489
+ """
339
490
  return [
340
491
  example
341
492
  for example in examples
@@ -346,7 +497,14 @@ class DataCurator:
346
497
  def _remove_duplicates(
347
498
  self, examples: List[Dict[str, Any]]
348
499
  ) -> List[Dict[str, Any]]:
349
- r"""Remove duplicates."""
500
+ r"""Remove duplicate examples from the dataset.
501
+
502
+ Args:
503
+ examples (List[Dict[str, Any]]): List of examples to deduplicate.
504
+
505
+ Returns:
506
+ List[Dict[str, Any]]: Deduplicated examples.
507
+ """
350
508
  seen = set()
351
509
  unique_examples = []
352
510
 
@@ -366,8 +524,15 @@ class DataCurator:
366
524
  def _sample_dataset(
367
525
  self, examples: List[Dict[str, Any]]
368
526
  ) -> List[Dict[str, Any]]:
369
- r"""Sample to target dataset size."""
527
+ r"""Sample examples to match target dataset size.
528
+
529
+ Args:
530
+ examples (List[Dict[str, Any]]): List of examples to sample from.
531
+
532
+ Returns:
533
+ List[Dict[str, Any]]: Sampled dataset of target size or smaller.
534
+ """
370
535
  if len(examples) <= self.config.dataset_size:
371
536
  return examples
372
537
 
373
- return random.sample(examples, self.config.dataset_size)
538
+ return self.rng.sample(examples, self.config.dataset_size)
@@ -17,12 +17,30 @@ from pydantic import BaseModel, Field
17
17
 
18
18
 
19
19
  class ReasoningStep(BaseModel):
20
+ r"""A single step in a multi-hop reasoning process.
21
+
22
+ Attributes:
23
+ step (str): The textual description of the reasoning step.
24
+ """
25
+
20
26
  step: str = Field(
21
27
  ..., description="A single step in the reasoning process."
22
28
  )
23
29
 
24
30
 
25
31
  class MultiHopQA(BaseModel):
32
+ r"""A multi-hop question-answer pair with reasoning steps and supporting
33
+ facts.
34
+
35
+ Attributes:
36
+ question (str): The question requiring multi-hop reasoning.
37
+ reasoning_steps (List[ReasoningStep]): List of reasoning steps to
38
+ answer.
39
+ answer (str): The final answer to the question.
40
+ supporting_facts (List[str]): List of facts supporting the reasoning.
41
+ type (str): The type of question-answer pair.
42
+ """
43
+
26
44
  question: str = Field(
27
45
  ..., description="The question that requires multi-hop reasoning."
28
46
  )
@@ -57,6 +75,13 @@ class MultiHopQA(BaseModel):
57
75
 
58
76
 
59
77
  class ContextPrompt(BaseModel):
78
+ r"""A context prompt for generating multi-hop question-answer pairs.
79
+
80
+ Attributes:
81
+ main_context (str): The primary context for generating QA pairs.
82
+ related_contexts (Optional[List[str]]): Additional related contexts.
83
+ """
84
+
60
85
  main_context: str = Field(
61
86
  ...,
62
87
  description="The main context for generating"
@@ -23,7 +23,15 @@ class ProcessorConfig(BaseModel):
23
23
  r"""Data processing configuration class"""
24
24
 
25
25
  def __repr__(self):
26
- return "MultiHopGeneratorAgent()"
26
+ return (
27
+ f"ProcessorConfig("
28
+ f"seed={self.seed}, min_length={self.min_length}, "
29
+ f"max_length={self.max_length}, "
30
+ f"complexity_threshold={self.complexity_threshold}, "
31
+ f"dataset_size={self.dataset_size}, "
32
+ f"use_ai_model={self.use_ai_model}"
33
+ f")"
34
+ )
27
35
 
28
36
  model_config = ConfigDict(
29
37
  validate_assignment=True,
@@ -45,13 +53,6 @@ class ProcessorConfig(BaseModel):
45
53
  default=512, description="Maximum text length", gt=0
46
54
  )
47
55
 
48
- quality_threshold: float = Field(
49
- default=0.7,
50
- description="Quality threshold for processing",
51
- ge=0.0,
52
- le=1.0,
53
- )
54
-
55
56
  complexity_threshold: float = Field(
56
57
  default=0.5,
57
58
  description="Complexity threshold for processing",
@@ -32,19 +32,19 @@ class HuggingFaceDatasetManager(BaseDatasetManager):
32
32
 
33
33
  Args:
34
34
  token (str): The Hugging Face API token. If not provided, the token
35
- will be read from the environment variable `HUGGING_FACE_TOKEN`.
35
+ will be read from the environment variable `HF_TOKEN`.
36
36
  """
37
37
 
38
38
  @api_keys_required(
39
39
  [
40
- ("token", "HUGGING_FACE_TOKEN"),
40
+ ("token", "HF_TOKEN"),
41
41
  ]
42
42
  )
43
43
  @dependencies_required('huggingface_hub')
44
44
  def __init__(self, token: Optional[str] = None):
45
45
  from huggingface_hub import HfApi
46
46
 
47
- self._api_key = token or os.getenv("HUGGING_FACE_TOKEN")
47
+ self._api_key = token or os.getenv("HF_TOKEN")
48
48
  self.api = HfApi(token=self._api_key)
49
49
 
50
50
  def create_dataset_card(
@@ -12,6 +12,7 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
  from .base import BaseEmbedding
15
+ from .jina_embedding import JinaEmbedding
15
16
  from .mistral_embedding import MistralEmbedding
16
17
  from .openai_compatible_embedding import OpenAICompatibleEmbedding
17
18
  from .openai_embedding import OpenAIEmbedding
@@ -25,4 +26,5 @@ __all__ = [
25
26
  "VisionLanguageEmbedding",
26
27
  "MistralEmbedding",
27
28
  "OpenAICompatibleEmbedding",
29
+ "JinaEmbedding",
28
30
  ]
@@ -0,0 +1,161 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import base64
16
+ import io
17
+ import os
18
+ from typing import Any, Optional, Union
19
+
20
+ import requests
21
+ from PIL import Image
22
+
23
+ from camel.embeddings import BaseEmbedding
24
+ from camel.types.enums import EmbeddingModelType
25
+ from camel.utils import api_keys_required
26
+
27
+
28
+ class JinaEmbedding(BaseEmbedding[Union[str, Image.Image]]):
29
+ r"""Provides text and image embedding functionalities using Jina AI's API.
30
+
31
+ Args:
32
+ model_type (EmbeddingModelType, optional): The model to use for
33
+ embeddings. (default: :obj:`JINA_EMBEDDINGS_V3`)
34
+ api_key (Optional[str], optional): The API key for authenticating with
35
+ Jina AI. (default: :obj:`None`)
36
+ dimensions (Optional[int], optional): The dimension of the output
37
+ embeddings. (default: :obj:`None`)
38
+ embedding_type (Optional[str], optional): The type of embedding format
39
+ to generate. Options: 'int8' (binary encoding with higher storage
40
+ and transfer efficiency), 'uint8' (unsigned binary encoding with
41
+ higher storage and transfer efficiency), 'base64' (base64 string
42
+ encoding with higher transfer efficiency). (default: :obj:`None`)
43
+ task (Optional[str], optional): The type of task for text embeddings.
44
+ Options: retrieval.query, retrieval.passage, text-matching,
45
+ classification, separation. (default: :obj:`None`)
46
+ late_chunking (bool, optional): If true, concatenates all sentences in
47
+ input and treats as a single input. (default: :obj:`False`)
48
+ normalized (bool, optional): If true, embeddings are normalized to unit
49
+ L2 norm. (default: :obj:`False`)
50
+ """
51
+
52
+ @api_keys_required([("api_key", 'JINA_API_KEY')])
53
+ def __init__(
54
+ self,
55
+ model_type: EmbeddingModelType = EmbeddingModelType.JINA_EMBEDDINGS_V3,
56
+ api_key: Optional[str] = None,
57
+ dimensions: Optional[int] = None,
58
+ embedding_type: Optional[str] = None,
59
+ task: Optional[str] = None,
60
+ late_chunking: bool = False,
61
+ normalized: bool = False,
62
+ ) -> None:
63
+ if not model_type.is_jina:
64
+ raise ValueError(
65
+ f"Model type {model_type} is not a Jina model. "
66
+ "Please use a valid Jina model type."
67
+ )
68
+ self.model_type = model_type
69
+ if dimensions is None:
70
+ self.output_dim = model_type.output_dim
71
+ else:
72
+ self.output_dim = dimensions
73
+ self._api_key = api_key or os.environ.get("JINA_API_KEY")
74
+
75
+ self.embedding_type = embedding_type
76
+ self.task = task
77
+ self.late_chunking = late_chunking
78
+ self.normalized = normalized
79
+ self.url = 'https://api.jina.ai/v1/embeddings'
80
+ self.headers = {
81
+ 'Content-Type': 'application/json',
82
+ 'Accept': 'application/json',
83
+ 'Authorization': f'Bearer {self._api_key}',
84
+ }
85
+
86
+ def embed_list(
87
+ self,
88
+ objs: list[Union[str, Image.Image]],
89
+ **kwargs: Any,
90
+ ) -> list[list[float]]:
91
+ r"""Generates embeddings for the given texts or images.
92
+
93
+ Args:
94
+ objs (list[Union[str, Image.Image]]): The texts or images for which
95
+ to generate the embeddings.
96
+ **kwargs (Any): Extra kwargs passed to the embedding API. Not used
97
+ in this implementation.
98
+
99
+ Returns:
100
+ list[list[float]]: A list that represents the generated embedding
101
+ as a list of floating-point numbers.
102
+
103
+ Raises:
104
+ ValueError: If the input type is not supported.
105
+ RuntimeError: If the API request fails.
106
+ """
107
+ input_data = []
108
+ for obj in objs:
109
+ if isinstance(obj, str):
110
+ if self.model_type == EmbeddingModelType.JINA_CLIP_V2:
111
+ input_data.append({"text": obj})
112
+ else:
113
+ input_data.append(obj) # type: ignore[arg-type]
114
+ elif isinstance(obj, Image.Image):
115
+ if self.model_type != EmbeddingModelType.JINA_CLIP_V2:
116
+ raise ValueError(
117
+ f"Model {self.model_type} does not support "
118
+ "image input. Use JINA_CLIP_V2 for image embeddings."
119
+ )
120
+ # Convert PIL Image to base64 string
121
+ buffered = io.BytesIO()
122
+ obj.save(buffered, format="PNG")
123
+ img_str = base64.b64encode(buffered.getvalue()).decode()
124
+ input_data.append({"image": img_str})
125
+ else:
126
+ raise ValueError(
127
+ f"Input type {type(obj)} is not supported. "
128
+ "Must be either str or PIL.Image."
129
+ )
130
+
131
+ data = {
132
+ "model": self.model_type.value,
133
+ "input": input_data,
134
+ "embedding_type": "float",
135
+ }
136
+
137
+ if self.embedding_type is not None:
138
+ data["embedding_type"] = self.embedding_type
139
+ if self.task is not None:
140
+ data["task"] = self.task
141
+ if self.late_chunking:
142
+ data["late_chunking"] = self.late_chunking # type: ignore[assignment]
143
+ if self.normalized:
144
+ data["normalized"] = self.normalized # type: ignore[assignment]
145
+ try:
146
+ response = requests.post(
147
+ self.url, headers=self.headers, json=data, timeout=180
148
+ )
149
+ response.raise_for_status()
150
+ result = response.json()
151
+ return [data["embedding"] for data in result["data"]]
152
+ except requests.exceptions.RequestException as e:
153
+ raise RuntimeError(f"Failed to get embeddings from Jina AI: {e}")
154
+
155
+ def get_output_dim(self) -> int:
156
+ r"""Returns the output dimension of the embeddings.
157
+
158
+ Returns:
159
+ int: The dimensionality of the embedding for the current model.
160
+ """
161
+ return self.output_dim
@@ -154,7 +154,7 @@ class FunctionCallingMessage(BaseMessage):
154
154
  " due to missing function name."
155
155
  )
156
156
 
157
- result_content = json.dumps(self.result)
157
+ result_content = str(self.result)
158
158
 
159
159
  return {
160
160
  "role": "tool",