camel-ai 0.2.23a0__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +16 -2
- camel/configs/anthropic_config.py +45 -11
- camel/datagen/self_improving_cot.py +2 -2
- camel/datagen/self_instruct/self_instruct.py +46 -2
- camel/models/__init__.py +2 -0
- camel/models/anthropic_model.py +5 -1
- camel/models/base_audio_model.py +92 -0
- camel/models/fish_audio_model.py +18 -8
- camel/models/model_manager.py +9 -0
- camel/models/openai_audio_models.py +80 -1
- camel/societies/role_playing.py +119 -0
- camel/toolkits/__init__.py +17 -1
- camel/toolkits/audio_analysis_toolkit.py +238 -0
- camel/toolkits/excel_toolkit.py +172 -0
- camel/toolkits/file_write_toolkit.py +371 -0
- camel/toolkits/image_analysis_toolkit.py +202 -0
- camel/toolkits/mcp_toolkit.py +251 -0
- camel/toolkits/page_script.js +376 -0
- camel/toolkits/terminal_toolkit.py +421 -0
- camel/toolkits/video_analysis_toolkit.py +407 -0
- camel/toolkits/{video_toolkit.py → video_download_toolkit.py} +19 -25
- camel/toolkits/web_toolkit.py +1306 -0
- camel/types/enums.py +3 -0
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info}/METADATA +241 -106
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info}/RECORD +57 -47
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info/licenses}/LICENSE +0 -0
camel/__init__.py
CHANGED
camel/agents/chat_agent.py
CHANGED
|
@@ -694,11 +694,18 @@ class ChatAgent(BaseAgent):
|
|
|
694
694
|
f"index: {self.model_backend.current_model_index}",
|
|
695
695
|
exc_info=exc,
|
|
696
696
|
)
|
|
697
|
-
|
|
697
|
+
error_info = str(exc)
|
|
698
|
+
|
|
699
|
+
if not response and self.model_backend.num_models > 1:
|
|
698
700
|
raise ModelProcessingError(
|
|
699
701
|
"Unable to process messages: none of the provided models "
|
|
700
702
|
"run succesfully."
|
|
701
703
|
)
|
|
704
|
+
elif not response:
|
|
705
|
+
raise ModelProcessingError(
|
|
706
|
+
f"Unable to process messages: the only provided model "
|
|
707
|
+
f"did not run succesfully. Error: {error_info}"
|
|
708
|
+
)
|
|
702
709
|
|
|
703
710
|
logger.info(
|
|
704
711
|
f"Model {self.model_backend.model_type}, "
|
|
@@ -732,11 +739,18 @@ class ChatAgent(BaseAgent):
|
|
|
732
739
|
f"index: {self.model_backend.current_model_index}",
|
|
733
740
|
exc_info=exc,
|
|
734
741
|
)
|
|
735
|
-
|
|
742
|
+
error_info = str(exc)
|
|
743
|
+
|
|
744
|
+
if not response and self.model_backend.num_models > 1:
|
|
736
745
|
raise ModelProcessingError(
|
|
737
746
|
"Unable to process messages: none of the provided models "
|
|
738
747
|
"run succesfully."
|
|
739
748
|
)
|
|
749
|
+
elif not response:
|
|
750
|
+
raise ModelProcessingError(
|
|
751
|
+
f"Unable to process messages: the only provided model "
|
|
752
|
+
f"did not run succesfully. Error: {error_info}"
|
|
753
|
+
)
|
|
740
754
|
|
|
741
755
|
logger.info(
|
|
742
756
|
f"Model {self.model_backend.model_type}, "
|
|
@@ -23,23 +23,24 @@ class AnthropicConfig(BaseConfig):
|
|
|
23
23
|
r"""Defines the parameters for generating chat completions using the
|
|
24
24
|
Anthropic API.
|
|
25
25
|
|
|
26
|
-
See: https://docs.anthropic.com/
|
|
26
|
+
See: https://docs.anthropic.com/en/api/messages
|
|
27
27
|
Args:
|
|
28
28
|
max_tokens (int, optional): The maximum number of tokens to
|
|
29
29
|
generate before stopping. Note that Anthropic models may stop
|
|
30
30
|
before reaching this maximum. This parameter only specifies the
|
|
31
31
|
absolute maximum number of tokens to generate.
|
|
32
32
|
(default: :obj:`8192`)
|
|
33
|
-
stop_sequences (List[str], optional):
|
|
34
|
-
model to stop generating
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
stop_sequences (List[str], optional): Custom text sequences that will
|
|
34
|
+
cause the model to stop generating. The models will normally stop
|
|
35
|
+
when they have naturally completed their turn. If the model
|
|
36
|
+
encounters one of these custom sequences, the response will be
|
|
37
|
+
terminated and the stop_reason will be "stop_sequence".
|
|
38
|
+
(default: :obj:`[]`)
|
|
39
39
|
temperature (float, optional): Amount of randomness injected into the
|
|
40
40
|
response. Defaults to 1. Ranges from 0 to 1. Use temp closer to 0
|
|
41
41
|
for analytical / multiple choice, and closer to 1 for creative
|
|
42
|
-
and generative tasks.
|
|
42
|
+
and generative tasks. Note that even with temperature of 0.0, the
|
|
43
|
+
results will not be fully deterministic. (default: :obj:`1`)
|
|
43
44
|
top_p (float, optional): Use nucleus sampling. In nucleus sampling, we
|
|
44
45
|
compute the cumulative distribution over all the options for each
|
|
45
46
|
subsequent token in decreasing probability order and cut it off
|
|
@@ -49,9 +50,20 @@ class AnthropicConfig(BaseConfig):
|
|
|
49
50
|
top_k (int, optional): Only sample from the top K options for each
|
|
50
51
|
subsequent token. Used to remove "long tail" low probability
|
|
51
52
|
responses. (default: :obj:`5`)
|
|
52
|
-
metadata: An object describing metadata about the request.
|
|
53
53
|
stream (bool, optional): Whether to incrementally stream the response
|
|
54
54
|
using server-sent events. (default: :obj:`False`)
|
|
55
|
+
metadata (Union[dict, NotGiven], optional): An object describing
|
|
56
|
+
metadata about the request. Can include user_id as an external
|
|
57
|
+
identifier for the user associated with the request.
|
|
58
|
+
(default: :obj:`NotGiven()`)
|
|
59
|
+
thinking (Union[dict, NotGiven], optional): Configuration for enabling
|
|
60
|
+
Claude's extended thinking. When enabled, responses include
|
|
61
|
+
thinking content blocks showing Claude's thinking process.
|
|
62
|
+
(default: :obj:`NotGiven()`)
|
|
63
|
+
tool_choice (Union[dict, NotGiven], optional): How the model should
|
|
64
|
+
use the provided tools. The model can use a specific tool, any
|
|
65
|
+
available tool, decide by itself, or not use tools at all.
|
|
66
|
+
(default: :obj:`NotGiven()`)
|
|
55
67
|
"""
|
|
56
68
|
|
|
57
69
|
max_tokens: int = 8192
|
|
@@ -60,11 +72,33 @@ class AnthropicConfig(BaseConfig):
|
|
|
60
72
|
top_p: Union[float, NotGiven] = 0.7
|
|
61
73
|
top_k: Union[int, NotGiven] = 5
|
|
62
74
|
stream: bool = False
|
|
75
|
+
metadata: Union[dict, NotGiven] = NotGiven()
|
|
76
|
+
thinking: Union[dict, NotGiven] = NotGiven()
|
|
77
|
+
tool_choice: Union[dict, NotGiven] = NotGiven()
|
|
63
78
|
|
|
64
79
|
def as_dict(self) -> dict[str, Any]:
|
|
65
80
|
config_dict = super().as_dict()
|
|
66
|
-
|
|
67
|
-
|
|
81
|
+
# Create a list of keys to remove to avoid modifying dict
|
|
82
|
+
keys_to_remove = [
|
|
83
|
+
key
|
|
84
|
+
for key, value in config_dict.items()
|
|
85
|
+
if isinstance(value, NotGiven)
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
for key in keys_to_remove:
|
|
89
|
+
del config_dict[key]
|
|
90
|
+
|
|
91
|
+
# remove some keys if thinking is enabled
|
|
92
|
+
thinking_enabled = (
|
|
93
|
+
not isinstance(self.thinking, NotGiven)
|
|
94
|
+
and self.thinking["type"] == "enabled"
|
|
95
|
+
)
|
|
96
|
+
if thinking_enabled:
|
|
97
|
+
# `top_p`, `top_k`, `temperature` must be unset when thinking is
|
|
98
|
+
# enabled.
|
|
99
|
+
config_dict.pop("top_k", None)
|
|
100
|
+
config_dict.pop("top_p", None)
|
|
101
|
+
config_dict.pop("temperature", None)
|
|
68
102
|
return config_dict
|
|
69
103
|
|
|
70
104
|
|
|
@@ -161,13 +161,13 @@ class SelfImprovingCoTPipeline:
|
|
|
161
161
|
# Initialize output file with empty results if path is specified
|
|
162
162
|
if self.output_path:
|
|
163
163
|
with open(self.output_path, 'w') as f:
|
|
164
|
-
json.dump({'traces': []}, f, indent=2)
|
|
164
|
+
json.dump({'traces': []}, f, indent=2, ensure_ascii=False)
|
|
165
165
|
self.lock = threading.Lock()
|
|
166
166
|
|
|
167
167
|
def safe_write_json(self, file_path, data):
|
|
168
168
|
temp_path = file_path + ".tmp"
|
|
169
169
|
with open(temp_path, "w") as f:
|
|
170
|
-
json.dump(data, f, indent=2)
|
|
170
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
171
171
|
os.replace(temp_path, file_path)
|
|
172
172
|
|
|
173
173
|
def clean_json(self, data):
|
|
@@ -15,16 +15,20 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
import random
|
|
18
|
+
import time
|
|
18
19
|
from typing import Any, Dict, List, Optional
|
|
19
20
|
|
|
20
21
|
from pydantic import BaseModel, Field
|
|
21
22
|
|
|
22
23
|
from camel.agents import ChatAgent
|
|
24
|
+
from camel.logger import get_logger
|
|
23
25
|
|
|
24
26
|
from .filter import RougeSimilarityFilter
|
|
25
27
|
from .filter.instruction_filter import InstructionFilter
|
|
26
28
|
from .templates import SelfInstructTemplates
|
|
27
29
|
|
|
30
|
+
logger = get_logger(__name__)
|
|
31
|
+
|
|
28
32
|
|
|
29
33
|
class SelfInstructPipeline:
|
|
30
34
|
r"""A pipeline to generate and manage machine-generated instructions for
|
|
@@ -210,18 +214,28 @@ class SelfInstructPipeline:
|
|
|
210
214
|
)
|
|
211
215
|
return structured_response.answer
|
|
212
216
|
except ValueError as e:
|
|
213
|
-
|
|
217
|
+
logger.error(f"Error parsing agent response: {e}")
|
|
214
218
|
return False
|
|
215
219
|
|
|
216
220
|
def generate_machine_instances(self):
|
|
217
221
|
r"""Generate instances for each machine task based on its
|
|
218
222
|
classification status.
|
|
219
223
|
"""
|
|
224
|
+
logger.info(
|
|
225
|
+
f"Starting output generation: target {len(self.machine_tasks)} "
|
|
226
|
+
f"instructions"
|
|
227
|
+
)
|
|
228
|
+
attempt_count = 0
|
|
220
229
|
for instruction in self.machine_tasks:
|
|
221
230
|
instance = self.generate_machine_instance(
|
|
222
231
|
instruction['instruction'], instruction['is_classification']
|
|
223
232
|
)
|
|
224
233
|
instruction['instances'] = instance
|
|
234
|
+
attempt_count += 1
|
|
235
|
+
logger.info(
|
|
236
|
+
f"Attempt[Output]: Progress {attempt_count}/"
|
|
237
|
+
f"{len(self.machine_tasks)} instructions"
|
|
238
|
+
)
|
|
225
239
|
|
|
226
240
|
def generate_machine_instance(
|
|
227
241
|
self, instruction: str, classification: bool
|
|
@@ -368,11 +382,30 @@ class SelfInstructPipeline:
|
|
|
368
382
|
with open(self.data_output_path, 'w') as f:
|
|
369
383
|
json.dump(self.machine_tasks, f, indent=4, ensure_ascii=False)
|
|
370
384
|
|
|
371
|
-
def generate(self):
|
|
385
|
+
def generate(self, timeout_minutes=600):
|
|
372
386
|
r"""Execute the entire pipeline to generate machine instructions
|
|
373
387
|
and instances.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
timeout_minutes (int): Maximum time in minutes to run the
|
|
391
|
+
generation process before timing out. (default: :obj:`600`)
|
|
374
392
|
"""
|
|
393
|
+
start_time = time.time()
|
|
394
|
+
timeout_seconds = timeout_minutes * 60
|
|
395
|
+
logger.info(
|
|
396
|
+
f"Starting instruction generation: target "
|
|
397
|
+
f"{self.num_machine_instructions} instructions"
|
|
398
|
+
)
|
|
375
399
|
while len(self.machine_tasks) < self.num_machine_instructions:
|
|
400
|
+
# Check for timeout
|
|
401
|
+
elapsed = time.time() - start_time
|
|
402
|
+
if elapsed > timeout_seconds:
|
|
403
|
+
logger.info(
|
|
404
|
+
f"Generation timed out after {elapsed / 60:.1f} minutes. "
|
|
405
|
+
f"Generated {len(self.machine_tasks)}/"
|
|
406
|
+
f"{self.num_machine_instructions} instructions."
|
|
407
|
+
)
|
|
408
|
+
break
|
|
376
409
|
prompt, instruction = self.generate_machine_instruction()
|
|
377
410
|
existing_instructions = [
|
|
378
411
|
t["instruction"] for t in self.human_tasks
|
|
@@ -389,6 +422,17 @@ class SelfInstructPipeline:
|
|
|
389
422
|
),
|
|
390
423
|
}
|
|
391
424
|
self.machine_tasks.append(instruction_dict)
|
|
425
|
+
logger.info(
|
|
426
|
+
f"Attempt[Instruction]: Progress "
|
|
427
|
+
f"{len(self.machine_tasks)}/"
|
|
428
|
+
f"{self.num_machine_instructions} "
|
|
429
|
+
f"instructions"
|
|
430
|
+
)
|
|
431
|
+
else:
|
|
432
|
+
logger.warning(
|
|
433
|
+
f"Instruction failed filters. Skipping instruction: "
|
|
434
|
+
f"{instruction}"
|
|
435
|
+
)
|
|
392
436
|
self.generate_machine_instances()
|
|
393
437
|
self.construct_data()
|
|
394
438
|
|
camel/models/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
from .aiml_model import AIMLModel
|
|
15
15
|
from .anthropic_model import AnthropicModel
|
|
16
16
|
from .azure_openai_model import AzureOpenAIModel
|
|
17
|
+
from .base_audio_model import BaseAudioModel
|
|
17
18
|
from .base_model import BaseModelBackend
|
|
18
19
|
from .cohere_model import CohereModel
|
|
19
20
|
from .deepseek_model import DeepSeekModel
|
|
@@ -74,4 +75,5 @@ __all__ = [
|
|
|
74
75
|
'InternLMModel',
|
|
75
76
|
'MoonshotModel',
|
|
76
77
|
'AIMLModel',
|
|
78
|
+
'BaseAudioModel',
|
|
77
79
|
]
|
camel/models/anthropic_model.py
CHANGED
|
@@ -84,7 +84,11 @@ class AnthropicModel(BaseModelBackend):
|
|
|
84
84
|
index=0,
|
|
85
85
|
message={
|
|
86
86
|
"role": "assistant",
|
|
87
|
-
"content":
|
|
87
|
+
"content": next(
|
|
88
|
+
content.text
|
|
89
|
+
for content in response.content
|
|
90
|
+
if content.type == "text"
|
|
91
|
+
),
|
|
88
92
|
},
|
|
89
93
|
finish_reason=response.stop_reason,
|
|
90
94
|
)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from typing import Any, Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseAudioModel(ABC):
|
|
21
|
+
r"""Base class for audio models providing Text-to-Speech (TTS) and
|
|
22
|
+
Speech-to-Text (STT) functionality.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
api_key: Optional[str] = None,
|
|
28
|
+
url: Optional[str] = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
r"""Initialize an instance of BaseAudioModel.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
api_key (Optional[str]): API key for the audio service. If not
|
|
34
|
+
provided, will look for an environment variable specific to the
|
|
35
|
+
implementation.
|
|
36
|
+
url (Optional[str]): Base URL for the audio API. If not provided,
|
|
37
|
+
will use a default URL or look for an environment variable
|
|
38
|
+
specific to the implementation.
|
|
39
|
+
"""
|
|
40
|
+
self._api_key = api_key
|
|
41
|
+
self._url = url
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def text_to_speech(
|
|
45
|
+
self,
|
|
46
|
+
input: str,
|
|
47
|
+
*,
|
|
48
|
+
storage_path: str,
|
|
49
|
+
**kwargs: Any,
|
|
50
|
+
) -> Any:
|
|
51
|
+
r"""Convert text to speech.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
input (str): The text to be converted to speech.
|
|
55
|
+
storage_path (str): The local path to store the
|
|
56
|
+
generated speech file.
|
|
57
|
+
**kwargs (Any): Extra kwargs passed to the TTS API.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Any: The response from the TTS API, which may vary by
|
|
61
|
+
implementation.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def speech_to_text(
|
|
67
|
+
self,
|
|
68
|
+
audio_file_path: str,
|
|
69
|
+
**kwargs: Any,
|
|
70
|
+
) -> str:
|
|
71
|
+
r"""Convert speech audio to text.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
audio_file_path (str): The audio file path to transcribe.
|
|
75
|
+
**kwargs (Any): Extra keyword arguments passed to the
|
|
76
|
+
Speech-to-Text (STT) API.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
str: The transcribed text.
|
|
80
|
+
"""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
def _ensure_directory_exists(self, file_path: str) -> None:
|
|
84
|
+
r"""Ensure the directory for the given file path exists.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
file_path (str): The file path for which to ensure the directory
|
|
88
|
+
exists.
|
|
89
|
+
"""
|
|
90
|
+
directory = os.path.dirname(file_path)
|
|
91
|
+
if directory and not os.path.exists(directory):
|
|
92
|
+
os.makedirs(directory)
|
camel/models/fish_audio_model.py
CHANGED
|
@@ -15,8 +15,10 @@
|
|
|
15
15
|
import os
|
|
16
16
|
from typing import Any, Optional
|
|
17
17
|
|
|
18
|
+
from camel.models.base_audio_model import BaseAudioModel
|
|
18
19
|
|
|
19
|
-
|
|
20
|
+
|
|
21
|
+
class FishAudioModel(BaseAudioModel):
|
|
20
22
|
r"""Provides access to FishAudio's Text-to-Speech (TTS) and Speech_to_Text
|
|
21
23
|
(STT) models.
|
|
22
24
|
"""
|
|
@@ -37,6 +39,7 @@ class FishAudioModel:
|
|
|
37
39
|
"""
|
|
38
40
|
from fish_audio_sdk import Session
|
|
39
41
|
|
|
42
|
+
super().__init__(api_key, url)
|
|
40
43
|
self._api_key = api_key or os.environ.get("FISHAUDIO_API_KEY")
|
|
41
44
|
self._url = url or os.environ.get(
|
|
42
45
|
"FISHAUDIO_API_BASE_URL", "https://api.fish.audio"
|
|
@@ -46,7 +49,8 @@ class FishAudioModel:
|
|
|
46
49
|
def text_to_speech(
|
|
47
50
|
self,
|
|
48
51
|
input: str,
|
|
49
|
-
|
|
52
|
+
*,
|
|
53
|
+
storage_path: Optional[str] = None,
|
|
50
54
|
reference_id: Optional[str] = None,
|
|
51
55
|
reference_audio: Optional[str] = None,
|
|
52
56
|
reference_audio_text: Optional[str] = None,
|
|
@@ -55,9 +59,9 @@ class FishAudioModel:
|
|
|
55
59
|
r"""Convert text to speech and save the output to a file.
|
|
56
60
|
|
|
57
61
|
Args:
|
|
58
|
-
|
|
59
|
-
storage_path (str): The file path where the resulting
|
|
60
|
-
be saved.
|
|
62
|
+
input (str): The text to convert to speech.
|
|
63
|
+
storage_path (Optional[str]): The file path where the resulting
|
|
64
|
+
speech will be saved. (default: :obj:`None`)
|
|
61
65
|
reference_id (Optional[str]): An optional reference ID to
|
|
62
66
|
associate with the request. (default: :obj:`None`)
|
|
63
67
|
reference_audio (Optional[str]): Path to an audio file for
|
|
@@ -68,12 +72,18 @@ class FishAudioModel:
|
|
|
68
72
|
|
|
69
73
|
Raises:
|
|
70
74
|
FileNotFoundError: If the reference audio file cannot be found.
|
|
75
|
+
ValueError: If storage_path is not provided or if reference_audio
|
|
76
|
+
is provided without reference_audio_text.
|
|
71
77
|
"""
|
|
72
78
|
from fish_audio_sdk import ReferenceAudio, TTSRequest
|
|
73
79
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
80
|
+
if storage_path is None:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
"storage_path must be provided for "
|
|
83
|
+
"FishAudioModel.text_to_speech"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
self._ensure_directory_exists(storage_path)
|
|
77
87
|
|
|
78
88
|
if not reference_audio:
|
|
79
89
|
with open(f"{storage_path}", "wb") as f:
|
camel/models/model_manager.py
CHANGED
|
@@ -117,6 +117,15 @@ class ModelManager:
|
|
|
117
117
|
"""
|
|
118
118
|
return self.models.index(self.current_model)
|
|
119
119
|
|
|
120
|
+
@property
|
|
121
|
+
def num_models(self) -> int:
|
|
122
|
+
r"""Return the number of models in the manager.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
int: The number of models available in the model manager.
|
|
126
|
+
"""
|
|
127
|
+
return len(self.models)
|
|
128
|
+
|
|
120
129
|
@property
|
|
121
130
|
def token_limit(self):
|
|
122
131
|
r"""Returns the maximum token limit for current model.
|
|
@@ -11,15 +11,17 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import base64
|
|
14
15
|
import os
|
|
15
16
|
from typing import Any, List, Optional, Union
|
|
16
17
|
|
|
17
18
|
from openai import AsyncOpenAI, OpenAI, _legacy_response
|
|
18
19
|
|
|
20
|
+
from camel.models.base_audio_model import BaseAudioModel
|
|
19
21
|
from camel.types import AudioModelType, VoiceType
|
|
20
22
|
|
|
21
23
|
|
|
22
|
-
class OpenAIAudioModels:
|
|
24
|
+
class OpenAIAudioModels(BaseAudioModel):
|
|
23
25
|
r"""Provides access to OpenAI's Text-to-Speech (TTS) and Speech_to_Text
|
|
24
26
|
(STT) models."""
|
|
25
27
|
|
|
@@ -29,6 +31,7 @@ class OpenAIAudioModels:
|
|
|
29
31
|
url: Optional[str] = None,
|
|
30
32
|
) -> None:
|
|
31
33
|
r"""Initialize an instance of OpenAI."""
|
|
34
|
+
super().__init__(api_key, url)
|
|
32
35
|
self._url = url or os.environ.get("OPENAI_API_BASE_URL")
|
|
33
36
|
self._api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
34
37
|
self._client = OpenAI(
|
|
@@ -47,6 +50,7 @@ class OpenAIAudioModels:
|
|
|
47
50
|
def text_to_speech(
|
|
48
51
|
self,
|
|
49
52
|
input: str,
|
|
53
|
+
*,
|
|
50
54
|
model_type: AudioModelType = AudioModelType.TTS_1,
|
|
51
55
|
voice: VoiceType = VoiceType.ALLOY,
|
|
52
56
|
storage_path: Optional[str] = None,
|
|
@@ -111,6 +115,8 @@ class OpenAIAudioModels:
|
|
|
111
115
|
new_storage_path = (
|
|
112
116
|
f"{file_name}_{chunk_index}{file_extension}"
|
|
113
117
|
)
|
|
118
|
+
# Ensure directory exists
|
|
119
|
+
self._ensure_directory_exists(new_storage_path)
|
|
114
120
|
response.write_to_file(new_storage_path)
|
|
115
121
|
chunk_index += 1
|
|
116
122
|
except Exception as e:
|
|
@@ -131,6 +137,8 @@ class OpenAIAudioModels:
|
|
|
131
137
|
|
|
132
138
|
if storage_path:
|
|
133
139
|
try:
|
|
140
|
+
# Ensure directory exists
|
|
141
|
+
self._ensure_directory_exists(storage_path)
|
|
134
142
|
response.write_to_file(storage_path)
|
|
135
143
|
except Exception as e:
|
|
136
144
|
raise Exception("Error during write the file") from e
|
|
@@ -263,3 +271,74 @@ class OpenAIAudioModels:
|
|
|
263
271
|
return transcription.text
|
|
264
272
|
except Exception as e:
|
|
265
273
|
raise Exception("Error during STT API call") from e
|
|
274
|
+
|
|
275
|
+
def audio_question_answering(
|
|
276
|
+
self,
|
|
277
|
+
audio_file_path: str,
|
|
278
|
+
question: str,
|
|
279
|
+
model: str = "gpt-4o-mini-audio-preview",
|
|
280
|
+
**kwargs: Any,
|
|
281
|
+
) -> str:
|
|
282
|
+
r"""Answer a question directly using the audio content.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
audio_file_path (str): The path to the audio file.
|
|
286
|
+
question (str): The question to ask about the audio content.
|
|
287
|
+
model (str, optional): The model to use for audio question
|
|
288
|
+
answering. (default: :obj:`"gpt-4o-mini-audio-preview"`)
|
|
289
|
+
**kwargs (Any): Extra keyword arguments passed to the chat
|
|
290
|
+
completions API.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
str: The model's response to the question.
|
|
294
|
+
|
|
295
|
+
Raises:
|
|
296
|
+
Exception: If there's an error during the API call.
|
|
297
|
+
"""
|
|
298
|
+
try:
|
|
299
|
+
# Read and encode the audio file
|
|
300
|
+
with open(audio_file_path, "rb") as audio_file:
|
|
301
|
+
audio_data = audio_file.read()
|
|
302
|
+
|
|
303
|
+
encoded_string = base64.b64encode(audio_data).decode('utf-8')
|
|
304
|
+
|
|
305
|
+
# Get file format
|
|
306
|
+
file_suffix = os.path.splitext(audio_file_path)[1]
|
|
307
|
+
file_format = file_suffix[1:].lower()
|
|
308
|
+
|
|
309
|
+
# Prepare the prompt
|
|
310
|
+
text_prompt = "Answer the following question based on the "
|
|
311
|
+
f"given audio information:\n\n{question}"
|
|
312
|
+
|
|
313
|
+
# Call the OpenAI API
|
|
314
|
+
completion = self._client.chat.completions.create(
|
|
315
|
+
model=model,
|
|
316
|
+
messages=[
|
|
317
|
+
{
|
|
318
|
+
"role": "system",
|
|
319
|
+
"content": "You are a helpful assistant "
|
|
320
|
+
"specializing in audio analysis.",
|
|
321
|
+
},
|
|
322
|
+
{ # type: ignore[misc, list-item]
|
|
323
|
+
"role": "user",
|
|
324
|
+
"content": [
|
|
325
|
+
{"type": "text", "text": text_prompt},
|
|
326
|
+
{
|
|
327
|
+
"type": "input_audio",
|
|
328
|
+
"input_audio": {
|
|
329
|
+
"data": encoded_string,
|
|
330
|
+
"format": file_format,
|
|
331
|
+
},
|
|
332
|
+
},
|
|
333
|
+
],
|
|
334
|
+
},
|
|
335
|
+
],
|
|
336
|
+
**kwargs,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
response = str(completion.choices[0].message.content)
|
|
340
|
+
return response
|
|
341
|
+
except Exception as e:
|
|
342
|
+
raise Exception(
|
|
343
|
+
"Error during audio question answering API call"
|
|
344
|
+
) from e
|