camel-ai 0.2.23a0__py3-none-any.whl → 0.2.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -0,0 +1,92 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import os
16
+ from abc import ABC, abstractmethod
17
+ from typing import Any, Optional
18
+
19
+
20
+ class BaseAudioModel(ABC):
21
+ r"""Base class for audio models providing Text-to-Speech (TTS) and
22
+ Speech-to-Text (STT) functionality.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ api_key: Optional[str] = None,
28
+ url: Optional[str] = None,
29
+ ) -> None:
30
+ r"""Initialize an instance of BaseAudioModel.
31
+
32
+ Args:
33
+ api_key (Optional[str]): API key for the audio service. If not
34
+ provided, will look for an environment variable specific to the
35
+ implementation.
36
+ url (Optional[str]): Base URL for the audio API. If not provided,
37
+ will use a default URL or look for an environment variable
38
+ specific to the implementation.
39
+ """
40
+ self._api_key = api_key
41
+ self._url = url
42
+
43
+ @abstractmethod
44
+ def text_to_speech(
45
+ self,
46
+ input: str,
47
+ *,
48
+ storage_path: str,
49
+ **kwargs: Any,
50
+ ) -> Any:
51
+ r"""Convert text to speech.
52
+
53
+ Args:
54
+ input (str): The text to be converted to speech.
55
+ storage_path (str): The local path to store the
56
+ generated speech file.
57
+ **kwargs (Any): Extra kwargs passed to the TTS API.
58
+
59
+ Returns:
60
+ Any: The response from the TTS API, which may vary by
61
+ implementation.
62
+ """
63
+ pass
64
+
65
+ @abstractmethod
66
+ def speech_to_text(
67
+ self,
68
+ audio_file_path: str,
69
+ **kwargs: Any,
70
+ ) -> str:
71
+ r"""Convert speech audio to text.
72
+
73
+ Args:
74
+ audio_file_path (str): The audio file path to transcribe.
75
+ **kwargs (Any): Extra keyword arguments passed to the
76
+ Speech-to-Text (STT) API.
77
+
78
+ Returns:
79
+ str: The transcribed text.
80
+ """
81
+ pass
82
+
83
+ def _ensure_directory_exists(self, file_path: str) -> None:
84
+ r"""Ensure the directory for the given file path exists.
85
+
86
+ Args:
87
+ file_path (str): The file path for which to ensure the directory
88
+ exists.
89
+ """
90
+ directory = os.path.dirname(file_path)
91
+ if directory and not os.path.exists(directory):
92
+ os.makedirs(directory)
@@ -15,8 +15,10 @@
15
15
  import os
16
16
  from typing import Any, Optional
17
17
 
18
+ from camel.models.base_audio_model import BaseAudioModel
18
19
 
19
- class FishAudioModel:
20
+
21
+ class FishAudioModel(BaseAudioModel):
20
22
  r"""Provides access to FishAudio's Text-to-Speech (TTS) and Speech_to_Text
21
23
  (STT) models.
22
24
  """
@@ -37,6 +39,7 @@ class FishAudioModel:
37
39
  """
38
40
  from fish_audio_sdk import Session
39
41
 
42
+ super().__init__(api_key, url)
40
43
  self._api_key = api_key or os.environ.get("FISHAUDIO_API_KEY")
41
44
  self._url = url or os.environ.get(
42
45
  "FISHAUDIO_API_BASE_URL", "https://api.fish.audio"
@@ -46,7 +49,8 @@ class FishAudioModel:
46
49
  def text_to_speech(
47
50
  self,
48
51
  input: str,
49
- storage_path: str,
52
+ *,
53
+ storage_path: Optional[str] = None,
50
54
  reference_id: Optional[str] = None,
51
55
  reference_audio: Optional[str] = None,
52
56
  reference_audio_text: Optional[str] = None,
@@ -55,9 +59,9 @@ class FishAudioModel:
55
59
  r"""Convert text to speech and save the output to a file.
56
60
 
57
61
  Args:
58
- input_text (str): The text to convert to speech.
59
- storage_path (str): The file path where the resulting speech will
60
- be saved.
62
+ input (str): The text to convert to speech.
63
+ storage_path (Optional[str]): The file path where the resulting
64
+ speech will be saved. (default: :obj:`None`)
61
65
  reference_id (Optional[str]): An optional reference ID to
62
66
  associate with the request. (default: :obj:`None`)
63
67
  reference_audio (Optional[str]): Path to an audio file for
@@ -68,12 +72,18 @@ class FishAudioModel:
68
72
 
69
73
  Raises:
70
74
  FileNotFoundError: If the reference audio file cannot be found.
75
+ ValueError: If storage_path is not provided or if reference_audio
76
+ is provided without reference_audio_text.
71
77
  """
72
78
  from fish_audio_sdk import ReferenceAudio, TTSRequest
73
79
 
74
- directory = os.path.dirname(storage_path)
75
- if directory and not os.path.exists(directory):
76
- os.makedirs(directory)
80
+ if storage_path is None:
81
+ raise ValueError(
82
+ "storage_path must be provided for "
83
+ "FishAudioModel.text_to_speech"
84
+ )
85
+
86
+ self._ensure_directory_exists(storage_path)
77
87
 
78
88
  if not reference_audio:
79
89
  with open(f"{storage_path}", "wb") as f:
@@ -117,6 +117,15 @@ class ModelManager:
117
117
  """
118
118
  return self.models.index(self.current_model)
119
119
 
120
+ @property
121
+ def num_models(self) -> int:
122
+ r"""Return the number of models in the manager.
123
+
124
+ Returns:
125
+ int: The number of models available in the model manager.
126
+ """
127
+ return len(self.models)
128
+
120
129
  @property
121
130
  def token_limit(self):
122
131
  r"""Returns the maximum token limit for current model.
@@ -11,15 +11,17 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import base64
14
15
  import os
15
16
  from typing import Any, List, Optional, Union
16
17
 
17
18
  from openai import AsyncOpenAI, OpenAI, _legacy_response
18
19
 
20
+ from camel.models.base_audio_model import BaseAudioModel
19
21
  from camel.types import AudioModelType, VoiceType
20
22
 
21
23
 
22
- class OpenAIAudioModels:
24
+ class OpenAIAudioModels(BaseAudioModel):
23
25
  r"""Provides access to OpenAI's Text-to-Speech (TTS) and Speech_to_Text
24
26
  (STT) models."""
25
27
 
@@ -29,6 +31,7 @@ class OpenAIAudioModels:
29
31
  url: Optional[str] = None,
30
32
  ) -> None:
31
33
  r"""Initialize an instance of OpenAI."""
34
+ super().__init__(api_key, url)
32
35
  self._url = url or os.environ.get("OPENAI_API_BASE_URL")
33
36
  self._api_key = api_key or os.environ.get("OPENAI_API_KEY")
34
37
  self._client = OpenAI(
@@ -47,6 +50,7 @@ class OpenAIAudioModels:
47
50
  def text_to_speech(
48
51
  self,
49
52
  input: str,
53
+ *,
50
54
  model_type: AudioModelType = AudioModelType.TTS_1,
51
55
  voice: VoiceType = VoiceType.ALLOY,
52
56
  storage_path: Optional[str] = None,
@@ -111,6 +115,8 @@ class OpenAIAudioModels:
111
115
  new_storage_path = (
112
116
  f"{file_name}_{chunk_index}{file_extension}"
113
117
  )
118
+ # Ensure directory exists
119
+ self._ensure_directory_exists(new_storage_path)
114
120
  response.write_to_file(new_storage_path)
115
121
  chunk_index += 1
116
122
  except Exception as e:
@@ -131,6 +137,8 @@ class OpenAIAudioModels:
131
137
 
132
138
  if storage_path:
133
139
  try:
140
+ # Ensure directory exists
141
+ self._ensure_directory_exists(storage_path)
134
142
  response.write_to_file(storage_path)
135
143
  except Exception as e:
136
144
  raise Exception("Error during write the file") from e
@@ -263,3 +271,74 @@ class OpenAIAudioModels:
263
271
  return transcription.text
264
272
  except Exception as e:
265
273
  raise Exception("Error during STT API call") from e
274
+
275
+ def audio_question_answering(
276
+ self,
277
+ audio_file_path: str,
278
+ question: str,
279
+ model: str = "gpt-4o-mini-audio-preview",
280
+ **kwargs: Any,
281
+ ) -> str:
282
+ r"""Answer a question directly using the audio content.
283
+
284
+ Args:
285
+ audio_file_path (str): The path to the audio file.
286
+ question (str): The question to ask about the audio content.
287
+ model (str, optional): The model to use for audio question
288
+ answering. (default: :obj:`"gpt-4o-mini-audio-preview"`)
289
+ **kwargs (Any): Extra keyword arguments passed to the chat
290
+ completions API.
291
+
292
+ Returns:
293
+ str: The model's response to the question.
294
+
295
+ Raises:
296
+ Exception: If there's an error during the API call.
297
+ """
298
+ try:
299
+ # Read and encode the audio file
300
+ with open(audio_file_path, "rb") as audio_file:
301
+ audio_data = audio_file.read()
302
+
303
+ encoded_string = base64.b64encode(audio_data).decode('utf-8')
304
+
305
+ # Get file format
306
+ file_suffix = os.path.splitext(audio_file_path)[1]
307
+ file_format = file_suffix[1:].lower()
308
+
309
+ # Prepare the prompt
310
+ text_prompt = "Answer the following question based on the "
311
+ f"given audio information:\n\n{question}"
312
+
313
+ # Call the OpenAI API
314
+ completion = self._client.chat.completions.create(
315
+ model=model,
316
+ messages=[
317
+ {
318
+ "role": "system",
319
+ "content": "You are a helpful assistant "
320
+ "specializing in audio analysis.",
321
+ },
322
+ { # type: ignore[misc, list-item]
323
+ "role": "user",
324
+ "content": [
325
+ {"type": "text", "text": text_prompt},
326
+ {
327
+ "type": "input_audio",
328
+ "input_audio": {
329
+ "data": encoded_string,
330
+ "format": file_format,
331
+ },
332
+ },
333
+ ],
334
+ },
335
+ ],
336
+ **kwargs,
337
+ )
338
+
339
+ response = str(completion.choices[0].message.content)
340
+ return response
341
+ except Exception as e:
342
+ raise Exception(
343
+ "Error during audio question answering API call"
344
+ ) from e
@@ -97,9 +97,16 @@ class SGLangModel(BaseModelBackend):
97
97
  def _start_server(self) -> None:
98
98
  try:
99
99
  if not self._url:
100
+ tool_call_flag = self.model_config_dict.get("tools")
101
+ tool_call_arg = (
102
+ f"--tool-call-parser {self._api_key} "
103
+ if tool_call_flag
104
+ else ""
105
+ )
100
106
  cmd = (
101
107
  f"python -m sglang.launch_server "
102
108
  f"--model-path {self.model_type} "
109
+ f"{tool_call_arg}"
103
110
  f"--port 30000 "
104
111
  f"--host 0.0.0.0"
105
112
  )
@@ -265,6 +272,19 @@ class SGLangModel(BaseModelBackend):
265
272
  """
266
273
  return self.model_config_dict.get('stream', False)
267
274
 
275
+ def __del__(self):
276
+ r"""Properly clean up resources when the model is destroyed."""
277
+ self.cleanup()
278
+
279
+ def cleanup(self):
280
+ r"""Terminate the server process and clean up resources."""
281
+ with self._lock:
282
+ if self.server_process:
283
+ _terminate_process(self.server_process)
284
+ self.server_process = None
285
+ self._client = None
286
+ logging.info("Server process terminated during cleanup.")
287
+
268
288
 
269
289
  # Below are helper functions from sglang.utils
270
290
  def _terminate_process(process):
@@ -326,21 +346,25 @@ def _execute_shell_command(command: str) -> subprocess.Popen:
326
346
  return subprocess.Popen(parts, text=True, stderr=subprocess.STDOUT)
327
347
 
328
348
 
329
- def _wait_for_server(base_url: str, timeout: Optional[int] = None) -> None:
349
+ def _wait_for_server(base_url: str, timeout: Optional[int] = 30) -> None:
330
350
  r"""Wait for the server to be ready by polling the /v1/models endpoint.
331
351
 
332
352
  Args:
333
353
  base_url: The base URL of the server
334
- timeout: Maximum time to wait in seconds. None means wait forever.
354
+ timeout: Maximum time to wait in seconds. Default is 30 seconds.
335
355
  """
336
356
  import requests
337
357
 
358
+ # Set a default value if timeout is None
359
+ actual_timeout = 30 if timeout is None else timeout
360
+
338
361
  start_time = time.time()
339
362
  while True:
340
363
  try:
341
364
  response = requests.get(
342
365
  f"{base_url}/v1/models",
343
366
  headers={"Authorization": "Bearer None"},
367
+ timeout=5, # Add a timeout for the request itself
344
368
  )
345
369
  if response.status_code == 200:
346
370
  time.sleep(5)
@@ -356,9 +380,15 @@ def _wait_for_server(base_url: str, timeout: Optional[int] = None) -> None:
356
380
  )
357
381
  break
358
382
 
359
- if timeout and time.time() - start_time > timeout:
383
+ if time.time() - start_time > actual_timeout:
384
+ raise TimeoutError(
385
+ f"Server did not become ready within "
386
+ f"{actual_timeout} seconds"
387
+ )
388
+ except (requests.exceptions.RequestException, TimeoutError) as e:
389
+ if time.time() - start_time > actual_timeout:
360
390
  raise TimeoutError(
361
- "Server did not become ready within timeout period"
391
+ f"Server did not become ready within "
392
+ f"{actual_timeout} seconds: {e}"
362
393
  )
363
- except requests.exceptions.RequestException:
364
394
  time.sleep(1)
@@ -468,6 +468,42 @@ class RolePlaying:
468
468
 
469
469
  return init_msg
470
470
 
471
+ async def ainit_chat(
472
+ self, init_msg_content: Optional[str] = None
473
+ ) -> BaseMessage:
474
+ r"""Asynchronously initializes the chat by resetting both of the
475
+ assistant and user agents. Returns an initial message for the
476
+ role-playing session.
477
+
478
+ Args:
479
+ init_msg_content (str, optional): A user-specified initial message.
480
+ Will be sent to the role-playing session as the initial
481
+ message. (default: :obj:`None`)
482
+
483
+ Returns:
484
+ BaseMessage: A single `BaseMessage` representing the initial
485
+ message.
486
+ """
487
+ # Currently, reset() is synchronous, but if it becomes async in the
488
+ # future, we can await it here
489
+ self.assistant_agent.reset()
490
+ self.user_agent.reset()
491
+ default_init_msg_content = (
492
+ "Now start to give me instructions one by one. "
493
+ "Only reply with Instruction and Input."
494
+ )
495
+ if init_msg_content is None:
496
+ init_msg_content = default_init_msg_content
497
+
498
+ # Initialize a message sent by the assistant
499
+ init_msg = BaseMessage.make_assistant_message(
500
+ role_name=getattr(self.assistant_sys_msg, 'role_name', None)
501
+ or "assistant",
502
+ content=init_msg_content,
503
+ )
504
+
505
+ return init_msg
506
+
471
507
  def step(
472
508
  self,
473
509
  assistant_msg: BaseMessage,
@@ -549,3 +585,86 @@ class RolePlaying:
549
585
  info=user_response.info,
550
586
  ),
551
587
  )
588
+
589
+ async def astep(
590
+ self,
591
+ assistant_msg: BaseMessage,
592
+ ) -> Tuple[ChatAgentResponse, ChatAgentResponse]:
593
+ r"""Asynchronously advances the conversation by taking a message from
594
+ the assistant, processing it using the user agent, and then processing
595
+ the resulting message using the assistant agent. Returns a tuple
596
+ containing the resulting assistant message, whether the assistant
597
+ agent terminated the conversation, and any additional assistant
598
+ information, as well as a tuple containing the resulting user message,
599
+ whether the user agent terminated the conversation, and any additional
600
+ user information.
601
+
602
+ Args:
603
+ assistant_msg: A `BaseMessage` representing the message from the
604
+ assistant.
605
+
606
+ Returns:
607
+ Tuple[ChatAgentResponse, ChatAgentResponse]: A tuple containing two
608
+ ChatAgentResponse: the first struct contains the resulting
609
+ assistant message, whether the assistant agent terminated the
610
+ conversation, and any additional assistant information; the
611
+ second struct contains the resulting user message, whether the
612
+ user agent terminated the conversation, and any additional user
613
+ information.
614
+ """
615
+ user_response = await self.user_agent.astep(assistant_msg)
616
+ if user_response.terminated or user_response.msgs is None:
617
+ return (
618
+ ChatAgentResponse(msgs=[], terminated=False, info={}),
619
+ ChatAgentResponse(
620
+ msgs=[],
621
+ terminated=user_response.terminated,
622
+ info=user_response.info,
623
+ ),
624
+ )
625
+ user_msg = self._reduce_message_options(user_response.msgs)
626
+
627
+ # To prevent recording the same memory more than once (once in chat
628
+ # step and once in role play), and the model generates only one
629
+ # response when multi-response support is enabled.
630
+ if (
631
+ 'n' in self.user_agent.model_backend.model_config_dict.keys()
632
+ and self.user_agent.model_backend.model_config_dict['n'] > 1
633
+ ):
634
+ self.user_agent.record_message(user_msg)
635
+
636
+ assistant_response = await self.assistant_agent.astep(user_msg)
637
+ if assistant_response.terminated or assistant_response.msgs is None:
638
+ return (
639
+ ChatAgentResponse(
640
+ msgs=[],
641
+ terminated=assistant_response.terminated,
642
+ info=assistant_response.info,
643
+ ),
644
+ ChatAgentResponse(
645
+ msgs=[user_msg], terminated=False, info=user_response.info
646
+ ),
647
+ )
648
+ assistant_msg = self._reduce_message_options(assistant_response.msgs)
649
+
650
+ # To prevent recording the same memory more than once (once in chat
651
+ # step and once in role play), and the model generates only one
652
+ # response when multi-response support is enabled.
653
+ if (
654
+ 'n' in self.assistant_agent.model_backend.model_config_dict.keys()
655
+ and self.assistant_agent.model_backend.model_config_dict['n'] > 1
656
+ ):
657
+ self.assistant_agent.record_message(assistant_msg)
658
+
659
+ return (
660
+ ChatAgentResponse(
661
+ msgs=[assistant_msg],
662
+ terminated=assistant_response.terminated,
663
+ info=assistant_response.info,
664
+ ),
665
+ ChatAgentResponse(
666
+ msgs=[user_msg],
667
+ terminated=user_response.terminated,
668
+ info=user_response.info,
669
+ ),
670
+ )
@@ -43,13 +43,21 @@ from .retrieval_toolkit import RetrievalToolkit
43
43
  from .notion_toolkit import NotionToolkit
44
44
  from .human_toolkit import HumanToolkit
45
45
  from .stripe_toolkit import StripeToolkit
46
- from .video_toolkit import VideoDownloaderToolkit
46
+ from .video_download_toolkit import VideoDownloaderToolkit
47
47
  from .dappier_toolkit import DappierToolkit
48
48
  from .networkx_toolkit import NetworkXToolkit
49
49
  from .semantic_scholar_toolkit import SemanticScholarToolkit
50
50
  from .zapier_toolkit import ZapierToolkit
51
51
  from .sympy_toolkit import SymPyToolkit
52
52
  from .mineru_toolkit import MinerUToolkit
53
+ from .audio_analysis_toolkit import AudioAnalysisToolkit
54
+ from .excel_toolkit import ExcelToolkit
55
+ from .video_analysis_toolkit import VideoAnalysisToolkit
56
+ from .image_analysis_toolkit import ImageAnalysisToolkit
57
+ from .mcp_toolkit import MCPToolkit
58
+ from .web_toolkit import WebToolkit
59
+ from .file_write_toolkit import FileWriteToolkit
60
+ from .terminal_toolkit import TerminalToolkit
53
61
 
54
62
 
55
63
  __all__ = [
@@ -88,4 +96,12 @@ __all__ = [
88
96
  'ZapierToolkit',
89
97
  'SymPyToolkit',
90
98
  'MinerUToolkit',
99
+ 'MCPToolkit',
100
+ 'AudioAnalysisToolkit',
101
+ 'ExcelToolkit',
102
+ 'VideoAnalysisToolkit',
103
+ 'ImageAnalysisToolkit',
104
+ 'WebToolkit',
105
+ 'FileWriteToolkit',
106
+ 'TerminalToolkit',
91
107
  ]