camel-ai 0.1.6.2__py3-none-any.whl → 0.1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/configs/gemini_config.py +0 -1
- camel/configs/groq_config.py +1 -1
- camel/configs/mistral_config.py +14 -10
- camel/embeddings/mistral_embedding.py +5 -5
- camel/interpreters/docker_interpreter.py +1 -1
- camel/loaders/__init__.py +1 -2
- camel/loaders/base_io.py +118 -52
- camel/loaders/jina_url_reader.py +6 -6
- camel/loaders/unstructured_io.py +34 -295
- camel/models/__init__.py +2 -0
- camel/models/mistral_model.py +120 -26
- camel/models/model_factory.py +3 -3
- camel/models/openai_compatibility_model.py +105 -0
- camel/retrievers/auto_retriever.py +40 -52
- camel/retrievers/bm25_retriever.py +9 -6
- camel/retrievers/vector_retriever.py +29 -20
- camel/storages/object_storages/__init__.py +22 -0
- camel/storages/object_storages/amazon_s3.py +205 -0
- camel/storages/object_storages/azure_blob.py +166 -0
- camel/storages/object_storages/base.py +115 -0
- camel/storages/object_storages/google_cloud.py +152 -0
- camel/toolkits/retrieval_toolkit.py +6 -6
- camel/toolkits/search_toolkit.py +4 -4
- camel/types/enums.py +7 -0
- camel/utils/token_counting.py +7 -3
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/METADATA +9 -5
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/RECORD +29 -23
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/WHEEL +0 -0
camel/loaders/unstructured_io.py
CHANGED
|
@@ -12,12 +12,19 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
14
|
import uuid
|
|
15
|
-
|
|
15
|
+
import warnings
|
|
16
|
+
from typing import (
|
|
17
|
+
Any,
|
|
18
|
+
Dict,
|
|
19
|
+
List,
|
|
20
|
+
Literal,
|
|
21
|
+
Optional,
|
|
22
|
+
Tuple,
|
|
23
|
+
Union,
|
|
24
|
+
)
|
|
16
25
|
|
|
17
26
|
from unstructured.documents.elements import Element
|
|
18
27
|
|
|
19
|
-
from camel.utils import dependencies_required
|
|
20
|
-
|
|
21
28
|
|
|
22
29
|
class UnstructuredIO:
|
|
23
30
|
r"""A class to handle various functionalities provided by the
|
|
@@ -25,56 +32,12 @@ class UnstructuredIO:
|
|
|
25
32
|
extracting, staging, chunking data, and integrating with cloud
|
|
26
33
|
services like S3 and Azure for data connection.
|
|
27
34
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
the Unstructured library.
|
|
35
|
+
References:
|
|
36
|
+
https://docs.unstructured.io/
|
|
31
37
|
"""
|
|
32
38
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def __init__(self):
|
|
36
|
-
r"""Initializes the UnstructuredIO class and ensures the
|
|
37
|
-
installed version of Unstructured library meets the minimum
|
|
38
|
-
requirements.
|
|
39
|
-
"""
|
|
40
|
-
self._ensure_unstructured_version(self.UNSTRUCTURED_MIN_VERSION)
|
|
41
|
-
|
|
42
|
-
@dependencies_required('unstructured')
|
|
43
|
-
def _ensure_unstructured_version(self, min_version: str) -> None:
|
|
44
|
-
r"""Validates that the installed 'Unstructured' library version
|
|
45
|
-
satisfies the specified minimum version requirement. This function is
|
|
46
|
-
essential for ensuring compatibility with features that depend on a
|
|
47
|
-
certain version of the 'Unstructured' package.
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
min_version (str): The minimum version required, specified in
|
|
51
|
-
`'major.minor.patch'` format.
|
|
52
|
-
|
|
53
|
-
Raises:
|
|
54
|
-
ImportError: If the 'Unstructured' package is not available in the
|
|
55
|
-
environment.
|
|
56
|
-
ValueError: If the current `'Unstructured'` version is older than
|
|
57
|
-
the required minimum version.
|
|
58
|
-
|
|
59
|
-
Notes:
|
|
60
|
-
Uses the 'packaging.version' module to parse and compare version
|
|
61
|
-
strings.
|
|
62
|
-
"""
|
|
63
|
-
from packaging import version
|
|
64
|
-
from unstructured.__version__ import __version__
|
|
65
|
-
|
|
66
|
-
# Use packaging.version to compare versions
|
|
67
|
-
min_ver = version.parse(min_version)
|
|
68
|
-
installed_ver = version.parse(__version__)
|
|
69
|
-
|
|
70
|
-
if installed_ver < min_ver:
|
|
71
|
-
raise ValueError(
|
|
72
|
-
f"Require `unstructured>={min_version}`, "
|
|
73
|
-
f"you have {__version__}."
|
|
74
|
-
)
|
|
75
|
-
|
|
39
|
+
@staticmethod
|
|
76
40
|
def create_element_from_text(
|
|
77
|
-
self,
|
|
78
41
|
text: str,
|
|
79
42
|
element_id: Optional[Union[str, uuid.UUID]] = None,
|
|
80
43
|
embeddings: Optional[List[float]] = None,
|
|
@@ -89,8 +52,8 @@ class UnstructuredIO:
|
|
|
89
52
|
|
|
90
53
|
Args:
|
|
91
54
|
text (str): The text content for the element.
|
|
92
|
-
element_id (Union[str, uuid.UUID], optional): Unique
|
|
93
|
-
|
|
55
|
+
element_id (Optional[Union[str, uuid.UUID]], optional): Unique
|
|
56
|
+
identifier for the element. Defaults to `None`.
|
|
94
57
|
embeddings (Optional[List[float]], optional): A list of float
|
|
95
58
|
numbers representing the text embeddings. Defaults to `None`.
|
|
96
59
|
filename (Optional[str], optional): The name of the file the
|
|
@@ -120,16 +83,16 @@ class UnstructuredIO:
|
|
|
120
83
|
|
|
121
84
|
return Text(
|
|
122
85
|
text=text,
|
|
123
|
-
element_id=element_id
|
|
86
|
+
element_id=element_id or uuid.uuid4(),
|
|
124
87
|
metadata=metadata,
|
|
125
88
|
embeddings=embeddings,
|
|
126
89
|
)
|
|
127
90
|
|
|
91
|
+
@staticmethod
|
|
128
92
|
def parse_file_or_url(
|
|
129
|
-
self,
|
|
130
93
|
input_path: str,
|
|
131
94
|
**kwargs: Any,
|
|
132
|
-
) -> List[Element]:
|
|
95
|
+
) -> Union[List[Element], None]:
|
|
133
96
|
r"""Loads a file or a URL and parses its contents into elements.
|
|
134
97
|
|
|
135
98
|
Args:
|
|
@@ -137,12 +100,12 @@ class UnstructuredIO:
|
|
|
137
100
|
**kwargs: Extra kwargs passed to the partition function.
|
|
138
101
|
|
|
139
102
|
Returns:
|
|
140
|
-
List[Element]: List of elements after parsing the file
|
|
103
|
+
Union[List[Element],None]: List of elements after parsing the file
|
|
104
|
+
or URL if success.
|
|
141
105
|
|
|
142
106
|
Raises:
|
|
143
107
|
FileNotFoundError: If the file does not exist at the path
|
|
144
108
|
specified.
|
|
145
|
-
Exception: For any other issues during file or URL parsing.
|
|
146
109
|
|
|
147
110
|
Notes:
|
|
148
111
|
Available document types:
|
|
@@ -166,8 +129,9 @@ class UnstructuredIO:
|
|
|
166
129
|
try:
|
|
167
130
|
elements = partition_html(url=input_path, **kwargs)
|
|
168
131
|
return elements
|
|
169
|
-
except Exception
|
|
170
|
-
|
|
132
|
+
except Exception:
|
|
133
|
+
warnings.warn(f"Failed to parse the URL: {input_path}")
|
|
134
|
+
return None
|
|
171
135
|
|
|
172
136
|
else:
|
|
173
137
|
# Handling file
|
|
@@ -184,13 +148,12 @@ class UnstructuredIO:
|
|
|
184
148
|
with open(input_path, "rb") as f:
|
|
185
149
|
elements = partition(file=f, **kwargs)
|
|
186
150
|
return elements
|
|
187
|
-
except Exception
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
) from e
|
|
151
|
+
except Exception:
|
|
152
|
+
warnings.warn(f"Failed to partition the file: {input_path}")
|
|
153
|
+
return None
|
|
191
154
|
|
|
155
|
+
@staticmethod
|
|
192
156
|
def clean_text_data(
|
|
193
|
-
self,
|
|
194
157
|
text: str,
|
|
195
158
|
clean_options: Optional[List[Tuple[str, Dict[str, Any]]]] = None,
|
|
196
159
|
) -> str:
|
|
@@ -253,7 +216,7 @@ class UnstructuredIO:
|
|
|
253
216
|
)
|
|
254
217
|
from unstructured.cleaners.translate import translate_text
|
|
255
218
|
|
|
256
|
-
cleaning_functions = {
|
|
219
|
+
cleaning_functions: Any = {
|
|
257
220
|
"clean_extra_whitespace": clean_extra_whitespace,
|
|
258
221
|
"clean_bullets": clean_bullets,
|
|
259
222
|
"clean_ordered_bullets": clean_ordered_bullets,
|
|
@@ -291,8 +254,8 @@ class UnstructuredIO:
|
|
|
291
254
|
|
|
292
255
|
return cleaned_text
|
|
293
256
|
|
|
257
|
+
@staticmethod
|
|
294
258
|
def extract_data_from_text(
|
|
295
|
-
self,
|
|
296
259
|
text: str,
|
|
297
260
|
extract_type: Literal[
|
|
298
261
|
'extract_datetimetz',
|
|
@@ -340,7 +303,7 @@ class UnstructuredIO:
|
|
|
340
303
|
extract_us_phone_number,
|
|
341
304
|
)
|
|
342
305
|
|
|
343
|
-
extraction_functions = {
|
|
306
|
+
extraction_functions: Any = {
|
|
344
307
|
"extract_datetimetz": extract_datetimetz,
|
|
345
308
|
"extract_email_address": extract_email_address,
|
|
346
309
|
"extract_ip_address": extract_ip_address,
|
|
@@ -357,8 +320,8 @@ class UnstructuredIO:
|
|
|
357
320
|
|
|
358
321
|
return extraction_functions[extract_type](text, **kwargs)
|
|
359
322
|
|
|
323
|
+
@staticmethod
|
|
360
324
|
def stage_elements(
|
|
361
|
-
self,
|
|
362
325
|
elements: List[Any],
|
|
363
326
|
stage_type: Literal[
|
|
364
327
|
'convert_to_csv',
|
|
@@ -416,7 +379,7 @@ class UnstructuredIO:
|
|
|
416
379
|
weaviate,
|
|
417
380
|
)
|
|
418
381
|
|
|
419
|
-
staging_functions = {
|
|
382
|
+
staging_functions: Any = {
|
|
420
383
|
"convert_to_csv": base.convert_to_csv,
|
|
421
384
|
"convert_to_dataframe": base.convert_to_dataframe,
|
|
422
385
|
"convert_to_dict": base.convert_to_dict,
|
|
@@ -441,8 +404,9 @@ class UnstructuredIO:
|
|
|
441
404
|
|
|
442
405
|
return staging_functions[stage_type](elements, **kwargs)
|
|
443
406
|
|
|
407
|
+
@staticmethod
|
|
444
408
|
def chunk_elements(
|
|
445
|
-
|
|
409
|
+
elements: List[Any], chunk_type: str, **kwargs
|
|
446
410
|
) -> List[Element]:
|
|
447
411
|
r"""Chunks elements by titles.
|
|
448
412
|
|
|
@@ -470,228 +434,3 @@ class UnstructuredIO:
|
|
|
470
434
|
|
|
471
435
|
# Format chunks into a list of dictionaries (or your preferred format)
|
|
472
436
|
return chunking_functions[chunk_type](elements, **kwargs)
|
|
473
|
-
|
|
474
|
-
def run_s3_ingest(
|
|
475
|
-
self,
|
|
476
|
-
s3_url: str,
|
|
477
|
-
output_dir: str,
|
|
478
|
-
num_processes: int = 2,
|
|
479
|
-
anonymous: bool = True,
|
|
480
|
-
) -> None:
|
|
481
|
-
r"""Processes documents from an S3 bucket and stores structured
|
|
482
|
-
outputs locally.
|
|
483
|
-
|
|
484
|
-
Args:
|
|
485
|
-
s3_url (str): The URL of the S3 bucket.
|
|
486
|
-
output_dir (str): Local directory to store the processed outputs.
|
|
487
|
-
num_processes (int, optional): Number of processes to use.
|
|
488
|
-
(default: :obj:`2`)
|
|
489
|
-
anonymous (bool, optional): Flag to run anonymously if
|
|
490
|
-
required. (default: :obj:`True`)
|
|
491
|
-
|
|
492
|
-
Notes:
|
|
493
|
-
You need to install the necessary extras by using:
|
|
494
|
-
`pip install "unstructured[s3]"`.
|
|
495
|
-
|
|
496
|
-
References:
|
|
497
|
-
https://unstructured-io.github.io/unstructured/
|
|
498
|
-
"""
|
|
499
|
-
|
|
500
|
-
from unstructured.ingest.interfaces import (
|
|
501
|
-
FsspecConfig,
|
|
502
|
-
PartitionConfig,
|
|
503
|
-
ProcessorConfig,
|
|
504
|
-
ReadConfig,
|
|
505
|
-
)
|
|
506
|
-
from unstructured.ingest.runner import S3Runner
|
|
507
|
-
|
|
508
|
-
runner = S3Runner(
|
|
509
|
-
processor_config=ProcessorConfig(
|
|
510
|
-
verbose=True,
|
|
511
|
-
output_dir=output_dir,
|
|
512
|
-
num_processes=num_processes,
|
|
513
|
-
),
|
|
514
|
-
read_config=ReadConfig(),
|
|
515
|
-
partition_config=PartitionConfig(),
|
|
516
|
-
fsspec_config=FsspecConfig(remote_url=s3_url),
|
|
517
|
-
)
|
|
518
|
-
runner.run(anonymous=anonymous)
|
|
519
|
-
|
|
520
|
-
def run_azure_ingest(
|
|
521
|
-
self,
|
|
522
|
-
azure_url: str,
|
|
523
|
-
output_dir: str,
|
|
524
|
-
account_name: str,
|
|
525
|
-
num_processes: int = 2,
|
|
526
|
-
) -> None:
|
|
527
|
-
r"""Processes documents from an Azure storage container and stores
|
|
528
|
-
structured outputs locally.
|
|
529
|
-
|
|
530
|
-
Args:
|
|
531
|
-
azure_url (str): The URL of the Azure storage container.
|
|
532
|
-
output_dir (str): Local directory to store the processed outputs.
|
|
533
|
-
account_name (str): Azure account name for accessing the container.
|
|
534
|
-
num_processes (int, optional): Number of processes to use.
|
|
535
|
-
(default: :obj:`2`)
|
|
536
|
-
|
|
537
|
-
Notes:
|
|
538
|
-
You need to install the necessary extras by using:
|
|
539
|
-
`pip install "unstructured[azure]"`.
|
|
540
|
-
|
|
541
|
-
References:
|
|
542
|
-
https://unstructured-io.github.io/unstructured/
|
|
543
|
-
"""
|
|
544
|
-
from unstructured.ingest.interfaces import (
|
|
545
|
-
FsspecConfig,
|
|
546
|
-
PartitionConfig,
|
|
547
|
-
ProcessorConfig,
|
|
548
|
-
ReadConfig,
|
|
549
|
-
)
|
|
550
|
-
from unstructured.ingest.runner import AzureRunner
|
|
551
|
-
|
|
552
|
-
runner = AzureRunner(
|
|
553
|
-
processor_config=ProcessorConfig(
|
|
554
|
-
verbose=True,
|
|
555
|
-
output_dir=output_dir,
|
|
556
|
-
num_processes=num_processes,
|
|
557
|
-
),
|
|
558
|
-
read_config=ReadConfig(),
|
|
559
|
-
partition_config=PartitionConfig(),
|
|
560
|
-
fsspec_config=FsspecConfig(remote_url=azure_url),
|
|
561
|
-
)
|
|
562
|
-
runner.run(account_name=account_name)
|
|
563
|
-
|
|
564
|
-
def run_github_ingest(
|
|
565
|
-
self,
|
|
566
|
-
repo_url: str,
|
|
567
|
-
git_branch: str,
|
|
568
|
-
output_dir: str,
|
|
569
|
-
num_processes: int = 2,
|
|
570
|
-
) -> None:
|
|
571
|
-
r"""Processes documents from a GitHub repository and stores
|
|
572
|
-
structured outputs locally.
|
|
573
|
-
|
|
574
|
-
Args:
|
|
575
|
-
repo_url (str): URL of the GitHub repository.
|
|
576
|
-
git_branch (str): Git branch name to process.
|
|
577
|
-
output_dir (str): Local directory to store the processed outputs.
|
|
578
|
-
num_processes (int, optional): Number of processes to use.
|
|
579
|
-
(default: :obj:`2`)
|
|
580
|
-
|
|
581
|
-
Notes:
|
|
582
|
-
You need to install the necessary extras by using:
|
|
583
|
-
`pip install "unstructured[github]"`.
|
|
584
|
-
|
|
585
|
-
References:
|
|
586
|
-
https://unstructured-io.github.io/unstructured/
|
|
587
|
-
"""
|
|
588
|
-
from unstructured.ingest.interfaces import (
|
|
589
|
-
PartitionConfig,
|
|
590
|
-
ProcessorConfig,
|
|
591
|
-
ReadConfig,
|
|
592
|
-
)
|
|
593
|
-
from unstructured.ingest.runner import GithubRunner
|
|
594
|
-
|
|
595
|
-
runner = GithubRunner(
|
|
596
|
-
processor_config=ProcessorConfig(
|
|
597
|
-
verbose=True,
|
|
598
|
-
output_dir=output_dir,
|
|
599
|
-
num_processes=num_processes,
|
|
600
|
-
),
|
|
601
|
-
read_config=ReadConfig(),
|
|
602
|
-
partition_config=PartitionConfig(),
|
|
603
|
-
)
|
|
604
|
-
runner.run(url=repo_url, git_branch=git_branch)
|
|
605
|
-
|
|
606
|
-
def run_slack_ingest(
|
|
607
|
-
self,
|
|
608
|
-
channels: List[str],
|
|
609
|
-
token: str,
|
|
610
|
-
start_date: str,
|
|
611
|
-
end_date: str,
|
|
612
|
-
output_dir: str,
|
|
613
|
-
num_processes: int = 2,
|
|
614
|
-
) -> None:
|
|
615
|
-
r"""Processes documents from specified Slack channels and stores
|
|
616
|
-
structured outputs locally.
|
|
617
|
-
|
|
618
|
-
Args:
|
|
619
|
-
channels (List[str]): List of Slack channel IDs.
|
|
620
|
-
token (str): Slack API token.
|
|
621
|
-
start_date (str): Start date for fetching data.
|
|
622
|
-
end_date (str): End date for fetching data.
|
|
623
|
-
output_dir (str): Local directory to store the processed outputs.
|
|
624
|
-
num_processes (int, optional): Number of processes to use.
|
|
625
|
-
(default: :obj:`2`)
|
|
626
|
-
|
|
627
|
-
Notes:
|
|
628
|
-
You need to install the necessary extras by using:
|
|
629
|
-
`pip install "unstructured[slack]"`.
|
|
630
|
-
|
|
631
|
-
References:
|
|
632
|
-
https://unstructured-io.github.io/unstructured/
|
|
633
|
-
"""
|
|
634
|
-
from unstructured.ingest.interfaces import (
|
|
635
|
-
PartitionConfig,
|
|
636
|
-
ProcessorConfig,
|
|
637
|
-
ReadConfig,
|
|
638
|
-
)
|
|
639
|
-
from unstructured.ingest.runner import SlackRunner
|
|
640
|
-
|
|
641
|
-
runner = SlackRunner(
|
|
642
|
-
processor_config=ProcessorConfig(
|
|
643
|
-
verbose=True,
|
|
644
|
-
output_dir=output_dir,
|
|
645
|
-
num_processes=num_processes,
|
|
646
|
-
),
|
|
647
|
-
read_config=ReadConfig(),
|
|
648
|
-
partition_config=PartitionConfig(),
|
|
649
|
-
)
|
|
650
|
-
runner.run(
|
|
651
|
-
channels=channels,
|
|
652
|
-
token=token,
|
|
653
|
-
start_date=start_date,
|
|
654
|
-
end_date=end_date,
|
|
655
|
-
)
|
|
656
|
-
|
|
657
|
-
def run_discord_ingest(
|
|
658
|
-
self,
|
|
659
|
-
channels: List[str],
|
|
660
|
-
token: str,
|
|
661
|
-
output_dir: str,
|
|
662
|
-
num_processes: int = 2,
|
|
663
|
-
) -> None:
|
|
664
|
-
r"""Processes messages from specified Discord channels and stores
|
|
665
|
-
structured outputs locally.
|
|
666
|
-
|
|
667
|
-
Args:
|
|
668
|
-
channels (List[str]): List of Discord channel IDs.
|
|
669
|
-
token (str): Discord bot token.
|
|
670
|
-
output_dir (str): Local directory to store the processed outputs.
|
|
671
|
-
num_processes (int, optional): Number of processes to use.
|
|
672
|
-
(default: :obj:`2`)
|
|
673
|
-
|
|
674
|
-
Notes:
|
|
675
|
-
You need to install the necessary extras by using:
|
|
676
|
-
`pip install "unstructured[discord]"`.
|
|
677
|
-
|
|
678
|
-
References:
|
|
679
|
-
https://unstructured-io.github.io/unstructured/
|
|
680
|
-
"""
|
|
681
|
-
from unstructured.ingest.interfaces import (
|
|
682
|
-
PartitionConfig,
|
|
683
|
-
ProcessorConfig,
|
|
684
|
-
ReadConfig,
|
|
685
|
-
)
|
|
686
|
-
from unstructured.ingest.runner import DiscordRunner
|
|
687
|
-
|
|
688
|
-
runner = DiscordRunner(
|
|
689
|
-
processor_config=ProcessorConfig(
|
|
690
|
-
verbose=True,
|
|
691
|
-
output_dir=output_dir,
|
|
692
|
-
num_processes=num_processes,
|
|
693
|
-
),
|
|
694
|
-
read_config=ReadConfig(),
|
|
695
|
-
partition_config=PartitionConfig(),
|
|
696
|
-
)
|
|
697
|
-
runner.run(channels=channels, token=token)
|
camel/models/__init__.py
CHANGED
|
@@ -23,6 +23,7 @@ from .nemotron_model import NemotronModel
|
|
|
23
23
|
from .ollama_model import OllamaModel
|
|
24
24
|
from .open_source_model import OpenSourceModel
|
|
25
25
|
from .openai_audio_models import OpenAIAudioModels
|
|
26
|
+
from .openai_compatibility_model import OpenAICompatibilityModel
|
|
26
27
|
from .openai_model import OpenAIModel
|
|
27
28
|
from .stub_model import StubModel
|
|
28
29
|
from .vllm_model import VLLMModel
|
|
@@ -45,4 +46,5 @@ __all__ = [
|
|
|
45
46
|
'OllamaModel',
|
|
46
47
|
'VLLMModel',
|
|
47
48
|
'GeminiModel',
|
|
49
|
+
'OpenAICompatibilityModel',
|
|
48
50
|
]
|
camel/models/mistral_model.py
CHANGED
|
@@ -15,7 +15,10 @@ import os
|
|
|
15
15
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
|
-
from mistralai.models
|
|
18
|
+
from mistralai.models import (
|
|
19
|
+
ChatCompletionResponse,
|
|
20
|
+
Messages,
|
|
21
|
+
)
|
|
19
22
|
|
|
20
23
|
from camel.configs import MISTRAL_API_PARAMS
|
|
21
24
|
from camel.messages import OpenAIMessage
|
|
@@ -23,16 +26,24 @@ from camel.models import BaseModelBackend
|
|
|
23
26
|
from camel.types import ChatCompletion, ModelType
|
|
24
27
|
from camel.utils import (
|
|
25
28
|
BaseTokenCounter,
|
|
26
|
-
|
|
29
|
+
OpenAITokenCounter,
|
|
27
30
|
api_keys_required,
|
|
28
31
|
)
|
|
29
32
|
|
|
33
|
+
try:
|
|
34
|
+
import os
|
|
35
|
+
|
|
36
|
+
if os.getenv("AGENTOPS_API_KEY") is not None:
|
|
37
|
+
from agentops import LLMEvent, record
|
|
38
|
+
else:
|
|
39
|
+
raise ImportError
|
|
40
|
+
except (ImportError, AttributeError):
|
|
41
|
+
LLMEvent = None
|
|
42
|
+
|
|
30
43
|
|
|
31
44
|
class MistralModel(BaseModelBackend):
|
|
32
45
|
r"""Mistral API in a unified BaseModelBackend interface."""
|
|
33
46
|
|
|
34
|
-
# TODO: Support tool calling.
|
|
35
|
-
|
|
36
47
|
def __init__(
|
|
37
48
|
self,
|
|
38
49
|
model_type: ModelType,
|
|
@@ -52,32 +63,37 @@ class MistralModel(BaseModelBackend):
|
|
|
52
63
|
mistral service. (default: :obj:`None`)
|
|
53
64
|
url (Optional[str]): The url to the mistral service.
|
|
54
65
|
token_counter (Optional[BaseTokenCounter]): Token counter to use
|
|
55
|
-
for the model. If not provided, `
|
|
66
|
+
for the model. If not provided, `OpenAITokenCounter` will be
|
|
56
67
|
used.
|
|
57
68
|
"""
|
|
58
69
|
super().__init__(
|
|
59
70
|
model_type, model_config_dict, api_key, url, token_counter
|
|
60
71
|
)
|
|
61
72
|
self._api_key = api_key or os.environ.get("MISTRAL_API_KEY")
|
|
73
|
+
self._url = url or os.environ.get("MISTRAL_SERVER_URL")
|
|
62
74
|
|
|
63
|
-
from mistralai
|
|
75
|
+
from mistralai import Mistral
|
|
64
76
|
|
|
65
|
-
self._client =
|
|
77
|
+
self._client = Mistral(api_key=self._api_key, server_url=self._url)
|
|
66
78
|
self._token_counter: Optional[BaseTokenCounter] = None
|
|
67
79
|
|
|
68
|
-
def
|
|
80
|
+
def _to_openai_response(
|
|
69
81
|
self, response: 'ChatCompletionResponse'
|
|
70
82
|
) -> ChatCompletion:
|
|
71
83
|
tool_calls = None
|
|
72
|
-
if
|
|
84
|
+
if (
|
|
85
|
+
response.choices
|
|
86
|
+
and response.choices[0].message
|
|
87
|
+
and response.choices[0].message.tool_calls is not None
|
|
88
|
+
):
|
|
73
89
|
tool_calls = [
|
|
74
90
|
dict(
|
|
75
|
-
id=tool_call.id,
|
|
91
|
+
id=tool_call.id, # type: ignore[union-attr]
|
|
76
92
|
function={
|
|
77
|
-
"name": tool_call.function.name,
|
|
78
|
-
"arguments": tool_call.function.arguments,
|
|
93
|
+
"name": tool_call.function.name, # type: ignore[union-attr]
|
|
94
|
+
"arguments": tool_call.function.arguments, # type: ignore[union-attr]
|
|
79
95
|
},
|
|
80
|
-
type=tool_call.type
|
|
96
|
+
type=tool_call.TYPE, # type: ignore[union-attr]
|
|
81
97
|
)
|
|
82
98
|
for tool_call in response.choices[0].message.tool_calls
|
|
83
99
|
]
|
|
@@ -86,14 +102,14 @@ class MistralModel(BaseModelBackend):
|
|
|
86
102
|
id=response.id,
|
|
87
103
|
choices=[
|
|
88
104
|
dict(
|
|
89
|
-
index=response.choices[0].index,
|
|
105
|
+
index=response.choices[0].index, # type: ignore[index]
|
|
90
106
|
message={
|
|
91
|
-
"role": response.choices[0].message.role,
|
|
92
|
-
"content": response.choices[0].message.content,
|
|
107
|
+
"role": response.choices[0].message.role, # type: ignore[index,union-attr]
|
|
108
|
+
"content": response.choices[0].message.content, # type: ignore[index,union-attr]
|
|
93
109
|
"tool_calls": tool_calls,
|
|
94
110
|
},
|
|
95
|
-
finish_reason=response.choices[0].finish_reason
|
|
96
|
-
if response.choices[0].finish_reason
|
|
111
|
+
finish_reason=response.choices[0].finish_reason # type: ignore[index]
|
|
112
|
+
if response.choices[0].finish_reason # type: ignore[index]
|
|
97
113
|
else None,
|
|
98
114
|
)
|
|
99
115
|
],
|
|
@@ -105,17 +121,79 @@ class MistralModel(BaseModelBackend):
|
|
|
105
121
|
|
|
106
122
|
return obj
|
|
107
123
|
|
|
124
|
+
def _to_mistral_chatmessage(
|
|
125
|
+
self,
|
|
126
|
+
messages: List[OpenAIMessage],
|
|
127
|
+
) -> List["Messages"]:
|
|
128
|
+
import uuid
|
|
129
|
+
|
|
130
|
+
from mistralai.models import (
|
|
131
|
+
AssistantMessage,
|
|
132
|
+
FunctionCall,
|
|
133
|
+
SystemMessage,
|
|
134
|
+
ToolCall,
|
|
135
|
+
ToolMessage,
|
|
136
|
+
UserMessage,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
new_messages = []
|
|
140
|
+
for msg in messages:
|
|
141
|
+
tool_id = uuid.uuid4().hex[:9]
|
|
142
|
+
tool_call_id = uuid.uuid4().hex[:9]
|
|
143
|
+
|
|
144
|
+
role = msg.get("role")
|
|
145
|
+
function_call = msg.get("function_call")
|
|
146
|
+
content = msg.get("content")
|
|
147
|
+
|
|
148
|
+
mistral_function_call = None
|
|
149
|
+
if function_call:
|
|
150
|
+
mistral_function_call = FunctionCall(
|
|
151
|
+
name=function_call.get("name"), # type: ignore[attr-defined]
|
|
152
|
+
arguments=function_call.get("arguments"), # type: ignore[attr-defined]
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
tool_calls = None
|
|
156
|
+
if mistral_function_call:
|
|
157
|
+
tool_calls = [
|
|
158
|
+
ToolCall(function=mistral_function_call, id=tool_id)
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
if role == "user":
|
|
162
|
+
new_messages.append(UserMessage(content=content)) # type: ignore[arg-type]
|
|
163
|
+
elif role == "assistant":
|
|
164
|
+
new_messages.append(
|
|
165
|
+
AssistantMessage(content=content, tool_calls=tool_calls) # type: ignore[arg-type]
|
|
166
|
+
)
|
|
167
|
+
elif role == "system":
|
|
168
|
+
new_messages.append(SystemMessage(content=content)) # type: ignore[arg-type]
|
|
169
|
+
elif role in {"tool", "function"}:
|
|
170
|
+
new_messages.append(
|
|
171
|
+
ToolMessage(
|
|
172
|
+
content=content, # type: ignore[arg-type]
|
|
173
|
+
tool_call_id=tool_call_id,
|
|
174
|
+
name=msg.get("name"), # type: ignore[arg-type]
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
raise ValueError(f"Unsupported message role: {role}")
|
|
179
|
+
|
|
180
|
+
return new_messages # type: ignore[return-value]
|
|
181
|
+
|
|
108
182
|
@property
|
|
109
183
|
def token_counter(self) -> BaseTokenCounter:
|
|
110
184
|
r"""Initialize the token counter for the model backend.
|
|
111
185
|
|
|
186
|
+
# NOTE: Temporarily using `OpenAITokenCounter` due to a current issue
|
|
187
|
+
# with installing `mistral-common` alongside `mistralai`.
|
|
188
|
+
# Refer to: https://github.com/mistralai/mistral-common/issues/37
|
|
189
|
+
|
|
112
190
|
Returns:
|
|
113
191
|
BaseTokenCounter: The token counter following the model's
|
|
114
192
|
tokenization style.
|
|
115
193
|
"""
|
|
116
194
|
if not self._token_counter:
|
|
117
|
-
self._token_counter =
|
|
118
|
-
|
|
195
|
+
self._token_counter = OpenAITokenCounter(
|
|
196
|
+
model=ModelType.GPT_4O_MINI
|
|
119
197
|
)
|
|
120
198
|
return self._token_counter
|
|
121
199
|
|
|
@@ -131,17 +209,33 @@ class MistralModel(BaseModelBackend):
|
|
|
131
209
|
in OpenAI API format.
|
|
132
210
|
|
|
133
211
|
Returns:
|
|
134
|
-
ChatCompletion
|
|
212
|
+
ChatCompletion.
|
|
135
213
|
"""
|
|
136
|
-
|
|
137
|
-
|
|
214
|
+
mistral_messages = self._to_mistral_chatmessage(messages)
|
|
215
|
+
|
|
216
|
+
response = self._client.chat.complete(
|
|
217
|
+
messages=mistral_messages,
|
|
138
218
|
model=self.model_type.value,
|
|
139
219
|
**self.model_config_dict,
|
|
140
220
|
)
|
|
141
221
|
|
|
142
|
-
|
|
222
|
+
openai_response = self._to_openai_response(response) # type: ignore[arg-type]
|
|
223
|
+
|
|
224
|
+
# Add AgentOps LLM Event tracking
|
|
225
|
+
if LLMEvent:
|
|
226
|
+
llm_event = LLMEvent(
|
|
227
|
+
thread_id=openai_response.id,
|
|
228
|
+
prompt=" ".join(
|
|
229
|
+
[message.get("content") for message in messages] # type: ignore[misc]
|
|
230
|
+
),
|
|
231
|
+
prompt_tokens=openai_response.usage.prompt_tokens, # type: ignore[union-attr]
|
|
232
|
+
completion=openai_response.choices[0].message.content,
|
|
233
|
+
completion_tokens=openai_response.usage.completion_tokens, # type: ignore[union-attr]
|
|
234
|
+
model=self.model_type.value,
|
|
235
|
+
)
|
|
236
|
+
record(llm_event)
|
|
143
237
|
|
|
144
|
-
return
|
|
238
|
+
return openai_response
|
|
145
239
|
|
|
146
240
|
def check_model_config(self):
|
|
147
241
|
r"""Check whether the model configuration contains any
|
|
@@ -161,7 +255,7 @@ class MistralModel(BaseModelBackend):
|
|
|
161
255
|
@property
|
|
162
256
|
def stream(self) -> bool:
|
|
163
257
|
r"""Returns whether the model is in stream mode, which sends partial
|
|
164
|
-
results each time.
|
|
258
|
+
results each time. Current it's not supported.
|
|
165
259
|
|
|
166
260
|
Returns:
|
|
167
261
|
bool: Whether the model is in stream mode.
|