camel-ai 0.1.6.2__py3-none-any.whl → 0.1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/interpreters/docker_interpreter.py +1 -1
- camel/loaders/__init__.py +1 -2
- camel/loaders/base_io.py +118 -52
- camel/loaders/jina_url_reader.py +6 -6
- camel/loaders/unstructured_io.py +24 -286
- camel/retrievers/auto_retriever.py +25 -35
- camel/retrievers/vector_retriever.py +20 -18
- camel/storages/object_storages/__init__.py +22 -0
- camel/storages/object_storages/amazon_s3.py +205 -0
- camel/storages/object_storages/azure_blob.py +166 -0
- camel/storages/object_storages/base.py +115 -0
- camel/storages/object_storages/google_cloud.py +152 -0
- camel/toolkits/retrieval_toolkit.py +5 -5
- camel/toolkits/search_toolkit.py +4 -4
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.3.dist-info}/METADATA +7 -3
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.3.dist-info}/RECORD +18 -13
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path, PurePath
|
|
17
|
+
from typing import Optional, Tuple
|
|
18
|
+
from warnings import warn
|
|
19
|
+
|
|
20
|
+
from camel.loaders import File
|
|
21
|
+
from camel.storages.object_storages.base import BaseObjectStorage
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AmazonS3Storage(BaseObjectStorage):
|
|
25
|
+
r"""A class to connect with AWS S3 object storage to put and get objects
|
|
26
|
+
from one S3 bucket. The class will first try to use the credentials passed
|
|
27
|
+
as arguments, if not provided, it will look for the environment variables
|
|
28
|
+
`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. If none of these are
|
|
29
|
+
provided, it will try to use the local credentials (will be created if
|
|
30
|
+
logged in with AWS CLI).
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
bucket_name (str): The name of the S3 bucket.
|
|
34
|
+
create_if_not_exists (bool, optional): Whether to create the bucket if
|
|
35
|
+
it does not exist. Defaults to True.
|
|
36
|
+
access_key_id (Optional[str], optional): The AWS access key ID.
|
|
37
|
+
Defaults to None.
|
|
38
|
+
secret_access_key (Optional[str], optional): The AWS secret access key.
|
|
39
|
+
Defaults to None.
|
|
40
|
+
anonymous (bool, optional): Whether to use anonymous access. Defaults
|
|
41
|
+
to False.
|
|
42
|
+
|
|
43
|
+
References:
|
|
44
|
+
https://aws.amazon.com/pm/serv-s3/
|
|
45
|
+
|
|
46
|
+
https://aws.amazon.com/cli/
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
bucket_name: str,
|
|
52
|
+
create_if_not_exists: bool = True,
|
|
53
|
+
access_key_id: Optional[str] = None,
|
|
54
|
+
secret_access_key: Optional[str] = None,
|
|
55
|
+
anonymous: bool = False,
|
|
56
|
+
) -> None:
|
|
57
|
+
self._bucket_name = bucket_name
|
|
58
|
+
self._create_if_not_exists = create_if_not_exists
|
|
59
|
+
|
|
60
|
+
aws_key_id = access_key_id or os.getenv("AWS_ACCESS_KEY_ID")
|
|
61
|
+
aws_secret_key = secret_access_key or os.getenv(
|
|
62
|
+
"AWS_SECRET_ACCESS_KEY"
|
|
63
|
+
)
|
|
64
|
+
if not all([aws_key_id, aws_secret_key]) and not anonymous:
|
|
65
|
+
warn(
|
|
66
|
+
"AWS access key not configured. Local credentials will be "
|
|
67
|
+
"used."
|
|
68
|
+
)
|
|
69
|
+
# Make all the empty values None
|
|
70
|
+
aws_key_id = None
|
|
71
|
+
aws_secret_key = None
|
|
72
|
+
|
|
73
|
+
import boto3
|
|
74
|
+
from botocore import UNSIGNED
|
|
75
|
+
from botocore.config import Config
|
|
76
|
+
|
|
77
|
+
if not anonymous:
|
|
78
|
+
self._client = boto3.client(
|
|
79
|
+
"s3",
|
|
80
|
+
aws_access_key_id=aws_key_id,
|
|
81
|
+
aws_secret_access_key=aws_secret_key,
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
self._client = boto3.client(
|
|
85
|
+
"s3", config=Config(signature_version=UNSIGNED)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
self._prepare_and_check()
|
|
89
|
+
|
|
90
|
+
def _prepare_and_check(self) -> None:
|
|
91
|
+
r"""Check privileges and existence of the bucket."""
|
|
92
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
self._client.head_bucket(Bucket=self._bucket_name)
|
|
96
|
+
except ClientError as e:
|
|
97
|
+
error_code = e.response['Error']['Code']
|
|
98
|
+
if error_code == '403':
|
|
99
|
+
raise PermissionError(
|
|
100
|
+
f"Failed to access bucket {self._bucket_name}: "
|
|
101
|
+
f"No permission."
|
|
102
|
+
)
|
|
103
|
+
elif error_code == '404':
|
|
104
|
+
if self._create_if_not_exists:
|
|
105
|
+
self._client.create_bucket(Bucket=self._bucket_name)
|
|
106
|
+
warn(
|
|
107
|
+
f"Bucket {self._bucket_name} not found. Automatically "
|
|
108
|
+
f"created."
|
|
109
|
+
)
|
|
110
|
+
else:
|
|
111
|
+
raise FileNotFoundError(
|
|
112
|
+
f"Failed to access bucket {self._bucket_name}: Not "
|
|
113
|
+
f"found."
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
raise e
|
|
117
|
+
except NoCredentialsError as e:
|
|
118
|
+
raise PermissionError("No AWS credentials found.") from e
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def canonicalize_path(file_path: PurePath) -> Tuple[str, str]:
|
|
122
|
+
r"""Canonicalize file path for Amazon S3.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
file_path (PurePath): The path to be canonicalized.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Tuple[str, str]: The canonicalized file key and file name.
|
|
129
|
+
"""
|
|
130
|
+
return file_path.as_posix(), file_path.name
|
|
131
|
+
|
|
132
|
+
def _put_file(self, file_key: str, file: File) -> None:
|
|
133
|
+
r"""Put a file to the Amazon S3 bucket.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
file_key (str): The path to the object in the bucket.
|
|
137
|
+
file (File): The file to be uploaded.
|
|
138
|
+
"""
|
|
139
|
+
self._client.put_object(
|
|
140
|
+
Bucket=self._bucket_name, Key=file_key, Body=file.raw_bytes
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def _get_file(self, file_key: str, filename: str) -> File:
|
|
144
|
+
r"""Get a file from the Amazon S3 bucket.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
file_key (str): The path to the object in the bucket.
|
|
148
|
+
filename (str): The name of the file.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
File: The object from the S3 bucket.
|
|
152
|
+
"""
|
|
153
|
+
response = self._client.get_object(
|
|
154
|
+
Bucket=self._bucket_name, Key=file_key
|
|
155
|
+
)
|
|
156
|
+
raw_bytes = response["Body"].read()
|
|
157
|
+
return File.create_file_from_raw_bytes(raw_bytes, filename)
|
|
158
|
+
|
|
159
|
+
def _upload_file(
|
|
160
|
+
self, local_file_path: Path, remote_file_key: str
|
|
161
|
+
) -> None:
|
|
162
|
+
r"""Upload a local file to the Amazon S3 bucket.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
local_file_path (Path): The path to the local file to be uploaded.
|
|
166
|
+
remote_file_key (str): The path to the object in the bucket.
|
|
167
|
+
"""
|
|
168
|
+
self._client.upload_file(
|
|
169
|
+
Bucket=self._bucket_name,
|
|
170
|
+
Key=remote_file_key,
|
|
171
|
+
Filename=local_file_path,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def _download_file(
|
|
175
|
+
self,
|
|
176
|
+
local_file_path: Path,
|
|
177
|
+
remote_file_key: str,
|
|
178
|
+
) -> None:
|
|
179
|
+
r"""Download a file from the Amazon S3 bucket to the local system.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
local_file_path (Path): The path to the local file to be saved.
|
|
183
|
+
remote_file_key (str): The key of the object in the bucket.
|
|
184
|
+
"""
|
|
185
|
+
self._client.download_file(
|
|
186
|
+
Bucket=self._bucket_name,
|
|
187
|
+
Key=remote_file_key,
|
|
188
|
+
Filename=local_file_path,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def _object_exists(self, file_key: str) -> bool:
|
|
192
|
+
r"""
|
|
193
|
+
Check if the object exists in the Amazon S3 bucket.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
file_key: The key of the object in the bucket.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
bool: Whether the object exists in the bucket.
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
self._client.head_object(Bucket=self._bucket_name, Key=file_key)
|
|
203
|
+
return True
|
|
204
|
+
except self._client.exceptions.ClientError:
|
|
205
|
+
return False
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
import os
|
|
15
|
+
from pathlib import Path, PurePath
|
|
16
|
+
from typing import Optional, Tuple
|
|
17
|
+
from warnings import warn
|
|
18
|
+
|
|
19
|
+
from camel.loaders import File
|
|
20
|
+
from camel.storages.object_storages.base import BaseObjectStorage
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AzureBlobStorage(BaseObjectStorage):
|
|
24
|
+
r"""A class to connect to Azure Blob Storage. It will connect to one
|
|
25
|
+
container in the storage account.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
storage_account_name (str): The name of the storage account.
|
|
29
|
+
container_name (str): The name of the container.
|
|
30
|
+
access_key (Optional[str], optional): The access key of the storage
|
|
31
|
+
account. Defaults to None.
|
|
32
|
+
|
|
33
|
+
References:
|
|
34
|
+
https://azure.microsoft.com/en-us/products/storage/blobs
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
storage_account_name: str,
|
|
40
|
+
container_name: str,
|
|
41
|
+
create_if_not_exists: bool = True,
|
|
42
|
+
access_key: Optional[str] = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
access_key = access_key or os.getenv("AZURE_ACCESS_KEY")
|
|
45
|
+
self._create_if_not_exists = create_if_not_exists
|
|
46
|
+
|
|
47
|
+
if not access_key:
|
|
48
|
+
warn("AZURE_ACCESS_KEY not provided.")
|
|
49
|
+
# Make all the empty values None
|
|
50
|
+
access_key = None
|
|
51
|
+
|
|
52
|
+
from azure.storage.blob import ContainerClient
|
|
53
|
+
|
|
54
|
+
self._client = ContainerClient(
|
|
55
|
+
account_url="https://"
|
|
56
|
+
f"{storage_account_name}.blob.core.windows.net",
|
|
57
|
+
credential=access_key,
|
|
58
|
+
container_name=container_name,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
self._prepare_and_check()
|
|
62
|
+
|
|
63
|
+
def _prepare_and_check(self) -> None:
|
|
64
|
+
r"""Check privileges and existence of the container."""
|
|
65
|
+
from azure.core.exceptions import ClientAuthenticationError
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
exists = self._client.exists()
|
|
69
|
+
if not exists and self._create_if_not_exists:
|
|
70
|
+
self._client.create_container()
|
|
71
|
+
warn(
|
|
72
|
+
f"Container {self._client.container_name} not found. "
|
|
73
|
+
f"Automatically created."
|
|
74
|
+
)
|
|
75
|
+
elif not exists:
|
|
76
|
+
raise FileNotFoundError(
|
|
77
|
+
f"Failed to access container {self._client.container_name}"
|
|
78
|
+
f": Not found."
|
|
79
|
+
)
|
|
80
|
+
except ClientAuthenticationError:
|
|
81
|
+
raise PermissionError(
|
|
82
|
+
f"Failed to access container {self._client.container_name}: "
|
|
83
|
+
f"No permission."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def canonicalize_path(file_path: PurePath) -> Tuple[str, str]:
|
|
88
|
+
r"""Canonicalize file path for Azure Blob Storage.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
file_path (PurePath): The path to be canonicalized.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Tuple[str, str]: The canonicalized file key and file name.
|
|
95
|
+
"""
|
|
96
|
+
# for Azure, both slash and backslash will be treated as separator
|
|
97
|
+
filename = file_path.name
|
|
98
|
+
if "\\" in filename:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
"Azure Blob Storage does not support backslash in filename."
|
|
101
|
+
)
|
|
102
|
+
return file_path.as_posix(), filename
|
|
103
|
+
|
|
104
|
+
def _put_file(self, file_key: str, file: File) -> None:
|
|
105
|
+
r"""Put a file to the Azure Blob Storage container.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
file_key (str): The path to the object in the container.
|
|
109
|
+
file (File): The file to be uploaded.
|
|
110
|
+
"""
|
|
111
|
+
self._client.upload_blob(
|
|
112
|
+
name=file_key, data=file.raw_bytes, overwrite=True
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _get_file(self, file_key: str, filename: str) -> File:
|
|
116
|
+
r"""Get a file from the Azure Blob Storage container.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
file_key (str): The path to the object in the container.
|
|
120
|
+
filename (str): The name of the file.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
File: The object from the container.
|
|
124
|
+
"""
|
|
125
|
+
raw_bytes = self._client.download_blob(file_key).readall()
|
|
126
|
+
file = File.create_file_from_raw_bytes(raw_bytes, filename)
|
|
127
|
+
return file
|
|
128
|
+
|
|
129
|
+
def _upload_file(
|
|
130
|
+
self, local_file_path: Path, remote_file_key: str
|
|
131
|
+
) -> None:
|
|
132
|
+
r"""Upload a local file to the Azure Blob Storage container.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
local_file_path (Path): The path to the local file to be uploaded.
|
|
136
|
+
remote_file_key (str): The path to the object in the container.
|
|
137
|
+
"""
|
|
138
|
+
with open(local_file_path, "rb") as f:
|
|
139
|
+
self._client.upload_blob(
|
|
140
|
+
name=remote_file_key, data=f, overwrite=True
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def _download_file(
|
|
144
|
+
self, local_file_path: Path, remote_file_key: str
|
|
145
|
+
) -> None:
|
|
146
|
+
r"""Download a file from the Azure Blob Storage container to the local
|
|
147
|
+
system.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
local_file_path (Path): The path to the local file to be saved.
|
|
151
|
+
remote_file_key (str): The key of the object in the container.
|
|
152
|
+
"""
|
|
153
|
+
with open(local_file_path, "wb") as f:
|
|
154
|
+
f.write(self._client.download_blob(remote_file_key).readall())
|
|
155
|
+
|
|
156
|
+
def _object_exists(self, file_key: str) -> bool:
|
|
157
|
+
r"""
|
|
158
|
+
Check if the object exists in the Azure Blob Storage container.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
file_key: The key of the object in the container.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
bool: Whether the object exists in the container.
|
|
165
|
+
"""
|
|
166
|
+
return self._client.get_blob_client(file_key).exists()
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from pathlib import Path, PurePath
|
|
17
|
+
from typing import Tuple
|
|
18
|
+
|
|
19
|
+
from camel.loaders import File
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BaseObjectStorage(ABC):
|
|
23
|
+
def object_exists(self, file_path: PurePath) -> bool:
|
|
24
|
+
r"""Check if the object exists in the storage.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
file_path (PurePath): The path to the object in the storage.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
bool: True if the object exists, False otherwise.
|
|
31
|
+
"""
|
|
32
|
+
file_key, _ = self.canonicalize_path(file_path)
|
|
33
|
+
return self._object_exists(file_key)
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def canonicalize_path(file_path: PurePath) -> Tuple[str, str]:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
def put_file(self, file_path: PurePath, file: File) -> None:
|
|
41
|
+
r"""Put a file to the object storage.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
file_path (PurePath): The path to the object in the storage.
|
|
45
|
+
file (File): The file to be put.
|
|
46
|
+
"""
|
|
47
|
+
file_key, _ = self.canonicalize_path(file_path)
|
|
48
|
+
self._put_file(file_key, file)
|
|
49
|
+
|
|
50
|
+
def get_file(self, file_path: PurePath) -> File:
|
|
51
|
+
r"""Get a file from the object storage.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
file_path (PurePath): The path to the object in the storage.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
File: The file object get from the storage.
|
|
58
|
+
"""
|
|
59
|
+
file_key, filename = self.canonicalize_path(file_path)
|
|
60
|
+
return self._get_file(file_key, filename)
|
|
61
|
+
|
|
62
|
+
def upload_file(
|
|
63
|
+
self, local_file_path: Path, remote_file_path: PurePath
|
|
64
|
+
) -> None:
|
|
65
|
+
r"""Upload a local file to the object storage.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
local_file_path (Path): The path to the local file to be uploaded.
|
|
69
|
+
remote_file_path (PurePath): The path to the object in storage.
|
|
70
|
+
"""
|
|
71
|
+
file_key, _ = self.canonicalize_path(remote_file_path)
|
|
72
|
+
# check if the local file exists
|
|
73
|
+
if not local_file_path.exists():
|
|
74
|
+
raise FileNotFoundError(
|
|
75
|
+
f"Local file {local_file_path} does not exist."
|
|
76
|
+
)
|
|
77
|
+
self._upload_file(local_file_path, file_key)
|
|
78
|
+
|
|
79
|
+
def download_file(
|
|
80
|
+
self, local_file_path: Path, remote_file_path: PurePath
|
|
81
|
+
) -> None:
|
|
82
|
+
r"""Download a file from the object storage to the local system.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
local_file_path (Path): The path to the local file to be saved.
|
|
86
|
+
remote_file_path (PurePath): The path to the object in storage.
|
|
87
|
+
"""
|
|
88
|
+
file_key, _ = self.canonicalize_path(remote_file_path)
|
|
89
|
+
self._download_file(local_file_path, file_key)
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def _put_file(self, file_key: str, file: File) -> None:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def _get_file(self, file_key: str, filename: str) -> File:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def _object_exists(self, file_key: str) -> bool:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
@abstractmethod
|
|
104
|
+
def _upload_file(
|
|
105
|
+
self, local_file_path: Path, remote_file_key: str
|
|
106
|
+
) -> None:
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def _download_file(
|
|
111
|
+
self,
|
|
112
|
+
local_file_path: Path,
|
|
113
|
+
remote_file_key: str,
|
|
114
|
+
) -> None:
|
|
115
|
+
pass
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
from pathlib import Path, PurePath
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from warnings import warn
|
|
17
|
+
|
|
18
|
+
from camel.loaders import File
|
|
19
|
+
from camel.storages.object_storages.base import BaseObjectStorage
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GoogleCloudStorage(BaseObjectStorage):
|
|
23
|
+
r"""A class to connect to Google Cloud Storage. It will connect to one
|
|
24
|
+
bucket in the storage account.
|
|
25
|
+
|
|
26
|
+
Note that Google Cloud Storage does not support api key authentication.
|
|
27
|
+
Therefore, before using this class, you need to log in with gcloud command
|
|
28
|
+
line tool and save the credentials first.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
bucket_name (str): The name of the bucket.
|
|
32
|
+
create_if_not_exists (bool, optional): Whether to create the bucket if
|
|
33
|
+
it does not exist. Defaults to True.
|
|
34
|
+
anonymous (bool, optional): Whether to use anonymous access. Defaults
|
|
35
|
+
to False.
|
|
36
|
+
|
|
37
|
+
References:
|
|
38
|
+
https://cloud.google.com/storage
|
|
39
|
+
|
|
40
|
+
https://cloud.google.com/docs/authentication/api-keys
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
bucket_name: str,
|
|
46
|
+
create_if_not_exists: bool = True,
|
|
47
|
+
anonymous: bool = False,
|
|
48
|
+
) -> None:
|
|
49
|
+
from google.cloud import storage
|
|
50
|
+
|
|
51
|
+
self.create_if_not_exists = create_if_not_exists
|
|
52
|
+
|
|
53
|
+
if anonymous:
|
|
54
|
+
client = storage.Client.create_anonymous_client()
|
|
55
|
+
else:
|
|
56
|
+
client = storage.Client()
|
|
57
|
+
self._client = client.bucket(bucket_name)
|
|
58
|
+
|
|
59
|
+
self._prepare_and_check()
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def canonicalize_path(file_path: PurePath) -> Tuple[str, str]:
|
|
63
|
+
r"""Canonicalize the path for Google Cloud Storage.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
file_path (PurePath): The path to be canonicalized.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Tuple[str, str]: The canonicalized file key and file name.
|
|
70
|
+
"""
|
|
71
|
+
return file_path.as_posix(), file_path.name
|
|
72
|
+
|
|
73
|
+
def _prepare_and_check(self) -> None:
|
|
74
|
+
r"""Check privileges and existence of the bucket."""
|
|
75
|
+
from google.auth.exceptions import InvalidOperation
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
exists = self._client.exists()
|
|
79
|
+
if not exists and self.create_if_not_exists:
|
|
80
|
+
self._client.create()
|
|
81
|
+
warn(
|
|
82
|
+
f"Bucket {self._client.name} not found. Automatically "
|
|
83
|
+
f"created."
|
|
84
|
+
)
|
|
85
|
+
elif not exists:
|
|
86
|
+
raise FileNotFoundError(
|
|
87
|
+
f"Failed to access bucket {self._client.name}: Not found."
|
|
88
|
+
)
|
|
89
|
+
except InvalidOperation:
|
|
90
|
+
raise PermissionError(
|
|
91
|
+
f"Failed to access bucket {self._client.name}: No permission."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _put_file(self, file_key: str, file: File) -> None:
|
|
95
|
+
r"""Put a file to the GCloud bucket.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
file_key (str): The path to the object in the bucket.
|
|
99
|
+
file (File): The file to be uploaded.
|
|
100
|
+
"""
|
|
101
|
+
self._client.blob(file_key).upload_from_string(file.raw_bytes)
|
|
102
|
+
|
|
103
|
+
def _get_file(self, file_key: str, filename: str) -> File:
|
|
104
|
+
r"""Get a file from the GCloud bucket.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
file_key (str): The path to the object in the bucket.
|
|
108
|
+
filename (str): The name of the file.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
File: The object from the S3 bucket.
|
|
112
|
+
"""
|
|
113
|
+
raw_bytes = self._client.get_blob(file_key).download_as_bytes()
|
|
114
|
+
return File.create_file_from_raw_bytes(raw_bytes, filename)
|
|
115
|
+
|
|
116
|
+
def _upload_file(
|
|
117
|
+
self, local_file_path: Path, remote_file_key: str
|
|
118
|
+
) -> None:
|
|
119
|
+
r"""Upload a local file to the GCloud bucket.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
local_file_path (Path): The path to the local file to be uploaded.
|
|
123
|
+
remote_file_key (str): The path to the object in the bucket.
|
|
124
|
+
"""
|
|
125
|
+
self._client.blob(remote_file_key).upload_from_filename(
|
|
126
|
+
local_file_path
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def _download_file(
|
|
130
|
+
self, local_file_path: Path, remote_file_key: str
|
|
131
|
+
) -> None:
|
|
132
|
+
r"""Download a file from the GCloud bucket to the local system.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
local_file_path (Path): The path to the local file to be saved.
|
|
136
|
+
remote_file_key (str): The key of the object in the bucket.
|
|
137
|
+
"""
|
|
138
|
+
self._client.get_blob(remote_file_key).download_to_filename(
|
|
139
|
+
local_file_path
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
def _object_exists(self, file_key: str) -> bool:
|
|
143
|
+
r"""
|
|
144
|
+
Check if the object exists in the GCloud bucket.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
file_key: The key of the object in the bucket.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
bool: Whether the object exists in the bucket.
|
|
151
|
+
"""
|
|
152
|
+
return self._client.blob(file_key).exists()
|
|
@@ -27,7 +27,7 @@ class RetrievalToolkit(BaseToolkit):
|
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
29
|
def information_retrieval(
|
|
30
|
-
self, query: str,
|
|
30
|
+
self, query: str, contents: Union[str, List[str]]
|
|
31
31
|
) -> str:
|
|
32
32
|
r"""Retrieves information from a local vector storage based on the
|
|
33
33
|
specified query. This function connects to a local vector storage
|
|
@@ -37,8 +37,8 @@ class RetrievalToolkit(BaseToolkit):
|
|
|
37
37
|
|
|
38
38
|
Args:
|
|
39
39
|
query (str): The question or query for which an answer is required.
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
contents (Union[str, List[str]]): Local file paths, remote URLs or
|
|
41
|
+
string contents.
|
|
42
42
|
|
|
43
43
|
Returns:
|
|
44
44
|
str: The information retrieved in response to the query, aggregated
|
|
@@ -47,7 +47,7 @@ class RetrievalToolkit(BaseToolkit):
|
|
|
47
47
|
Example:
|
|
48
48
|
# Retrieve information about CAMEL AI.
|
|
49
49
|
information_retrieval(query = "what is CAMEL AI?",
|
|
50
|
-
|
|
50
|
+
contents="https://www.camel-ai.org/")
|
|
51
51
|
"""
|
|
52
52
|
auto_retriever = AutoRetriever(
|
|
53
53
|
vector_storage_local_path="camel/temp_storage",
|
|
@@ -55,7 +55,7 @@ class RetrievalToolkit(BaseToolkit):
|
|
|
55
55
|
)
|
|
56
56
|
|
|
57
57
|
retrieved_info = auto_retriever.run_vector_retriever(
|
|
58
|
-
query=query,
|
|
58
|
+
query=query, contents=contents, top_k=3
|
|
59
59
|
)
|
|
60
60
|
return retrieved_info
|
|
61
61
|
|