camel-ai 0.1.6.2__py3-none-any.whl → 0.1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/configs/gemini_config.py +0 -1
- camel/configs/groq_config.py +1 -1
- camel/configs/mistral_config.py +14 -10
- camel/embeddings/mistral_embedding.py +5 -5
- camel/interpreters/docker_interpreter.py +1 -1
- camel/loaders/__init__.py +1 -2
- camel/loaders/base_io.py +118 -52
- camel/loaders/jina_url_reader.py +6 -6
- camel/loaders/unstructured_io.py +34 -295
- camel/models/__init__.py +2 -0
- camel/models/mistral_model.py +120 -26
- camel/models/model_factory.py +3 -3
- camel/models/openai_compatibility_model.py +105 -0
- camel/retrievers/auto_retriever.py +40 -52
- camel/retrievers/bm25_retriever.py +9 -6
- camel/retrievers/vector_retriever.py +29 -20
- camel/storages/object_storages/__init__.py +22 -0
- camel/storages/object_storages/amazon_s3.py +205 -0
- camel/storages/object_storages/azure_blob.py +166 -0
- camel/storages/object_storages/base.py +115 -0
- camel/storages/object_storages/google_cloud.py +152 -0
- camel/toolkits/retrieval_toolkit.py +6 -6
- camel/toolkits/search_toolkit.py +4 -4
- camel/types/enums.py +7 -0
- camel/utils/token_counting.py +7 -3
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/METADATA +9 -5
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/RECORD +29 -23
- {camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/WHEEL +0 -0
camel/__init__.py
CHANGED
camel/configs/gemini_config.py
CHANGED
|
@@ -81,7 +81,6 @@ class GeminiConfig(BaseConfig):
|
|
|
81
81
|
response_mime_type: Optional[str] = None
|
|
82
82
|
response_schema: Optional[Any] = None
|
|
83
83
|
safety_settings: Optional[Any] = None
|
|
84
|
-
tools: Optional[Any] = None
|
|
85
84
|
tool_config: Optional[Any] = None
|
|
86
85
|
request_options: Optional[Any] = None
|
|
87
86
|
|
camel/configs/groq_config.py
CHANGED
|
@@ -99,7 +99,7 @@ class GroqConfig(BaseConfig):
|
|
|
99
99
|
response_format: Union[dict, NotGiven] = NOT_GIVEN
|
|
100
100
|
frequency_penalty: float = 0.0
|
|
101
101
|
user: str = ""
|
|
102
|
-
tool_choice: Optional[Union[dict[str, str], str]] = "
|
|
102
|
+
tool_choice: Optional[Union[dict[str, str], str]] = "auto"
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
GROQ_API_PARAMS = {param for param in GroqConfig.model_fields.keys()}
|
camel/configs/mistral_config.py
CHANGED
|
@@ -26,23 +26,26 @@ class MistralConfig(BaseConfig):
|
|
|
26
26
|
|
|
27
27
|
reference: https://github.com/mistralai/client-python/blob/9d238f88c41689821d7b08570f13b43426f97fd6/src/mistralai/client.py#L195
|
|
28
28
|
|
|
29
|
+
#TODO: Support stream mode
|
|
30
|
+
|
|
29
31
|
Args:
|
|
30
32
|
temperature (Optional[float], optional): temperature the temperature
|
|
31
33
|
to use for sampling, e.g. 0.5.
|
|
32
|
-
max_tokens (Optional[int], optional): the maximum number of tokens to
|
|
33
|
-
generate, e.g. 100. Defaults to None.
|
|
34
34
|
top_p (Optional[float], optional): the cumulative probability of
|
|
35
35
|
tokens to generate, e.g. 0.9. Defaults to None.
|
|
36
|
+
max_tokens (Optional[int], optional): the maximum number of tokens to
|
|
37
|
+
generate, e.g. 100. Defaults to None.
|
|
38
|
+
min_tokens (Optional[int], optional): the minimum number of tokens to
|
|
39
|
+
generate, e.g. 100. Defaults to None.
|
|
40
|
+
stop (Optional[Union[str,list[str]]]): Stop generation if this token
|
|
41
|
+
is detected. Or if one of these tokens is detected when providing
|
|
42
|
+
a string list.
|
|
36
43
|
random_seed (Optional[int], optional): the random seed to use for
|
|
37
44
|
sampling, e.g. 42. Defaults to None.
|
|
38
|
-
safe_mode (bool, optional): deprecated, use safe_prompt instead.
|
|
39
|
-
Defaults to False.
|
|
40
45
|
safe_prompt (bool, optional): whether to use safe prompt, e.g. true.
|
|
41
46
|
Defaults to False.
|
|
42
47
|
response_format (Union[Dict[str, str], ResponseFormat): format of the
|
|
43
48
|
response.
|
|
44
|
-
tools (Optional[list[OpenAIFunction]], optional): a list of tools to
|
|
45
|
-
use.
|
|
46
49
|
tool_choice (str, optional): Controls which (if
|
|
47
50
|
any) tool is called by the model. :obj:`"none"` means the model
|
|
48
51
|
will not call any tool and instead generates a message.
|
|
@@ -53,10 +56,11 @@ class MistralConfig(BaseConfig):
|
|
|
53
56
|
"""
|
|
54
57
|
|
|
55
58
|
temperature: Optional[float] = None
|
|
56
|
-
max_tokens: Optional[int] = None
|
|
57
59
|
top_p: Optional[float] = None
|
|
60
|
+
max_tokens: Optional[int] = None
|
|
61
|
+
min_tokens: Optional[int] = None
|
|
62
|
+
stop: Optional[Union[str, list[str]]] = None
|
|
58
63
|
random_seed: Optional[int] = None
|
|
59
|
-
safe_mode: bool = False
|
|
60
64
|
safe_prompt: bool = False
|
|
61
65
|
response_format: Optional[Union[Dict[str, str], Any]] = None
|
|
62
66
|
tool_choice: Optional[str] = "auto"
|
|
@@ -65,12 +69,12 @@ class MistralConfig(BaseConfig):
|
|
|
65
69
|
@classmethod
|
|
66
70
|
def fields_type_checking(cls, response_format):
|
|
67
71
|
if response_format and not isinstance(response_format, dict):
|
|
68
|
-
from mistralai.models
|
|
72
|
+
from mistralai.models import ResponseFormat
|
|
69
73
|
|
|
70
74
|
if not isinstance(response_format, ResponseFormat):
|
|
71
75
|
raise ValueError(
|
|
72
76
|
f"The tool {response_format} should be an instance "
|
|
73
|
-
"of `mistralai.models.
|
|
77
|
+
"of `mistralai.models.ResponseFormat`."
|
|
74
78
|
)
|
|
75
79
|
return response_format
|
|
76
80
|
|
|
@@ -43,7 +43,7 @@ class MistralEmbedding(BaseEmbedding[str]):
|
|
|
43
43
|
api_key: str | None = None,
|
|
44
44
|
dimensions: int | None = None,
|
|
45
45
|
) -> None:
|
|
46
|
-
from mistralai
|
|
46
|
+
from mistralai import Mistral
|
|
47
47
|
|
|
48
48
|
if not model_type.is_mistral:
|
|
49
49
|
raise ValueError("Invalid Mistral embedding model type.")
|
|
@@ -54,7 +54,7 @@ class MistralEmbedding(BaseEmbedding[str]):
|
|
|
54
54
|
assert isinstance(dimensions, int)
|
|
55
55
|
self.output_dim = dimensions
|
|
56
56
|
self._api_key = api_key or os.environ.get("MISTRAL_API_KEY")
|
|
57
|
-
self._client =
|
|
57
|
+
self._client = Mistral(api_key=self._api_key)
|
|
58
58
|
|
|
59
59
|
@api_keys_required("MISTRAL_API_KEY")
|
|
60
60
|
def embed_list(
|
|
@@ -73,12 +73,12 @@ class MistralEmbedding(BaseEmbedding[str]):
|
|
|
73
73
|
as a list of floating-point numbers.
|
|
74
74
|
"""
|
|
75
75
|
# TODO: count tokens
|
|
76
|
-
response = self._client.embeddings(
|
|
77
|
-
|
|
76
|
+
response = self._client.embeddings.create(
|
|
77
|
+
inputs=objs,
|
|
78
78
|
model=self.model_type.value,
|
|
79
79
|
**kwargs,
|
|
80
80
|
)
|
|
81
|
-
return [data.embedding for data in response.data]
|
|
81
|
+
return [data.embedding for data in response.data] # type: ignore[misc,union-attr]
|
|
82
82
|
|
|
83
83
|
def get_output_dim(self) -> int:
|
|
84
84
|
r"""Returns the output dimension of the embeddings.
|
|
@@ -130,7 +130,7 @@ class DockerInterpreter(BaseInterpreter):
|
|
|
130
130
|
code_type = self._check_code_type(code_type)
|
|
131
131
|
commands = shlex.split(
|
|
132
132
|
self._CODE_EXECUTE_CMD_MAPPING[code_type].format(
|
|
133
|
-
file_name=
|
|
133
|
+
file_name=file.as_posix()
|
|
134
134
|
)
|
|
135
135
|
)
|
|
136
136
|
if self._container is None:
|
camel/loaders/__init__.py
CHANGED
|
@@ -12,14 +12,13 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
14
|
|
|
15
|
-
from .base_io import File
|
|
15
|
+
from .base_io import File
|
|
16
16
|
from .firecrawl_reader import Firecrawl
|
|
17
17
|
from .jina_url_reader import JinaURLReader
|
|
18
18
|
from .unstructured_io import UnstructuredIO
|
|
19
19
|
|
|
20
20
|
__all__ = [
|
|
21
21
|
'File',
|
|
22
|
-
'read_file',
|
|
23
22
|
'UnstructuredIO',
|
|
24
23
|
'JinaURLReader',
|
|
25
24
|
'Firecrawl',
|
camel/loaders/base_io.py
CHANGED
|
@@ -23,52 +23,113 @@ from camel.utils import dependencies_required
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class File(ABC):
|
|
26
|
-
r"""Represents an uploaded file comprised of Documents
|
|
26
|
+
r"""Represents an uploaded file comprised of Documents.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
name (str): The name of the file.
|
|
30
|
+
file_id (str): The unique identifier of the file.
|
|
31
|
+
metadata (Dict[str, Any], optional): Additional metadata
|
|
32
|
+
associated with the file. Defaults to None.
|
|
33
|
+
docs (List[Dict[str, Any]], optional): A list of documents
|
|
34
|
+
contained within the file. Defaults to None.
|
|
35
|
+
raw_bytes (bytes, optional): The raw bytes content of the file.
|
|
36
|
+
Defaults to b"".
|
|
37
|
+
"""
|
|
27
38
|
|
|
28
39
|
def __init__(
|
|
29
40
|
self,
|
|
30
41
|
name: str,
|
|
31
|
-
|
|
42
|
+
file_id: str,
|
|
32
43
|
metadata: Optional[Dict[str, Any]] = None,
|
|
33
44
|
docs: Optional[List[Dict[str, Any]]] = None,
|
|
45
|
+
raw_bytes: bytes = b"",
|
|
34
46
|
):
|
|
35
|
-
r"""
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
name (str): The name of the file.
|
|
39
|
-
id (str): The unique identifier of the file.
|
|
40
|
-
metadata (Dict[str, Any], optional): Additional metadata
|
|
41
|
-
associated with the file. Defaults to None.
|
|
42
|
-
docs (List[Dict[str, Any]], optional): A list of documents
|
|
43
|
-
contained within the file. Defaults to None.
|
|
44
|
-
"""
|
|
45
47
|
self.name = name
|
|
46
|
-
self.
|
|
48
|
+
self.file_id = file_id
|
|
47
49
|
self.metadata = metadata or {}
|
|
48
50
|
self.docs = docs or []
|
|
51
|
+
self.raw_bytes = raw_bytes
|
|
49
52
|
|
|
50
53
|
@classmethod
|
|
51
54
|
@abstractmethod
|
|
52
|
-
def from_bytes(cls, file: BytesIO) -> "File":
|
|
55
|
+
def from_bytes(cls, file: BytesIO, filename: str) -> "File":
|
|
53
56
|
r"""Creates a File object from a BytesIO object.
|
|
54
57
|
|
|
55
58
|
Args:
|
|
56
59
|
file (BytesIO): A BytesIO object representing the contents of the
|
|
57
60
|
file.
|
|
61
|
+
filename (str): The name of the file.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
File: A File object.
|
|
65
|
+
"""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def from_raw_bytes(cls, raw_bytes: bytes, filename: str) -> "File":
|
|
70
|
+
r"""Creates a File object from raw bytes.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
raw_bytes (bytes): The raw bytes content of the file.
|
|
74
|
+
filename (str): The name of the file.
|
|
58
75
|
|
|
59
76
|
Returns:
|
|
60
77
|
File: A File object.
|
|
61
78
|
"""
|
|
79
|
+
file = BytesIO(raw_bytes)
|
|
80
|
+
return cls.from_bytes(file, filename)
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def create_file(file: BytesIO, filename: str) -> "File":
|
|
84
|
+
r"""Reads an uploaded file and returns a File object.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
file (BytesIO): A BytesIO object representing the contents of the
|
|
88
|
+
file.
|
|
89
|
+
filename (str): The name of the file.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
File: A File object.
|
|
93
|
+
"""
|
|
94
|
+
ext_to_cls = {
|
|
95
|
+
"docx": DocxFile,
|
|
96
|
+
"pdf": PdfFile,
|
|
97
|
+
"txt": TxtFile,
|
|
98
|
+
"json": JsonFile,
|
|
99
|
+
"html": HtmlFile,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
ext = filename.split(".")[-1].lower()
|
|
103
|
+
if ext not in ext_to_cls:
|
|
104
|
+
raise NotImplementedError(f"File type {ext} not supported")
|
|
105
|
+
|
|
106
|
+
out_file = ext_to_cls[ext].from_bytes(file, filename)
|
|
107
|
+
return out_file
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def create_file_from_raw_bytes(raw_bytes: bytes, filename: str) -> "File":
|
|
111
|
+
r"""Reads raw bytes and returns a File object.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
raw_bytes (bytes): The raw bytes content of the file.
|
|
115
|
+
filename (str): The name of the file.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
File: A File object.
|
|
119
|
+
"""
|
|
120
|
+
file = BytesIO(raw_bytes)
|
|
121
|
+
return File.create_file(file, filename)
|
|
62
122
|
|
|
63
123
|
def __repr__(self) -> str:
|
|
64
124
|
return (
|
|
65
|
-
f"File(name={self.name}, id={self.
|
|
125
|
+
f"File(name={self.name}, id={self.file_id}, "
|
|
66
126
|
f"metadata={self.metadata}, docs={self.docs})"
|
|
67
127
|
)
|
|
68
128
|
|
|
69
129
|
def __str__(self) -> str:
|
|
70
130
|
return (
|
|
71
|
-
f"File(name={self.name}, id={self.
|
|
131
|
+
f"File(name={self.name}, id={self.file_id}, metadata="
|
|
132
|
+
f"{self.metadata})"
|
|
72
133
|
)
|
|
73
134
|
|
|
74
135
|
def copy(self) -> "File":
|
|
@@ -76,9 +137,10 @@ class File(ABC):
|
|
|
76
137
|
|
|
77
138
|
return self.__class__(
|
|
78
139
|
name=self.name,
|
|
79
|
-
|
|
140
|
+
file_id=self.file_id,
|
|
80
141
|
metadata=deepcopy(self.metadata),
|
|
81
142
|
docs=deepcopy(self.docs),
|
|
143
|
+
raw_bytes=self.raw_bytes,
|
|
82
144
|
)
|
|
83
145
|
|
|
84
146
|
|
|
@@ -97,12 +159,13 @@ def strip_consecutive_newlines(text: str) -> str:
|
|
|
97
159
|
class DocxFile(File):
|
|
98
160
|
@classmethod
|
|
99
161
|
@dependencies_required('docx2txt')
|
|
100
|
-
def from_bytes(cls, file: BytesIO) -> "DocxFile":
|
|
162
|
+
def from_bytes(cls, file: BytesIO, filename: str) -> "DocxFile":
|
|
101
163
|
r"""Creates a DocxFile object from a BytesIO object.
|
|
102
164
|
|
|
103
165
|
Args:
|
|
104
166
|
file (BytesIO): A BytesIO object representing the contents of the
|
|
105
167
|
docx file.
|
|
168
|
+
filename (str): The name of the file.
|
|
106
169
|
|
|
107
170
|
Returns:
|
|
108
171
|
DocxFile: A DocxFile object.
|
|
@@ -117,17 +180,23 @@ class DocxFile(File):
|
|
|
117
180
|
file_id = md5(file.getvalue()).hexdigest()
|
|
118
181
|
# Reset the file pointer to the beginning
|
|
119
182
|
file.seek(0)
|
|
120
|
-
return cls(
|
|
183
|
+
return cls(
|
|
184
|
+
name=filename,
|
|
185
|
+
file_id=file_id,
|
|
186
|
+
docs=[doc],
|
|
187
|
+
raw_bytes=file.getvalue(),
|
|
188
|
+
)
|
|
121
189
|
|
|
122
190
|
|
|
123
191
|
class PdfFile(File):
|
|
124
192
|
@classmethod
|
|
125
|
-
def from_bytes(cls, file: BytesIO) -> "PdfFile":
|
|
193
|
+
def from_bytes(cls, file: BytesIO, filename: str) -> "PdfFile":
|
|
126
194
|
r"""Creates a PdfFile object from a BytesIO object.
|
|
127
195
|
|
|
128
196
|
Args:
|
|
129
197
|
file (BytesIO): A BytesIO object representing the contents of the
|
|
130
198
|
pdf file.
|
|
199
|
+
filename (str): The name of the file.
|
|
131
200
|
|
|
132
201
|
Returns:
|
|
133
202
|
PdfFile: A PdfFile object.
|
|
@@ -153,17 +222,23 @@ class PdfFile(File):
|
|
|
153
222
|
file_id = md5(file.getvalue()).hexdigest()
|
|
154
223
|
# Reset the file pointer to the beginning
|
|
155
224
|
file.seek(0)
|
|
156
|
-
return cls(
|
|
225
|
+
return cls(
|
|
226
|
+
name=filename,
|
|
227
|
+
file_id=file_id,
|
|
228
|
+
docs=docs,
|
|
229
|
+
raw_bytes=file.getvalue(),
|
|
230
|
+
)
|
|
157
231
|
|
|
158
232
|
|
|
159
233
|
class TxtFile(File):
|
|
160
234
|
@classmethod
|
|
161
|
-
def from_bytes(cls, file: BytesIO) -> "TxtFile":
|
|
235
|
+
def from_bytes(cls, file: BytesIO, filename: str) -> "TxtFile":
|
|
162
236
|
r"""Creates a TxtFile object from a BytesIO object.
|
|
163
237
|
|
|
164
238
|
Args:
|
|
165
239
|
file (BytesIO): A BytesIO object representing the contents of the
|
|
166
240
|
txt file.
|
|
241
|
+
filename (str): The name of the file.
|
|
167
242
|
|
|
168
243
|
Returns:
|
|
169
244
|
TxtFile: A TxtFile object.
|
|
@@ -177,17 +252,23 @@ class TxtFile(File):
|
|
|
177
252
|
file_id = md5(file.getvalue()).hexdigest()
|
|
178
253
|
# Reset the file pointer to the beginning
|
|
179
254
|
file.seek(0)
|
|
180
|
-
return cls(
|
|
255
|
+
return cls(
|
|
256
|
+
name=filename,
|
|
257
|
+
file_id=file_id,
|
|
258
|
+
docs=[doc],
|
|
259
|
+
raw_bytes=file.getvalue(),
|
|
260
|
+
)
|
|
181
261
|
|
|
182
262
|
|
|
183
263
|
class JsonFile(File):
|
|
184
264
|
@classmethod
|
|
185
|
-
def from_bytes(cls, file: BytesIO) -> "JsonFile":
|
|
265
|
+
def from_bytes(cls, file: BytesIO, filename: str) -> "JsonFile":
|
|
186
266
|
r"""Creates a JsonFile object from a BytesIO object.
|
|
187
267
|
|
|
188
268
|
Args:
|
|
189
269
|
file (BytesIO): A BytesIO object representing the contents of the
|
|
190
270
|
json file.
|
|
271
|
+
filename (str): The name of the file.
|
|
191
272
|
|
|
192
273
|
Returns:
|
|
193
274
|
JsonFile: A JsonFile object.
|
|
@@ -200,17 +281,23 @@ class JsonFile(File):
|
|
|
200
281
|
file_id = md5(file.getvalue()).hexdigest()
|
|
201
282
|
# Reset the file pointer to the beginning
|
|
202
283
|
file.seek(0)
|
|
203
|
-
return cls(
|
|
284
|
+
return cls(
|
|
285
|
+
name=filename,
|
|
286
|
+
file_id=file_id,
|
|
287
|
+
docs=[doc],
|
|
288
|
+
raw_bytes=file.getvalue(),
|
|
289
|
+
)
|
|
204
290
|
|
|
205
291
|
|
|
206
292
|
class HtmlFile(File):
|
|
207
293
|
@classmethod
|
|
208
|
-
def from_bytes(cls, file: BytesIO) -> "HtmlFile":
|
|
294
|
+
def from_bytes(cls, file: BytesIO, filename: str) -> "HtmlFile":
|
|
209
295
|
r"""Creates a HtmlFile object from a BytesIO object.
|
|
210
296
|
|
|
211
297
|
Args:
|
|
212
298
|
file (BytesIO): A BytesIO object representing the contents of the
|
|
213
299
|
html file.
|
|
300
|
+
filename (str): The name of the file.
|
|
214
301
|
|
|
215
302
|
Returns:
|
|
216
303
|
HtmlFile: A HtmlFile object.
|
|
@@ -233,30 +320,9 @@ class HtmlFile(File):
|
|
|
233
320
|
file_id = md5(file.getvalue()).hexdigest()
|
|
234
321
|
# Reset the file pointer to the beginning
|
|
235
322
|
file.seek(0)
|
|
236
|
-
return cls(
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
Args:
|
|
243
|
-
file (BytesIO): A BytesIO object representing the contents of the file.
|
|
244
|
-
|
|
245
|
-
Returns:
|
|
246
|
-
File: A File object.
|
|
247
|
-
"""
|
|
248
|
-
# Determine the file type based on the file extension
|
|
249
|
-
if file.name.lower().endswith(".docx"):
|
|
250
|
-
return DocxFile.from_bytes(file)
|
|
251
|
-
elif file.name.lower().endswith(".pdf"):
|
|
252
|
-
return PdfFile.from_bytes(file)
|
|
253
|
-
elif file.name.lower().endswith(".txt"):
|
|
254
|
-
return TxtFile.from_bytes(file)
|
|
255
|
-
elif file.name.lower().endswith(".json"):
|
|
256
|
-
return JsonFile.from_bytes(file)
|
|
257
|
-
elif file.name.lower().endswith(".html"):
|
|
258
|
-
return HtmlFile.from_bytes(file)
|
|
259
|
-
else:
|
|
260
|
-
raise NotImplementedError(
|
|
261
|
-
f"File type {file.name.split('.')[-1]} not supported"
|
|
323
|
+
return cls(
|
|
324
|
+
name=filename,
|
|
325
|
+
file_id=file_id,
|
|
326
|
+
docs=[doc],
|
|
327
|
+
raw_bytes=file.getvalue(),
|
|
262
328
|
)
|
camel/loaders/jina_url_reader.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
16
|
from typing import Any, Optional
|
|
17
|
+
from warnings import warn
|
|
17
18
|
|
|
18
19
|
from camel.types.enums import JinaReturnFormat
|
|
19
20
|
|
|
@@ -54,11 +55,10 @@ class JinaURLReader:
|
|
|
54
55
|
**kwargs: Any,
|
|
55
56
|
) -> None:
|
|
56
57
|
api_key = api_key or os.getenv('JINA_API_KEY')
|
|
57
|
-
if api_key
|
|
58
|
-
|
|
59
|
-
"
|
|
60
|
-
"
|
|
61
|
-
"https://jina.ai/reader."
|
|
58
|
+
if not api_key:
|
|
59
|
+
warn(
|
|
60
|
+
"JINA_API_KEY not set. This will result in a low rate limit "
|
|
61
|
+
"of Jina URL Reader. Get API key here: https://jina.ai/reader."
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
# if the following field not provided, it will be None
|
|
@@ -94,6 +94,6 @@ class JinaURLReader:
|
|
|
94
94
|
resp = requests.get(full_url, headers=self._headers)
|
|
95
95
|
resp.raise_for_status()
|
|
96
96
|
except Exception as e:
|
|
97
|
-
raise
|
|
97
|
+
raise ValueError(f"Failed to read content from {url}: {e}") from e
|
|
98
98
|
|
|
99
99
|
return resp.text
|