camel-ai 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -11
- camel/agents/__init__.py +7 -5
- camel/agents/chat_agent.py +134 -86
- camel/agents/critic_agent.py +28 -17
- camel/agents/deductive_reasoner_agent.py +235 -0
- camel/agents/embodied_agent.py +92 -40
- camel/agents/knowledge_graph_agent.py +221 -0
- camel/agents/role_assignment_agent.py +27 -17
- camel/agents/task_agent.py +60 -34
- camel/agents/tool_agents/base.py +0 -1
- camel/agents/tool_agents/hugging_face_tool_agent.py +7 -4
- camel/configs/__init__.py +29 -0
- camel/configs/anthropic_config.py +73 -0
- camel/configs/base_config.py +22 -0
- camel/{configs.py → configs/openai_config.py} +37 -64
- camel/embeddings/__init__.py +2 -0
- camel/embeddings/base.py +3 -2
- camel/embeddings/openai_embedding.py +10 -5
- camel/embeddings/sentence_transformers_embeddings.py +65 -0
- camel/functions/__init__.py +18 -3
- camel/functions/google_maps_function.py +335 -0
- camel/functions/math_functions.py +7 -7
- camel/functions/open_api_function.py +380 -0
- camel/functions/open_api_specs/coursera/__init__.py +13 -0
- camel/functions/open_api_specs/coursera/openapi.yaml +82 -0
- camel/functions/open_api_specs/klarna/__init__.py +13 -0
- camel/functions/open_api_specs/klarna/openapi.yaml +87 -0
- camel/functions/open_api_specs/speak/__init__.py +13 -0
- camel/functions/open_api_specs/speak/openapi.yaml +151 -0
- camel/functions/openai_function.py +346 -42
- camel/functions/retrieval_functions.py +61 -0
- camel/functions/search_functions.py +100 -35
- camel/functions/slack_functions.py +275 -0
- camel/functions/twitter_function.py +484 -0
- camel/functions/weather_functions.py +36 -23
- camel/generators.py +65 -46
- camel/human.py +17 -11
- camel/interpreters/__init__.py +25 -0
- camel/interpreters/base.py +49 -0
- camel/{utils/python_interpreter.py → interpreters/internal_python_interpreter.py} +129 -48
- camel/interpreters/interpreter_error.py +19 -0
- camel/interpreters/subprocess_interpreter.py +190 -0
- camel/loaders/__init__.py +22 -0
- camel/{functions/base_io_functions.py → loaders/base_io.py} +38 -35
- camel/{functions/unstructured_io_fuctions.py → loaders/unstructured_io.py} +199 -110
- camel/memories/__init__.py +17 -7
- camel/memories/agent_memories.py +156 -0
- camel/memories/base.py +97 -32
- camel/memories/blocks/__init__.py +21 -0
- camel/memories/{chat_history_memory.py → blocks/chat_history_block.py} +34 -34
- camel/memories/blocks/vectordb_block.py +101 -0
- camel/memories/context_creators/__init__.py +3 -2
- camel/memories/context_creators/score_based.py +32 -20
- camel/memories/records.py +6 -5
- camel/messages/__init__.py +2 -2
- camel/messages/base.py +99 -16
- camel/messages/func_message.py +7 -4
- camel/models/__init__.py +6 -2
- camel/models/anthropic_model.py +146 -0
- camel/models/base_model.py +10 -3
- camel/models/model_factory.py +17 -11
- camel/models/open_source_model.py +25 -13
- camel/models/openai_audio_models.py +251 -0
- camel/models/openai_model.py +20 -13
- camel/models/stub_model.py +10 -5
- camel/prompts/__init__.py +7 -5
- camel/prompts/ai_society.py +21 -14
- camel/prompts/base.py +54 -47
- camel/prompts/code.py +22 -14
- camel/prompts/evaluation.py +8 -5
- camel/prompts/misalignment.py +26 -19
- camel/prompts/object_recognition.py +35 -0
- camel/prompts/prompt_templates.py +14 -8
- camel/prompts/role_description_prompt_template.py +16 -10
- camel/prompts/solution_extraction.py +9 -5
- camel/prompts/task_prompt_template.py +24 -21
- camel/prompts/translation.py +9 -5
- camel/responses/agent_responses.py +5 -2
- camel/retrievers/__init__.py +26 -0
- camel/retrievers/auto_retriever.py +330 -0
- camel/retrievers/base.py +69 -0
- camel/retrievers/bm25_retriever.py +140 -0
- camel/retrievers/cohere_rerank_retriever.py +108 -0
- camel/retrievers/vector_retriever.py +183 -0
- camel/societies/__init__.py +1 -1
- camel/societies/babyagi_playing.py +56 -32
- camel/societies/role_playing.py +188 -133
- camel/storages/__init__.py +18 -0
- camel/storages/graph_storages/__init__.py +23 -0
- camel/storages/graph_storages/base.py +82 -0
- camel/storages/graph_storages/graph_element.py +74 -0
- camel/storages/graph_storages/neo4j_graph.py +582 -0
- camel/storages/key_value_storages/base.py +1 -2
- camel/storages/key_value_storages/in_memory.py +1 -2
- camel/storages/key_value_storages/json.py +8 -13
- camel/storages/vectordb_storages/__init__.py +33 -0
- camel/storages/vectordb_storages/base.py +202 -0
- camel/storages/vectordb_storages/milvus.py +396 -0
- camel/storages/vectordb_storages/qdrant.py +373 -0
- camel/terminators/__init__.py +1 -1
- camel/terminators/base.py +2 -3
- camel/terminators/response_terminator.py +21 -12
- camel/terminators/token_limit_terminator.py +5 -3
- camel/toolkits/__init__.py +21 -0
- camel/toolkits/base.py +22 -0
- camel/toolkits/github_toolkit.py +245 -0
- camel/types/__init__.py +18 -6
- camel/types/enums.py +129 -15
- camel/types/openai_types.py +10 -5
- camel/utils/__init__.py +20 -13
- camel/utils/commons.py +170 -85
- camel/utils/token_counting.py +135 -15
- {camel_ai-0.1.1.dist-info → camel_ai-0.1.4.dist-info}/METADATA +123 -75
- camel_ai-0.1.4.dist-info/RECORD +119 -0
- {camel_ai-0.1.1.dist-info → camel_ai-0.1.4.dist-info}/WHEEL +1 -1
- camel/memories/context_creators/base.py +0 -72
- camel_ai-0.1.1.dist-info/RECORD +0 -75
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
import shlex
|
|
16
|
+
import subprocess
|
|
17
|
+
import tempfile
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, ClassVar, Dict, List
|
|
20
|
+
|
|
21
|
+
from colorama import Fore
|
|
22
|
+
|
|
23
|
+
from camel.interpreters.base import BaseInterpreter
|
|
24
|
+
from camel.interpreters.interpreter_error import InterpreterError
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SubprocessInterpreter(BaseInterpreter):
|
|
28
|
+
r"""SubprocessInterpreter is a class for executing code files or code
|
|
29
|
+
strings in a subprocess.
|
|
30
|
+
|
|
31
|
+
This class handles the execution of code in different scripting languages
|
|
32
|
+
(currently Python and Bash) within a subprocess, capturing their
|
|
33
|
+
stdout and stderr streams, and allowing user checking before executing code
|
|
34
|
+
strings.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
require_confirm (bool, optional): If True, prompt user before running
|
|
38
|
+
code strings for security. (default: :obj:`True`)
|
|
39
|
+
print_stdout (bool, optional): If True, print the standard output of
|
|
40
|
+
the executed code. (default: :obj:`False`)
|
|
41
|
+
print_stderr (bool, optional): If True, print the standard error of the
|
|
42
|
+
executed code. (default: :obj:`True`)
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
_CODE_EXECUTE_CMD_MAPPING: ClassVar[Dict[str, str]] = {
|
|
46
|
+
"python": "python {file_name}",
|
|
47
|
+
"bash": "bash {file_name}",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
_CODE_EXTENSION_MAPPING: ClassVar[Dict[str, str]] = {
|
|
51
|
+
"python": "py",
|
|
52
|
+
"bash": "sh",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
_CODE_TYPE_MAPPING: ClassVar[Dict[str, str]] = {
|
|
56
|
+
"python": "python",
|
|
57
|
+
"py3": "python",
|
|
58
|
+
"python3": "python",
|
|
59
|
+
"py": "python",
|
|
60
|
+
"shell": "bash",
|
|
61
|
+
"bash": "bash",
|
|
62
|
+
"sh": "bash",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
require_confirm: bool = True,
|
|
68
|
+
print_stdout: bool = False,
|
|
69
|
+
print_stderr: bool = True,
|
|
70
|
+
) -> None:
|
|
71
|
+
self.require_confirm = require_confirm
|
|
72
|
+
self.print_stdout = print_stdout
|
|
73
|
+
self.print_stderr = print_stderr
|
|
74
|
+
|
|
75
|
+
def run_file(
|
|
76
|
+
self,
|
|
77
|
+
file: Path,
|
|
78
|
+
code_type: str,
|
|
79
|
+
) -> str:
|
|
80
|
+
r"""Executes a code file in a subprocess and captures its output.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
file (Path): The path object of the file to run.
|
|
84
|
+
code_type (str): The type of code to execute (e.g., 'python',
|
|
85
|
+
'bash').
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
str: A string containing the captured stdout and stderr of the
|
|
89
|
+
executed code.
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
RuntimeError: If the provided file path does not point to a file.
|
|
93
|
+
InterpreterError: If the code type provided is not supported.
|
|
94
|
+
"""
|
|
95
|
+
if not file.is_file():
|
|
96
|
+
raise RuntimeError(f"{file} is not a file.")
|
|
97
|
+
code_type = self._check_code_type(code_type)
|
|
98
|
+
cmd = shlex.split(
|
|
99
|
+
self._CODE_EXECUTE_CMD_MAPPING[code_type].format(
|
|
100
|
+
file_name=str(file)
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
proc = subprocess.Popen(
|
|
104
|
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
105
|
+
)
|
|
106
|
+
stdout, stderr = proc.communicate()
|
|
107
|
+
if self.print_stdout and stdout:
|
|
108
|
+
print("======stdout======")
|
|
109
|
+
print(Fore.GREEN + stdout + Fore.RESET)
|
|
110
|
+
print("==================")
|
|
111
|
+
if self.print_stderr and stderr:
|
|
112
|
+
print("======stderr======")
|
|
113
|
+
print(Fore.RED + stderr + Fore.RESET)
|
|
114
|
+
print("==================")
|
|
115
|
+
exec_result = f"{stdout}"
|
|
116
|
+
exec_result += f"(stderr: {stderr})" if stderr else ""
|
|
117
|
+
return exec_result
|
|
118
|
+
|
|
119
|
+
def run(
|
|
120
|
+
self,
|
|
121
|
+
code: str,
|
|
122
|
+
code_type: str,
|
|
123
|
+
) -> str:
|
|
124
|
+
r"""Generates a temporary file with the given code, executes it, and
|
|
125
|
+
deletes the file afterward.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
code (str): The code string to execute.
|
|
129
|
+
code_type (str): The type of code to execute (e.g., 'python',
|
|
130
|
+
'bash').
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
str: A string containing the captured stdout and stderr of the
|
|
134
|
+
executed code.
|
|
135
|
+
|
|
136
|
+
Raises:
|
|
137
|
+
InterpreterError: If the user declines to run the code or if the
|
|
138
|
+
code type is unsupported.
|
|
139
|
+
"""
|
|
140
|
+
code_type = self._check_code_type(code_type)
|
|
141
|
+
|
|
142
|
+
# Print code for security checking
|
|
143
|
+
if self.require_confirm:
|
|
144
|
+
print(f"The following {code_type} code will run on your computer:")
|
|
145
|
+
print(Fore.CYAN + code + Fore.RESET)
|
|
146
|
+
while True:
|
|
147
|
+
choice = input("Running code? [Y/n]:").lower()
|
|
148
|
+
if choice in ["y", "yes", "ye", ""]:
|
|
149
|
+
break
|
|
150
|
+
elif choice in ["no", "n"]:
|
|
151
|
+
raise InterpreterError(
|
|
152
|
+
"Execution halted: User opted not to run the code. "
|
|
153
|
+
"This choice stops the current operation and any "
|
|
154
|
+
"further code execution."
|
|
155
|
+
)
|
|
156
|
+
temp_file_path = self._create_temp_file(
|
|
157
|
+
code=code, extension=self._CODE_EXTENSION_MAPPING[code_type]
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
result = self.run_file(temp_file_path, code_type)
|
|
161
|
+
|
|
162
|
+
temp_file_path.unlink()
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
def _create_temp_file(self, code: str, extension: str) -> Path:
|
|
166
|
+
with tempfile.NamedTemporaryFile(
|
|
167
|
+
mode="w", delete=False, suffix=f".{extension}"
|
|
168
|
+
) as f:
|
|
169
|
+
f.write(code)
|
|
170
|
+
name = f.name
|
|
171
|
+
return Path(name)
|
|
172
|
+
|
|
173
|
+
def _check_code_type(self, code_type: str) -> str:
|
|
174
|
+
if code_type not in self._CODE_TYPE_MAPPING:
|
|
175
|
+
raise InterpreterError(
|
|
176
|
+
f"Unsupported code type {code_type}. Currently "
|
|
177
|
+
f"`{self.__class__.__name__}` only supports "
|
|
178
|
+
f"{', '.join(self._CODE_EXTENSION_MAPPING.keys())}."
|
|
179
|
+
)
|
|
180
|
+
return self._CODE_TYPE_MAPPING[code_type]
|
|
181
|
+
|
|
182
|
+
def supported_code_types(self) -> List[str]:
|
|
183
|
+
r"""Provides supported code types by the interpreter."""
|
|
184
|
+
return list(self._CODE_EXTENSION_MAPPING.keys())
|
|
185
|
+
|
|
186
|
+
def update_action_space(self, action_space: Dict[str, Any]) -> None:
|
|
187
|
+
r"""Updates action space for *python* interpreter"""
|
|
188
|
+
raise RuntimeError(
|
|
189
|
+
"SubprocessInterpreter doesn't support " "`action_space`."
|
|
190
|
+
)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from .base_io import File, read_file
|
|
16
|
+
from .unstructured_io import UnstructuredIO
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'File',
|
|
20
|
+
'read_file',
|
|
21
|
+
'UnstructuredIO',
|
|
22
|
+
]
|
|
@@ -35,10 +35,10 @@ class File(ABC):
|
|
|
35
35
|
Args:
|
|
36
36
|
name (str): The name of the file.
|
|
37
37
|
id (str): The unique identifier of the file.
|
|
38
|
-
metadata (Dict[str, Any], optional):
|
|
39
|
-
|
|
40
|
-
docs (List[Dict[str, Any]], optional):
|
|
41
|
-
|
|
38
|
+
metadata (Dict[str, Any], optional): Additional metadata
|
|
39
|
+
associated with the file. Defaults to None.
|
|
40
|
+
docs (List[Dict[str, Any]], optional): A list of documents
|
|
41
|
+
contained within the file. Defaults to None.
|
|
42
42
|
"""
|
|
43
43
|
self.name = name
|
|
44
44
|
self.id = id
|
|
@@ -51,20 +51,21 @@ class File(ABC):
|
|
|
51
51
|
r"""Creates a File object from a BytesIO object.
|
|
52
52
|
|
|
53
53
|
Args:
|
|
54
|
-
file (BytesIO):
|
|
55
|
-
|
|
54
|
+
file (BytesIO): A BytesIO object representing the contents of the
|
|
55
|
+
file.
|
|
56
56
|
|
|
57
57
|
Returns:
|
|
58
58
|
File: A File object.
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
61
|
def __repr__(self) -> str:
|
|
62
|
-
return (
|
|
63
|
-
|
|
62
|
+
return (
|
|
63
|
+
f"File(name={self.name}, id={self.id}, "
|
|
64
|
+
f"metadata={self.metadata}, docs={self.docs})"
|
|
65
|
+
)
|
|
64
66
|
|
|
65
67
|
def __str__(self) -> str:
|
|
66
|
-
return (
|
|
67
|
-
f"File(name={self.name}, id={self.id}, metadata={self.metadata})")
|
|
68
|
+
return f"File(name={self.name}, id={self.id}, metadata={self.metadata})"
|
|
68
69
|
|
|
69
70
|
def copy(self) -> "File":
|
|
70
71
|
r"""Create a deep copy of this File"""
|
|
@@ -90,14 +91,13 @@ def strip_consecutive_newlines(text: str) -> str:
|
|
|
90
91
|
|
|
91
92
|
|
|
92
93
|
class DocxFile(File):
|
|
93
|
-
|
|
94
94
|
@classmethod
|
|
95
95
|
def from_bytes(cls, file: BytesIO) -> "DocxFile":
|
|
96
96
|
r"""Creates a DocxFile object from a BytesIO object.
|
|
97
97
|
|
|
98
98
|
Args:
|
|
99
|
-
file (BytesIO):
|
|
100
|
-
|
|
99
|
+
file (BytesIO): A BytesIO object representing the contents of the
|
|
100
|
+
docx file.
|
|
101
101
|
|
|
102
102
|
Returns:
|
|
103
103
|
DocxFile: A DocxFile object.
|
|
@@ -106,9 +106,11 @@ class DocxFile(File):
|
|
|
106
106
|
try:
|
|
107
107
|
import docx2txt
|
|
108
108
|
except ImportError:
|
|
109
|
-
raise ImportError(
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
raise ImportError(
|
|
110
|
+
"Please install `docx2txt` first. "
|
|
111
|
+
"You can install it by running "
|
|
112
|
+
"`pip install docx2txt`."
|
|
113
|
+
)
|
|
112
114
|
text = docx2txt.process(file)
|
|
113
115
|
text = strip_consecutive_newlines(text)
|
|
114
116
|
# Create a dictionary with the extracted text
|
|
@@ -121,14 +123,13 @@ class DocxFile(File):
|
|
|
121
123
|
|
|
122
124
|
|
|
123
125
|
class PdfFile(File):
|
|
124
|
-
|
|
125
126
|
@classmethod
|
|
126
127
|
def from_bytes(cls, file: BytesIO) -> "PdfFile":
|
|
127
128
|
r"""Creates a PdfFile object from a BytesIO object.
|
|
128
129
|
|
|
129
130
|
Args:
|
|
130
|
-
file (BytesIO):
|
|
131
|
-
|
|
131
|
+
file (BytesIO): A BytesIO object representing the contents of the
|
|
132
|
+
pdf file.
|
|
132
133
|
|
|
133
134
|
Returns:
|
|
134
135
|
PdfFile: A PdfFile object.
|
|
@@ -137,9 +138,11 @@ class PdfFile(File):
|
|
|
137
138
|
try:
|
|
138
139
|
import fitz
|
|
139
140
|
except ImportError:
|
|
140
|
-
raise ImportError(
|
|
141
|
-
|
|
142
|
-
|
|
141
|
+
raise ImportError(
|
|
142
|
+
"Please install `PyMuPDF` first. "
|
|
143
|
+
"You can install it by running "
|
|
144
|
+
"`pip install PyMuPDF`."
|
|
145
|
+
)
|
|
143
146
|
pdf = fitz.open(stream=file.read(), filetype="pdf")
|
|
144
147
|
docs = []
|
|
145
148
|
for i, page in enumerate(pdf):
|
|
@@ -156,14 +159,13 @@ class PdfFile(File):
|
|
|
156
159
|
|
|
157
160
|
|
|
158
161
|
class TxtFile(File):
|
|
159
|
-
|
|
160
162
|
@classmethod
|
|
161
163
|
def from_bytes(cls, file: BytesIO) -> "TxtFile":
|
|
162
164
|
r"""Creates a TxtFile object from a BytesIO object.
|
|
163
165
|
|
|
164
166
|
Args:
|
|
165
|
-
file (BytesIO):
|
|
166
|
-
|
|
167
|
+
file (BytesIO): A BytesIO object representing the contents of the
|
|
168
|
+
txt file.
|
|
167
169
|
|
|
168
170
|
Returns:
|
|
169
171
|
TxtFile: A TxtFile object.
|
|
@@ -181,14 +183,13 @@ class TxtFile(File):
|
|
|
181
183
|
|
|
182
184
|
|
|
183
185
|
class JsonFile(File):
|
|
184
|
-
|
|
185
186
|
@classmethod
|
|
186
187
|
def from_bytes(cls, file: BytesIO) -> "JsonFile":
|
|
187
188
|
r"""Creates a JsonFile object from a BytesIO object.
|
|
188
189
|
|
|
189
190
|
Args:
|
|
190
|
-
file (BytesIO):
|
|
191
|
-
|
|
191
|
+
file (BytesIO): A BytesIO object representing the contents of the
|
|
192
|
+
json file.
|
|
192
193
|
|
|
193
194
|
Returns:
|
|
194
195
|
JsonFile: A JsonFile object.
|
|
@@ -205,14 +206,13 @@ class JsonFile(File):
|
|
|
205
206
|
|
|
206
207
|
|
|
207
208
|
class HtmlFile(File):
|
|
208
|
-
|
|
209
209
|
@classmethod
|
|
210
210
|
def from_bytes(cls, file: BytesIO) -> "HtmlFile":
|
|
211
211
|
r"""Creates a HtmlFile object from a BytesIO object.
|
|
212
212
|
|
|
213
213
|
Args:
|
|
214
|
-
file (BytesIO):
|
|
215
|
-
|
|
214
|
+
file (BytesIO): A BytesIO object representing the contents of the
|
|
215
|
+
html file.
|
|
216
216
|
|
|
217
217
|
Returns:
|
|
218
218
|
HtmlFile: A HtmlFile object.
|
|
@@ -221,9 +221,11 @@ class HtmlFile(File):
|
|
|
221
221
|
try:
|
|
222
222
|
from bs4 import BeautifulSoup
|
|
223
223
|
except ImportError:
|
|
224
|
-
raise ImportError(
|
|
225
|
-
|
|
226
|
-
|
|
224
|
+
raise ImportError(
|
|
225
|
+
"Please install `beautifulsoup4` first. "
|
|
226
|
+
"You can install it by running "
|
|
227
|
+
"`pip install beautifulsoup4`."
|
|
228
|
+
)
|
|
227
229
|
soup = BeautifulSoup(file, "html.parser")
|
|
228
230
|
text = soup.get_text()
|
|
229
231
|
text = strip_consecutive_newlines(text)
|
|
@@ -258,4 +260,5 @@ def read_file(file: BytesIO) -> File:
|
|
|
258
260
|
return HtmlFile.from_bytes(file)
|
|
259
261
|
else:
|
|
260
262
|
raise NotImplementedError(
|
|
261
|
-
f"File type {file.name.split('.')[-1]} not supported"
|
|
263
|
+
f"File type {file.name.split('.')[-1]} not supported"
|
|
264
|
+
)
|