camel-ai 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -11
- camel/agents/__init__.py +5 -5
- camel/agents/chat_agent.py +124 -63
- camel/agents/critic_agent.py +28 -17
- camel/agents/deductive_reasoner_agent.py +235 -0
- camel/agents/embodied_agent.py +92 -40
- camel/agents/role_assignment_agent.py +27 -17
- camel/agents/task_agent.py +60 -34
- camel/agents/tool_agents/base.py +0 -1
- camel/agents/tool_agents/hugging_face_tool_agent.py +7 -4
- camel/configs.py +119 -7
- camel/embeddings/__init__.py +2 -0
- camel/embeddings/base.py +3 -2
- camel/embeddings/openai_embedding.py +3 -3
- camel/embeddings/sentence_transformers_embeddings.py +65 -0
- camel/functions/__init__.py +13 -3
- camel/functions/google_maps_function.py +335 -0
- camel/functions/math_functions.py +7 -7
- camel/functions/openai_function.py +344 -42
- camel/functions/search_functions.py +100 -35
- camel/functions/twitter_function.py +484 -0
- camel/functions/weather_functions.py +36 -23
- camel/generators.py +65 -46
- camel/human.py +17 -11
- camel/interpreters/__init__.py +25 -0
- camel/interpreters/base.py +49 -0
- camel/{utils/python_interpreter.py → interpreters/internal_python_interpreter.py} +129 -48
- camel/interpreters/interpreter_error.py +19 -0
- camel/interpreters/subprocess_interpreter.py +190 -0
- camel/loaders/__init__.py +22 -0
- camel/{functions/base_io_functions.py → loaders/base_io.py} +38 -35
- camel/{functions/unstructured_io_fuctions.py → loaders/unstructured_io.py} +199 -110
- camel/memories/__init__.py +17 -7
- camel/memories/agent_memories.py +156 -0
- camel/memories/base.py +97 -32
- camel/memories/blocks/__init__.py +21 -0
- camel/memories/{chat_history_memory.py → blocks/chat_history_block.py} +34 -34
- camel/memories/blocks/vectordb_block.py +101 -0
- camel/memories/context_creators/__init__.py +3 -2
- camel/memories/context_creators/score_based.py +32 -20
- camel/memories/records.py +6 -5
- camel/messages/__init__.py +2 -2
- camel/messages/base.py +99 -16
- camel/messages/func_message.py +7 -4
- camel/models/__init__.py +4 -2
- camel/models/anthropic_model.py +132 -0
- camel/models/base_model.py +3 -2
- camel/models/model_factory.py +10 -8
- camel/models/open_source_model.py +25 -13
- camel/models/openai_model.py +9 -10
- camel/models/stub_model.py +6 -5
- camel/prompts/__init__.py +7 -5
- camel/prompts/ai_society.py +21 -14
- camel/prompts/base.py +54 -47
- camel/prompts/code.py +22 -14
- camel/prompts/evaluation.py +8 -5
- camel/prompts/misalignment.py +26 -19
- camel/prompts/object_recognition.py +35 -0
- camel/prompts/prompt_templates.py +14 -8
- camel/prompts/role_description_prompt_template.py +16 -10
- camel/prompts/solution_extraction.py +9 -5
- camel/prompts/task_prompt_template.py +24 -21
- camel/prompts/translation.py +9 -5
- camel/responses/agent_responses.py +5 -2
- camel/retrievers/__init__.py +24 -0
- camel/retrievers/auto_retriever.py +319 -0
- camel/retrievers/base.py +64 -0
- camel/retrievers/bm25_retriever.py +149 -0
- camel/retrievers/vector_retriever.py +166 -0
- camel/societies/__init__.py +1 -1
- camel/societies/babyagi_playing.py +56 -32
- camel/societies/role_playing.py +188 -133
- camel/storages/__init__.py +18 -0
- camel/storages/graph_storages/__init__.py +23 -0
- camel/storages/graph_storages/base.py +82 -0
- camel/storages/graph_storages/graph_element.py +74 -0
- camel/storages/graph_storages/neo4j_graph.py +582 -0
- camel/storages/key_value_storages/base.py +1 -2
- camel/storages/key_value_storages/in_memory.py +1 -2
- camel/storages/key_value_storages/json.py +8 -13
- camel/storages/vectordb_storages/__init__.py +33 -0
- camel/storages/vectordb_storages/base.py +202 -0
- camel/storages/vectordb_storages/milvus.py +396 -0
- camel/storages/vectordb_storages/qdrant.py +371 -0
- camel/terminators/__init__.py +1 -1
- camel/terminators/base.py +2 -3
- camel/terminators/response_terminator.py +21 -12
- camel/terminators/token_limit_terminator.py +5 -3
- camel/types/__init__.py +12 -6
- camel/types/enums.py +86 -13
- camel/types/openai_types.py +10 -5
- camel/utils/__init__.py +18 -13
- camel/utils/commons.py +242 -81
- camel/utils/token_counting.py +135 -15
- {camel_ai-0.1.1.dist-info → camel_ai-0.1.3.dist-info}/METADATA +116 -74
- camel_ai-0.1.3.dist-info/RECORD +101 -0
- {camel_ai-0.1.1.dist-info → camel_ai-0.1.3.dist-info}/WHEEL +1 -1
- camel/memories/context_creators/base.py +0 -72
- camel_ai-0.1.1.dist-info/RECORD +0 -75
|
@@ -11,11 +11,13 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
import uuid
|
|
15
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
14
16
|
|
|
15
|
-
from
|
|
17
|
+
from unstructured.documents.elements import Element
|
|
16
18
|
|
|
17
19
|
|
|
18
|
-
class
|
|
20
|
+
class UnstructuredIO:
|
|
19
21
|
r"""A class to handle various functionalities provided by the
|
|
20
22
|
Unstructured library, including version checking, parsing, cleaning,
|
|
21
23
|
extracting, staging, chunking data, and integrating with cloud
|
|
@@ -29,13 +31,13 @@ class UnstructuredModules:
|
|
|
29
31
|
UNSTRUCTURED_MIN_VERSION = "0.10.30" # Define the minimum version
|
|
30
32
|
|
|
31
33
|
def __init__(self):
|
|
32
|
-
r"""Initializes the
|
|
34
|
+
r"""Initializes the UnstructuredIO class and ensures the
|
|
33
35
|
installed version of Unstructured library meets the minimum
|
|
34
36
|
requirements.
|
|
35
37
|
"""
|
|
36
|
-
self.
|
|
38
|
+
self._ensure_unstructured_version(self.UNSTRUCTURED_MIN_VERSION)
|
|
37
39
|
|
|
38
|
-
def
|
|
40
|
+
def _ensure_unstructured_version(self, min_version: str) -> None:
|
|
39
41
|
r"""Validates that the installed 'Unstructured' library version
|
|
40
42
|
satisfies the specified minimum version requirement. This function is
|
|
41
43
|
essential for ensuring compatibility with features that depend on a
|
|
@@ -68,17 +70,74 @@ class UnstructuredModules:
|
|
|
68
70
|
installed_ver = version.parse(__version__)
|
|
69
71
|
|
|
70
72
|
if installed_ver < min_ver:
|
|
71
|
-
raise ValueError(
|
|
72
|
-
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"Require `unstructured>={min_version}`, "
|
|
75
|
+
f"you have {__version__}."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def create_element_from_text(
|
|
79
|
+
self,
|
|
80
|
+
text: str,
|
|
81
|
+
element_id: Optional[Union[str, uuid.UUID]] = None,
|
|
82
|
+
embeddings: Optional[List[float]] = None,
|
|
83
|
+
filename: Optional[str] = None,
|
|
84
|
+
file_directory: Optional[str] = None,
|
|
85
|
+
last_modified: Optional[str] = None,
|
|
86
|
+
filetype: Optional[str] = None,
|
|
87
|
+
parent_id: Optional[Union[str, uuid.UUID]] = None,
|
|
88
|
+
) -> Element:
|
|
89
|
+
r"""Creates a Text element from a given text input, with optional
|
|
90
|
+
metadata and embeddings.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
text (str): The text content for the element.
|
|
94
|
+
element_id (Union[str, uuid.UUID], optional): Unique identifier
|
|
95
|
+
forthe element. Defaults to an empty string.
|
|
96
|
+
embeddings (Optional[List[float]], optional): A list of float
|
|
97
|
+
numbers representing the text embeddings. Defaults to `None`.
|
|
98
|
+
filename (Optional[str], optional): The name of the file the
|
|
99
|
+
element is associated with. Defaults to `None`.
|
|
100
|
+
file_directory (Optional[str], optional): The directory path where
|
|
101
|
+
the file is located. Defaults to `None`.
|
|
102
|
+
last_modified (Optional[str], optional): The last modified date of
|
|
103
|
+
the file. Defaults to `None`.
|
|
104
|
+
filetype (Optional[str], optional): The type of the file. Defaults
|
|
105
|
+
to `None`.
|
|
106
|
+
parent_id (Optional[Union[str, uuid.UUID]], optional): The
|
|
107
|
+
identifier of the parent element. Defaults to `None`.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Element: An instance of Text with the provided content and
|
|
111
|
+
metadata.
|
|
112
|
+
"""
|
|
113
|
+
from unstructured.documents.elements import ElementMetadata, Text
|
|
114
|
+
|
|
115
|
+
metadata = ElementMetadata(
|
|
116
|
+
filename=filename,
|
|
117
|
+
file_directory=file_directory,
|
|
118
|
+
last_modified=last_modified,
|
|
119
|
+
filetype=filetype,
|
|
120
|
+
parent_id=parent_id,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return Text(
|
|
124
|
+
text=text,
|
|
125
|
+
element_id=element_id if element_id else str(uuid.uuid4()),
|
|
126
|
+
metadata=metadata,
|
|
127
|
+
embeddings=embeddings,
|
|
128
|
+
)
|
|
73
129
|
|
|
74
130
|
def parse_file_or_url(
|
|
75
131
|
self,
|
|
76
132
|
input_path: str,
|
|
133
|
+
**kwargs: Any,
|
|
77
134
|
) -> Union[Any, List[Any]]:
|
|
78
135
|
r"""Loads a file or a URL and parses its contents as unstructured data.
|
|
79
136
|
|
|
80
137
|
Args:
|
|
81
138
|
input_path (str): Path to the file or URL to be parsed.
|
|
139
|
+
**kwargs: Extra kwargs passed to the partition function.
|
|
140
|
+
|
|
82
141
|
Returns:
|
|
83
142
|
List[Any]: The elements after parsing the file or URL, could be a
|
|
84
143
|
dict, list, etc., depending on the content. If return_str is
|
|
@@ -115,7 +174,7 @@ class UnstructuredModules:
|
|
|
115
174
|
from unstructured.partition.html import partition_html
|
|
116
175
|
|
|
117
176
|
try:
|
|
118
|
-
elements = partition_html(url=input_path)
|
|
177
|
+
elements = partition_html(url=input_path, **kwargs)
|
|
119
178
|
return elements
|
|
120
179
|
except Exception as e:
|
|
121
180
|
raise Exception("Failed to parse the URL.") from e
|
|
@@ -126,17 +185,15 @@ class UnstructuredModules:
|
|
|
126
185
|
|
|
127
186
|
# Check if the file exists
|
|
128
187
|
if not os.path.exists(input_path):
|
|
129
|
-
raise FileNotFoundError(
|
|
130
|
-
f"The file {input_path} was not found.")
|
|
188
|
+
raise FileNotFoundError(f"The file {input_path} was not found.")
|
|
131
189
|
|
|
132
190
|
# Read the file
|
|
133
191
|
try:
|
|
134
192
|
with open(input_path, "rb") as f:
|
|
135
|
-
elements = partition(file=f)
|
|
193
|
+
elements = partition(file=f, **kwargs)
|
|
136
194
|
return elements
|
|
137
195
|
except Exception as e:
|
|
138
|
-
raise Exception(
|
|
139
|
-
"Failed to parse the unstructured file.") from e
|
|
196
|
+
raise Exception("Failed to parse the unstructured file.") from e
|
|
140
197
|
|
|
141
198
|
def clean_text_data(
|
|
142
199
|
self,
|
|
@@ -144,10 +201,10 @@ class UnstructuredModules:
|
|
|
144
201
|
clean_options: Optional[List[Tuple[str, Dict[str, Any]]]] = None,
|
|
145
202
|
) -> str:
|
|
146
203
|
r"""Cleans text data using a variety of cleaning functions provided by
|
|
147
|
-
the `
|
|
204
|
+
the `unstructured` library.
|
|
148
205
|
|
|
149
206
|
This function applies multiple text cleaning utilities by calling the
|
|
150
|
-
`
|
|
207
|
+
`unstructured` library's cleaning bricks for operations like
|
|
151
208
|
replacing unicode quotes, removing extra whitespace, dashes, non-ascii
|
|
152
209
|
characters, and more.
|
|
153
210
|
|
|
@@ -158,36 +215,27 @@ class UnstructuredModules:
|
|
|
158
215
|
|
|
159
216
|
Args:
|
|
160
217
|
text (str): The text to be cleaned.
|
|
161
|
-
clean_options (dict): A dictionary specifying which
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
'clean_prefix',
|
|
172
|
-
'clean_dashes',
|
|
173
|
-
'clean_trailing_punctuation',
|
|
174
|
-
'clean_non_ascii_chars',
|
|
175
|
-
'group_broken_paragraphs',
|
|
176
|
-
'remove_punctuation',
|
|
177
|
-
'replace_unicode_quotes',
|
|
178
|
-
'bytes_string_to_string',
|
|
179
|
-
'translate_text'.
|
|
218
|
+
clean_options (dict): A dictionary specifying which cleaning
|
|
219
|
+
options to apply. The keys should match the names of the
|
|
220
|
+
cleaning functions, and the values should be dictionaries
|
|
221
|
+
containing the parameters for each function. Supported types:
|
|
222
|
+
'clean_extra_whitespace', 'clean_bullets',
|
|
223
|
+
'clean_ordered_bullets', 'clean_postfix', 'clean_prefix',
|
|
224
|
+
'clean_dashes', 'clean_trailing_punctuation',
|
|
225
|
+
'clean_non_ascii_chars', 'group_broken_paragraphs',
|
|
226
|
+
'remove_punctuation', 'replace_unicode_quotes',
|
|
227
|
+
'bytes_string_to_string', 'translate_text'.
|
|
180
228
|
|
|
181
229
|
Returns:
|
|
182
230
|
str: The cleaned text.
|
|
183
231
|
|
|
184
232
|
Raises:
|
|
185
233
|
AttributeError: If a cleaning option does not correspond to a
|
|
186
|
-
valid cleaning function in
|
|
234
|
+
valid cleaning function in `unstructured`.
|
|
187
235
|
|
|
188
236
|
Notes:
|
|
189
237
|
The 'options' dictionary keys must correspond to valid cleaning
|
|
190
|
-
brick names from the
|
|
238
|
+
brick names from the `unstructured` library.
|
|
191
239
|
Each brick's parameters must be provided in a nested dictionary
|
|
192
240
|
as the value for the key.
|
|
193
241
|
|
|
@@ -239,32 +287,43 @@ class UnstructuredModules:
|
|
|
239
287
|
cleaned_text = text
|
|
240
288
|
for func_name, params in clean_options:
|
|
241
289
|
if func_name in cleaning_functions:
|
|
242
|
-
cleaned_text = cleaning_functions[func_name](
|
|
243
|
-
|
|
290
|
+
cleaned_text = cleaning_functions[func_name](
|
|
291
|
+
cleaned_text, **params
|
|
292
|
+
)
|
|
244
293
|
else:
|
|
245
294
|
raise ValueError(
|
|
246
|
-
f"'{func_name}' is not a valid function in
|
|
295
|
+
f"'{func_name}' is not a valid function in `unstructured`."
|
|
247
296
|
)
|
|
248
297
|
|
|
249
298
|
return cleaned_text
|
|
250
299
|
|
|
251
|
-
def extract_data_from_text(
|
|
252
|
-
|
|
300
|
+
def extract_data_from_text(
|
|
301
|
+
self,
|
|
302
|
+
text: str,
|
|
303
|
+
extract_type: Literal[
|
|
304
|
+
'extract_datetimetz',
|
|
305
|
+
'extract_email_address',
|
|
306
|
+
'extract_ip_address',
|
|
307
|
+
'extract_ip_address_name',
|
|
308
|
+
'extract_mapi_id',
|
|
309
|
+
'extract_ordered_bullets',
|
|
310
|
+
'extract_text_after',
|
|
311
|
+
'extract_text_before',
|
|
312
|
+
'extract_us_phone_number',
|
|
313
|
+
],
|
|
314
|
+
**kwargs,
|
|
315
|
+
) -> Any:
|
|
253
316
|
r"""Extracts various types of data from text using functions from
|
|
254
317
|
unstructured.cleaners.extract.
|
|
255
318
|
|
|
256
319
|
Args:
|
|
257
320
|
text (str): Text to extract data from.
|
|
258
|
-
extract_type (
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
'extract_ordered_bullets',
|
|
265
|
-
'extract_text_after',
|
|
266
|
-
'extract_text_before',
|
|
267
|
-
'extract_us_phone_number'.
|
|
321
|
+
extract_type (Literal['extract_datetimetz',
|
|
322
|
+
'extract_email_address', 'extract_ip_address',
|
|
323
|
+
'extract_ip_address_name', 'extract_mapi_id',
|
|
324
|
+
'extract_ordered_bullets', 'extract_text_after',
|
|
325
|
+
'extract_text_before', 'extract_us_phone_number']): Type of
|
|
326
|
+
data to extract.
|
|
268
327
|
**kwargs: Additional keyword arguments for specific
|
|
269
328
|
extraction functions.
|
|
270
329
|
|
|
@@ -304,8 +363,24 @@ class UnstructuredModules:
|
|
|
304
363
|
|
|
305
364
|
return extraction_functions[extract_type](text, **kwargs)
|
|
306
365
|
|
|
307
|
-
def stage_elements(
|
|
308
|
-
|
|
366
|
+
def stage_elements(
|
|
367
|
+
self,
|
|
368
|
+
elements: List[Any],
|
|
369
|
+
stage_type: Literal[
|
|
370
|
+
'convert_to_csv',
|
|
371
|
+
'convert_to_dataframe',
|
|
372
|
+
'convert_to_dict',
|
|
373
|
+
'dict_to_elements',
|
|
374
|
+
'stage_csv_for_prodigy',
|
|
375
|
+
'stage_for_prodigy',
|
|
376
|
+
'stage_for_baseplate',
|
|
377
|
+
'stage_for_datasaur',
|
|
378
|
+
'stage_for_label_box',
|
|
379
|
+
'stage_for_label_studio',
|
|
380
|
+
'stage_for_weaviate',
|
|
381
|
+
],
|
|
382
|
+
**kwargs,
|
|
383
|
+
) -> Union[str, List[Dict], Any]:
|
|
309
384
|
r"""Stages elements for various platforms based on the
|
|
310
385
|
specified staging type.
|
|
311
386
|
|
|
@@ -317,19 +392,12 @@ class UnstructuredModules:
|
|
|
317
392
|
|
|
318
393
|
Args:
|
|
319
394
|
elements (List[Any]): List of Element objects to be staged.
|
|
320
|
-
stage_type (
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
'stage_for_prodigy',
|
|
327
|
-
'stage_for_argilla',
|
|
328
|
-
'stage_for_baseplate',
|
|
329
|
-
'stage_for_datasaur',
|
|
330
|
-
'stage_for_label_box',
|
|
331
|
-
'stage_for_label_studio',
|
|
332
|
-
'stage_for_weaviate'.
|
|
395
|
+
stage_type (Literal['convert_to_csv', 'convert_to_dataframe',
|
|
396
|
+
'convert_to_dict', 'dict_to_elements',
|
|
397
|
+
'stage_csv_for_prodigy', 'stage_for_prodigy',
|
|
398
|
+
'stage_for_baseplate', 'stage_for_datasaur',
|
|
399
|
+
'stage_for_label_box', 'stage_for_label_studio',
|
|
400
|
+
'stage_for_weaviate']): Type of staging to perform.
|
|
333
401
|
**kwargs: Additional keyword arguments specific to
|
|
334
402
|
the staging type.
|
|
335
403
|
|
|
@@ -345,7 +413,6 @@ class UnstructuredModules:
|
|
|
345
413
|
"""
|
|
346
414
|
|
|
347
415
|
from unstructured.staging import (
|
|
348
|
-
argilla,
|
|
349
416
|
base,
|
|
350
417
|
baseplate,
|
|
351
418
|
datasaur,
|
|
@@ -356,33 +423,24 @@ class UnstructuredModules:
|
|
|
356
423
|
)
|
|
357
424
|
|
|
358
425
|
staging_functions = {
|
|
359
|
-
"convert_to_csv":
|
|
360
|
-
base.
|
|
361
|
-
"
|
|
362
|
-
base.
|
|
363
|
-
"
|
|
364
|
-
|
|
365
|
-
"
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
lambda els,
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
"stage_for_datasaur":
|
|
378
|
-
lambda els, **kw: datasaur.stage_for_datasaur(
|
|
379
|
-
els, kw.get('entities', [])),
|
|
380
|
-
"stage_for_label_box":
|
|
381
|
-
lambda els, **kw: label_box.stage_for_label_box(els, **kw),
|
|
382
|
-
"stage_for_label_studio":
|
|
383
|
-
lambda els, **kw: label_studio.stage_for_label_studio(els, **kw),
|
|
384
|
-
"stage_for_weaviate":
|
|
385
|
-
weaviate.stage_for_weaviate,
|
|
426
|
+
"convert_to_csv": base.convert_to_csv,
|
|
427
|
+
"convert_to_dataframe": base.convert_to_dataframe,
|
|
428
|
+
"convert_to_dict": base.convert_to_dict,
|
|
429
|
+
"dict_to_elements": base.dict_to_elements,
|
|
430
|
+
"stage_csv_for_prodigy": lambda els,
|
|
431
|
+
**kw: prodigy.stage_csv_for_prodigy(els, kw.get('metadata', [])),
|
|
432
|
+
"stage_for_prodigy": lambda els, **kw: prodigy.stage_for_prodigy(
|
|
433
|
+
els, kw.get('metadata', [])
|
|
434
|
+
),
|
|
435
|
+
"stage_for_baseplate": baseplate.stage_for_baseplate,
|
|
436
|
+
"stage_for_datasaur": lambda els, **kw: datasaur.stage_for_datasaur(
|
|
437
|
+
els, kw.get('entities', [])
|
|
438
|
+
),
|
|
439
|
+
"stage_for_label_box": lambda els,
|
|
440
|
+
**kw: label_box.stage_for_label_box(els, **kw),
|
|
441
|
+
"stage_for_label_studio": lambda els,
|
|
442
|
+
**kw: label_studio.stage_for_label_studio(els, **kw),
|
|
443
|
+
"stage_for_weaviate": weaviate.stage_for_weaviate,
|
|
386
444
|
}
|
|
387
445
|
|
|
388
446
|
if stage_type not in staging_functions:
|
|
@@ -390,14 +448,15 @@ class UnstructuredModules:
|
|
|
390
448
|
|
|
391
449
|
return staging_functions[stage_type](elements, **kwargs)
|
|
392
450
|
|
|
393
|
-
def chunk_elements(
|
|
394
|
-
|
|
451
|
+
def chunk_elements(
|
|
452
|
+
self, elements: List[Any], chunk_type: str, **kwargs
|
|
453
|
+
) -> List[Any]:
|
|
395
454
|
r"""Chunks elements by titles.
|
|
396
455
|
|
|
397
456
|
Args:
|
|
398
457
|
elements (List[Any]): List of Element objects to be chunked.
|
|
399
458
|
chunk_type (str): Type chunk going to apply. Supported types:
|
|
400
|
-
|
|
459
|
+
'chunk_by_title'.
|
|
401
460
|
**kwargs: Additional keyword arguments for chunking.
|
|
402
461
|
|
|
403
462
|
Returns:
|
|
@@ -419,8 +478,13 @@ class UnstructuredModules:
|
|
|
419
478
|
# Format chunks into a list of dictionaries (or your preferred format)
|
|
420
479
|
return chunking_functions[chunk_type](elements, **kwargs)
|
|
421
480
|
|
|
422
|
-
def run_s3_ingest(
|
|
423
|
-
|
|
481
|
+
def run_s3_ingest(
|
|
482
|
+
self,
|
|
483
|
+
s3_url: str,
|
|
484
|
+
output_dir: str,
|
|
485
|
+
num_processes: int = 2,
|
|
486
|
+
anonymous: bool = True,
|
|
487
|
+
) -> None:
|
|
424
488
|
r"""Processes documents from an S3 bucket and stores structured
|
|
425
489
|
outputs locally.
|
|
426
490
|
|
|
@@ -460,8 +524,13 @@ class UnstructuredModules:
|
|
|
460
524
|
)
|
|
461
525
|
runner.run(anonymous=anonymous)
|
|
462
526
|
|
|
463
|
-
def run_azure_ingest(
|
|
464
|
-
|
|
527
|
+
def run_azure_ingest(
|
|
528
|
+
self,
|
|
529
|
+
azure_url: str,
|
|
530
|
+
output_dir: str,
|
|
531
|
+
account_name: str,
|
|
532
|
+
num_processes: int = 2,
|
|
533
|
+
) -> None:
|
|
465
534
|
"""
|
|
466
535
|
Processes documents from an Azure storage container and stores
|
|
467
536
|
structured outputs locally.
|
|
@@ -500,8 +569,13 @@ class UnstructuredModules:
|
|
|
500
569
|
)
|
|
501
570
|
runner.run(account_name=account_name)
|
|
502
571
|
|
|
503
|
-
def run_github_ingest(
|
|
504
|
-
|
|
572
|
+
def run_github_ingest(
|
|
573
|
+
self,
|
|
574
|
+
repo_url: str,
|
|
575
|
+
git_branch: str,
|
|
576
|
+
output_dir: str,
|
|
577
|
+
num_processes: int = 2,
|
|
578
|
+
) -> None:
|
|
505
579
|
r"""Processes documents from a GitHub repository and stores
|
|
506
580
|
structured outputs locally.
|
|
507
581
|
|
|
@@ -537,9 +611,15 @@ class UnstructuredModules:
|
|
|
537
611
|
)
|
|
538
612
|
runner.run(url=repo_url, git_branch=git_branch)
|
|
539
613
|
|
|
540
|
-
def run_slack_ingest(
|
|
541
|
-
|
|
542
|
-
|
|
614
|
+
def run_slack_ingest(
|
|
615
|
+
self,
|
|
616
|
+
channels: List[str],
|
|
617
|
+
token: str,
|
|
618
|
+
start_date: str,
|
|
619
|
+
end_date: str,
|
|
620
|
+
output_dir: str,
|
|
621
|
+
num_processes: int = 2,
|
|
622
|
+
) -> None:
|
|
543
623
|
r"""Processes documents from specified Slack channels and stores
|
|
544
624
|
structured outputs locally.
|
|
545
625
|
|
|
@@ -575,11 +655,20 @@ class UnstructuredModules:
|
|
|
575
655
|
read_config=ReadConfig(),
|
|
576
656
|
partition_config=PartitionConfig(),
|
|
577
657
|
)
|
|
578
|
-
runner.run(
|
|
579
|
-
|
|
658
|
+
runner.run(
|
|
659
|
+
channels=channels,
|
|
660
|
+
token=token,
|
|
661
|
+
start_date=start_date,
|
|
662
|
+
end_date=end_date,
|
|
663
|
+
)
|
|
580
664
|
|
|
581
|
-
def run_discord_ingest(
|
|
582
|
-
|
|
665
|
+
def run_discord_ingest(
|
|
666
|
+
self,
|
|
667
|
+
channels: List[str],
|
|
668
|
+
token: str,
|
|
669
|
+
output_dir: str,
|
|
670
|
+
num_processes: int = 2,
|
|
671
|
+
) -> None:
|
|
583
672
|
r"""Processes messages from specified Discord channels and stores
|
|
584
673
|
structured outputs locally.
|
|
585
674
|
|
camel/memories/__init__.py
CHANGED
|
@@ -12,17 +12,27 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
14
|
|
|
15
|
-
from .
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
from .agent_memories import (
|
|
16
|
+
ChatHistoryMemory,
|
|
17
|
+
LongtermAgentMemory,
|
|
18
|
+
VectorDBMemory,
|
|
19
|
+
)
|
|
20
|
+
from .base import AgentMemory, BaseContextCreator, MemoryBlock
|
|
21
|
+
from .blocks.chat_history_block import ChatHistoryBlock
|
|
22
|
+
from .blocks.vectordb_block import VectorDBBlock
|
|
18
23
|
from .context_creators.score_based import ScoreBasedContextCreator
|
|
19
|
-
from .
|
|
24
|
+
from .records import ContextRecord, MemoryRecord
|
|
20
25
|
|
|
21
26
|
__all__ = [
|
|
22
27
|
'MemoryRecord',
|
|
23
28
|
'ContextRecord',
|
|
24
|
-
'
|
|
29
|
+
'MemoryBlock',
|
|
30
|
+
"AgentMemory",
|
|
31
|
+
'BaseContextCreator',
|
|
32
|
+
'ScoreBasedContextCreator',
|
|
25
33
|
'ChatHistoryMemory',
|
|
26
|
-
|
|
27
|
-
|
|
34
|
+
'VectorDBMemory',
|
|
35
|
+
'ChatHistoryBlock',
|
|
36
|
+
'VectorDBBlock',
|
|
37
|
+
'LongtermAgentMemory',
|
|
28
38
|
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from typing import List, Optional
|
|
16
|
+
|
|
17
|
+
from camel.memories.base import AgentMemory, BaseContextCreator
|
|
18
|
+
from camel.memories.blocks import ChatHistoryBlock, VectorDBBlock
|
|
19
|
+
from camel.memories.records import (
|
|
20
|
+
ContextRecord,
|
|
21
|
+
MemoryRecord,
|
|
22
|
+
)
|
|
23
|
+
from camel.storages import BaseKeyValueStorage, BaseVectorStorage
|
|
24
|
+
from camel.types import OpenAIBackendRole
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ChatHistoryMemory(AgentMemory):
|
|
28
|
+
r"""An agent memory wrapper of :obj:`ChatHistoryBlock`.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
context_creator (BaseContextCreator): A model context creator.
|
|
32
|
+
storage (BaseKeyValueStorage, optional): A storage backend for storing
|
|
33
|
+
chat history. If `None`, an :obj:`InMemoryKeyValueStorage`
|
|
34
|
+
will be used. (default: :obj:`None`)
|
|
35
|
+
window_size (int, optional): The number of recent chat messages to
|
|
36
|
+
retrieve. If not provided, the entire chat history will be
|
|
37
|
+
retrieved. (default: :obj:`None`)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
context_creator: BaseContextCreator,
|
|
43
|
+
storage: Optional[BaseKeyValueStorage] = None,
|
|
44
|
+
window_size: Optional[int] = None,
|
|
45
|
+
) -> None:
|
|
46
|
+
if window_size is not None and not isinstance(window_size, int):
|
|
47
|
+
raise TypeError("`window_size` must be an integer or None.")
|
|
48
|
+
if window_size is not None and window_size < 0:
|
|
49
|
+
raise ValueError("`window_size` must be non-negative.")
|
|
50
|
+
self._context_creator = context_creator
|
|
51
|
+
self._window_size = window_size
|
|
52
|
+
self._chat_history_block = ChatHistoryBlock(storage=storage)
|
|
53
|
+
|
|
54
|
+
def retrieve(self) -> List[ContextRecord]:
|
|
55
|
+
return self._chat_history_block.retrieve(self._window_size)
|
|
56
|
+
|
|
57
|
+
def write_records(self, records: List[MemoryRecord]) -> None:
|
|
58
|
+
self._chat_history_block.write_records(records)
|
|
59
|
+
|
|
60
|
+
def get_context_creator(self) -> BaseContextCreator:
|
|
61
|
+
return self._context_creator
|
|
62
|
+
|
|
63
|
+
def clear(self) -> None:
|
|
64
|
+
self._chat_history_block.clear()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class VectorDBMemory(AgentMemory):
|
|
68
|
+
r"""An agent memory wrapper of :obj:`VectorDBBlock`. This memory queries
|
|
69
|
+
messages stored in the vector database. Notice that the most recent
|
|
70
|
+
messages will not be added to the context.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
context_creator (BaseContextCreator): A model context creator.
|
|
74
|
+
storage (BaseVectorStorage, optional): A vector storage storage. If
|
|
75
|
+
`None`, an :obj:`QdrantStorage` will be used.
|
|
76
|
+
(default: :obj:`None`)
|
|
77
|
+
retrieve_limit (int, optional): The maximum number of messages
|
|
78
|
+
to be added into the context. (default: :obj:`3`)
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
context_creator: BaseContextCreator,
|
|
84
|
+
storage: Optional[BaseVectorStorage] = None,
|
|
85
|
+
retrieve_limit: int = 3,
|
|
86
|
+
) -> None:
|
|
87
|
+
self._context_creator = context_creator
|
|
88
|
+
self._retrieve_limit = retrieve_limit
|
|
89
|
+
self._vectordb_block = VectorDBBlock(storage=storage)
|
|
90
|
+
|
|
91
|
+
self._current_topic: str = ""
|
|
92
|
+
|
|
93
|
+
def retrieve(self) -> List[ContextRecord]:
|
|
94
|
+
return self._vectordb_block.retrieve(
|
|
95
|
+
self._current_topic,
|
|
96
|
+
limit=self._retrieve_limit,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def write_records(self, records: List[MemoryRecord]) -> None:
|
|
100
|
+
# Assume the last user input is the current topic.
|
|
101
|
+
for record in records:
|
|
102
|
+
if record.role_at_backend == OpenAIBackendRole.USER:
|
|
103
|
+
self._current_topic = record.message.content
|
|
104
|
+
self._vectordb_block.write_records(records)
|
|
105
|
+
|
|
106
|
+
def get_context_creator(self) -> BaseContextCreator:
|
|
107
|
+
return self._context_creator
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class LongtermAgentMemory(AgentMemory):
|
|
111
|
+
r"""An implementation of the :obj:`AgentMemory` abstract base class for
|
|
112
|
+
augumenting ChatHistoryMemory with VectorDBMemory.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def __init__(
|
|
116
|
+
self,
|
|
117
|
+
context_creator: BaseContextCreator,
|
|
118
|
+
chat_history_block: Optional[ChatHistoryBlock] = None,
|
|
119
|
+
vector_db_block: Optional[VectorDBBlock] = None,
|
|
120
|
+
retrieve_limit: int = 3,
|
|
121
|
+
) -> None:
|
|
122
|
+
self.chat_history_block = chat_history_block or ChatHistoryBlock()
|
|
123
|
+
self.vector_db_block = vector_db_block or VectorDBBlock()
|
|
124
|
+
self.retrieve_limit = retrieve_limit
|
|
125
|
+
self._context_creator = context_creator
|
|
126
|
+
self._current_topic: str = ""
|
|
127
|
+
|
|
128
|
+
def get_context_creator(self) -> BaseContextCreator:
|
|
129
|
+
return self._context_creator
|
|
130
|
+
|
|
131
|
+
def retrieve(self) -> List[ContextRecord]:
|
|
132
|
+
chat_history = self.chat_history_block.retrieve()
|
|
133
|
+
vector_db_retrieve = self.vector_db_block.retrieve(
|
|
134
|
+
self._current_topic, self.retrieve_limit
|
|
135
|
+
)
|
|
136
|
+
return chat_history[:1] + vector_db_retrieve + chat_history[1:]
|
|
137
|
+
|
|
138
|
+
def write_records(self, records: List[MemoryRecord]) -> None:
|
|
139
|
+
r"""Converts the provided chat messages into vector representations and
|
|
140
|
+
writes them to the vector database.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
records (List[MemoryRecord]): Messages to be added to the vector
|
|
144
|
+
database.
|
|
145
|
+
"""
|
|
146
|
+
self.vector_db_block.write_records(records)
|
|
147
|
+
self.chat_history_block.write_records(records)
|
|
148
|
+
|
|
149
|
+
for record in records:
|
|
150
|
+
if record.role_at_backend == OpenAIBackendRole.USER:
|
|
151
|
+
self._current_topic = record.message.content
|
|
152
|
+
|
|
153
|
+
def clear(self) -> None:
|
|
154
|
+
r"""Removes all records from the memory."""
|
|
155
|
+
self.chat_history_block.clear()
|
|
156
|
+
self.vector_db_block.clear()
|