camel-ai 0.1.6.2__py3-none-any.whl → 0.1.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -12,12 +12,18 @@
12
12
  # limitations under the License.
13
13
  # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
14
  import uuid
15
- from typing import Any, Dict, List, Literal, Optional, Tuple, Union
15
+ from typing import (
16
+ Any,
17
+ Dict,
18
+ List,
19
+ Literal,
20
+ Optional,
21
+ Tuple,
22
+ Union,
23
+ )
16
24
 
17
25
  from unstructured.documents.elements import Element
18
26
 
19
- from camel.utils import dependencies_required
20
-
21
27
 
22
28
  class UnstructuredIO:
23
29
  r"""A class to handle various functionalities provided by the
@@ -25,56 +31,12 @@ class UnstructuredIO:
25
31
  extracting, staging, chunking data, and integrating with cloud
26
32
  services like S3 and Azure for data connection.
27
33
 
28
- Attributes:
29
- UNSTRUCTURED_MIN_VERSION (str): The minimum required version of
30
- the Unstructured library.
34
+ References:
35
+ https://docs.unstructured.io/
31
36
  """
32
37
 
33
- UNSTRUCTURED_MIN_VERSION = "0.10.30" # Define the minimum version
34
-
35
- def __init__(self):
36
- r"""Initializes the UnstructuredIO class and ensures the
37
- installed version of Unstructured library meets the minimum
38
- requirements.
39
- """
40
- self._ensure_unstructured_version(self.UNSTRUCTURED_MIN_VERSION)
41
-
42
- @dependencies_required('unstructured')
43
- def _ensure_unstructured_version(self, min_version: str) -> None:
44
- r"""Validates that the installed 'Unstructured' library version
45
- satisfies the specified minimum version requirement. This function is
46
- essential for ensuring compatibility with features that depend on a
47
- certain version of the 'Unstructured' package.
48
-
49
- Args:
50
- min_version (str): The minimum version required, specified in
51
- `'major.minor.patch'` format.
52
-
53
- Raises:
54
- ImportError: If the 'Unstructured' package is not available in the
55
- environment.
56
- ValueError: If the current `'Unstructured'` version is older than
57
- the required minimum version.
58
-
59
- Notes:
60
- Uses the 'packaging.version' module to parse and compare version
61
- strings.
62
- """
63
- from packaging import version
64
- from unstructured.__version__ import __version__
65
-
66
- # Use packaging.version to compare versions
67
- min_ver = version.parse(min_version)
68
- installed_ver = version.parse(__version__)
69
-
70
- if installed_ver < min_ver:
71
- raise ValueError(
72
- f"Require `unstructured>={min_version}`, "
73
- f"you have {__version__}."
74
- )
75
-
38
+ @staticmethod
76
39
  def create_element_from_text(
77
- self,
78
40
  text: str,
79
41
  element_id: Optional[Union[str, uuid.UUID]] = None,
80
42
  embeddings: Optional[List[float]] = None,
@@ -89,8 +51,8 @@ class UnstructuredIO:
89
51
 
90
52
  Args:
91
53
  text (str): The text content for the element.
92
- element_id (Union[str, uuid.UUID], optional): Unique identifier
93
- forthe element. Defaults to an empty string.
54
+ element_id (Optional[Union[str, uuid.UUID]], optional): Unique
55
+ identifier for the element. Defaults to `None`.
94
56
  embeddings (Optional[List[float]], optional): A list of float
95
57
  numbers representing the text embeddings. Defaults to `None`.
96
58
  filename (Optional[str], optional): The name of the file the
@@ -120,13 +82,13 @@ class UnstructuredIO:
120
82
 
121
83
  return Text(
122
84
  text=text,
123
- element_id=element_id if element_id else str(uuid.uuid4()),
85
+ element_id=element_id or uuid.uuid4(),
124
86
  metadata=metadata,
125
87
  embeddings=embeddings,
126
88
  )
127
89
 
90
+ @staticmethod
128
91
  def parse_file_or_url(
129
- self,
130
92
  input_path: str,
131
93
  **kwargs: Any,
132
94
  ) -> List[Element]:
@@ -189,8 +151,8 @@ class UnstructuredIO:
189
151
  "Failed to parse the unstructured file."
190
152
  ) from e
191
153
 
154
+ @staticmethod
192
155
  def clean_text_data(
193
- self,
194
156
  text: str,
195
157
  clean_options: Optional[List[Tuple[str, Dict[str, Any]]]] = None,
196
158
  ) -> str:
@@ -253,7 +215,7 @@ class UnstructuredIO:
253
215
  )
254
216
  from unstructured.cleaners.translate import translate_text
255
217
 
256
- cleaning_functions = {
218
+ cleaning_functions: Any = {
257
219
  "clean_extra_whitespace": clean_extra_whitespace,
258
220
  "clean_bullets": clean_bullets,
259
221
  "clean_ordered_bullets": clean_ordered_bullets,
@@ -291,8 +253,8 @@ class UnstructuredIO:
291
253
 
292
254
  return cleaned_text
293
255
 
256
+ @staticmethod
294
257
  def extract_data_from_text(
295
- self,
296
258
  text: str,
297
259
  extract_type: Literal[
298
260
  'extract_datetimetz',
@@ -340,7 +302,7 @@ class UnstructuredIO:
340
302
  extract_us_phone_number,
341
303
  )
342
304
 
343
- extraction_functions = {
305
+ extraction_functions: Any = {
344
306
  "extract_datetimetz": extract_datetimetz,
345
307
  "extract_email_address": extract_email_address,
346
308
  "extract_ip_address": extract_ip_address,
@@ -357,8 +319,8 @@ class UnstructuredIO:
357
319
 
358
320
  return extraction_functions[extract_type](text, **kwargs)
359
321
 
322
+ @staticmethod
360
323
  def stage_elements(
361
- self,
362
324
  elements: List[Any],
363
325
  stage_type: Literal[
364
326
  'convert_to_csv',
@@ -416,7 +378,7 @@ class UnstructuredIO:
416
378
  weaviate,
417
379
  )
418
380
 
419
- staging_functions = {
381
+ staging_functions: Any = {
420
382
  "convert_to_csv": base.convert_to_csv,
421
383
  "convert_to_dataframe": base.convert_to_dataframe,
422
384
  "convert_to_dict": base.convert_to_dict,
@@ -441,8 +403,9 @@ class UnstructuredIO:
441
403
 
442
404
  return staging_functions[stage_type](elements, **kwargs)
443
405
 
406
+ @staticmethod
444
407
  def chunk_elements(
445
- self, elements: List[Any], chunk_type: str, **kwargs
408
+ elements: List[Any], chunk_type: str, **kwargs
446
409
  ) -> List[Element]:
447
410
  r"""Chunks elements by titles.
448
411
 
@@ -470,228 +433,3 @@ class UnstructuredIO:
470
433
 
471
434
  # Format chunks into a list of dictionaries (or your preferred format)
472
435
  return chunking_functions[chunk_type](elements, **kwargs)
473
-
474
- def run_s3_ingest(
475
- self,
476
- s3_url: str,
477
- output_dir: str,
478
- num_processes: int = 2,
479
- anonymous: bool = True,
480
- ) -> None:
481
- r"""Processes documents from an S3 bucket and stores structured
482
- outputs locally.
483
-
484
- Args:
485
- s3_url (str): The URL of the S3 bucket.
486
- output_dir (str): Local directory to store the processed outputs.
487
- num_processes (int, optional): Number of processes to use.
488
- (default: :obj:`2`)
489
- anonymous (bool, optional): Flag to run anonymously if
490
- required. (default: :obj:`True`)
491
-
492
- Notes:
493
- You need to install the necessary extras by using:
494
- `pip install "unstructured[s3]"`.
495
-
496
- References:
497
- https://unstructured-io.github.io/unstructured/
498
- """
499
-
500
- from unstructured.ingest.interfaces import (
501
- FsspecConfig,
502
- PartitionConfig,
503
- ProcessorConfig,
504
- ReadConfig,
505
- )
506
- from unstructured.ingest.runner import S3Runner
507
-
508
- runner = S3Runner(
509
- processor_config=ProcessorConfig(
510
- verbose=True,
511
- output_dir=output_dir,
512
- num_processes=num_processes,
513
- ),
514
- read_config=ReadConfig(),
515
- partition_config=PartitionConfig(),
516
- fsspec_config=FsspecConfig(remote_url=s3_url),
517
- )
518
- runner.run(anonymous=anonymous)
519
-
520
- def run_azure_ingest(
521
- self,
522
- azure_url: str,
523
- output_dir: str,
524
- account_name: str,
525
- num_processes: int = 2,
526
- ) -> None:
527
- r"""Processes documents from an Azure storage container and stores
528
- structured outputs locally.
529
-
530
- Args:
531
- azure_url (str): The URL of the Azure storage container.
532
- output_dir (str): Local directory to store the processed outputs.
533
- account_name (str): Azure account name for accessing the container.
534
- num_processes (int, optional): Number of processes to use.
535
- (default: :obj:`2`)
536
-
537
- Notes:
538
- You need to install the necessary extras by using:
539
- `pip install "unstructured[azure]"`.
540
-
541
- References:
542
- https://unstructured-io.github.io/unstructured/
543
- """
544
- from unstructured.ingest.interfaces import (
545
- FsspecConfig,
546
- PartitionConfig,
547
- ProcessorConfig,
548
- ReadConfig,
549
- )
550
- from unstructured.ingest.runner import AzureRunner
551
-
552
- runner = AzureRunner(
553
- processor_config=ProcessorConfig(
554
- verbose=True,
555
- output_dir=output_dir,
556
- num_processes=num_processes,
557
- ),
558
- read_config=ReadConfig(),
559
- partition_config=PartitionConfig(),
560
- fsspec_config=FsspecConfig(remote_url=azure_url),
561
- )
562
- runner.run(account_name=account_name)
563
-
564
- def run_github_ingest(
565
- self,
566
- repo_url: str,
567
- git_branch: str,
568
- output_dir: str,
569
- num_processes: int = 2,
570
- ) -> None:
571
- r"""Processes documents from a GitHub repository and stores
572
- structured outputs locally.
573
-
574
- Args:
575
- repo_url (str): URL of the GitHub repository.
576
- git_branch (str): Git branch name to process.
577
- output_dir (str): Local directory to store the processed outputs.
578
- num_processes (int, optional): Number of processes to use.
579
- (default: :obj:`2`)
580
-
581
- Notes:
582
- You need to install the necessary extras by using:
583
- `pip install "unstructured[github]"`.
584
-
585
- References:
586
- https://unstructured-io.github.io/unstructured/
587
- """
588
- from unstructured.ingest.interfaces import (
589
- PartitionConfig,
590
- ProcessorConfig,
591
- ReadConfig,
592
- )
593
- from unstructured.ingest.runner import GithubRunner
594
-
595
- runner = GithubRunner(
596
- processor_config=ProcessorConfig(
597
- verbose=True,
598
- output_dir=output_dir,
599
- num_processes=num_processes,
600
- ),
601
- read_config=ReadConfig(),
602
- partition_config=PartitionConfig(),
603
- )
604
- runner.run(url=repo_url, git_branch=git_branch)
605
-
606
- def run_slack_ingest(
607
- self,
608
- channels: List[str],
609
- token: str,
610
- start_date: str,
611
- end_date: str,
612
- output_dir: str,
613
- num_processes: int = 2,
614
- ) -> None:
615
- r"""Processes documents from specified Slack channels and stores
616
- structured outputs locally.
617
-
618
- Args:
619
- channels (List[str]): List of Slack channel IDs.
620
- token (str): Slack API token.
621
- start_date (str): Start date for fetching data.
622
- end_date (str): End date for fetching data.
623
- output_dir (str): Local directory to store the processed outputs.
624
- num_processes (int, optional): Number of processes to use.
625
- (default: :obj:`2`)
626
-
627
- Notes:
628
- You need to install the necessary extras by using:
629
- `pip install "unstructured[slack]"`.
630
-
631
- References:
632
- https://unstructured-io.github.io/unstructured/
633
- """
634
- from unstructured.ingest.interfaces import (
635
- PartitionConfig,
636
- ProcessorConfig,
637
- ReadConfig,
638
- )
639
- from unstructured.ingest.runner import SlackRunner
640
-
641
- runner = SlackRunner(
642
- processor_config=ProcessorConfig(
643
- verbose=True,
644
- output_dir=output_dir,
645
- num_processes=num_processes,
646
- ),
647
- read_config=ReadConfig(),
648
- partition_config=PartitionConfig(),
649
- )
650
- runner.run(
651
- channels=channels,
652
- token=token,
653
- start_date=start_date,
654
- end_date=end_date,
655
- )
656
-
657
- def run_discord_ingest(
658
- self,
659
- channels: List[str],
660
- token: str,
661
- output_dir: str,
662
- num_processes: int = 2,
663
- ) -> None:
664
- r"""Processes messages from specified Discord channels and stores
665
- structured outputs locally.
666
-
667
- Args:
668
- channels (List[str]): List of Discord channel IDs.
669
- token (str): Discord bot token.
670
- output_dir (str): Local directory to store the processed outputs.
671
- num_processes (int, optional): Number of processes to use.
672
- (default: :obj:`2`)
673
-
674
- Notes:
675
- You need to install the necessary extras by using:
676
- `pip install "unstructured[discord]"`.
677
-
678
- References:
679
- https://unstructured-io.github.io/unstructured/
680
- """
681
- from unstructured.ingest.interfaces import (
682
- PartitionConfig,
683
- ProcessorConfig,
684
- ReadConfig,
685
- )
686
- from unstructured.ingest.runner import DiscordRunner
687
-
688
- runner = DiscordRunner(
689
- processor_config=ProcessorConfig(
690
- verbose=True,
691
- output_dir=output_dir,
692
- num_processes=num_processes,
693
- ),
694
- read_config=ReadConfig(),
695
- partition_config=PartitionConfig(),
696
- )
697
- runner.run(channels=channels, token=token)
@@ -97,36 +97,36 @@ class AutoRetriever:
97
97
  f"Unsupported vector storage type: {self.storage_type}"
98
98
  )
99
99
 
100
- def _collection_name_generator(self, content_input_path: str) -> str:
100
+ def _collection_name_generator(self, content: str) -> str:
101
101
  r"""Generates a valid collection name from a given file path or URL.
102
102
 
103
103
  Args:
104
- content_input_path: str. The input URL or file path from which to
105
- generate the collection name.
104
+ contents (str): Local file path, remote URL or string content.
106
105
 
107
106
  Returns:
108
107
  str: A sanitized, valid collection name suitable for use.
109
108
  """
110
- # Check path type
111
- parsed_url = urlparse(content_input_path)
112
- self.is_url = all([parsed_url.scheme, parsed_url.netloc])
109
+ # Check if the content is URL
110
+ parsed_url = urlparse(content)
111
+ is_url = all([parsed_url.scheme, parsed_url.netloc])
113
112
 
114
113
  # Convert given path into a collection name, ensuring it only
115
114
  # contains numbers, letters, and underscores
116
- if self.is_url:
115
+ if is_url:
117
116
  # For URLs, remove https://, replace /, and any characters not
118
117
  # allowed by Milvus with _
119
118
  collection_name = re.sub(
120
119
  r'[^0-9a-zA-Z]+',
121
120
  '_',
122
- content_input_path.replace("https://", ""),
121
+ content.replace("https://", ""),
123
122
  )
124
- else:
123
+ elif os.path.exists(content):
125
124
  # For file paths, get the stem and replace spaces with _, also
126
125
  # ensuring only allowed characters are present
127
- collection_name = re.sub(
128
- r'[^0-9a-zA-Z]+', '_', Path(content_input_path).stem
129
- )
126
+ collection_name = re.sub(r'[^0-9a-zA-Z]+', '_', Path(content).stem)
127
+ else:
128
+ # the content is string input
129
+ collection_name = content[:10]
130
130
 
131
131
  # Ensure the collection name does not start or end with underscore
132
132
  collection_name = collection_name.strip("_")
@@ -193,7 +193,7 @@ class AutoRetriever:
193
193
  def run_vector_retriever(
194
194
  self,
195
195
  query: str,
196
- content_input_paths: Union[str, List[str]],
196
+ contents: Union[str, List[str]],
197
197
  top_k: int = DEFAULT_TOP_K_RESULTS,
198
198
  similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
199
199
  return_detailed_info: bool = False,
@@ -203,8 +203,8 @@ class AutoRetriever:
203
203
 
204
204
  Args:
205
205
  query (str): Query string for information retriever.
206
- content_input_paths (Union[str, List[str]]): Paths to local
207
- files or remote URLs.
206
+ contents (Union[str, List[str]]): Local file paths, remote URLs or
207
+ string contents.
208
208
  top_k (int, optional): The number of top results to return during
209
209
  retrieve. Must be a positive integer. Defaults to
210
210
  `DEFAULT_TOP_K_RESULTS`.
@@ -223,24 +223,18 @@ class AutoRetriever:
223
223
  Raises:
224
224
  ValueError: If there's an vector storage existing with content
225
225
  name in the vector path but the payload is None. If
226
- `content_input_paths` is empty.
226
+ `contents` is empty.
227
227
  RuntimeError: If any errors occur during the retrieve process.
228
228
  """
229
- if not content_input_paths:
230
- raise ValueError("content_input_paths cannot be empty.")
229
+ if not contents:
230
+ raise ValueError("content cannot be empty.")
231
231
 
232
- content_input_paths = (
233
- [content_input_paths]
234
- if isinstance(content_input_paths, str)
235
- else content_input_paths
236
- )
232
+ contents = [contents] if isinstance(contents, str) else contents
237
233
 
238
234
  all_retrieved_info = []
239
- for content_input_path in content_input_paths:
235
+ for content in contents:
240
236
  # Generate a valid collection name
241
- collection_name = self._collection_name_generator(
242
- content_input_path
243
- )
237
+ collection_name = self._collection_name_generator(content)
244
238
  try:
245
239
  vector_storage_instance = self._initialize_vector_storage(
246
240
  collection_name
@@ -251,13 +245,11 @@ class AutoRetriever:
251
245
  file_is_modified = False # initialize with a default value
252
246
  if (
253
247
  vector_storage_instance.status().vector_count != 0
254
- and not self.is_url
248
+ and os.path.exists(content)
255
249
  ):
256
250
  # Get original modified date from file
257
251
  modified_date_from_file = (
258
- self._get_file_modified_date_from_file(
259
- content_input_path
260
- )
252
+ self._get_file_modified_date_from_file(content)
261
253
  )
262
254
  # Get modified date from vector storage
263
255
  modified_date_from_storage = (
@@ -280,18 +272,16 @@ class AutoRetriever:
280
272
  # Process and store the content to the vector storage
281
273
  vr = VectorRetriever(
282
274
  storage=vector_storage_instance,
283
- similarity_threshold=similarity_threshold,
284
275
  embedding_model=self.embedding_model,
285
276
  )
286
- vr.process(content_input_path)
277
+ vr.process(content)
287
278
  else:
288
279
  vr = VectorRetriever(
289
280
  storage=vector_storage_instance,
290
- similarity_threshold=similarity_threshold,
291
281
  embedding_model=self.embedding_model,
292
282
  )
293
283
  # Retrieve info by given query from the vector storage
294
- retrieved_info = vr.query(query, top_k)
284
+ retrieved_info = vr.query(query, top_k, similarity_threshold)
295
285
  all_retrieved_info.extend(retrieved_info)
296
286
  except Exception as e:
297
287
  raise RuntimeError(
@@ -11,7 +11,9 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
+ import os
14
15
  from typing import Any, Dict, List, Optional
16
+ from urllib.parse import urlparse
15
17
 
16
18
  from camel.embeddings import BaseEmbedding, OpenAIEmbedding
17
19
  from camel.loaders import UnstructuredIO
@@ -38,24 +40,18 @@ class VectorRetriever(BaseRetriever):
38
40
  embedding_model (BaseEmbedding): Embedding model used to generate
39
41
  vector embeddings.
40
42
  storage (BaseVectorStorage): Vector storage to query.
41
- similarity_threshold (float, optional): The similarity threshold
42
- for filtering results. Defaults to `DEFAULT_SIMILARITY_THRESHOLD`.
43
43
  unstructured_modules (UnstructuredIO): A module for parsing files and
44
44
  URLs and chunking content based on specified parameters.
45
45
  """
46
46
 
47
47
  def __init__(
48
48
  self,
49
- similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
50
49
  embedding_model: Optional[BaseEmbedding] = None,
51
50
  storage: Optional[BaseVectorStorage] = None,
52
51
  ) -> None:
53
52
  r"""Initializes the retriever class with an optional embedding model.
54
53
 
55
54
  Args:
56
- similarity_threshold (float, optional): The similarity threshold
57
- for filtering results. Defaults to
58
- `DEFAULT_SIMILARITY_THRESHOLD`.
59
55
  embedding_model (Optional[BaseEmbedding]): The embedding model
60
56
  instance. Defaults to `OpenAIEmbedding` if not provided.
61
57
  storage (BaseVectorStorage): Vector storage to query.
@@ -68,12 +64,11 @@ class VectorRetriever(BaseRetriever):
68
64
  vector_dim=self.embedding_model.get_output_dim()
69
65
  )
70
66
  )
71
- self.similarity_threshold = similarity_threshold
72
- self.unstructured_modules: UnstructuredIO = UnstructuredIO()
67
+ self.uio: UnstructuredIO = UnstructuredIO()
73
68
 
74
69
  def process(
75
70
  self,
76
- content_input_path: str,
71
+ content: str,
77
72
  chunk_type: str = "chunk_by_title",
78
73
  **kwargs: Any,
79
74
  ) -> None:
@@ -82,16 +77,19 @@ class VectorRetriever(BaseRetriever):
82
77
  vector storage.
83
78
 
84
79
  Args:
85
- content_input_path (str): File path or URL of the content to be
86
- processed.
80
+ contents (str): Local file path, remote URL or string content.
87
81
  chunk_type (str): Type of chunking going to apply. Defaults to
88
82
  "chunk_by_title".
89
83
  **kwargs (Any): Additional keyword arguments for content parsing.
90
84
  """
91
- elements = self.unstructured_modules.parse_file_or_url(
92
- content_input_path, **kwargs
93
- )
94
- chunks = self.unstructured_modules.chunk_elements(
85
+ # Check if the content is URL
86
+ parsed_url = urlparse(content)
87
+ is_url = all([parsed_url.scheme, parsed_url.netloc])
88
+ if is_url or os.path.exists(content):
89
+ elements = self.uio.parse_file_or_url(content, **kwargs)
90
+ else:
91
+ elements = [self.uio.create_element_from_text(text=content)]
92
+ chunks = self.uio.chunk_elements(
95
93
  chunk_type=chunk_type, elements=elements
96
94
  )
97
95
  # Iterate to process and store embeddings, set batch of 50
@@ -105,7 +103,7 @@ class VectorRetriever(BaseRetriever):
105
103
  # Prepare the payload for each vector record, includes the content
106
104
  # path, chunk metadata, and chunk text
107
105
  for vector, chunk in zip(batch_vectors, batch_chunks):
108
- content_path_info = {"content path": content_input_path}
106
+ content_path_info = {"content path": content}
109
107
  chunk_metadata = {"metadata": chunk.metadata.to_dict()}
110
108
  chunk_text = {"text": str(chunk)}
111
109
  combined_dict = {
@@ -124,12 +122,16 @@ class VectorRetriever(BaseRetriever):
124
122
  self,
125
123
  query: str,
126
124
  top_k: int = DEFAULT_TOP_K_RESULTS,
125
+ similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
127
126
  ) -> List[Dict[str, Any]]:
128
127
  r"""Executes a query in vector storage and compiles the retrieved
129
128
  results into a dictionary.
130
129
 
131
130
  Args:
132
131
  query (str): Query string for information retriever.
132
+ similarity_threshold (float, optional): The similarity threshold
133
+ for filtering results. Defaults to
134
+ `DEFAULT_SIMILARITY_THRESHOLD`.
133
135
  top_k (int, optional): The number of top results to return during
134
136
  retriever. Must be a positive integer. Defaults to 1.
135
137
 
@@ -161,7 +163,7 @@ class VectorRetriever(BaseRetriever):
161
163
  formatted_results = []
162
164
  for result in query_results:
163
165
  if (
164
- result.similarity >= self.similarity_threshold
166
+ result.similarity >= similarity_threshold
165
167
  and result.record.payload is not None
166
168
  ):
167
169
  result_dict = {
@@ -182,7 +184,7 @@ class VectorRetriever(BaseRetriever):
182
184
  'text': (
183
185
  f"No suitable information retrieved "
184
186
  f"from {content_path} with similarity_threshold"
185
- f" = {self.similarity_threshold}."
187
+ f" = {similarity_threshold}."
186
188
  )
187
189
  }
188
190
  ]
@@ -0,0 +1,22 @@
1
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2
+ # Licensed under the Apache License, Version 2.0 (the “License”);
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an “AS IS” BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
+ from .amazon_s3 import AmazonS3Storage
15
+ from .azure_blob import AzureBlobStorage
16
+ from .google_cloud import GoogleCloudStorage
17
+
18
+ __all__ = [
19
+ "AmazonS3Storage",
20
+ "AzureBlobStorage",
21
+ "GoogleCloudStorage",
22
+ ]