ag2 0.4.1__py3-none-any.whl → 0.4.2b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- ag2-0.4.2b1.dist-info/METADATA +19 -0
- ag2-0.4.2b1.dist-info/RECORD +6 -0
- ag2-0.4.2b1.dist-info/top_level.txt +1 -0
- ag2-0.4.1.dist-info/METADATA +0 -500
- ag2-0.4.1.dist-info/RECORD +0 -158
- ag2-0.4.1.dist-info/top_level.txt +0 -1
- autogen/__init__.py +0 -17
- autogen/_pydantic.py +0 -116
- autogen/agentchat/__init__.py +0 -42
- autogen/agentchat/agent.py +0 -142
- autogen/agentchat/assistant_agent.py +0 -85
- autogen/agentchat/chat.py +0 -306
- autogen/agentchat/contrib/__init__.py +0 -0
- autogen/agentchat/contrib/agent_builder.py +0 -788
- autogen/agentchat/contrib/agent_eval/agent_eval.py +0 -107
- autogen/agentchat/contrib/agent_eval/criterion.py +0 -47
- autogen/agentchat/contrib/agent_eval/critic_agent.py +0 -47
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +0 -42
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +0 -48
- autogen/agentchat/contrib/agent_eval/task.py +0 -43
- autogen/agentchat/contrib/agent_optimizer.py +0 -450
- autogen/agentchat/contrib/capabilities/__init__.py +0 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +0 -21
- autogen/agentchat/contrib/capabilities/generate_images.py +0 -297
- autogen/agentchat/contrib/capabilities/teachability.py +0 -406
- autogen/agentchat/contrib/capabilities/text_compressors.py +0 -72
- autogen/agentchat/contrib/capabilities/transform_messages.py +0 -92
- autogen/agentchat/contrib/capabilities/transforms.py +0 -565
- autogen/agentchat/contrib/capabilities/transforms_util.py +0 -120
- autogen/agentchat/contrib/capabilities/vision_capability.py +0 -217
- autogen/agentchat/contrib/captainagent/tools/__init__.py +0 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +0 -41
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +0 -31
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +0 -26
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +0 -55
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +0 -54
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +0 -39
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +0 -35
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +0 -61
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +0 -62
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +0 -48
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +0 -34
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +0 -36
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +0 -19
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +0 -32
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +0 -17
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +0 -26
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +0 -24
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +0 -28
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +0 -35
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +0 -40
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +0 -23
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +0 -37
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +0 -16
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +0 -16
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +0 -10
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +0 -34
- autogen/agentchat/contrib/captainagent.py +0 -490
- autogen/agentchat/contrib/gpt_assistant_agent.py +0 -545
- autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
- autogen/agentchat/contrib/graph_rag/document.py +0 -30
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +0 -111
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +0 -81
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +0 -56
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +0 -64
- autogen/agentchat/contrib/img_utils.py +0 -390
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +0 -123
- autogen/agentchat/contrib/llava_agent.py +0 -176
- autogen/agentchat/contrib/math_user_proxy_agent.py +0 -471
- autogen/agentchat/contrib/multimodal_conversable_agent.py +0 -128
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +0 -325
- autogen/agentchat/contrib/retrieve_assistant_agent.py +0 -56
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +0 -705
- autogen/agentchat/contrib/society_of_mind_agent.py +0 -203
- autogen/agentchat/contrib/swarm_agent.py +0 -463
- autogen/agentchat/contrib/text_analyzer_agent.py +0 -76
- autogen/agentchat/contrib/tool_retriever.py +0 -120
- autogen/agentchat/contrib/vectordb/__init__.py +0 -0
- autogen/agentchat/contrib/vectordb/base.py +0 -243
- autogen/agentchat/contrib/vectordb/chromadb.py +0 -326
- autogen/agentchat/contrib/vectordb/mongodb.py +0 -559
- autogen/agentchat/contrib/vectordb/pgvectordb.py +0 -958
- autogen/agentchat/contrib/vectordb/qdrant.py +0 -334
- autogen/agentchat/contrib/vectordb/utils.py +0 -126
- autogen/agentchat/contrib/web_surfer.py +0 -305
- autogen/agentchat/conversable_agent.py +0 -2908
- autogen/agentchat/groupchat.py +0 -1668
- autogen/agentchat/user_proxy_agent.py +0 -109
- autogen/agentchat/utils.py +0 -207
- autogen/browser_utils.py +0 -291
- autogen/cache/__init__.py +0 -10
- autogen/cache/abstract_cache_base.py +0 -78
- autogen/cache/cache.py +0 -182
- autogen/cache/cache_factory.py +0 -85
- autogen/cache/cosmos_db_cache.py +0 -150
- autogen/cache/disk_cache.py +0 -109
- autogen/cache/in_memory_cache.py +0 -61
- autogen/cache/redis_cache.py +0 -128
- autogen/code_utils.py +0 -745
- autogen/coding/__init__.py +0 -22
- autogen/coding/base.py +0 -113
- autogen/coding/docker_commandline_code_executor.py +0 -262
- autogen/coding/factory.py +0 -45
- autogen/coding/func_with_reqs.py +0 -203
- autogen/coding/jupyter/__init__.py +0 -22
- autogen/coding/jupyter/base.py +0 -32
- autogen/coding/jupyter/docker_jupyter_server.py +0 -164
- autogen/coding/jupyter/embedded_ipython_code_executor.py +0 -182
- autogen/coding/jupyter/jupyter_client.py +0 -224
- autogen/coding/jupyter/jupyter_code_executor.py +0 -161
- autogen/coding/jupyter/local_jupyter_server.py +0 -168
- autogen/coding/local_commandline_code_executor.py +0 -410
- autogen/coding/markdown_code_extractor.py +0 -44
- autogen/coding/utils.py +0 -57
- autogen/exception_utils.py +0 -46
- autogen/extensions/__init__.py +0 -0
- autogen/formatting_utils.py +0 -76
- autogen/function_utils.py +0 -362
- autogen/graph_utils.py +0 -148
- autogen/io/__init__.py +0 -15
- autogen/io/base.py +0 -105
- autogen/io/console.py +0 -43
- autogen/io/websockets.py +0 -213
- autogen/logger/__init__.py +0 -11
- autogen/logger/base_logger.py +0 -140
- autogen/logger/file_logger.py +0 -287
- autogen/logger/logger_factory.py +0 -29
- autogen/logger/logger_utils.py +0 -42
- autogen/logger/sqlite_logger.py +0 -459
- autogen/math_utils.py +0 -356
- autogen/oai/__init__.py +0 -33
- autogen/oai/anthropic.py +0 -428
- autogen/oai/bedrock.py +0 -606
- autogen/oai/cerebras.py +0 -270
- autogen/oai/client.py +0 -1148
- autogen/oai/client_utils.py +0 -167
- autogen/oai/cohere.py +0 -453
- autogen/oai/completion.py +0 -1216
- autogen/oai/gemini.py +0 -469
- autogen/oai/groq.py +0 -281
- autogen/oai/mistral.py +0 -279
- autogen/oai/ollama.py +0 -582
- autogen/oai/openai_utils.py +0 -811
- autogen/oai/together.py +0 -343
- autogen/retrieve_utils.py +0 -487
- autogen/runtime_logging.py +0 -163
- autogen/token_count_utils.py +0 -259
- autogen/types.py +0 -20
- autogen/version.py +0 -7
- {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/LICENSE +0 -0
- {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/NOTICE.md +0 -0
- {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/WHEEL +0 -0
|
@@ -1,326 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
#
|
|
5
|
-
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
|
6
|
-
# SPDX-License-Identifier: MIT
|
|
7
|
-
import os
|
|
8
|
-
from typing import Callable, List
|
|
9
|
-
|
|
10
|
-
from .base import Document, ItemID, QueryResults, VectorDB
|
|
11
|
-
from .utils import chroma_results_to_query_results, filter_results_by_distance, get_logger
|
|
12
|
-
|
|
13
|
-
try:
|
|
14
|
-
import chromadb
|
|
15
|
-
|
|
16
|
-
if chromadb.__version__ < "0.4.15":
|
|
17
|
-
raise ImportError("Please upgrade chromadb to version 0.4.15 or later.")
|
|
18
|
-
import chromadb.utils.embedding_functions as ef
|
|
19
|
-
from chromadb.api.models.Collection import Collection
|
|
20
|
-
except ImportError:
|
|
21
|
-
raise ImportError("Please install chromadb: `pip install chromadb`")
|
|
22
|
-
|
|
23
|
-
CHROMADB_MAX_BATCH_SIZE = os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000)
|
|
24
|
-
logger = get_logger(__name__)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class ChromaVectorDB(VectorDB):
|
|
28
|
-
"""
|
|
29
|
-
A vector database that uses ChromaDB as the backend.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
def __init__(
|
|
33
|
-
self, *, client=None, path: str = "tmp/db", embedding_function: Callable = None, metadata: dict = None, **kwargs
|
|
34
|
-
) -> None:
|
|
35
|
-
"""
|
|
36
|
-
Initialize the vector database.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
client: chromadb.Client | The client object of the vector database. Default is None.
|
|
40
|
-
If provided, it will use the client object directly and ignore other arguments.
|
|
41
|
-
path: str | The path to the vector database. Default is `tmp/db`. The default was `None` for version <=0.2.24.
|
|
42
|
-
embedding_function: Callable | The embedding function used to generate the vector representation
|
|
43
|
-
of the documents. Default is None, SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") will be used.
|
|
44
|
-
metadata: dict | The metadata of the vector database. Default is None. If None, it will use this
|
|
45
|
-
setting: {"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 32}. For more details of
|
|
46
|
-
the metadata, please refer to [distances](https://github.com/nmslib/hnswlib#supported-distances),
|
|
47
|
-
[hnsw](https://github.com/chroma-core/chroma/blob/566bc80f6c8ee29f7d99b6322654f32183c368c4/chromadb/segment/impl/vector/local_hnsw.py#L184),
|
|
48
|
-
and [ALGO_PARAMS](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md).
|
|
49
|
-
kwargs: dict | Additional keyword arguments.
|
|
50
|
-
|
|
51
|
-
Returns:
|
|
52
|
-
None
|
|
53
|
-
"""
|
|
54
|
-
self.client = client
|
|
55
|
-
self.path = path
|
|
56
|
-
self.embedding_function = (
|
|
57
|
-
ef.SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2")
|
|
58
|
-
if embedding_function is None
|
|
59
|
-
else embedding_function
|
|
60
|
-
)
|
|
61
|
-
self.metadata = metadata if metadata else {"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 32}
|
|
62
|
-
if not self.client:
|
|
63
|
-
if self.path is not None:
|
|
64
|
-
self.client = chromadb.PersistentClient(path=self.path, **kwargs)
|
|
65
|
-
else:
|
|
66
|
-
self.client = chromadb.Client(**kwargs)
|
|
67
|
-
self.active_collection = None
|
|
68
|
-
self.type = "chroma"
|
|
69
|
-
|
|
70
|
-
def create_collection(
|
|
71
|
-
self, collection_name: str, overwrite: bool = False, get_or_create: bool = True
|
|
72
|
-
) -> Collection:
|
|
73
|
-
"""
|
|
74
|
-
Create a collection in the vector database.
|
|
75
|
-
Case 1. if the collection does not exist, create the collection.
|
|
76
|
-
Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
|
|
77
|
-
Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection,
|
|
78
|
-
otherwise it raise a ValueError.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
collection_name: str | The name of the collection.
|
|
82
|
-
overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
|
|
83
|
-
get_or_create: bool | Whether to get the collection if it exists. Default is True.
|
|
84
|
-
|
|
85
|
-
Returns:
|
|
86
|
-
Collection | The collection object.
|
|
87
|
-
"""
|
|
88
|
-
try:
|
|
89
|
-
if self.active_collection and self.active_collection.name == collection_name:
|
|
90
|
-
collection = self.active_collection
|
|
91
|
-
else:
|
|
92
|
-
collection = self.client.get_collection(collection_name, embedding_function=self.embedding_function)
|
|
93
|
-
except ValueError:
|
|
94
|
-
collection = None
|
|
95
|
-
if collection is None:
|
|
96
|
-
return self.client.create_collection(
|
|
97
|
-
collection_name,
|
|
98
|
-
embedding_function=self.embedding_function,
|
|
99
|
-
get_or_create=get_or_create,
|
|
100
|
-
metadata=self.metadata,
|
|
101
|
-
)
|
|
102
|
-
elif overwrite:
|
|
103
|
-
self.client.delete_collection(collection_name)
|
|
104
|
-
return self.client.create_collection(
|
|
105
|
-
collection_name,
|
|
106
|
-
embedding_function=self.embedding_function,
|
|
107
|
-
get_or_create=get_or_create,
|
|
108
|
-
metadata=self.metadata,
|
|
109
|
-
)
|
|
110
|
-
elif get_or_create:
|
|
111
|
-
return collection
|
|
112
|
-
else:
|
|
113
|
-
raise ValueError(f"Collection {collection_name} already exists.")
|
|
114
|
-
|
|
115
|
-
def get_collection(self, collection_name: str = None) -> Collection:
|
|
116
|
-
"""
|
|
117
|
-
Get the collection from the vector database.
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
collection_name: str | The name of the collection. Default is None. If None, return the
|
|
121
|
-
current active collection.
|
|
122
|
-
|
|
123
|
-
Returns:
|
|
124
|
-
Collection | The collection object.
|
|
125
|
-
"""
|
|
126
|
-
if collection_name is None:
|
|
127
|
-
if self.active_collection is None:
|
|
128
|
-
raise ValueError("No collection is specified.")
|
|
129
|
-
else:
|
|
130
|
-
logger.info(
|
|
131
|
-
f"No collection is specified. Using current active collection {self.active_collection.name}."
|
|
132
|
-
)
|
|
133
|
-
else:
|
|
134
|
-
if not (self.active_collection and self.active_collection.name == collection_name):
|
|
135
|
-
self.active_collection = self.client.get_collection(
|
|
136
|
-
collection_name, embedding_function=self.embedding_function
|
|
137
|
-
)
|
|
138
|
-
return self.active_collection
|
|
139
|
-
|
|
140
|
-
def delete_collection(self, collection_name: str) -> None:
|
|
141
|
-
"""
|
|
142
|
-
Delete the collection from the vector database.
|
|
143
|
-
|
|
144
|
-
Args:
|
|
145
|
-
collection_name: str | The name of the collection.
|
|
146
|
-
|
|
147
|
-
Returns:
|
|
148
|
-
None
|
|
149
|
-
"""
|
|
150
|
-
self.client.delete_collection(collection_name)
|
|
151
|
-
if self.active_collection and self.active_collection.name == collection_name:
|
|
152
|
-
self.active_collection = None
|
|
153
|
-
|
|
154
|
-
def _batch_insert(
|
|
155
|
-
self, collection: Collection, embeddings=None, ids=None, metadatas=None, documents=None, upsert=False
|
|
156
|
-
) -> None:
|
|
157
|
-
batch_size = int(CHROMADB_MAX_BATCH_SIZE)
|
|
158
|
-
for i in range(0, len(documents), min(batch_size, len(documents))):
|
|
159
|
-
end_idx = i + min(batch_size, len(documents) - i)
|
|
160
|
-
collection_kwargs = {
|
|
161
|
-
"documents": documents[i:end_idx],
|
|
162
|
-
"ids": ids[i:end_idx],
|
|
163
|
-
"metadatas": metadatas[i:end_idx] if metadatas else None,
|
|
164
|
-
"embeddings": embeddings[i:end_idx] if embeddings else None,
|
|
165
|
-
}
|
|
166
|
-
if upsert:
|
|
167
|
-
collection.upsert(**collection_kwargs)
|
|
168
|
-
else:
|
|
169
|
-
collection.add(**collection_kwargs)
|
|
170
|
-
|
|
171
|
-
def insert_docs(self, docs: List[Document], collection_name: str = None, upsert: bool = False) -> None:
|
|
172
|
-
"""
|
|
173
|
-
Insert documents into the collection of the vector database.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
docs: List[Document] | A list of documents. Each document is a TypedDict `Document`.
|
|
177
|
-
collection_name: str | The name of the collection. Default is None.
|
|
178
|
-
upsert: bool | Whether to update the document if it exists. Default is False.
|
|
179
|
-
kwargs: Dict | Additional keyword arguments.
|
|
180
|
-
|
|
181
|
-
Returns:
|
|
182
|
-
None
|
|
183
|
-
"""
|
|
184
|
-
if not docs:
|
|
185
|
-
return
|
|
186
|
-
if docs[0].get("content") is None:
|
|
187
|
-
raise ValueError("The document content is required.")
|
|
188
|
-
if docs[0].get("id") is None:
|
|
189
|
-
raise ValueError("The document id is required.")
|
|
190
|
-
documents = [doc.get("content") for doc in docs]
|
|
191
|
-
ids = [doc.get("id") for doc in docs]
|
|
192
|
-
collection = self.get_collection(collection_name)
|
|
193
|
-
if docs[0].get("embedding") is None:
|
|
194
|
-
logger.info(
|
|
195
|
-
"No content embedding is provided. Will use the VectorDB's embedding function to generate the content embedding."
|
|
196
|
-
)
|
|
197
|
-
embeddings = None
|
|
198
|
-
else:
|
|
199
|
-
embeddings = [doc.get("embedding") for doc in docs]
|
|
200
|
-
if docs[0].get("metadata") is None:
|
|
201
|
-
metadatas = None
|
|
202
|
-
else:
|
|
203
|
-
metadatas = [doc.get("metadata") for doc in docs]
|
|
204
|
-
self._batch_insert(collection, embeddings, ids, metadatas, documents, upsert)
|
|
205
|
-
|
|
206
|
-
def update_docs(self, docs: List[Document], collection_name: str = None) -> None:
|
|
207
|
-
"""
|
|
208
|
-
Update documents in the collection of the vector database.
|
|
209
|
-
|
|
210
|
-
Args:
|
|
211
|
-
docs: List[Document] | A list of documents.
|
|
212
|
-
collection_name: str | The name of the collection. Default is None.
|
|
213
|
-
|
|
214
|
-
Returns:
|
|
215
|
-
None
|
|
216
|
-
"""
|
|
217
|
-
self.insert_docs(docs, collection_name, upsert=True)
|
|
218
|
-
|
|
219
|
-
def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs) -> None:
|
|
220
|
-
"""
|
|
221
|
-
Delete documents from the collection of the vector database.
|
|
222
|
-
|
|
223
|
-
Args:
|
|
224
|
-
ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`.
|
|
225
|
-
collection_name: str | The name of the collection. Default is None.
|
|
226
|
-
kwargs: Dict | Additional keyword arguments.
|
|
227
|
-
|
|
228
|
-
Returns:
|
|
229
|
-
None
|
|
230
|
-
"""
|
|
231
|
-
collection = self.get_collection(collection_name)
|
|
232
|
-
collection.delete(ids, **kwargs)
|
|
233
|
-
|
|
234
|
-
def retrieve_docs(
|
|
235
|
-
self,
|
|
236
|
-
queries: List[str],
|
|
237
|
-
collection_name: str = None,
|
|
238
|
-
n_results: int = 10,
|
|
239
|
-
distance_threshold: float = -1,
|
|
240
|
-
**kwargs,
|
|
241
|
-
) -> QueryResults:
|
|
242
|
-
"""
|
|
243
|
-
Retrieve documents from the collection of the vector database based on the queries.
|
|
244
|
-
|
|
245
|
-
Args:
|
|
246
|
-
queries: List[str] | A list of queries. Each query is a string.
|
|
247
|
-
collection_name: str | The name of the collection. Default is None.
|
|
248
|
-
n_results: int | The number of relevant documents to return. Default is 10.
|
|
249
|
-
distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
|
|
250
|
-
returned. Don't filter with it if < 0. Default is -1.
|
|
251
|
-
kwargs: Dict | Additional keyword arguments.
|
|
252
|
-
|
|
253
|
-
Returns:
|
|
254
|
-
QueryResults | The query results. Each query result is a list of list of tuples containing the document and
|
|
255
|
-
the distance.
|
|
256
|
-
"""
|
|
257
|
-
collection = self.get_collection(collection_name)
|
|
258
|
-
if isinstance(queries, str):
|
|
259
|
-
queries = [queries]
|
|
260
|
-
results = collection.query(
|
|
261
|
-
query_texts=queries,
|
|
262
|
-
n_results=n_results,
|
|
263
|
-
**kwargs,
|
|
264
|
-
)
|
|
265
|
-
results["contents"] = results.pop("documents")
|
|
266
|
-
results = chroma_results_to_query_results(results)
|
|
267
|
-
results = filter_results_by_distance(results, distance_threshold)
|
|
268
|
-
return results
|
|
269
|
-
|
|
270
|
-
@staticmethod
|
|
271
|
-
def _chroma_get_results_to_list_documents(data_dict) -> List[Document]:
|
|
272
|
-
"""Converts a dictionary with list values to a list of Document.
|
|
273
|
-
|
|
274
|
-
Args:
|
|
275
|
-
data_dict: A dictionary where keys map to lists or None.
|
|
276
|
-
|
|
277
|
-
Returns:
|
|
278
|
-
List[Document] | The list of Document.
|
|
279
|
-
|
|
280
|
-
Example:
|
|
281
|
-
data_dict = {
|
|
282
|
-
"key1s": [1, 2, 3],
|
|
283
|
-
"key2s": ["a", "b", "c"],
|
|
284
|
-
"key3s": None,
|
|
285
|
-
"key4s": ["x", "y", "z"],
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
results = [
|
|
289
|
-
{"key1": 1, "key2": "a", "key4": "x"},
|
|
290
|
-
{"key1": 2, "key2": "b", "key4": "y"},
|
|
291
|
-
{"key1": 3, "key2": "c", "key4": "z"},
|
|
292
|
-
]
|
|
293
|
-
"""
|
|
294
|
-
|
|
295
|
-
results = []
|
|
296
|
-
keys = [key for key in data_dict if data_dict[key] is not None]
|
|
297
|
-
|
|
298
|
-
for i in range(len(data_dict[keys[0]])):
|
|
299
|
-
sub_dict = {}
|
|
300
|
-
for key in data_dict.keys():
|
|
301
|
-
if data_dict[key] is not None and len(data_dict[key]) > i:
|
|
302
|
-
sub_dict[key[:-1]] = data_dict[key][i]
|
|
303
|
-
results.append(sub_dict)
|
|
304
|
-
return results
|
|
305
|
-
|
|
306
|
-
def get_docs_by_ids(
|
|
307
|
-
self, ids: List[ItemID] = None, collection_name: str = None, include=None, **kwargs
|
|
308
|
-
) -> List[Document]:
|
|
309
|
-
"""
|
|
310
|
-
Retrieve documents from the collection of the vector database based on the ids.
|
|
311
|
-
|
|
312
|
-
Args:
|
|
313
|
-
ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
|
|
314
|
-
collection_name: str | The name of the collection. Default is None.
|
|
315
|
-
include: List[str] | The fields to include. Default is None.
|
|
316
|
-
If None, will include ["metadatas", "documents"], ids will always be included.
|
|
317
|
-
kwargs: dict | Additional keyword arguments.
|
|
318
|
-
|
|
319
|
-
Returns:
|
|
320
|
-
List[Document] | The results.
|
|
321
|
-
"""
|
|
322
|
-
collection = self.get_collection(collection_name)
|
|
323
|
-
include = include if include else ["metadatas", "documents"]
|
|
324
|
-
results = collection.get(ids, include=include, **kwargs)
|
|
325
|
-
results = self._chroma_get_results_to_list_documents(results)
|
|
326
|
-
return results
|