frogml-core 0.0.113__py3-none-any.whl → 0.0.115__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frogml_core/__init__.py +1 -1
- frogml_core/clients/administration/authentication/client.py +2 -2
- frogml_core/clients/batch_job_management/client.py +4 -4
- frogml_core/clients/build_orchestrator/build_model_request_getter.py +6 -6
- frogml_core/clients/build_orchestrator/client.py +12 -12
- frogml_core/clients/build_orchestrator/internal_client.py +10 -10
- frogml_core/frogml_client/build_api_helpers/build_api_steps.py +3 -3
- frogml_core/inner/build_logic/constants/upload_tag.py +7 -7
- frogml_core/inner/build_logic/interface/context_interface.py +1 -1
- frogml_core/inner/build_logic/phases/phase_010_fetch_model/fetch_strategy_manager/strategy/strategy.py +4 -4
- frogml_core/inner/build_logic/phases/phase_010_fetch_model/set_version_step.py +3 -3
- frogml_core/inner/build_logic/phases/phase_020_remote_register_frogml_build/start_remote_build_step.py +3 -3
- frogml_core/inner/build_logic/phases/phase_020_remote_register_frogml_build/upload_step.py +11 -9
- frogml_core/inner/build_logic/tools/ignore_files.py +3 -3
- frogml_core/inner/di_configuration/__init__.py +0 -6
- frogml_core/model/adapters/__init__.py +1 -1
- frogml_core/model/analytics_logging.py +1 -1
- frogml_core/model/tools/adapters/input.py +6 -6
- frogml_core/model/tools/adapters/output.py +8 -8
- frogml_core/model/tools/run_model_locally.py +2 -2
- frogml_core/model/utils/feature_utils.py +1 -1
- {frogml_core-0.0.113.dist-info → frogml_core-0.0.115.dist-info}/METADATA +1 -1
- {frogml_core-0.0.113.dist-info → frogml_core-0.0.115.dist-info}/RECORD +30 -137
- frogml_services_mock/mocks/analytics_api.py +6 -6
- frogml_services_mock/mocks/ecosystem_service_api.py +2 -2
- frogml_services_mock/mocks/frogml_mocks.py +0 -11
- frogml_services_mock/services_mock.py +4 -52
- frogml_storage/__init__.py +1 -1
- frogml_core/clients/prompt_manager/__init__.py +0 -0
- frogml_core/clients/prompt_manager/model_descriptor_mapper.py +0 -196
- frogml_core/clients/prompt_manager/prompt_manager_client.py +0 -190
- frogml_core/clients/prompt_manager/prompt_proto_mapper.py +0 -264
- frogml_core/clients/vector_store/__init__.py +0 -2
- frogml_core/clients/vector_store/management_client.py +0 -127
- frogml_core/clients/vector_store/serving_client.py +0 -157
- frogml_core/clients/workspace_manager/__init__.py +0 -1
- frogml_core/clients/workspace_manager/client.py +0 -224
- frogml_core/llmops/__init__.py +0 -0
- frogml_core/llmops/generation/__init__.py +0 -0
- frogml_core/llmops/generation/_steaming.py +0 -78
- frogml_core/llmops/generation/base.py +0 -5
- frogml_core/llmops/generation/chat/__init__.py +0 -0
- frogml_core/llmops/generation/chat/openai/LICENSE.txt +0 -201
- frogml_core/llmops/generation/chat/openai/types/__init__.py +0 -0
- frogml_core/llmops/generation/chat/openai/types/chat/__init__.py +0 -0
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion.py +0 -88
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_assistant_message_param.py +0 -65
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_chunk.py +0 -153
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_content_part_text_param.py +0 -28
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_function_call_option_param.py +0 -25
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_function_message_param.py +0 -33
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_message.py +0 -56
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_message_param.py +0 -34
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_message_tool_call.py +0 -46
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_message_tool_call_param.py +0 -44
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_named_tool_choice_param.py +0 -32
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_role.py +0 -20
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_system_message_param.py +0 -35
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_token_logprob.py +0 -71
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_tool_choice_option_param.py +0 -28
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_tool_message_param.py +0 -31
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_tool_param.py +0 -29
- frogml_core/llmops/generation/chat/openai/types/chat/chat_completion_user_message_param.py +0 -35
- frogml_core/llmops/generation/chat/openai/types/chat/completion_create_params.py +0 -279
- frogml_core/llmops/generation/chat/openai/types/completion_choice.py +0 -47
- frogml_core/llmops/generation/chat/openai/types/completion_create_params.py +0 -209
- frogml_core/llmops/generation/chat/openai/types/completion_usage.py +0 -30
- frogml_core/llmops/generation/chat/openai/types/model.py +0 -35
- frogml_core/llmops/generation/chat/openai/types/shared/__init__.py +0 -3
- frogml_core/llmops/generation/chat/openai/types/shared/error_object.py +0 -27
- frogml_core/llmops/generation/chat/openai/types/shared/function_definition.py +0 -49
- frogml_core/llmops/generation/chat/openai/types/shared/function_parameters.py +0 -20
- frogml_core/llmops/generation/chat/openai/types/shared_params/__init__.py +0 -2
- frogml_core/llmops/generation/chat/openai/types/shared_params/function_definition.py +0 -49
- frogml_core/llmops/generation/chat/openai/types/shared_params/function_parameters.py +0 -22
- frogml_core/llmops/generation/streaming.py +0 -26
- frogml_core/llmops/model/__init__.py +0 -0
- frogml_core/llmops/model/descriptor.py +0 -40
- frogml_core/llmops/prompt/__init__.py +0 -0
- frogml_core/llmops/prompt/base.py +0 -136
- frogml_core/llmops/prompt/chat/__init__.py +0 -0
- frogml_core/llmops/prompt/chat/message.py +0 -24
- frogml_core/llmops/prompt/chat/template.py +0 -113
- frogml_core/llmops/prompt/chat/value.py +0 -10
- frogml_core/llmops/prompt/manager.py +0 -138
- frogml_core/llmops/prompt/template.py +0 -24
- frogml_core/llmops/prompt/value.py +0 -14
- frogml_core/llmops/provider/__init__.py +0 -0
- frogml_core/llmops/provider/chat.py +0 -44
- frogml_core/llmops/provider/openai/__init__.py +0 -0
- frogml_core/llmops/provider/openai/client.py +0 -126
- frogml_core/llmops/provider/openai/provider.py +0 -93
- frogml_core/vector_store/__init__.py +0 -4
- frogml_core/vector_store/client.py +0 -151
- frogml_core/vector_store/collection.py +0 -429
- frogml_core/vector_store/filters.py +0 -359
- frogml_core/vector_store/inference_client.py +0 -105
- frogml_core/vector_store/rest_helpers.py +0 -81
- frogml_core/vector_store/utils/__init__.py +0 -0
- frogml_core/vector_store/utils/filter_utils.py +0 -23
- frogml_core/vector_store/utils/upsert_utils.py +0 -218
- frogml_proto/qwak/prompt/v1/prompt/prompt_manager_service_pb2.py +0 -77
- frogml_proto/qwak/prompt/v1/prompt/prompt_manager_service_pb2.pyi +0 -417
- frogml_proto/qwak/prompt/v1/prompt/prompt_manager_service_pb2_grpc.py +0 -441
- frogml_proto/qwak/prompt/v1/prompt/prompt_pb2.py +0 -69
- frogml_proto/qwak/prompt/v1/prompt/prompt_pb2.pyi +0 -415
- frogml_proto/qwak/prompt/v1/prompt/prompt_pb2_grpc.py +0 -4
- frogml_proto/qwak/vectors/v1/collection/collection_pb2.py +0 -46
- frogml_proto/qwak/vectors/v1/collection/collection_pb2.pyi +0 -287
- frogml_proto/qwak/vectors/v1/collection/collection_pb2_grpc.py +0 -4
- frogml_proto/qwak/vectors/v1/collection/collection_service_pb2.py +0 -60
- frogml_proto/qwak/vectors/v1/collection/collection_service_pb2.pyi +0 -258
- frogml_proto/qwak/vectors/v1/collection/collection_service_pb2_grpc.py +0 -304
- frogml_proto/qwak/vectors/v1/collection/event/collection_event_pb2.py +0 -28
- frogml_proto/qwak/vectors/v1/collection/event/collection_event_pb2.pyi +0 -41
- frogml_proto/qwak/vectors/v1/collection/event/collection_event_pb2_grpc.py +0 -4
- frogml_proto/qwak/vectors/v1/filters_pb2.py +0 -52
- frogml_proto/qwak/vectors/v1/filters_pb2.pyi +0 -297
- frogml_proto/qwak/vectors/v1/filters_pb2_grpc.py +0 -4
- frogml_proto/qwak/vectors/v1/vector_pb2.py +0 -38
- frogml_proto/qwak/vectors/v1/vector_pb2.pyi +0 -142
- frogml_proto/qwak/vectors/v1/vector_pb2_grpc.py +0 -4
- frogml_proto/qwak/vectors/v1/vector_service_pb2.py +0 -53
- frogml_proto/qwak/vectors/v1/vector_service_pb2.pyi +0 -243
- frogml_proto/qwak/vectors/v1/vector_service_pb2_grpc.py +0 -201
- frogml_proto/qwak/workspace/workspace_pb2.py +0 -50
- frogml_proto/qwak/workspace/workspace_pb2.pyi +0 -331
- frogml_proto/qwak/workspace/workspace_pb2_grpc.py +0 -4
- frogml_proto/qwak/workspace/workspace_service_pb2.py +0 -84
- frogml_proto/qwak/workspace/workspace_service_pb2.pyi +0 -393
- frogml_proto/qwak/workspace/workspace_service_pb2_grpc.py +0 -507
- frogml_services_mock/mocks/prompt_manager_service.py +0 -281
- frogml_services_mock/mocks/vector_serving_api.py +0 -159
- frogml_services_mock/mocks/vectors_management_api.py +0 -97
- frogml_services_mock/mocks/workspace_manager_service_mock.py +0 -202
- /frogml_core/model/adapters/output_adapters/{qwak_with_default_fallback.py → frogml_with_default_fallback.py} +0 -0
- {frogml_core-0.0.113.dist-info → frogml_core-0.0.115.dist-info}/WHEEL +0 -0
@@ -1,429 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from dataclasses import dataclass
|
3
|
-
from typing import Dict, List, Optional, TypeVar, Union
|
4
|
-
|
5
|
-
from typeguard import typechecked
|
6
|
-
|
7
|
-
from frogml_proto.qwak.vectors.v1.vector_pb2 import Property
|
8
|
-
from frogml_proto.qwak.vectors.v1.vector_pb2 import SearchResult as ProtoSearchResult
|
9
|
-
from frogml_proto.qwak.vectors.v1.vector_pb2 import StoredVector as ProtoStoredVector
|
10
|
-
from frogml_proto.qwak.vectors.v1.vector_pb2 import (
|
11
|
-
VectorIdentifier as ProtoVectorIdentifier,
|
12
|
-
)
|
13
|
-
from frogml_core.clients.vector_store.serving_client import VectorServingClient
|
14
|
-
from frogml_core.exceptions import FrogmlException
|
15
|
-
from frogml_core.vector_store.filters import Filter
|
16
|
-
from frogml_core.vector_store.inference_client import VectorStoreInferenceClient
|
17
|
-
from frogml_core.vector_store.utils.upsert_utils import (
|
18
|
-
_divide_chunks,
|
19
|
-
_upsert_natural_input,
|
20
|
-
_upsert_vectors,
|
21
|
-
)
|
22
|
-
|
23
|
-
NaturalInput = TypeVar("T")
|
24
|
-
NaturalInputs = List[NaturalInput]
|
25
|
-
Vector = List[float]
|
26
|
-
Properties = Dict[str, Union[str, int, bool, float]]
|
27
|
-
|
28
|
-
_TENANT_ID_PATTERN = r"^[a-zA-Z0-9_-]{4,64}$"
|
29
|
-
|
30
|
-
|
31
|
-
@dataclass
|
32
|
-
class SearchResult:
|
33
|
-
"""
|
34
|
-
A class used to represent the result of a vector similarity search operation.
|
35
|
-
|
36
|
-
Attributes:
|
37
|
-
properties (dict): The dictionary of properties to attach with the vectors
|
38
|
-
id (str): The vector object unique identifier
|
39
|
-
vector (Vector): The vector values
|
40
|
-
distance (int): The distance metric indicating how similar the vector is to the search query.
|
41
|
-
Smaller values indicate higher similarity.
|
42
|
-
"""
|
43
|
-
|
44
|
-
properties: Properties
|
45
|
-
id: Optional[str]
|
46
|
-
vector: Optional[Vector]
|
47
|
-
distance: Optional[float]
|
48
|
-
|
49
|
-
|
50
|
-
class Collection:
|
51
|
-
"""
|
52
|
-
The Collection class is a Python class that provides functionalities for handling operations on vectors within a
|
53
|
-
specific collection in a vector store. This class should be used after a collection has been created or fetched
|
54
|
-
using `VectorStoreClient`.
|
55
|
-
|
56
|
-
The Collection class allows you to:
|
57
|
-
* **Search for Similar Vectors**: This helps in finding vectors that are most similar to a given query vector.
|
58
|
-
* **Upsert Vectors**: This operation allows you to insert new vectors into the collection or update existing
|
59
|
-
vectors if they already exist.
|
60
|
-
* **Delete Vectors by ID**: This operation deletes vectors based on their unique identifiers
|
61
|
-
"""
|
62
|
-
|
63
|
-
id: str
|
64
|
-
name: str
|
65
|
-
metric: str
|
66
|
-
dimension: int
|
67
|
-
description: Optional[str]
|
68
|
-
vectorizer: Optional[str]
|
69
|
-
multi_tenant: bool
|
70
|
-
|
71
|
-
_vector_serving_client: VectorServingClient
|
72
|
-
_type_to_proto_property_mapping: Dict[str, TypeVar] = {
|
73
|
-
str: "string_val",
|
74
|
-
bool: "bool_val",
|
75
|
-
int: "int_val",
|
76
|
-
float: "double_val",
|
77
|
-
}
|
78
|
-
|
79
|
-
_proto_property_to_type_mapping = {
|
80
|
-
v: k for k, v in _type_to_proto_property_mapping.items()
|
81
|
-
}
|
82
|
-
|
83
|
-
def __init__(
|
84
|
-
self,
|
85
|
-
id: str,
|
86
|
-
name: str,
|
87
|
-
metric: str,
|
88
|
-
dimension: int,
|
89
|
-
vector_serving_client: VectorServingClient,
|
90
|
-
description: Optional[str] = None,
|
91
|
-
vectorizer: Optional[str] = None,
|
92
|
-
muli_tenant: bool = False,
|
93
|
-
):
|
94
|
-
"""
|
95
|
-
Initializes a `Collection` client object to interact with Qwak's vector serving service. Should not be created
|
96
|
-
directly - created or fetched using the `VectorStoreClient` object.
|
97
|
-
"""
|
98
|
-
self.id = id
|
99
|
-
self.name = name
|
100
|
-
self.description = description
|
101
|
-
self.metric = metric
|
102
|
-
self.dimension = dimension
|
103
|
-
self.vectorizer = vectorizer
|
104
|
-
self._vector_serving_client = vector_serving_client
|
105
|
-
self._realtime_inference_client = None
|
106
|
-
self.multi_tenant = muli_tenant
|
107
|
-
|
108
|
-
if vectorizer:
|
109
|
-
self._realtime_inference_client = VectorStoreInferenceClient(
|
110
|
-
model_id=self.vectorizer.lower().replace(" ", "_").replace("-", "_")
|
111
|
-
)
|
112
|
-
|
113
|
-
@typechecked
|
114
|
-
def search(
|
115
|
-
self,
|
116
|
-
output_properties: List[str],
|
117
|
-
vector: Optional[Vector] = None,
|
118
|
-
natural_input: Optional[NaturalInput] = None,
|
119
|
-
top_results: int = 1,
|
120
|
-
include_id: bool = True,
|
121
|
-
include_vector: bool = False,
|
122
|
-
include_distance: bool = False,
|
123
|
-
filter: Optional[Filter] = None,
|
124
|
-
tenant_id: Optional[str] = None,
|
125
|
-
) -> List[SearchResult]:
|
126
|
-
"""
|
127
|
-
Searches for vectors in the collection that are most similar to a given query vector.
|
128
|
-
Vector similarity is a measure of the closeness or similarity between two vectors. In the context of machine
|
129
|
-
learning, vectors often represent points in a high-dimensional space, and the concept of similarity between
|
130
|
-
vectors can be crucial for many tasks such as clustering, classification, and nearest-neighbor searches.
|
131
|
-
|
132
|
-
Parameters:
|
133
|
-
output_properties (list): A list of property fields to include in the results.
|
134
|
-
vector (list): The vector to get the most similar vectors to according to the distance metric
|
135
|
-
natural_input (any): Natural inputs (text, image) which should be embedded by the collection and, and
|
136
|
-
according to the resulting embedding - get the most similar vectors
|
137
|
-
top_results (int): The number of relevant results to return
|
138
|
-
include_id (list): Whether to include the vector ID's in the result set
|
139
|
-
include_vector (list): Whether to include the vector values themselves in the result set
|
140
|
-
include_distance (list): Whether to include the distance calculations to the result set
|
141
|
-
filter (Filter): Pre-filtering search results
|
142
|
-
tenant_id (str): tenant ID, passed if and only if the collection has multi tenancy enabled
|
143
|
-
|
144
|
-
Returns:
|
145
|
-
List[SearchResult]: A list of SearchResult object, which is used as a container for the search results
|
146
|
-
|
147
|
-
Raises:
|
148
|
-
QwakException: If you don't provide either vectors or natural_inputs
|
149
|
-
QwakException: If you provide both vectors and natural_inputs
|
150
|
-
QwakException: If the tenant provided mismatches the configuration
|
151
|
-
"""
|
152
|
-
if not (bool(vector) ^ bool(natural_input)):
|
153
|
-
raise FrogmlException(
|
154
|
-
"Exactly one of {'vectors', 'natural_input'} should be passed"
|
155
|
-
)
|
156
|
-
|
157
|
-
if natural_input:
|
158
|
-
vector = self._transform_natural_input_to_vectors(
|
159
|
-
natural_input=natural_input
|
160
|
-
)
|
161
|
-
proto_filter = filter._to_proto() if filter else None
|
162
|
-
self._validate_tenant(tenant_id)
|
163
|
-
|
164
|
-
return [
|
165
|
-
self._to_search_result(
|
166
|
-
result,
|
167
|
-
include_id=include_id,
|
168
|
-
include_distance=include_distance,
|
169
|
-
include_vector=include_vector,
|
170
|
-
)
|
171
|
-
for result in self._vector_serving_client.search(
|
172
|
-
collection_name=self.name,
|
173
|
-
vector=vector,
|
174
|
-
properties=output_properties,
|
175
|
-
top_results=top_results,
|
176
|
-
include_id=include_id,
|
177
|
-
include_vector=include_vector,
|
178
|
-
include_distance=include_distance,
|
179
|
-
filters=proto_filter,
|
180
|
-
tenant_id=tenant_id,
|
181
|
-
)
|
182
|
-
]
|
183
|
-
|
184
|
-
@typechecked
|
185
|
-
def upsert(
|
186
|
-
self,
|
187
|
-
ids: List[str],
|
188
|
-
properties: List[Properties],
|
189
|
-
vectors: Optional[List[Vector]] = None,
|
190
|
-
natural_inputs: Optional[NaturalInputs] = None,
|
191
|
-
batch_size: int = 1000,
|
192
|
-
multiproc: bool = False,
|
193
|
-
max_processes: Optional[int] = None,
|
194
|
-
*,
|
195
|
-
tenant_ids: Optional[List[str]] = None,
|
196
|
-
) -> None:
|
197
|
-
"""
|
198
|
-
Inserts new vectors into the collection or updates existing vectors. Notice that this method will overwrite
|
199
|
-
existing vectors with the same IDs.
|
200
|
-
|
201
|
-
Parameters:
|
202
|
-
ids (str): A list of vector ids to be added
|
203
|
-
vectors (list): The list of vectors to add. This attribute or `natural_inputs` must be set
|
204
|
-
natural_inputs (list): Natural inputs (text, image) which should be embedded by the collection and added
|
205
|
-
to the store. This attribute or `vectors` must be set
|
206
|
-
properties (dict): A dictionary of properties to attach with the vectors
|
207
|
-
batch_size(int): maximum batch size when upserting against the backend Vector Store, defaults to 1000
|
208
|
-
multiproc (bool): whether to use multiprocessing, defaults to False
|
209
|
-
max_processes (Optional[int]): max number of processes if multiproc is selected, multiproc must be enabled
|
210
|
-
tenant_ids (List[str]): tenant ids, should be specified if and only if the collection has multi tenancy enabled.
|
211
|
-
|
212
|
-
Raises:
|
213
|
-
QwakException: If you don't provide either vectors or natural_inputs
|
214
|
-
QwakException: If you provide both vectors and natural_inputs
|
215
|
-
"""
|
216
|
-
|
217
|
-
if not (bool(vectors) ^ bool(natural_inputs)):
|
218
|
-
raise FrogmlException(
|
219
|
-
"`vectors` or `natural` inputs should be defined and not empty. But not both"
|
220
|
-
)
|
221
|
-
|
222
|
-
if max_processes is not None and not multiproc:
|
223
|
-
raise FrogmlException(
|
224
|
-
"Can not set max_processes if multiproc is not enabled"
|
225
|
-
)
|
226
|
-
|
227
|
-
self._validate_tenant_ids(
|
228
|
-
vector_ids=vectors, tenant_ids=tenant_ids, verb="upserting"
|
229
|
-
)
|
230
|
-
id_tpls = zip(ids, tenant_ids) if self.multi_tenant else ids
|
231
|
-
|
232
|
-
if (len(vectors or natural_inputs) != len(ids)) or (
|
233
|
-
len(properties) != len(ids)
|
234
|
-
):
|
235
|
-
raise FrogmlException(
|
236
|
-
"Non matching lengths for input list (vectors / natural inputs), IDs, and properties list. "
|
237
|
-
"Make sure all 3 fields are aligned in length"
|
238
|
-
)
|
239
|
-
if bool(natural_inputs):
|
240
|
-
_upsert_natural_input(
|
241
|
-
vector_tuples=list(zip(id_tpls, natural_inputs, properties)),
|
242
|
-
chunk_size=batch_size,
|
243
|
-
vectorizer_name=self.vectorizer,
|
244
|
-
collection_name=self.name,
|
245
|
-
edge_services_url=self._vector_serving_client._edge_services_url,
|
246
|
-
multiproc=multiproc,
|
247
|
-
max_processes=max_processes,
|
248
|
-
)
|
249
|
-
else:
|
250
|
-
_upsert_vectors(
|
251
|
-
vector_tuples=list(zip(id_tpls, vectors, properties)),
|
252
|
-
chunk_size=batch_size,
|
253
|
-
collection_name=self.name,
|
254
|
-
edge_services_url=self._vector_serving_client._edge_services_url,
|
255
|
-
multiproc=multiproc,
|
256
|
-
max_processes=max_processes,
|
257
|
-
)
|
258
|
-
|
259
|
-
@typechecked
|
260
|
-
def delete(
|
261
|
-
self,
|
262
|
-
vector_ids: List[str],
|
263
|
-
*,
|
264
|
-
tenant_ids: Optional[List[str]] = None,
|
265
|
-
batch_size: int = 10000,
|
266
|
-
) -> int:
|
267
|
-
"""
|
268
|
-
Deletes vectors from the collection based on their IDs.
|
269
|
-
|
270
|
-
Parameters:
|
271
|
-
vector_ids (list): A list of vector IDs to delete.
|
272
|
-
batch_size (int): optional batch size, defaults to 10000
|
273
|
-
tenant_ids (list): tenant IDs (same length as vector_ids, used only when multi tenancy is enabled)
|
274
|
-
|
275
|
-
Returns:
|
276
|
-
int: Number of actual vectors deleted from the collection
|
277
|
-
"""
|
278
|
-
self._validate_tenant_ids(
|
279
|
-
vector_ids=vector_ids, tenant_ids=tenant_ids, verb="deleting"
|
280
|
-
)
|
281
|
-
vector_identifiers: List[ProtoVectorIdentifier] = self._extract_tenant_ids(
|
282
|
-
vector_ids, tenant_ids
|
283
|
-
)
|
284
|
-
|
285
|
-
return sum(
|
286
|
-
self._vector_serving_client.delete_vectors(
|
287
|
-
collection_name=self.name, vector_identifiers=ids_chunk
|
288
|
-
)
|
289
|
-
for ids_chunk in _divide_chunks(vector_identifiers, batch_size)
|
290
|
-
)
|
291
|
-
|
292
|
-
@typechecked
|
293
|
-
def fetch(self, vector_id: str, *, tenant_id: Optional[str] = None) -> SearchResult:
|
294
|
-
"""
|
295
|
-
Fetches a vector from the collection based on its ID.
|
296
|
-
|
297
|
-
Parameters:
|
298
|
-
vector_id (str): The ID of the vector to fetch.
|
299
|
-
tenant_id (str, optional): Tenant id, passed if and only if multi tenancy is enabled
|
300
|
-
|
301
|
-
Returns:
|
302
|
-
SearchResult: A SearchResult object, which is used as a container for the search results
|
303
|
-
"""
|
304
|
-
self._validate_tenant(tenant_id)
|
305
|
-
vector_identifier: ProtoVectorIdentifier
|
306
|
-
if tenant_id is not None:
|
307
|
-
vector_identifier = ProtoVectorIdentifier(
|
308
|
-
vector_id=vector_id, tenant_id=tenant_id
|
309
|
-
)
|
310
|
-
else:
|
311
|
-
vector_identifier = ProtoVectorIdentifier(vector_id=vector_id)
|
312
|
-
|
313
|
-
result = self._vector_serving_client.fetch_vector(
|
314
|
-
collection_name=self.name, vector_identifier=vector_identifier
|
315
|
-
)
|
316
|
-
|
317
|
-
return self._to_search_result(
|
318
|
-
result, include_id=True, include_distance=False, include_vector=True
|
319
|
-
)
|
320
|
-
|
321
|
-
def _to_search_result(
|
322
|
-
self,
|
323
|
-
search_result: Union[ProtoSearchResult, ProtoStoredVector],
|
324
|
-
include_id: bool,
|
325
|
-
include_vector: bool,
|
326
|
-
include_distance: bool,
|
327
|
-
) -> SearchResult:
|
328
|
-
id = (
|
329
|
-
search_result.vector_identifier.vector_id
|
330
|
-
if type(search_result) is ProtoStoredVector
|
331
|
-
else search_result.id
|
332
|
-
)
|
333
|
-
return SearchResult(
|
334
|
-
id=id if include_id else None,
|
335
|
-
vector=(
|
336
|
-
[e for e in search_result.vector.element] if include_vector else None
|
337
|
-
),
|
338
|
-
distance=search_result.distance if include_distance else None,
|
339
|
-
properties={
|
340
|
-
prop.name: self._extract_value_with_type(prop)
|
341
|
-
for prop in search_result.properties
|
342
|
-
},
|
343
|
-
)
|
344
|
-
|
345
|
-
def _extract_value_with_type(self, prop: Property):
|
346
|
-
type_caster = self._proto_property_to_type_mapping.get(
|
347
|
-
prop.WhichOneof("value_type"), None
|
348
|
-
)
|
349
|
-
if not type_caster:
|
350
|
-
raise FrogmlException(
|
351
|
-
f"Cannot deserialize property with type {type(type_caster)}. This means an invalid property type"
|
352
|
-
f" was registered to the platform. Please delete and add the vector object again."
|
353
|
-
)
|
354
|
-
|
355
|
-
return type_caster(getattr(prop, prop.WhichOneof("value_type")))
|
356
|
-
|
357
|
-
def _transform_natural_input_to_vectors(
|
358
|
-
self, natural_input: NaturalInput
|
359
|
-
) -> Vector:
|
360
|
-
if not self.vectorizer:
|
361
|
-
raise FrogmlException(
|
362
|
-
"Unable to search by natural input because the collection does not have a Vectorizer defined."
|
363
|
-
)
|
364
|
-
return self._realtime_inference_client.get_embedding(natural_input)
|
365
|
-
|
366
|
-
def _transform_natural_input_list_to_vectors(
|
367
|
-
self, natural_inputs: NaturalInputs
|
368
|
-
) -> List[Vector]:
|
369
|
-
return [
|
370
|
-
self._transform_natural_input_to_vectors(natural_input=natural_input)
|
371
|
-
for natural_input in natural_inputs
|
372
|
-
]
|
373
|
-
|
374
|
-
def _validate_tenant(self, tenant_id: Optional[str] = None):
|
375
|
-
if self.multi_tenant:
|
376
|
-
# we are multi tenant, assert a valid tenant is passed
|
377
|
-
if tenant_id is None:
|
378
|
-
raise FrogmlException(
|
379
|
-
"Tenant ID must be passed when multi tenancy is enabled"
|
380
|
-
)
|
381
|
-
|
382
|
-
self._validate_tenant_id(tenant_id)
|
383
|
-
else:
|
384
|
-
if tenant_id is not None:
|
385
|
-
raise FrogmlException(
|
386
|
-
f"Collection {self.name} is not multi tenant, can not specify tenant"
|
387
|
-
)
|
388
|
-
|
389
|
-
def _validate_tenant_id(self, tenant_id: str):
|
390
|
-
if not (bool(re.match(_TENANT_ID_PATTERN, tenant_id))):
|
391
|
-
raise FrogmlException(
|
392
|
-
f"Tenant ID {tenant_id} does not conform to {_TENANT_ID_PATTERN}"
|
393
|
-
)
|
394
|
-
|
395
|
-
def _validate_tenant_ids(
|
396
|
-
self, vector_ids: List[str], tenant_ids: Optional[List[str]], verb: str
|
397
|
-
) -> None:
|
398
|
-
if self.multi_tenant:
|
399
|
-
if tenant_ids is None:
|
400
|
-
raise FrogmlException(
|
401
|
-
f"Tenant IDs must be provided when {verb} against multitenant collections"
|
402
|
-
)
|
403
|
-
if len(tenant_ids) != len(vector_ids):
|
404
|
-
raise FrogmlException(
|
405
|
-
f"Got different number of vector ids {len(vector_ids)} and tenant ids {len(tenant_ids)}"
|
406
|
-
)
|
407
|
-
for tenant_id in tenant_ids:
|
408
|
-
self._validate_tenant_id(tenant_id=tenant_id)
|
409
|
-
else:
|
410
|
-
if tenant_ids is not None:
|
411
|
-
raise FrogmlException(
|
412
|
-
f"Collection {self.name} does not have multi tenancy enabled, do not pass tenant ids"
|
413
|
-
)
|
414
|
-
|
415
|
-
def _extract_tenant_ids(
|
416
|
-
self, vector_ids: List[str], tenant_ids: Optional[List[str]]
|
417
|
-
) -> List[ProtoVectorIdentifier]:
|
418
|
-
vector_identifiers: List[ProtoVectorIdentifier]
|
419
|
-
if self.multi_tenant:
|
420
|
-
vector_identifiers = [
|
421
|
-
ProtoVectorIdentifier(vector_id=vector_id, tenant_id=tenant_id)
|
422
|
-
for vector_id, tenant_id in zip(vector_ids, tenant_ids)
|
423
|
-
]
|
424
|
-
else:
|
425
|
-
vector_identifiers = [
|
426
|
-
ProtoVectorIdentifier(vector_id=vector_id) for vector_id in vector_ids
|
427
|
-
]
|
428
|
-
|
429
|
-
return vector_identifiers
|