qwak-core 0.6.7__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. frogml_storage/__init__.py +1 -0
  2. frogml_storage/artifactory/__init__.py +1 -0
  3. frogml_storage/artifactory/_artifactory_api.py +315 -0
  4. frogml_storage/authentication/login/__init__.py +1 -0
  5. frogml_storage/authentication/login/_login_cli.py +239 -0
  6. frogml_storage/authentication/login/_login_command.py +74 -0
  7. frogml_storage/authentication/models/__init__.py +3 -0
  8. frogml_storage/authentication/models/_auth.py +24 -0
  9. frogml_storage/authentication/models/_auth_config.py +70 -0
  10. frogml_storage/authentication/models/_login.py +22 -0
  11. frogml_storage/authentication/utils/__init__.py +18 -0
  12. frogml_storage/authentication/utils/_authentication_utils.py +284 -0
  13. frogml_storage/authentication/utils/_login_checks_utils.py +114 -0
  14. frogml_storage/base_storage.py +140 -0
  15. frogml_storage/constants.py +56 -0
  16. frogml_storage/exceptions/checksum_verification_error.py +3 -0
  17. frogml_storage/exceptions/validation_error.py +4 -0
  18. frogml_storage/frog_ml.py +668 -0
  19. frogml_storage/http/__init__.py +1 -0
  20. frogml_storage/http/http_client.py +83 -0
  21. frogml_storage/logging/__init__.py +1 -0
  22. frogml_storage/logging/_log_config.py +45 -0
  23. frogml_storage/logging/log_utils.py +21 -0
  24. frogml_storage/models/__init__.py +1 -0
  25. frogml_storage/models/_download_context.py +54 -0
  26. frogml_storage/models/dataset_manifest.py +13 -0
  27. frogml_storage/models/entity_manifest.py +93 -0
  28. frogml_storage/models/frogml_dataset_version.py +21 -0
  29. frogml_storage/models/frogml_entity_type_info.py +50 -0
  30. frogml_storage/models/frogml_entity_version.py +34 -0
  31. frogml_storage/models/frogml_model_version.py +21 -0
  32. frogml_storage/models/model_manifest.py +60 -0
  33. frogml_storage/models/serialization_metadata.py +15 -0
  34. frogml_storage/utils/__init__.py +12 -0
  35. frogml_storage/utils/_environment.py +21 -0
  36. frogml_storage/utils/_input_checks_utility.py +104 -0
  37. frogml_storage/utils/_storage_utils.py +15 -0
  38. frogml_storage/utils/_url_utils.py +27 -0
  39. qwak/__init__.py +1 -1
  40. qwak/clients/model_management/client.py +5 -0
  41. qwak/clients/project/client.py +7 -0
  42. qwak/inner/const.py +8 -0
  43. qwak/inner/di_configuration/account.py +67 -6
  44. qwak/inner/di_configuration/dependency_wiring.py +0 -2
  45. qwak/inner/tool/auth.py +83 -0
  46. qwak/inner/tool/grpc/grpc_auth.py +35 -0
  47. qwak/inner/tool/grpc/grpc_tools.py +37 -14
  48. qwak/inner/tool/grpc/grpc_try_wrapping.py +1 -3
  49. qwak/qwak_client/client.py +6 -0
  50. {qwak_core-0.6.7.dist-info → qwak_core-0.7.0.dist-info}/METADATA +1 -1
  51. {qwak_core-0.6.7.dist-info → qwak_core-0.7.0.dist-info}/RECORD +54 -30
  52. qwak_services_mock/mocks/qwak_mocks.py +2 -8
  53. qwak_services_mock/services_mock.py +0 -24
  54. qwak/clients/vector_store/__init__.py +0 -2
  55. qwak/clients/vector_store/management_client.py +0 -124
  56. qwak/clients/vector_store/serving_client.py +0 -156
  57. qwak/vector_store/__init__.py +0 -4
  58. qwak/vector_store/client.py +0 -150
  59. qwak/vector_store/collection.py +0 -426
  60. qwak/vector_store/filters.py +0 -354
  61. qwak/vector_store/inference_client.py +0 -103
  62. qwak/vector_store/rest_helpers.py +0 -72
  63. qwak/vector_store/utils/__init__.py +0 -0
  64. qwak/vector_store/utils/filter_utils.py +0 -21
  65. qwak/vector_store/utils/upsert_utils.py +0 -217
  66. qwak_services_mock/mocks/vector_serving_api.py +0 -154
  67. qwak_services_mock/mocks/vectors_management_api.py +0 -96
  68. {qwak_core-0.6.7.dist-info → qwak_core-0.7.0.dist-info}/WHEEL +0 -0
@@ -1,150 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Dict, List, Optional
4
-
5
- from _qwak_proto.qwak.vectors.v1.collection.collection_pb2 import (
6
- VectorCollectionDefinition,
7
- VectorCollectionMetric,
8
- )
9
- from qwak.clients.vector_store import VectorManagementClient, VectorServingClient
10
- from qwak.exceptions import QwakException
11
- from qwak.vector_store.collection import Collection
12
- from typeguard import typechecked
13
-
14
-
15
- class VectorStoreClient:
16
- """
17
- The `VectorStoreClient` class is designed to interact with Qwak's vector store service. A vector store is a
18
- specialized storage service optimized for storing and querying high-dimensional vectors. These vectors can be
19
- representations extracted from images, text, or any other type of data.
20
- Vector stores are often used in machine learning applications for operations such as nearest neighbor searches,
21
- similarity measures, and clustering.
22
- """
23
-
24
- _vector_management_client: VectorManagementClient
25
-
26
- _metric_dict: Dict[str, VectorCollectionMetric] = {
27
- "l2_squared": VectorCollectionMetric.COLLECTION_METRIC_L2_SQUARED,
28
- "cosine": VectorCollectionMetric.COLLECTION_METRIC_COSINE,
29
- "dot_product": VectorCollectionMetric.COLLECTION_METRIC_DOT_PRODUCT,
30
- "l1": VectorCollectionMetric.COLLECTION_METRIC_L1,
31
- "hamming": VectorCollectionMetric.COLLECTION_METRIC_HAMMING,
32
- }
33
-
34
- def __init__(self, edge_services_url: Optional[str] = None):
35
- """
36
- Initializes a `VectorStoreClient` client object to interact with Qwak's vector store service
37
- """
38
- self._vector_management_client = VectorManagementClient()
39
- self._vector_serving_client = VectorServingClient(
40
- edge_services_url=edge_services_url
41
- )
42
-
43
- @typechecked
44
- def create_collection(
45
- self,
46
- name: str,
47
- dimension: int,
48
- description: str = None,
49
- metric: str = "l2_squared",
50
- vectorizer: Optional[str] = None,
51
- multi_tenant: bool = False,
52
- ) -> Collection:
53
- """
54
- Creates a new collection with the given name and dimension.
55
- Each collection in the vector store can be thought of as a table or a namespace that holds vectors of a
56
- particular dimension. For example, you may have a collection named "image_embeddings" where each vector is of
57
- dimension 512, representing an embedding of an image.
58
-
59
- Parameters:
60
- name (str): The name of the collection to create.
61
- dimension (int): The dimension of the vectors in the collection.
62
- description (str, optional): A human-readable description of the collection.
63
- metric (int): The distance metric used by the collection when executing similarity search
64
- vectorizer (str): an optional Qwak model used for vector embedding in case natural input is provided
65
- multi_tenant (bool): Whether this collection has multitenancy. defaults to False and cannot be changed
66
- after the collection is created
67
-
68
- Returns:
69
- Collection: The Collection object which is used to interact with the vector store.
70
-
71
- Raises:
72
- QwakException: if any of the collection creation parameters are invalid
73
- """
74
- proto_definition = self._vector_management_client.create_collection(
75
- name=name,
76
- description=description,
77
- dimension=dimension,
78
- metric=self._metrics_mapper(metric),
79
- vectorizer=vectorizer,
80
- multi_tenant=multi_tenant,
81
- ).definition
82
-
83
- return self._collection_from_definition(proto_definition)
84
-
85
- @typechecked
86
- def delete_collection(self, name: str) -> None:
87
- """
88
- Deletes a collection with the given name.
89
-
90
- Parameters:
91
- name (str): The name of the collection to delete.
92
-
93
- Raises:
94
- QwakException: in case the deletion failed for any reason
95
- """
96
- self._vector_management_client.delete_collection_by_name(name=name)
97
-
98
- @typechecked
99
- def get_collection_by_name(self, name: str) -> Collection:
100
- """
101
- Fetches a collection by its name.
102
-
103
- Parameters:
104
- name (str): The name of the collection to fetch.
105
-
106
- Returns:
107
- collection: a Collection object
108
-
109
- Raises:
110
- QwakException: in case the collection doesn't exist
111
- """
112
- proto_definition = self._vector_management_client.get_collection_by_name(
113
- name
114
- ).definition
115
- return self._collection_from_definition(proto_definition)
116
-
117
- def list_collections(self) -> List[Collection]:
118
- """
119
- Lists all available collections in the current Qwak account
120
-
121
- Returns:
122
- list: A list of Collection objects.
123
- """
124
- proto_definitions = self._vector_management_client.list_collections()
125
- return [
126
- self._collection_from_definition(collection.definition)
127
- for collection in proto_definitions
128
- ]
129
-
130
- def _collection_from_definition(
131
- self, collection_definition: VectorCollectionDefinition
132
- ):
133
- return Collection(
134
- id=collection_definition.id,
135
- name=collection_definition.collection_spec.name,
136
- metric=collection_definition.collection_spec.metric,
137
- dimension=collection_definition.collection_spec.dimension,
138
- description=collection_definition.collection_spec.description,
139
- vectorizer=collection_definition.collection_spec.vectorizer.qwak_model_name,
140
- vector_serving_client=self._vector_serving_client,
141
- muli_tenant=collection_definition.collection_spec.multi_tenancy_enabled,
142
- )
143
-
144
- def _metrics_mapper(self, metric: str) -> VectorCollectionMetric:
145
- if metric in self._metric_dict:
146
- return self._metric_dict[metric]
147
-
148
- raise QwakException(
149
- f"Unsupported metric type '{metric}'. Currently supported metrics are {list(self._metric_dict.keys())}"
150
- )
@@ -1,426 +0,0 @@
1
- import re
2
- from dataclasses import dataclass
3
- from typing import Dict, List, Optional, TypeVar, Union
4
-
5
- from _qwak_proto.qwak.vectors.v1.vector_pb2 import (
6
- Property,
7
- SearchResult as ProtoSearchResult,
8
- StoredVector as ProtoStoredVector,
9
- VectorIdentifier as ProtoVectorIdentifier,
10
- )
11
- from qwak.clients.vector_store.serving_client import VectorServingClient
12
- from qwak.exceptions import QwakException
13
- from qwak.vector_store.filters import Filter
14
- from qwak.vector_store.inference_client import VectorStoreInferenceClient
15
- from qwak.vector_store.utils.upsert_utils import (
16
- _divide_chunks,
17
- _upsert_natural_input,
18
- _upsert_vectors,
19
- )
20
- from typeguard import typechecked
21
-
22
- NaturalInput = TypeVar("T")
23
- NaturalInputs = List[NaturalInput]
24
- Vector = List[float]
25
- Properties = Dict[str, Union[str, int, bool, float]]
26
-
27
- _TENANT_ID_PATTERN = r"^[a-zA-Z0-9_-]{4,64}$"
28
-
29
-
30
- @dataclass
31
- class SearchResult:
32
- """
33
- A class used to represent the result of a vector similarity search operation.
34
-
35
- Attributes:
36
- properties (dict): The dictionary of properties to attach with the vectors
37
- id (str): The vector object unique identifier
38
- vector (Vector): The vector values
39
- distance (int): The distance metric indicating how similar the vector is to the search query.
40
- Smaller values indicate higher similarity.
41
- """
42
-
43
- properties: Properties
44
- id: Optional[str]
45
- vector: Optional[Vector]
46
- distance: Optional[float]
47
-
48
-
49
- class Collection:
50
- """
51
- The Collection class is a Python class that provides functionalities for handling operations on vectors within a
52
- specific collection in a vector store. This class should be used after a collection has been created or fetched
53
- using `VectorStoreClient`.
54
-
55
- The Collection class allows you to:
56
- * **Search for Similar Vectors**: This helps in finding vectors that are most similar to a given query vector.
57
- * **Upsert Vectors**: This operation allows you to insert new vectors into the collection or update existing
58
- vectors if they already exist.
59
- * **Delete Vectors by ID**: This operation deletes vectors based on their unique identifiers
60
- """
61
-
62
- id: str
63
- name: str
64
- metric: str
65
- dimension: int
66
- description: Optional[str]
67
- vectorizer: Optional[str]
68
- multi_tenant: bool
69
-
70
- _vector_serving_client: VectorServingClient
71
- _type_to_proto_property_mapping: Dict[str, TypeVar] = {
72
- str: "string_val",
73
- bool: "bool_val",
74
- int: "int_val",
75
- float: "double_val",
76
- }
77
-
78
- _proto_property_to_type_mapping = {
79
- v: k for k, v in _type_to_proto_property_mapping.items()
80
- }
81
-
82
- def __init__(
83
- self,
84
- id: str,
85
- name: str,
86
- metric: str,
87
- dimension: int,
88
- vector_serving_client: VectorServingClient,
89
- description: Optional[str] = None,
90
- vectorizer: Optional[str] = None,
91
- muli_tenant: bool = False,
92
- ):
93
- """
94
- Initializes a `Collection` client object to interact with Qwak's vector serving service. Should not be created
95
- directly - created or fetched using the `VectorStoreClient` object.
96
- """
97
- self.id = id
98
- self.name = name
99
- self.description = description
100
- self.metric = metric
101
- self.dimension = dimension
102
- self.vectorizer = vectorizer
103
- self._vector_serving_client = vector_serving_client
104
- self._realtime_inference_client = None
105
- self.multi_tenant = muli_tenant
106
-
107
- if vectorizer:
108
- self._realtime_inference_client = VectorStoreInferenceClient(
109
- model_id=self.vectorizer.lower().replace(" ", "_").replace("-", "_")
110
- )
111
-
112
- @typechecked
113
- def search(
114
- self,
115
- output_properties: List[str],
116
- vector: Optional[Vector] = None,
117
- natural_input: Optional[NaturalInput] = None,
118
- top_results: int = 1,
119
- include_id: bool = True,
120
- include_vector: bool = False,
121
- include_distance: bool = False,
122
- filter: Optional[Filter] = None,
123
- tenant_id: Optional[str] = None,
124
- ) -> List[SearchResult]:
125
- """
126
- Searches for vectors in the collection that are most similar to a given query vector.
127
- Vector similarity is a measure of the closeness or similarity between two vectors. In the context of machine
128
- learning, vectors often represent points in a high-dimensional space, and the concept of similarity between
129
- vectors can be crucial for many tasks such as clustering, classification, and nearest-neighbor searches.
130
-
131
- Parameters:
132
- output_properties (list): A list of property fields to include in the results.
133
- vector (list): The vector to get the most similar vectors to according to the distance metric
134
- natural_input (any): Natural inputs (text, image) which should be embedded by the collection and, and
135
- according to the resulting embedding - get the most similar vectors
136
- top_results (int): The number of relevant results to return
137
- include_id (list): Whether to include the vector ID's in the result set
138
- include_vector (list): Whether to include the vector values themselves in the result set
139
- include_distance (list): Whether to include the distance calculations to the result set
140
- filter (Filter): Pre-filtering search results
141
- tenant_id (str): tenant ID, passed if and only if the collection has multi tenancy enabled
142
-
143
- Returns:
144
- List[SearchResult]: A list of SearchResult object, which is used as a container for the search results
145
-
146
- Raises:
147
- QwakException: If you don't provide either vectors or natural_inputs
148
- QwakException: If you provide both vectors and natural_inputs
149
- QwakException: If the tenant provided mismatches the configuration
150
- """
151
- if not (bool(vector) ^ bool(natural_input)):
152
- raise QwakException(
153
- "Exactly one of {'vectors', 'natural_input'} should be passed"
154
- )
155
-
156
- if natural_input:
157
- vector = self._transform_natural_input_to_vectors(
158
- natural_input=natural_input
159
- )
160
- proto_filter = filter._to_proto() if filter else None
161
- self._validate_tenant(tenant_id)
162
-
163
- return [
164
- self._to_search_result(
165
- result,
166
- include_id=include_id,
167
- include_distance=include_distance,
168
- include_vector=include_vector,
169
- )
170
- for result in self._vector_serving_client.search(
171
- collection_name=self.name,
172
- vector=vector,
173
- properties=output_properties,
174
- top_results=top_results,
175
- include_id=include_id,
176
- include_vector=include_vector,
177
- include_distance=include_distance,
178
- filters=proto_filter,
179
- tenant_id=tenant_id,
180
- )
181
- ]
182
-
183
- @typechecked
184
- def upsert(
185
- self,
186
- ids: List[str],
187
- properties: List[Properties],
188
- vectors: Optional[List[Vector]] = None,
189
- natural_inputs: Optional[NaturalInputs] = None,
190
- batch_size: int = 1000,
191
- multiproc: bool = False,
192
- max_processes: Optional[int] = None,
193
- *,
194
- tenant_ids: Optional[List[str]] = None,
195
- ) -> None:
196
- """
197
- Inserts new vectors into the collection or updates existing vectors. Notice that this method will overwrite
198
- existing vectors with the same IDs.
199
-
200
- Parameters:
201
- ids (str): A list of vector ids to be added
202
- vectors (list): The list of vectors to add. This attribute or `natural_inputs` must be set
203
- natural_inputs (list): Natural inputs (text, image) which should be embedded by the collection and added
204
- to the store. This attribute or `vectors` must be set
205
- properties (dict): A dictionary of properties to attach with the vectors
206
- batch_size(int): maximum batch size when upserting against the backend Vector Store, defaults to 1000
207
- multiproc (bool): whether to use multiprocessing, defaults to False
208
- max_processes (Optional[int]): max number of processes if multiproc is selected, multiproc must be enabled
209
- tenant_ids (List[str]): tenant ids, should be specified if and only if the collection has multi tenancy enabled.
210
-
211
- Raises:
212
- QwakException: If you don't provide either vectors or natural_inputs
213
- QwakException: If you provide both vectors and natural_inputs
214
- """
215
-
216
- if not (bool(vectors) ^ bool(natural_inputs)):
217
- raise QwakException(
218
- "`vectors` or `natural` inputs should be defined and not empty. But not both"
219
- )
220
-
221
- if max_processes is not None and not multiproc:
222
- raise QwakException("Can not set max_processes if multiproc is not enabled")
223
-
224
- self._validate_tenant_ids(
225
- vector_ids=vectors, tenant_ids=tenant_ids, verb="upserting"
226
- )
227
- id_tpls = zip(ids, tenant_ids) if self.multi_tenant else ids
228
-
229
- if (len(vectors or natural_inputs) != len(ids)) or (
230
- len(properties) != len(ids)
231
- ):
232
- raise QwakException(
233
- "Non matching lengths for input list (vectors / natural inputs), IDs, and properties list. "
234
- "Make sure all 3 fields are aligned in length"
235
- )
236
- if bool(natural_inputs):
237
- _upsert_natural_input(
238
- vector_tuples=list(zip(id_tpls, natural_inputs, properties)),
239
- chunk_size=batch_size,
240
- vectorizer_name=self.vectorizer,
241
- collection_name=self.name,
242
- edge_services_url=self._vector_serving_client._edge_services_url,
243
- multiproc=multiproc,
244
- max_processes=max_processes,
245
- )
246
- else:
247
- _upsert_vectors(
248
- vector_tuples=list(zip(id_tpls, vectors, properties)),
249
- chunk_size=batch_size,
250
- collection_name=self.name,
251
- edge_services_url=self._vector_serving_client._edge_services_url,
252
- multiproc=multiproc,
253
- max_processes=max_processes,
254
- )
255
-
256
- @typechecked
257
- def delete(
258
- self,
259
- vector_ids: List[str],
260
- *,
261
- tenant_ids: Optional[List[str]] = None,
262
- batch_size: int = 10000,
263
- ) -> int:
264
- """
265
- Deletes vectors from the collection based on their IDs.
266
-
267
- Parameters:
268
- vector_ids (list): A list of vector IDs to delete.
269
- batch_size (int): optional batch size, defaults to 10000
270
- tenant_ids (list): tenant IDs (same length as vector_ids, used only when multi tenancy is enabled)
271
-
272
- Returns:
273
- int: Number of actual vectors deleted from the collection
274
- """
275
- self._validate_tenant_ids(
276
- vector_ids=vector_ids, tenant_ids=tenant_ids, verb="deleting"
277
- )
278
- vector_identifiers: List[ProtoVectorIdentifier] = self._extract_tenant_ids(
279
- vector_ids, tenant_ids
280
- )
281
-
282
- return sum(
283
- self._vector_serving_client.delete_vectors(
284
- collection_name=self.name, vector_identifiers=ids_chunk
285
- )
286
- for ids_chunk in _divide_chunks(vector_identifiers, batch_size)
287
- )
288
-
289
- @typechecked
290
- def fetch(self, vector_id: str, *, tenant_id: Optional[str] = None) -> SearchResult:
291
- """
292
- Fetches a vector from the collection based on its ID.
293
-
294
- Parameters:
295
- vector_id (str): The ID of the vector to fetch.
296
- tenant_id (str, optional): Tenant id, passed if and only if multi tenancy is enabled
297
-
298
- Returns:
299
- SearchResult: A SearchResult object, which is used as a container for the search results
300
- """
301
- self._validate_tenant(tenant_id)
302
- vector_identifier: ProtoVectorIdentifier
303
- if tenant_id is not None:
304
- vector_identifier = ProtoVectorIdentifier(
305
- vector_id=vector_id, tenant_id=tenant_id
306
- )
307
- else:
308
- vector_identifier = ProtoVectorIdentifier(vector_id=vector_id)
309
-
310
- result = self._vector_serving_client.fetch_vector(
311
- collection_name=self.name, vector_identifier=vector_identifier
312
- )
313
-
314
- return self._to_search_result(
315
- result, include_id=True, include_distance=False, include_vector=True
316
- )
317
-
318
- def _to_search_result(
319
- self,
320
- search_result: Union[ProtoSearchResult, ProtoStoredVector],
321
- include_id: bool,
322
- include_vector: bool,
323
- include_distance: bool,
324
- ) -> SearchResult:
325
- id = (
326
- search_result.vector_identifier.vector_id
327
- if type(search_result) is ProtoStoredVector
328
- else search_result.id
329
- )
330
- return SearchResult(
331
- id=id if include_id else None,
332
- vector=(
333
- [e for e in search_result.vector.element] if include_vector else None
334
- ),
335
- distance=search_result.distance if include_distance else None,
336
- properties={
337
- prop.name: self._extract_value_with_type(prop)
338
- for prop in search_result.properties
339
- },
340
- )
341
-
342
- def _extract_value_with_type(self, prop: Property):
343
- type_caster = self._proto_property_to_type_mapping.get(
344
- prop.WhichOneof("value_type"), None
345
- )
346
- if not type_caster:
347
- raise QwakException(
348
- f"Cannot deserialize property with type {type(type_caster)}. This means an invalid property type"
349
- f" was registered to the platform. Please delete and add the vector object again."
350
- )
351
-
352
- return type_caster(getattr(prop, prop.WhichOneof("value_type")))
353
-
354
- def _transform_natural_input_to_vectors(
355
- self, natural_input: NaturalInput
356
- ) -> Vector:
357
- if not self.vectorizer:
358
- raise QwakException(
359
- "Unable to search by natural input because the collection does not have a Vectorizer defined."
360
- )
361
- return self._realtime_inference_client.get_embedding(natural_input)
362
-
363
- def _transform_natural_input_list_to_vectors(
364
- self, natural_inputs: NaturalInputs
365
- ) -> List[Vector]:
366
- return [
367
- self._transform_natural_input_to_vectors(natural_input=natural_input)
368
- for natural_input in natural_inputs
369
- ]
370
-
371
- def _validate_tenant(self, tenant_id: Optional[str] = None):
372
- if self.multi_tenant:
373
- # we are multi tenant, assert a valid tenant is passed
374
- if tenant_id is None:
375
- raise QwakException(
376
- "Tenant ID must be passed when multi tenancy is enabled"
377
- )
378
-
379
- self._validate_tenant_id(tenant_id)
380
- else:
381
- if tenant_id is not None:
382
- raise QwakException(
383
- f"Collection {self.name} is not multi tenant, can not specify tenant"
384
- )
385
-
386
- def _validate_tenant_id(self, tenant_id: str):
387
- if not (bool(re.match(_TENANT_ID_PATTERN, tenant_id))):
388
- raise QwakException(
389
- f"Tenant ID {tenant_id} does not conform to {_TENANT_ID_PATTERN}"
390
- )
391
-
392
- def _validate_tenant_ids(
393
- self, vector_ids: List[str], tenant_ids: Optional[List[str]], verb: str
394
- ) -> None:
395
- if self.multi_tenant:
396
- if tenant_ids is None:
397
- raise QwakException(
398
- f"Tenant IDs must be provided when {verb} against multitenant collections"
399
- )
400
- if len(tenant_ids) != len(vector_ids):
401
- raise QwakException(
402
- f"Got different number of vector ids {len(vector_ids)} and tenant ids {len(tenant_ids)}"
403
- )
404
- for tenant_id in tenant_ids:
405
- self._validate_tenant_id(tenant_id=tenant_id)
406
- else:
407
- if tenant_ids is not None:
408
- raise QwakException(
409
- f"Collection {self.name} does not have multi tenancy enabled, do not pass tenant ids"
410
- )
411
-
412
- def _extract_tenant_ids(
413
- self, vector_ids: List[str], tenant_ids: Optional[List[str]]
414
- ) -> List[ProtoVectorIdentifier]:
415
- vector_identifiers: List[ProtoVectorIdentifier]
416
- if self.multi_tenant:
417
- vector_identifiers = [
418
- ProtoVectorIdentifier(vector_id=vector_id, tenant_id=tenant_id)
419
- for vector_id, tenant_id in zip(vector_ids, tenant_ids)
420
- ]
421
- else:
422
- vector_identifiers = [
423
- ProtoVectorIdentifier(vector_id=vector_id) for vector_id in vector_ids
424
- ]
425
-
426
- return vector_identifiers