qwak-core 0.6.7__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. frogml_storage/__init__.py +1 -0
  2. frogml_storage/artifactory/__init__.py +1 -0
  3. frogml_storage/artifactory/_artifactory_api.py +315 -0
  4. frogml_storage/authentication/login/__init__.py +1 -0
  5. frogml_storage/authentication/login/_login_cli.py +239 -0
  6. frogml_storage/authentication/login/_login_command.py +74 -0
  7. frogml_storage/authentication/models/__init__.py +3 -0
  8. frogml_storage/authentication/models/_auth.py +24 -0
  9. frogml_storage/authentication/models/_auth_config.py +70 -0
  10. frogml_storage/authentication/models/_login.py +22 -0
  11. frogml_storage/authentication/utils/__init__.py +18 -0
  12. frogml_storage/authentication/utils/_authentication_utils.py +284 -0
  13. frogml_storage/authentication/utils/_login_checks_utils.py +114 -0
  14. frogml_storage/base_storage.py +140 -0
  15. frogml_storage/constants.py +56 -0
  16. frogml_storage/exceptions/checksum_verification_error.py +3 -0
  17. frogml_storage/exceptions/validation_error.py +4 -0
  18. frogml_storage/frog_ml.py +668 -0
  19. frogml_storage/http/__init__.py +1 -0
  20. frogml_storage/http/http_client.py +83 -0
  21. frogml_storage/logging/__init__.py +1 -0
  22. frogml_storage/logging/_log_config.py +45 -0
  23. frogml_storage/logging/log_utils.py +21 -0
  24. frogml_storage/models/__init__.py +1 -0
  25. frogml_storage/models/_download_context.py +54 -0
  26. frogml_storage/models/dataset_manifest.py +13 -0
  27. frogml_storage/models/entity_manifest.py +93 -0
  28. frogml_storage/models/frogml_dataset_version.py +21 -0
  29. frogml_storage/models/frogml_entity_type_info.py +50 -0
  30. frogml_storage/models/frogml_entity_version.py +34 -0
  31. frogml_storage/models/frogml_model_version.py +21 -0
  32. frogml_storage/models/model_manifest.py +60 -0
  33. frogml_storage/models/serialization_metadata.py +15 -0
  34. frogml_storage/utils/__init__.py +12 -0
  35. frogml_storage/utils/_environment.py +21 -0
  36. frogml_storage/utils/_input_checks_utility.py +104 -0
  37. frogml_storage/utils/_storage_utils.py +15 -0
  38. frogml_storage/utils/_url_utils.py +27 -0
  39. qwak/__init__.py +1 -1
  40. qwak/clients/model_management/client.py +5 -0
  41. qwak/clients/project/client.py +7 -0
  42. qwak/inner/const.py +8 -0
  43. qwak/inner/di_configuration/account.py +67 -6
  44. qwak/inner/di_configuration/dependency_wiring.py +0 -2
  45. qwak/inner/tool/auth.py +83 -0
  46. qwak/inner/tool/grpc/grpc_auth.py +35 -0
  47. qwak/inner/tool/grpc/grpc_tools.py +37 -14
  48. qwak/inner/tool/grpc/grpc_try_wrapping.py +1 -3
  49. qwak/qwak_client/client.py +6 -0
  50. {qwak_core-0.6.7.dist-info → qwak_core-0.7.0.dist-info}/METADATA +1 -1
  51. {qwak_core-0.6.7.dist-info → qwak_core-0.7.0.dist-info}/RECORD +54 -30
  52. qwak_services_mock/mocks/qwak_mocks.py +2 -8
  53. qwak_services_mock/services_mock.py +0 -24
  54. qwak/clients/vector_store/__init__.py +0 -2
  55. qwak/clients/vector_store/management_client.py +0 -124
  56. qwak/clients/vector_store/serving_client.py +0 -156
  57. qwak/vector_store/__init__.py +0 -4
  58. qwak/vector_store/client.py +0 -150
  59. qwak/vector_store/collection.py +0 -426
  60. qwak/vector_store/filters.py +0 -354
  61. qwak/vector_store/inference_client.py +0 -103
  62. qwak/vector_store/rest_helpers.py +0 -72
  63. qwak/vector_store/utils/__init__.py +0 -0
  64. qwak/vector_store/utils/filter_utils.py +0 -21
  65. qwak/vector_store/utils/upsert_utils.py +0 -217
  66. qwak_services_mock/mocks/vector_serving_api.py +0 -154
  67. qwak_services_mock/mocks/vectors_management_api.py +0 -96
  68. {qwak_core-0.6.7.dist-info → qwak_core-0.7.0.dist-info}/WHEEL +0 -0
@@ -1,217 +0,0 @@
1
- import os
2
- from functools import partial
3
- from math import ceil
4
- from multiprocessing import Pool, set_start_method
5
- from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union
6
-
7
- from _qwak_proto.qwak.vectors.v1.vector_pb2 import (
8
- DoubleVector,
9
- Property,
10
- StoredVector,
11
- VectorIdentifier,
12
- )
13
- from dependency_injector.wiring import Provide, inject
14
- from qwak.clients.vector_store.serving_client import VectorServingClient
15
- from qwak.exceptions import QwakException
16
- from qwak.inner.di_configuration import QwakContainer
17
- from qwak.vector_store.inference_client import VectorStoreInferenceClient
18
- from typeguard import typechecked
19
-
20
- _NaturalInput = TypeVar("T")
21
- _NaturalInputs = List[_NaturalInput]
22
- _Vector = List[float]
23
- _Properties = Dict[str, Union[str, int, bool, float]]
24
-
25
- _type_to_proto_property_mapping: Dict = {
26
- str: "string_val",
27
- bool: "bool_val",
28
- int: "int_val",
29
- float: "double_val",
30
- }
31
-
32
-
33
- def _build_property(key: str, value: Union[str, int, bool, float]):
34
- type_val = _type_to_proto_property_mapping.get(type(value), None)
35
- if not type_val:
36
- raise QwakException(
37
- f"Cannot upsert vector with property value type {type(value)}. "
38
- f"Supported types are: {list(_type_to_proto_property_mapping.keys())}"
39
- )
40
-
41
- property_args = {"name": key, type_val: value}
42
- return Property(**property_args)
43
-
44
-
45
- def _rewire_qwak_container(config):
46
- # re-creating the container using the config from the original container.
47
- # note that this runs in a fresh interpreter - at that point there's a running
48
- # container because of the imports, but it won't necessarily have the same config
49
- # as the container in the parent process that spawned this one.
50
- # rewiring only the vector store + ecosystem and authentication - if using stuff from
51
- # other modules pls feel free to add it here.
52
- new_container = QwakContainer(config=config)
53
- from qwak.clients import vector_store
54
- from qwak.clients.administration import authentication, eco_system
55
-
56
- new_container.wire(
57
- packages=[
58
- authentication,
59
- eco_system,
60
- vector_store,
61
- ]
62
- )
63
-
64
-
65
- @typechecked
66
- def _divide_chunks(lst: List, chunk_size: int):
67
- if chunk_size <= 0:
68
- raise QwakException("Chunk size must be a positive integer")
69
-
70
- num_items: int = len(lst)
71
- num_chunks: int = ceil(num_items / chunk_size)
72
- for i in range(num_chunks):
73
- yield lst[i * chunk_size : (i + 1) * chunk_size]
74
-
75
-
76
- def _get_vector_identifier(t: Union[str, Tuple[str, str]]):
77
- if type(t) is str:
78
- return VectorIdentifier(vector_id=t)
79
- return VectorIdentifier(vector_id=t[0], tenant_id=t[1])
80
-
81
-
82
- def _upsert_vector_block(
83
- vector_tuples: List[Tuple[Union[str, Tuple[str, str]], _Vector, _Properties]],
84
- chunk_size: int,
85
- collection_name: str,
86
- edge_services_url: str,
87
- ) -> None:
88
- vector_serving_client: VectorServingClient = VectorServingClient(
89
- edge_services_url=edge_services_url
90
- )
91
- for chunk in _divide_chunks(vector_tuples, chunk_size):
92
- # chunk is a list of (id, vector, properties) tuples
93
- vector_serving_client.upsert_vectors(
94
- collection_name=collection_name,
95
- vectors=[
96
- StoredVector(
97
- vector_identifier=_get_vector_identifier(tpl[0]),
98
- vector=DoubleVector(element=tpl[1]),
99
- properties=[
100
- _build_property(key, value) for (key, value) in tpl[2].items()
101
- ],
102
- )
103
- for tpl in chunk
104
- ],
105
- )
106
-
107
-
108
- def _upsert_natural_input_block(
109
- vector_tuples: List[Tuple[VectorIdentifier, _NaturalInput, _Properties]],
110
- chunk_size: int,
111
- vectorizer_name: str,
112
- collection_name: str,
113
- edge_services_url: str,
114
- ) -> None:
115
- vector_serving_client: VectorServingClient = VectorServingClient(
116
- edge_services_url=edge_services_url
117
- )
118
- inference_client: VectorStoreInferenceClient = VectorStoreInferenceClient(
119
- model_id=vectorizer_name
120
- )
121
- for chunk in _divide_chunks(vector_tuples, chunk_size):
122
- # chunk is a list of (id, _NaturalInput, properties) tuples
123
- vector_serving_client.upsert_vectors(
124
- collection_name=collection_name,
125
- vectors=[
126
- StoredVector(
127
- vector_identifier=_get_vector_identifier(tpl[0]),
128
- vector=DoubleVector(
129
- element=inference_client.get_embedding(natural_input=tpl[1])
130
- ),
131
- properties=[
132
- _build_property(key, value) for (key, value) in tpl[2].items()
133
- ],
134
- )
135
- for tpl in chunk
136
- ],
137
- )
138
-
139
-
140
- @inject
141
- def _upsert_natural_input(
142
- vector_tuples: List[Tuple[Union[str, Tuple[str, str]], _NaturalInput, _Properties]],
143
- chunk_size: int,
144
- vectorizer_name: str,
145
- collection_name: str,
146
- edge_services_url: str,
147
- multiproc: bool = False,
148
- max_processes: Optional[int] = None,
149
- config=Provide[QwakContainer.config],
150
- ):
151
- if not multiproc:
152
- _upsert_natural_input_block(
153
- vector_tuples=vector_tuples,
154
- chunk_size=chunk_size,
155
- vectorizer_name=vectorizer_name,
156
- collection_name=collection_name,
157
- edge_services_url=edge_services_url,
158
- )
159
- else:
160
- if max_processes is None:
161
- max_processes = os.cpu_count()
162
- effective_block_size: int = ceil(len(vector_tuples) / (max_processes * 4))
163
-
164
- f = partial(
165
- _upsert_natural_input_block,
166
- chunk_size=chunk_size,
167
- vectorizer_name=vectorizer_name,
168
- collection_name=collection_name,
169
- edge_services_url=edge_services_url,
170
- )
171
-
172
- blocks: Iterable[List[Tuple[str, _NaturalInput, _Properties]]] = _divide_chunks(
173
- vector_tuples, effective_block_size
174
- )
175
- initializer = partial(_rewire_qwak_container, config=config)
176
- set_start_method("spawn", force=True)
177
-
178
- with Pool(processes=max_processes, initializer=initializer) as p:
179
- p.map(f, blocks)
180
-
181
-
182
- @inject
183
- def _upsert_vectors(
184
- vector_tuples: List[Tuple[Union[str, Tuple[str, str]], _Vector, _Properties]],
185
- chunk_size: int,
186
- collection_name: str,
187
- edge_services_url: str,
188
- multiproc: bool = False,
189
- max_processes: Optional[int] = None,
190
- config=Provide[QwakContainer.config],
191
- ):
192
- if not multiproc:
193
- _upsert_vector_block(
194
- vector_tuples=vector_tuples,
195
- chunk_size=chunk_size,
196
- collection_name=collection_name,
197
- edge_services_url=edge_services_url,
198
- )
199
- else:
200
- if max_processes is None:
201
- max_processes = os.cpu_count()
202
- effective_block_size: int = ceil(len(vector_tuples) / (max_processes * 4))
203
-
204
- f = partial(
205
- _upsert_vector_block,
206
- chunk_size=chunk_size,
207
- collection_name=collection_name,
208
- edge_services_url=edge_services_url,
209
- )
210
- blocks: Iterable[List[Tuple[str, _Vector, _Properties]]] = list(
211
- _divide_chunks(vector_tuples, effective_block_size)
212
- )
213
-
214
- set_start_method("spawn", force=True)
215
- initializer = partial(_rewire_qwak_container, config=config)
216
- with Pool(processes=max_processes, initializer=initializer) as p:
217
- p.map(f, blocks)
@@ -1,154 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Dict, List
3
-
4
- import grpc
5
- from _qwak_proto.qwak.vectors.v1.vector_pb2 import (
6
- DoubleVector,
7
- Property,
8
- SearchResult,
9
- StoredVector,
10
- VectorIdentifier,
11
- )
12
- from _qwak_proto.qwak.vectors.v1.vector_service_pb2 import (
13
- DeleteVectorsResponse,
14
- FetchVectorRequest,
15
- FetchVectorResponse,
16
- SearchSimilarVectorsRequest,
17
- SearchSimilarVectorsResponse,
18
- UpsertVectorsRequest,
19
- UpsertVectorsResponse,
20
- )
21
- from _qwak_proto.qwak.vectors.v1.vector_service_pb2_grpc import VectorServiceServicer
22
- from numpy import dot
23
- from numpy.linalg import norm
24
-
25
-
26
- @dataclass
27
- class VectorObject:
28
- id: str
29
- vector: List[float]
30
- property: List[Property]
31
-
32
-
33
- class VectorServingServiceMock(VectorServiceServicer):
34
- def __init__(self):
35
- self._vector_collections: Dict[str, Dict] = dict()
36
-
37
- def reset_vector_store(self) -> None:
38
- self._vector_collections.clear()
39
-
40
- def get_num_of_vectors(self, collection_name: str) -> int:
41
- if collection_name not in self._vector_collections:
42
- return 0
43
-
44
- return len(self._vector_collections[collection_name].values())
45
-
46
- def create_collection(self, collection_name: str) -> None:
47
- self._vector_collections[collection_name] = dict()
48
-
49
- def get_vector_by_ids(
50
- self, collection_name: str, vector_ids: List[str]
51
- ) -> List[dict]:
52
- if collection_name not in self._vector_collections:
53
- raise ValueError(f"Collection named {collection_name} doesn't exist")
54
-
55
- collection = self._vector_collections[collection_name]
56
- return [collection.get(_id) for _id in vector_ids if _id in collection]
57
-
58
- def SearchSimilarVectors(self, request: SearchSimilarVectorsRequest, context):
59
- if request.collection_name not in self._vector_collections:
60
- context.set_details(f"Collection named {request.collection} doesn't exist'")
61
- context.set_code(grpc.StatusCode.NOT_FOUND)
62
- return
63
-
64
- reference_vector = list(request.reference_vector.element)
65
- collection_store = self._vector_collections.get(request.collection_name, dict())
66
-
67
- # a naive impl of a "vector similarity" search - compute pairwise cosine distance on
68
- # the entire set and return top results
69
- result_set = sorted(
70
- collection_store.values(),
71
- key=lambda b: _cos_sim(reference_vector, b["vector"]),
72
- )[-request.max_results :]
73
-
74
- return SearchSimilarVectorsResponse(
75
- search_results=[
76
- SearchResult(
77
- id=result["id"] if request.include_id else None,
78
- properties=[
79
- p
80
- for p in result_set[0]["properties"]
81
- if p.name in request.properties
82
- ],
83
- vector=DoubleVector(element=result["vector"])
84
- if request.include_vector
85
- else None,
86
- distance=_cos_sim(reference_vector, result["vector"])
87
- if request.include_distance
88
- else None,
89
- )
90
- for result in result_set
91
- ]
92
- )
93
-
94
- def UpsertVectors(self, request: UpsertVectorsRequest, context):
95
- collection_store = self._vector_collections.get(request.collection_name, dict())
96
- for stored_vector in request.vector:
97
- id = stored_vector.vector_identifier.vector_id
98
- collection_store[id] = {
99
- "id": id,
100
- "vector": list(stored_vector.vector.element),
101
- "properties": stored_vector.properties,
102
- }
103
-
104
- self._vector_collections[request.collection_name] = collection_store
105
- return UpsertVectorsResponse()
106
-
107
- def DeleteVectors(self, request, context):
108
- if request.collection_name not in self._vector_collections:
109
- context.set_details(
110
- f"Collection named {request.collection_name} doesn't exist'"
111
- )
112
- context.set_code(grpc.StatusCode.NOT_FOUND)
113
- return
114
-
115
- collection_store = self._vector_collections[request.collection_name]
116
- ids_in_collection = [
117
- vector_identifier.vector_id
118
- for vector_identifier in request.vector_identifiers
119
- if vector_identifier.vector_id in collection_store
120
- ]
121
- for id in ids_in_collection:
122
- collection_store.pop(id)
123
-
124
- self._vector_collections[request.collection_name] = collection_store
125
- return DeleteVectorsResponse(num_vectors_deleted=len(ids_in_collection))
126
-
127
- def FetchVector(self, request, context):
128
- if request.collection_name not in self._vector_collections:
129
- context.set_details(
130
- f"Collection named {request.collection_name} doesn't exist'"
131
- )
132
- context.set_code(grpc.StatusCode.NOT_FOUND)
133
- return
134
-
135
- collection_store = self._vector_collections[request.collection_name]
136
- vector_identifier = request.vector_identifier
137
- vector_id = vector_identifier.vector_id
138
- if vector_id not in collection_store:
139
- context.set_details(f"Vector with id {vector_id} doesn't exist'")
140
- context.set_code(grpc.StatusCode.NOT_FOUND)
141
- return
142
-
143
- stored_vector_dict = collection_store[vector_id]
144
- return FetchVectorResponse(
145
- vector=StoredVector(
146
- vector_identifier=request.vector_identifier,
147
- vector=DoubleVector(element=stored_vector_dict["vector"]),
148
- properties=stored_vector_dict["properties"],
149
- )
150
- )
151
-
152
-
153
- def _cos_sim(a, b):
154
- return dot(a, b) / (norm(a) * norm(b))
@@ -1,96 +0,0 @@
1
- import uuid
2
- from datetime import datetime
3
-
4
- import grpc
5
- from _qwak_proto.qwak.vectors.v1.collection.collection_pb2 import (
6
- COLLECTION_STATUS_CREATE_REQUESTED,
7
- QwakMetadata,
8
- VectorCollection,
9
- VectorCollectionDefinition,
10
- )
11
- from _qwak_proto.qwak.vectors.v1.collection.collection_service_pb2 import (
12
- CreateCollectionResponse,
13
- DeleteCollectionByIdResponse,
14
- DeleteCollectionByNameResponse,
15
- GetCollectionByIdResponse,
16
- GetCollectionByNameResponse,
17
- ListCollectionsResponse,
18
- )
19
- from _qwak_proto.qwak.vectors.v1.collection.collection_service_pb2_grpc import (
20
- VectorCollectionServiceServicer,
21
- )
22
- from google.protobuf.timestamp_pb2 import Timestamp
23
-
24
-
25
- class VectorCollectionManagementServiceMock(VectorCollectionServiceServicer):
26
- def __init__(self):
27
- self._collections_spec_by_ids = {}
28
- self._collections_spec_by_name = {}
29
-
30
- def reset_collections(self):
31
- self._collections_spec_by_ids.clear()
32
- self._collections_spec_by_name.clear()
33
-
34
- def CreateCollection(self, request, context):
35
- timestamp = Timestamp()
36
- timestamp.FromDatetime(datetime.now())
37
-
38
- collection_id = str(uuid.uuid4())
39
- vector_collection = VectorCollection(
40
- metadata=QwakMetadata(
41
- created_at=timestamp,
42
- created_by="it@qwak.com",
43
- last_modified_at=timestamp,
44
- last_modified_by="it@qwak.com",
45
- ),
46
- definition=VectorCollectionDefinition(
47
- id=collection_id,
48
- collection_spec=request.collection_spec,
49
- ),
50
- status=COLLECTION_STATUS_CREATE_REQUESTED,
51
- )
52
-
53
- self._collections_spec_by_ids[collection_id] = vector_collection
54
- self._collections_spec_by_name[request.collection_spec.name] = vector_collection
55
- return CreateCollectionResponse(vector_collection=vector_collection)
56
-
57
- def GetCollectionById(self, request, context):
58
- if request.id in self._collections_spec_by_ids:
59
- return GetCollectionByIdResponse(
60
- vector_collection=self._collections_spec_by_ids[request.id]
61
- )
62
-
63
- context.set_details(f"Collection ID {request.id} doesn't exist'")
64
- context.set_code(grpc.StatusCode.NOT_FOUND)
65
-
66
- def GetCollectionByName(self, request, context):
67
- if request.name in self._collections_spec_by_name:
68
- return GetCollectionByNameResponse(
69
- vector_collection=self._collections_spec_by_name[request.name]
70
- )
71
-
72
- context.set_details(f"Collection name {request.name} doesn't exist'")
73
- context.set_code(grpc.StatusCode.NOT_FOUND)
74
-
75
- def DeleteCollectionById(self, request, context):
76
- if request.id in self._collections_spec_by_ids:
77
- self._collections_spec_by_ids.pop(request.id)
78
- return DeleteCollectionByIdResponse()
79
-
80
- context.set_details(f"Collection ID {request.id} doesn't exist'")
81
- context.set_code(grpc.StatusCode.NOT_FOUND)
82
-
83
- def DeleteCollectionByName(self, request, context):
84
- if request.name in self._collections_spec_by_name:
85
- self._collections_spec_by_name.pop(request.name)
86
- return DeleteCollectionByNameResponse()
87
-
88
- context.set_details(f"Collection name {request.name} doesn't exist'")
89
- context.set_code(grpc.StatusCode.NOT_FOUND)
90
-
91
- def ListCollections(self, request, context):
92
- return ListCollectionsResponse(
93
- vector_collections=[
94
- collection for collection in self._collections_spec_by_ids.values()
95
- ]
96
- )