nucliadb 6.2.1.post3025__py3-none-any.whl → 6.2.1.post3034__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,7 @@ from nucliadb_protos.resources_pb2 import (
43
43
  )
44
44
  from nucliadb_protos.utils_pb2 import ExtractedText, VectorObject
45
45
  from nucliadb_protos.writer_pb2 import Error, FieldStatus
46
+ from nucliadb_utils.storages.exceptions import CouldNotCopyNotFound
46
47
  from nucliadb_utils.storages.storage import Storage, StorageField
47
48
 
48
49
  if TYPE_CHECKING: # pragma: no cover
@@ -370,9 +371,26 @@ class Field(Generic[PbType]):
370
371
  sf = self._get_extracted_vectors_storage_field(vectorset, storage_key_kind)
371
372
  vo: Optional[VectorObject] = None
372
373
  if actual_payload is None:
373
- # Its first extracted text
374
+ # Its first extracted vectors
374
375
  if payload.HasField("file"):
375
- await self.storage.normalize_binary(payload.file, sf)
376
+ # When we receive vectors in a cloud file, it points to our
377
+ # storage but paths are different, we may want to move it. This
378
+ # can happen, for example, with LEGACY KBs where processing
379
+ # sends us the extracted vectors prefixed by vectorset but, to
380
+ # maintain bw/c, we move those to the original not prefixed
381
+ # path.
382
+ try:
383
+ await self.storage.normalize_binary(payload.file, sf)
384
+ except CouldNotCopyNotFound:
385
+ # A failure here could mean the payload has already been
386
+ # moved and we're retrying due to a redelivery or another
387
+ # retry mechanism
388
+ already_moved = await sf.exists()
389
+ if already_moved:
390
+ # We assume is the correct one and do nothing else
391
+ pass
392
+ else:
393
+ raise
376
394
  vo = await self.storage.download_pb(sf, VectorObject)
377
395
  else:
378
396
  await self.storage.upload_pb(sf, payload.vectors)
@@ -59,13 +59,11 @@ from nucliadb_protos.knowledgebox_pb2 import (
59
59
  VectorSetPurge,
60
60
  )
61
61
  from nucliadb_protos.resources_pb2 import Basic
62
- from nucliadb_utils import const
63
62
  from nucliadb_utils.settings import is_onprem_nucliadb
64
63
  from nucliadb_utils.storages.storage import Storage
65
64
  from nucliadb_utils.utilities import (
66
65
  get_audit,
67
66
  get_storage,
68
- has_feature,
69
67
  )
70
68
 
71
69
  # XXX Eventually all these keys should be moved to datamanagers.kb
@@ -149,31 +147,6 @@ class KnowledgeBox:
149
147
 
150
148
  vs_external_indexes = []
151
149
 
152
- # HACK! Currently, we share responsibility of deciding where to
153
- # store extracted vectors with processing. Depending on whether
154
- # it sends the vectorset id or not (in the extracted vectors
155
- # wrapper) nucliadb will store the extracted vectors in a different place
156
- #
157
- # Right now, processing behaviour is not setting vectorset ids
158
- # if there's only one semantic model configured. This is done
159
- # for Bw/c with KBs previous to vectorsets.
160
- #
161
- # We now hardcode this assumption here, so we can annotate each
162
- # vectorset with a mark and don't depend on processing
163
- # information to decide the storage key. Once this is done we'll
164
- # be able to force processing to always send vectorset ids and
165
- # remove that bw/c behavior
166
- #
167
- if has_feature(const.Features.REMOVE_DEFAULT_VECTORSET):
168
- storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
169
- else:
170
- if len(semantic_models) == 1:
171
- storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.LEGACY
172
- else:
173
- storage_key_kind = (
174
- knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
175
- )
176
-
177
150
  for vectorset_id, semantic_model in semantic_models.items(): # type: ignore
178
151
  # if this KB uses a matryoshka model, we can choose a different
179
152
  # dimension
@@ -200,7 +173,7 @@ class KnowledgeBox:
200
173
  vector_dimension=dimension,
201
174
  ),
202
175
  matryoshka_dimensions=semantic_model.matryoshka_dimensions,
203
- storage_key_kind=storage_key_kind,
176
+ storage_key_kind=knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX,
204
177
  )
205
178
  await datamanagers.vectorsets.set(txn, kbid=kbid, config=vectorset_config)
206
179
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3025
3
+ Version: 6.2.1.post3034
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3025
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3025
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3025
28
- Requires-Dist: nucliadb-models>=6.2.1.post3025
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3034
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3034
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3034
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3034
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -118,7 +118,7 @@ nucliadb/ingest/consumer/service.py,sha256=EZM1sABW_7bj6j2UgKUHUuK-EGIEYnLdtPAn8
118
118
  nucliadb/ingest/consumer/shard_creator.py,sha256=8SotMc-o_G8XZU52gR4Aay7tcigTdIXgz8YtxqHmJ1Q,4309
119
119
  nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
120
120
  nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
121
- nucliadb/ingest/fields/base.py,sha256=b6QpVPsCiDirDiYG3-yOCMaSNznJSHmQB0z6J_eDIyw,20657
121
+ nucliadb/ingest/fields/base.py,sha256=DTXFu_g9vSU2h7S4PlYxd-hp4SDuM_EPf8y51ALgD8w,21654
122
122
  nucliadb/ingest/fields/conversation.py,sha256=OcQOHvi72Pm0OyNGwxLo9gONo8f1NhwASq0_gS-E64A,7021
123
123
  nucliadb/ingest/fields/exceptions.py,sha256=LBZ-lw11f42Pk-ck-NSN9mSJ2kOw-NeRwb-UE31ILTQ,1171
124
124
  nucliadb/ingest/fields/file.py,sha256=1v4jLg3balUua2VmSV8hHkAwPFShTUCOzufZvIUQcQw,4740
@@ -130,7 +130,7 @@ nucliadb/ingest/orm/brain.py,sha256=UND5EsNUdd7XdjScYqRqg4r_xCx3l-My8alGw5M9CWg,
130
130
  nucliadb/ingest/orm/broker_message.py,sha256=ZEMueoGuuRKO4tHgzc0P0AM1Ls1TTYey_4UvRQf0BpY,6915
131
131
  nucliadb/ingest/orm/entities.py,sha256=5d6Gfo-Yz-rns_mNJeRqiGaPeWpUMgSKZnmWIGMLCKo,15537
132
132
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
133
- nucliadb/ingest/orm/knowledgebox.py,sha256=CGNfvft-IzSUQNENsnqYebp5dGfk2o3N561pcKes-28,25144
133
+ nucliadb/ingest/orm/knowledgebox.py,sha256=IGOPvBR1qXqDxE5DeiOdYCLdPgjzOVVpsASJ2zYvWwQ,23651
134
134
  nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
135
135
  nucliadb/ingest/orm/resource.py,sha256=KDTEwZ6_5eLvLu1s30Pln4BFDOy9D7_ChT7kRmQ1J2g,44670
136
136
  nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
@@ -330,9 +330,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
330
330
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
331
331
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
332
332
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
333
- nucliadb-6.2.1.post3025.dist-info/METADATA,sha256=9HhgfxRf98PfKRLxtV8WwdwVuN1X-P-L2wkYg_ZNJsw,4603
334
- nucliadb-6.2.1.post3025.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
335
- nucliadb-6.2.1.post3025.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
336
- nucliadb-6.2.1.post3025.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
337
- nucliadb-6.2.1.post3025.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
338
- nucliadb-6.2.1.post3025.dist-info/RECORD,,
333
+ nucliadb-6.2.1.post3034.dist-info/METADATA,sha256=NQ9r_0EVqvRG-HjOJ549zBC_gi16eZxAz8FpZTpmCk8,4603
334
+ nucliadb-6.2.1.post3034.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
335
+ nucliadb-6.2.1.post3034.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
336
+ nucliadb-6.2.1.post3034.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
337
+ nucliadb-6.2.1.post3034.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
338
+ nucliadb-6.2.1.post3034.dist-info/RECORD,,