nucliadb 6.3.4.post3796__py3-none-any.whl → 6.3.4.post3812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -502,11 +502,12 @@ class KnowledgeBox:
502
502
  shard_manager = get_shard_manager()
503
503
  await shard_manager.create_vectorset(self.kbid, config)
504
504
 
505
- async def delete_vectorset(self, vectorset_id: str):
506
- vectorset_count = await datamanagers.vectorsets.count(self.txn, kbid=self.kbid)
507
- if vectorset_count == 1:
508
- raise VectorSetConflict("Deletion of your last vectorset is not allowed")
505
+ async def vectorset_marked_for_deletion(self, vectorset_id: str) -> bool:
506
+ key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id)
507
+ value = await self.txn.get(key)
508
+ return value is not None
509
509
 
510
+ async def delete_vectorset(self, vectorset_id: str):
510
511
  deleted = await datamanagers.vectorsets.delete(
511
512
  self.txn, kbid=self.kbid, vectorset_id=vectorset_id
512
513
  )
@@ -514,6 +515,10 @@ class KnowledgeBox:
514
515
  # already deleted
515
516
  return
516
517
 
518
+ vectorset_count = await datamanagers.vectorsets.count(self.txn, kbid=self.kbid)
519
+ if vectorset_count == 0:
520
+ raise VectorSetConflict("Deletion of your last vectorset is not allowed")
521
+
517
522
  # mark vectorset for async deletion
518
523
  deletion_mark_key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id)
519
524
  payload = VectorSetPurge(storage_key_kind=deleted.storage_key_kind)
@@ -19,6 +19,7 @@
19
19
  #
20
20
  import base64
21
21
  import json
22
+ import logging
22
23
  import os
23
24
  import random
24
25
  from enum import Enum
@@ -216,13 +217,16 @@ class PredictEngine:
216
217
  else:
217
218
  return {"X-STF-KBID": kbid}
218
219
 
219
- async def check_response(self, resp: aiohttp.ClientResponse, expected_status: int = 200) -> None:
220
+ async def check_response(
221
+ self, kbid: str, resp: aiohttp.ClientResponse, expected_status: int = 200
222
+ ) -> None:
220
223
  if resp.status == expected_status:
221
224
  return
222
225
 
223
226
  if resp.status == 402:
224
227
  data = await resp.json()
225
228
  raise LimitsExceededError(402, data["detail"])
229
+
226
230
  try:
227
231
  data = await resp.json()
228
232
  try:
@@ -234,10 +238,22 @@ class PredictEngine:
234
238
  aiohttp.client_exceptions.ContentTypeError,
235
239
  ):
236
240
  detail = await resp.text()
237
- if str(resp.status).startswith("5"):
238
- logger.error(f"Predict API error at {resp.url}: {detail}")
239
- else:
240
- logger.info(f"Predict API error at {resp.url}: {detail}")
241
+
242
+ is_5xx_error = resp.status > 499
243
+ # NOTE: 512 is a special status code sent by learning predict api indicating that the error
244
+ # is related to an external generative model, so we don't want to log it as an error
245
+ is_external_generative_error = resp.status == 512
246
+ log_level = logging.ERROR if is_5xx_error and not is_external_generative_error else logging.INFO
247
+ logger.log(
248
+ log_level,
249
+ "Predict API error",
250
+ extra=dict(
251
+ kbid=kbid,
252
+ url=resp.url,
253
+ status_code=resp.status,
254
+ detail=detail,
255
+ ),
256
+ )
241
257
  raise ProxiedPredictAPIError(status=resp.status, detail=detail)
242
258
 
243
259
  @backoff.on_exception(
@@ -265,7 +281,7 @@ class PredictEngine:
265
281
  json=item.model_dump(),
266
282
  headers=self.get_predict_headers(kbid),
267
283
  )
268
- await self.check_response(resp, expected_status=200)
284
+ await self.check_response(kbid, resp, expected_status=200)
269
285
  return await _parse_rephrase_response(resp)
270
286
 
271
287
  @predict_observer.wrap({"type": "chat_ndjson"})
@@ -294,7 +310,7 @@ class PredictEngine:
294
310
  headers=headers,
295
311
  timeout=None,
296
312
  )
297
- await self.check_response(resp, expected_status=200)
313
+ await self.check_response(kbid, resp, expected_status=200)
298
314
  ident = resp.headers.get(NUCLIA_LEARNING_ID_HEADER)
299
315
  model = resp.headers.get(NUCLIA_LEARNING_MODEL_HEADER)
300
316
  return ident, model, get_chat_ndjson_generator(resp)
@@ -348,7 +364,7 @@ class PredictEngine:
348
364
  params=params,
349
365
  headers=self.get_predict_headers(kbid),
350
366
  )
351
- await self.check_response(resp, expected_status=200)
367
+ await self.check_response(kbid, resp, expected_status=200)
352
368
  data = await resp.json()
353
369
  return QueryInfo(**data)
354
370
 
@@ -368,7 +384,7 @@ class PredictEngine:
368
384
  params={"text": sentence},
369
385
  headers=self.get_predict_headers(kbid),
370
386
  )
371
- await self.check_response(resp, expected_status=200)
387
+ await self.check_response(kbid, resp, expected_status=200)
372
388
  data = await resp.json()
373
389
  return convert_relations(data)
374
390
 
@@ -387,7 +403,7 @@ class PredictEngine:
387
403
  headers=self.get_predict_headers(kbid),
388
404
  timeout=None,
389
405
  )
390
- await self.check_response(resp, expected_status=200)
406
+ await self.check_response(kbid, resp, expected_status=200)
391
407
  data = await resp.json()
392
408
  return SummarizedResponse.model_validate(data)
393
409
 
@@ -405,7 +421,7 @@ class PredictEngine:
405
421
  json=item.model_dump(),
406
422
  headers=self.get_predict_headers(kbid),
407
423
  )
408
- await self.check_response(resp, expected_status=200)
424
+ await self.check_response(kbid, resp, expected_status=200)
409
425
  data = await resp.json()
410
426
  return RerankResponse.model_validate(data)
411
427
 
@@ -423,7 +439,7 @@ class PredictEngine:
423
439
  json=item.model_dump(),
424
440
  headers=self.get_predict_headers(kbid),
425
441
  )
426
- await self.check_response(resp, expected_status=200)
442
+ await self.check_response(kbid, resp, expected_status=200)
427
443
  data = await resp.json()
428
444
  return RunAgentsResponse.model_validate(data)
429
445
 
@@ -66,7 +66,7 @@ async def create_kb_endpoint(request: Request, item: KnowledgeBoxConfig) -> Know
66
66
  except KnowledgeBoxConflict:
67
67
  raise HTTPException(status_code=419, detail="Knowledge box already exists")
68
68
  except ExternalIndexCreationError as exc:
69
- raise HTTPException(status_code=502, detail=str(exc))
69
+ raise HTTPException(status_code=512, detail=str(exc))
70
70
  except Exception:
71
71
  logger.exception("Could not create KB")
72
72
  raise HTTPException(status_code=500, detail="Error creating knowledge box")
@@ -58,16 +58,23 @@ async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Creat
58
58
  detail=err.content,
59
59
  )
60
60
 
61
- except VectorSetConflict:
61
+ except VectorSetConflict as err:
62
62
  raise HTTPException(
63
63
  status_code=409,
64
- detail="A vectorset with this embedding model already exists in your KB",
64
+ detail=str(err),
65
65
  )
66
66
 
67
67
  return CreatedVectorSet(id=vectorset_id)
68
68
 
69
69
 
70
70
  async def _add_vectorset(kbid: str, vectorset_id: str) -> None:
71
+ storage = await get_storage()
72
+
73
+ async with datamanagers.with_ro_transaction() as txn:
74
+ kbobj = KnowledgeBox(txn, storage, kbid)
75
+ if await kbobj.vectorset_marked_for_deletion(vectorset_id):
76
+ raise VectorSetConflict("Vectorset is already being deleted. Please try again later.")
77
+
71
78
  # First off, add the vectorset to the learning configuration if it's not already there
72
79
  lconfig = await learning_proxy.get_configuration(kbid)
73
80
  assert lconfig is not None
@@ -79,7 +86,6 @@ async def _add_vectorset(kbid: str, vectorset_id: str) -> None:
79
86
  assert lconfig is not None
80
87
 
81
88
  # Then, add the vectorset to the index if it's not already there
82
- storage = await get_storage()
83
89
  vectorset_config = get_vectorset_config(lconfig, vectorset_id)
84
90
  async with datamanagers.with_rw_transaction() as txn:
85
91
  kbobj = KnowledgeBox(txn, storage, kbid)
@@ -275,7 +275,7 @@ class GCloudFileStorageManager(FileStorageManager):
275
275
  data = {"text": text}
276
276
  if resp.status not in (200, 204, 404):
277
277
  if resp.status == 404:
278
- logger.error(
278
+ logger.debug(
279
279
  f"Attempt to delete not found gcloud: {data}, status: {resp.status}",
280
280
  exc_info=True,
281
281
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.4.post3796
3
+ Version: 6.3.4.post3812
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3796
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3796
25
- Requires-Dist: nucliadb-protos>=6.3.4.post3796
26
- Requires-Dist: nucliadb-models>=6.3.4.post3796
27
- Requires-Dist: nidx-protos>=6.3.4.post3796
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3812
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3812
25
+ Requires-Dist: nucliadb-protos>=6.3.4.post3812
26
+ Requires-Dist: nucliadb-models>=6.3.4.post3812
27
+ Requires-Dist: nidx-protos>=6.3.4.post3812
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -143,7 +143,7 @@ nucliadb/ingest/orm/brain.py,sha256=A8H1J7Bo95sNzDgYr0_UNoemQhWOFEFz9UlYfs6ug-8,
143
143
  nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
144
144
  nucliadb/ingest/orm/entities.py,sha256=3_n6lKhBy2GsdmNmkh0_mvxP8md20OZsbtTNEmfJ8Hg,14888
145
145
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
146
- nucliadb/ingest/orm/knowledgebox.py,sha256=IGOPvBR1qXqDxE5DeiOdYCLdPgjzOVVpsASJ2zYvWwQ,23651
146
+ nucliadb/ingest/orm/knowledgebox.py,sha256=Bfb4-MIQWlaJrQAUDbgs_iIsXCYjS7s5YiiGl_Jb4jo,23887
147
147
  nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
148
148
  nucliadb/ingest/orm/resource.py,sha256=oFD7APhmG1A72h7DTKumZWQRpIDM0o_FytP1P-CcNq0,45918
149
149
  nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
@@ -193,7 +193,7 @@ nucliadb/search/__init__.py,sha256=tnypbqcH4nBHbGpkINudhKgdLKpwXQCvDtPchUlsyY4,1
193
193
  nucliadb/search/app.py,sha256=-WEX1AZRA8R_9aeOo9ovOTwjXW_7VfwWN7N2ccSoqXg,3387
194
194
  nucliadb/search/lifecycle.py,sha256=V_Pj5PRP0yyDY8d5LytO4X8p9HhN7UomqRG6Ri0UaFA,2206
195
195
  nucliadb/search/openapi.py,sha256=t3Wo_4baTrfPftg2BHsyLWNZ1MYn7ZRdW7ht-wFOgRs,1016
196
- nucliadb/search/predict.py,sha256=z2-RkhMkH-5T6PtFkfESxNof07XiS5FxicLHPRyCUXc,22284
196
+ nucliadb/search/predict.py,sha256=VJr5Itx8FE7CZIGYcP-fRgd2YGxAnP9Qj9NxiwWiwcc,22819
197
197
  nucliadb/search/predict_models.py,sha256=ZAe0dneUsPmV9uBar57cCFADCGOrYDsJHuqKlA5zWag,5937
198
198
  nucliadb/search/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
199
  nucliadb/search/run.py,sha256=aFb-CXRi_C8YMpP_ivNj8KW1BYhADj88y8K9Lr_nUPI,1402
@@ -331,7 +331,7 @@ nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,
331
331
  nucliadb/writer/api/v1/__init__.py,sha256=akI9A_jloNLb0dU4T5zjfdyvmSAiDeIdjAlzNx74FlU,1128
332
332
  nucliadb/writer/api/v1/export_import.py,sha256=elf-EQY5DD3mhw8kWb9tQpDcbrF9sY6VFYqxQOjuVP0,8201
333
333
  nucliadb/writer/api/v1/field.py,sha256=FySCMpcruSAKGeepeAlOihjwxyUPcDO73Uilq5VDWRk,18514
334
- nucliadb/writer/api/v1/knowledgebox.py,sha256=MLeIuym4jPrJgfy1NTcN9CpUGwuBiqDHMcx0hY9DR7g,9530
334
+ nucliadb/writer/api/v1/knowledgebox.py,sha256=PHEYDFa-sN5JrI8-EiVVg5FDOsRuCLT43kyAB4xt-xA,9530
335
335
  nucliadb/writer/api/v1/learning_config.py,sha256=CKBjqcbewkfPwGUPLDWzZSpro6XkmCaVppe5Qtpu5Go,3117
336
336
  nucliadb/writer/api/v1/resource.py,sha256=jV9HM-ID1PPYypfy4Sl4_9aSPF87v7gSJZUSzHjHcQ4,19740
337
337
  nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
@@ -339,7 +339,7 @@ nucliadb/writer/api/v1/services.py,sha256=3AUjk-SmvqJx76v7y89DZx6oyasojPliGYeniR
339
339
  nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
340
340
  nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
341
341
  nucliadb/writer/api/v1/upload.py,sha256=hLMHXSaqEOE-vjKjhIupgdx8klJc3mVQp_oMwx5N-7o,33800
342
- nucliadb/writer/api/v1/vectorsets.py,sha256=mESaXkkI9f-jWWMW61ZZgv7E5YWXKemyc6vwT0lFXns,6747
342
+ nucliadb/writer/api/v1/vectorsets.py,sha256=F3iMViL5G95_Tns4aO2SOA0DwAzxK2_P8MXxtd_XLRE,6973
343
343
  nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
344
344
  nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
345
345
  nucliadb/writer/resource/basic.py,sha256=_zdAr110C7rtEzOKoBRMzPjAnQ0pAtRfGjB8qCzodvI,11767
@@ -349,13 +349,13 @@ nucliadb/writer/tus/__init__.py,sha256=huWpKnDnjsrKlBBJk30ta5vamlA-4x0TbPs_2Up8h
349
349
  nucliadb/writer/tus/azure.py,sha256=XhWAlWTM0vmXcXtuEPYjjeEhuZjiZXZu8q9WsJ7omFE,4107
350
350
  nucliadb/writer/tus/dm.py,sha256=bVoXqt_dpNvTjpffPYhj1JfqK6gfLoPr0hdkknUCZ9E,5488
351
351
  nucliadb/writer/tus/exceptions.py,sha256=WfZSSjsHfoy63wUFlH3QoHx7FMoCNA1oKJmWpZZDnCo,2156
352
- nucliadb/writer/tus/gcs.py,sha256=yM9GSO0mV7c6ZFqK9LpjiBNu96uf1_rofAj9R0NC7xg,14079
352
+ nucliadb/writer/tus/gcs.py,sha256=OnE-YUnp7eyfWFlnh-vlGoxEPS8cUBSSmSm6iJ1Kva0,14079
353
353
  nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,5193
354
354
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
355
355
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
356
356
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
357
- nucliadb-6.3.4.post3796.dist-info/METADATA,sha256=CAxQ5xeSrVmN4NWzZwAdBUv2q5O5cP1INsDQ14RJYdk,4291
358
- nucliadb-6.3.4.post3796.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
359
- nucliadb-6.3.4.post3796.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
360
- nucliadb-6.3.4.post3796.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
361
- nucliadb-6.3.4.post3796.dist-info/RECORD,,
357
+ nucliadb-6.3.4.post3812.dist-info/METADATA,sha256=yjF1rSCHEhQiCv7vEdwAPyTtjrWrsKJgae-L39Y_zE8,4291
358
+ nucliadb-6.3.4.post3812.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
359
+ nucliadb-6.3.4.post3812.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
360
+ nucliadb-6.3.4.post3812.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
361
+ nucliadb-6.3.4.post3812.dist-info/RECORD,,