nucliadb 6.3.4.post3796__py3-none-any.whl → 6.3.4.post3812__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/ingest/orm/knowledgebox.py +9 -4
- nucliadb/search/predict.py +28 -12
- nucliadb/writer/api/v1/knowledgebox.py +1 -1
- nucliadb/writer/api/v1/vectorsets.py +9 -3
- nucliadb/writer/tus/gcs.py +1 -1
- {nucliadb-6.3.4.post3796.dist-info → nucliadb-6.3.4.post3812.dist-info}/METADATA +6 -6
- {nucliadb-6.3.4.post3796.dist-info → nucliadb-6.3.4.post3812.dist-info}/RECORD +10 -10
- {nucliadb-6.3.4.post3796.dist-info → nucliadb-6.3.4.post3812.dist-info}/WHEEL +0 -0
- {nucliadb-6.3.4.post3796.dist-info → nucliadb-6.3.4.post3812.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.4.post3796.dist-info → nucliadb-6.3.4.post3812.dist-info}/top_level.txt +0 -0
@@ -502,11 +502,12 @@ class KnowledgeBox:
|
|
502
502
|
shard_manager = get_shard_manager()
|
503
503
|
await shard_manager.create_vectorset(self.kbid, config)
|
504
504
|
|
505
|
-
async def
|
506
|
-
|
507
|
-
|
508
|
-
|
505
|
+
async def vectorset_marked_for_deletion(self, vectorset_id: str) -> bool:
|
506
|
+
key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id)
|
507
|
+
value = await self.txn.get(key)
|
508
|
+
return value is not None
|
509
509
|
|
510
|
+
async def delete_vectorset(self, vectorset_id: str):
|
510
511
|
deleted = await datamanagers.vectorsets.delete(
|
511
512
|
self.txn, kbid=self.kbid, vectorset_id=vectorset_id
|
512
513
|
)
|
@@ -514,6 +515,10 @@ class KnowledgeBox:
|
|
514
515
|
# already deleted
|
515
516
|
return
|
516
517
|
|
518
|
+
vectorset_count = await datamanagers.vectorsets.count(self.txn, kbid=self.kbid)
|
519
|
+
if vectorset_count == 0:
|
520
|
+
raise VectorSetConflict("Deletion of your last vectorset is not allowed")
|
521
|
+
|
517
522
|
# mark vectorset for async deletion
|
518
523
|
deletion_mark_key = KB_VECTORSET_TO_DELETE.format(kbid=self.kbid, vectorset=vectorset_id)
|
519
524
|
payload = VectorSetPurge(storage_key_kind=deleted.storage_key_kind)
|
nucliadb/search/predict.py
CHANGED
@@ -19,6 +19,7 @@
|
|
19
19
|
#
|
20
20
|
import base64
|
21
21
|
import json
|
22
|
+
import logging
|
22
23
|
import os
|
23
24
|
import random
|
24
25
|
from enum import Enum
|
@@ -216,13 +217,16 @@ class PredictEngine:
|
|
216
217
|
else:
|
217
218
|
return {"X-STF-KBID": kbid}
|
218
219
|
|
219
|
-
async def check_response(
|
220
|
+
async def check_response(
|
221
|
+
self, kbid: str, resp: aiohttp.ClientResponse, expected_status: int = 200
|
222
|
+
) -> None:
|
220
223
|
if resp.status == expected_status:
|
221
224
|
return
|
222
225
|
|
223
226
|
if resp.status == 402:
|
224
227
|
data = await resp.json()
|
225
228
|
raise LimitsExceededError(402, data["detail"])
|
229
|
+
|
226
230
|
try:
|
227
231
|
data = await resp.json()
|
228
232
|
try:
|
@@ -234,10 +238,22 @@ class PredictEngine:
|
|
234
238
|
aiohttp.client_exceptions.ContentTypeError,
|
235
239
|
):
|
236
240
|
detail = await resp.text()
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
+
|
242
|
+
is_5xx_error = resp.status > 499
|
243
|
+
# NOTE: 512 is a special status code sent by learning predict api indicating that the error
|
244
|
+
# is related to an external generative model, so we don't want to log it as an error
|
245
|
+
is_external_generative_error = resp.status == 512
|
246
|
+
log_level = logging.ERROR if is_5xx_error and not is_external_generative_error else logging.INFO
|
247
|
+
logger.log(
|
248
|
+
log_level,
|
249
|
+
"Predict API error",
|
250
|
+
extra=dict(
|
251
|
+
kbid=kbid,
|
252
|
+
url=resp.url,
|
253
|
+
status_code=resp.status,
|
254
|
+
detail=detail,
|
255
|
+
),
|
256
|
+
)
|
241
257
|
raise ProxiedPredictAPIError(status=resp.status, detail=detail)
|
242
258
|
|
243
259
|
@backoff.on_exception(
|
@@ -265,7 +281,7 @@ class PredictEngine:
|
|
265
281
|
json=item.model_dump(),
|
266
282
|
headers=self.get_predict_headers(kbid),
|
267
283
|
)
|
268
|
-
await self.check_response(resp, expected_status=200)
|
284
|
+
await self.check_response(kbid, resp, expected_status=200)
|
269
285
|
return await _parse_rephrase_response(resp)
|
270
286
|
|
271
287
|
@predict_observer.wrap({"type": "chat_ndjson"})
|
@@ -294,7 +310,7 @@ class PredictEngine:
|
|
294
310
|
headers=headers,
|
295
311
|
timeout=None,
|
296
312
|
)
|
297
|
-
await self.check_response(resp, expected_status=200)
|
313
|
+
await self.check_response(kbid, resp, expected_status=200)
|
298
314
|
ident = resp.headers.get(NUCLIA_LEARNING_ID_HEADER)
|
299
315
|
model = resp.headers.get(NUCLIA_LEARNING_MODEL_HEADER)
|
300
316
|
return ident, model, get_chat_ndjson_generator(resp)
|
@@ -348,7 +364,7 @@ class PredictEngine:
|
|
348
364
|
params=params,
|
349
365
|
headers=self.get_predict_headers(kbid),
|
350
366
|
)
|
351
|
-
await self.check_response(resp, expected_status=200)
|
367
|
+
await self.check_response(kbid, resp, expected_status=200)
|
352
368
|
data = await resp.json()
|
353
369
|
return QueryInfo(**data)
|
354
370
|
|
@@ -368,7 +384,7 @@ class PredictEngine:
|
|
368
384
|
params={"text": sentence},
|
369
385
|
headers=self.get_predict_headers(kbid),
|
370
386
|
)
|
371
|
-
await self.check_response(resp, expected_status=200)
|
387
|
+
await self.check_response(kbid, resp, expected_status=200)
|
372
388
|
data = await resp.json()
|
373
389
|
return convert_relations(data)
|
374
390
|
|
@@ -387,7 +403,7 @@ class PredictEngine:
|
|
387
403
|
headers=self.get_predict_headers(kbid),
|
388
404
|
timeout=None,
|
389
405
|
)
|
390
|
-
await self.check_response(resp, expected_status=200)
|
406
|
+
await self.check_response(kbid, resp, expected_status=200)
|
391
407
|
data = await resp.json()
|
392
408
|
return SummarizedResponse.model_validate(data)
|
393
409
|
|
@@ -405,7 +421,7 @@ class PredictEngine:
|
|
405
421
|
json=item.model_dump(),
|
406
422
|
headers=self.get_predict_headers(kbid),
|
407
423
|
)
|
408
|
-
await self.check_response(resp, expected_status=200)
|
424
|
+
await self.check_response(kbid, resp, expected_status=200)
|
409
425
|
data = await resp.json()
|
410
426
|
return RerankResponse.model_validate(data)
|
411
427
|
|
@@ -423,7 +439,7 @@ class PredictEngine:
|
|
423
439
|
json=item.model_dump(),
|
424
440
|
headers=self.get_predict_headers(kbid),
|
425
441
|
)
|
426
|
-
await self.check_response(resp, expected_status=200)
|
442
|
+
await self.check_response(kbid, resp, expected_status=200)
|
427
443
|
data = await resp.json()
|
428
444
|
return RunAgentsResponse.model_validate(data)
|
429
445
|
|
@@ -66,7 +66,7 @@ async def create_kb_endpoint(request: Request, item: KnowledgeBoxConfig) -> Know
|
|
66
66
|
except KnowledgeBoxConflict:
|
67
67
|
raise HTTPException(status_code=419, detail="Knowledge box already exists")
|
68
68
|
except ExternalIndexCreationError as exc:
|
69
|
-
raise HTTPException(status_code=
|
69
|
+
raise HTTPException(status_code=512, detail=str(exc))
|
70
70
|
except Exception:
|
71
71
|
logger.exception("Could not create KB")
|
72
72
|
raise HTTPException(status_code=500, detail="Error creating knowledge box")
|
@@ -58,16 +58,23 @@ async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Creat
|
|
58
58
|
detail=err.content,
|
59
59
|
)
|
60
60
|
|
61
|
-
except VectorSetConflict:
|
61
|
+
except VectorSetConflict as err:
|
62
62
|
raise HTTPException(
|
63
63
|
status_code=409,
|
64
|
-
detail=
|
64
|
+
detail=str(err),
|
65
65
|
)
|
66
66
|
|
67
67
|
return CreatedVectorSet(id=vectorset_id)
|
68
68
|
|
69
69
|
|
70
70
|
async def _add_vectorset(kbid: str, vectorset_id: str) -> None:
|
71
|
+
storage = await get_storage()
|
72
|
+
|
73
|
+
async with datamanagers.with_ro_transaction() as txn:
|
74
|
+
kbobj = KnowledgeBox(txn, storage, kbid)
|
75
|
+
if await kbobj.vectorset_marked_for_deletion(vectorset_id):
|
76
|
+
raise VectorSetConflict("Vectorset is already being deleted. Please try again later.")
|
77
|
+
|
71
78
|
# First off, add the vectorset to the learning configuration if it's not already there
|
72
79
|
lconfig = await learning_proxy.get_configuration(kbid)
|
73
80
|
assert lconfig is not None
|
@@ -79,7 +86,6 @@ async def _add_vectorset(kbid: str, vectorset_id: str) -> None:
|
|
79
86
|
assert lconfig is not None
|
80
87
|
|
81
88
|
# Then, add the vectorset to the index if it's not already there
|
82
|
-
storage = await get_storage()
|
83
89
|
vectorset_config = get_vectorset_config(lconfig, vectorset_id)
|
84
90
|
async with datamanagers.with_rw_transaction() as txn:
|
85
91
|
kbobj = KnowledgeBox(txn, storage, kbid)
|
nucliadb/writer/tus/gcs.py
CHANGED
@@ -275,7 +275,7 @@ class GCloudFileStorageManager(FileStorageManager):
|
|
275
275
|
data = {"text": text}
|
276
276
|
if resp.status not in (200, 204, 404):
|
277
277
|
if resp.status == 404:
|
278
|
-
logger.
|
278
|
+
logger.debug(
|
279
279
|
f"Attempt to delete not found gcloud: {data}, status: {resp.status}",
|
280
280
|
exc_info=True,
|
281
281
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.3.4.
|
3
|
+
Version: 6.3.4.post3812
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.3.4.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.3.4.
|
26
|
-
Requires-Dist: nucliadb-models>=6.3.4.
|
27
|
-
Requires-Dist: nidx-protos>=6.3.4.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3812
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3812
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.4.post3812
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.4.post3812
|
27
|
+
Requires-Dist: nidx-protos>=6.3.4.post3812
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -143,7 +143,7 @@ nucliadb/ingest/orm/brain.py,sha256=A8H1J7Bo95sNzDgYr0_UNoemQhWOFEFz9UlYfs6ug-8,
|
|
143
143
|
nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
|
144
144
|
nucliadb/ingest/orm/entities.py,sha256=3_n6lKhBy2GsdmNmkh0_mvxP8md20OZsbtTNEmfJ8Hg,14888
|
145
145
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
146
|
-
nucliadb/ingest/orm/knowledgebox.py,sha256=
|
146
|
+
nucliadb/ingest/orm/knowledgebox.py,sha256=Bfb4-MIQWlaJrQAUDbgs_iIsXCYjS7s5YiiGl_Jb4jo,23887
|
147
147
|
nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
|
148
148
|
nucliadb/ingest/orm/resource.py,sha256=oFD7APhmG1A72h7DTKumZWQRpIDM0o_FytP1P-CcNq0,45918
|
149
149
|
nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
|
@@ -193,7 +193,7 @@ nucliadb/search/__init__.py,sha256=tnypbqcH4nBHbGpkINudhKgdLKpwXQCvDtPchUlsyY4,1
|
|
193
193
|
nucliadb/search/app.py,sha256=-WEX1AZRA8R_9aeOo9ovOTwjXW_7VfwWN7N2ccSoqXg,3387
|
194
194
|
nucliadb/search/lifecycle.py,sha256=V_Pj5PRP0yyDY8d5LytO4X8p9HhN7UomqRG6Ri0UaFA,2206
|
195
195
|
nucliadb/search/openapi.py,sha256=t3Wo_4baTrfPftg2BHsyLWNZ1MYn7ZRdW7ht-wFOgRs,1016
|
196
|
-
nucliadb/search/predict.py,sha256=
|
196
|
+
nucliadb/search/predict.py,sha256=VJr5Itx8FE7CZIGYcP-fRgd2YGxAnP9Qj9NxiwWiwcc,22819
|
197
197
|
nucliadb/search/predict_models.py,sha256=ZAe0dneUsPmV9uBar57cCFADCGOrYDsJHuqKlA5zWag,5937
|
198
198
|
nucliadb/search/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
199
199
|
nucliadb/search/run.py,sha256=aFb-CXRi_C8YMpP_ivNj8KW1BYhADj88y8K9Lr_nUPI,1402
|
@@ -331,7 +331,7 @@ nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,
|
|
331
331
|
nucliadb/writer/api/v1/__init__.py,sha256=akI9A_jloNLb0dU4T5zjfdyvmSAiDeIdjAlzNx74FlU,1128
|
332
332
|
nucliadb/writer/api/v1/export_import.py,sha256=elf-EQY5DD3mhw8kWb9tQpDcbrF9sY6VFYqxQOjuVP0,8201
|
333
333
|
nucliadb/writer/api/v1/field.py,sha256=FySCMpcruSAKGeepeAlOihjwxyUPcDO73Uilq5VDWRk,18514
|
334
|
-
nucliadb/writer/api/v1/knowledgebox.py,sha256=
|
334
|
+
nucliadb/writer/api/v1/knowledgebox.py,sha256=PHEYDFa-sN5JrI8-EiVVg5FDOsRuCLT43kyAB4xt-xA,9530
|
335
335
|
nucliadb/writer/api/v1/learning_config.py,sha256=CKBjqcbewkfPwGUPLDWzZSpro6XkmCaVppe5Qtpu5Go,3117
|
336
336
|
nucliadb/writer/api/v1/resource.py,sha256=jV9HM-ID1PPYypfy4Sl4_9aSPF87v7gSJZUSzHjHcQ4,19740
|
337
337
|
nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
|
@@ -339,7 +339,7 @@ nucliadb/writer/api/v1/services.py,sha256=3AUjk-SmvqJx76v7y89DZx6oyasojPliGYeniR
|
|
339
339
|
nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
|
340
340
|
nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
|
341
341
|
nucliadb/writer/api/v1/upload.py,sha256=hLMHXSaqEOE-vjKjhIupgdx8klJc3mVQp_oMwx5N-7o,33800
|
342
|
-
nucliadb/writer/api/v1/vectorsets.py,sha256=
|
342
|
+
nucliadb/writer/api/v1/vectorsets.py,sha256=F3iMViL5G95_Tns4aO2SOA0DwAzxK2_P8MXxtd_XLRE,6973
|
343
343
|
nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
344
344
|
nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
|
345
345
|
nucliadb/writer/resource/basic.py,sha256=_zdAr110C7rtEzOKoBRMzPjAnQ0pAtRfGjB8qCzodvI,11767
|
@@ -349,13 +349,13 @@ nucliadb/writer/tus/__init__.py,sha256=huWpKnDnjsrKlBBJk30ta5vamlA-4x0TbPs_2Up8h
|
|
349
349
|
nucliadb/writer/tus/azure.py,sha256=XhWAlWTM0vmXcXtuEPYjjeEhuZjiZXZu8q9WsJ7omFE,4107
|
350
350
|
nucliadb/writer/tus/dm.py,sha256=bVoXqt_dpNvTjpffPYhj1JfqK6gfLoPr0hdkknUCZ9E,5488
|
351
351
|
nucliadb/writer/tus/exceptions.py,sha256=WfZSSjsHfoy63wUFlH3QoHx7FMoCNA1oKJmWpZZDnCo,2156
|
352
|
-
nucliadb/writer/tus/gcs.py,sha256=
|
352
|
+
nucliadb/writer/tus/gcs.py,sha256=OnE-YUnp7eyfWFlnh-vlGoxEPS8cUBSSmSm6iJ1Kva0,14079
|
353
353
|
nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,5193
|
354
354
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
355
355
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
356
356
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
357
|
-
nucliadb-6.3.4.
|
358
|
-
nucliadb-6.3.4.
|
359
|
-
nucliadb-6.3.4.
|
360
|
-
nucliadb-6.3.4.
|
361
|
-
nucliadb-6.3.4.
|
357
|
+
nucliadb-6.3.4.post3812.dist-info/METADATA,sha256=yjF1rSCHEhQiCv7vEdwAPyTtjrWrsKJgae-L39Y_zE8,4291
|
358
|
+
nucliadb-6.3.4.post3812.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
359
|
+
nucliadb-6.3.4.post3812.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
360
|
+
nucliadb-6.3.4.post3812.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
361
|
+
nucliadb-6.3.4.post3812.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|