nucliadb 6.5.0.post4408__py3-none-any.whl → 6.5.0.post4413__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ from typing import Optional
24
24
  from nucliadb.train.utils import get_shard_manager
25
25
 
26
26
 
27
- async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
27
+ async def get_kb_partitions(kbid: str, prefix: Optional[str] = None) -> list[str]:
28
28
  shard_manager = get_shard_manager()
29
29
  shards = await shard_manager.get_shards_by_kbid_inner(kbid=kbid)
30
30
  valid_shards = []
@@ -26,6 +26,7 @@ from fastapi import HTTPException, Request
26
26
  from fastapi.responses import StreamingResponse
27
27
  from fastapi_versioning import version
28
28
 
29
+ from nucliadb.common.cluster.exceptions import ShardNotFound
29
30
  from nucliadb.train.api.utils import get_kb_partitions
30
31
  from nucliadb.train.api.v1.router import KB_PREFIX, api
31
32
  from nucliadb.train.generator import generate_train_data
@@ -49,7 +50,10 @@ async def object_get_response(
49
50
  kbid: str,
50
51
  shard: str,
51
52
  ) -> StreamingResponse:
52
- partitions = await get_kb_partitions(kbid, shard)
53
+ try:
54
+ partitions = await get_kb_partitions(kbid, prefix=shard)
55
+ except ShardNotFound:
56
+ raise HTTPException(status_code=404, detail=f"No shards found for kb")
53
57
  if shard not in partitions:
54
58
  raise HTTPException(status_code=404, detail=f"Partition {shard} not found")
55
59
  trainset, filter_expression = await get_trainset(request)
@@ -20,9 +20,10 @@
20
20
 
21
21
  from typing import Optional
22
22
 
23
- from fastapi import Request
23
+ from fastapi import HTTPException, Request
24
24
  from fastapi_versioning import version
25
25
 
26
+ from nucliadb.common.cluster.exceptions import ShardNotFound
26
27
  from nucliadb.train.api.utils import get_kb_partitions
27
28
  from nucliadb.train.api.v1.router import KB_PREFIX, api
28
29
  from nucliadb_models.resource import NucliaDBRoles
@@ -57,5 +58,8 @@ async def get_partitions_prefix(request: Request, kbid: str, prefix: str) -> Tra
57
58
 
58
59
 
59
60
  async def get_partitions(kbid: str, prefix: Optional[str] = None) -> TrainSetPartitions:
60
- all_keys = await get_kb_partitions(kbid, prefix)
61
+ try:
62
+ all_keys = await get_kb_partitions(kbid, prefix)
63
+ except ShardNotFound:
64
+ raise HTTPException(status_code=404, detail=f"No shards found for kb")
61
65
  return TrainSetPartitions(partitions=all_keys)
@@ -17,12 +17,14 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
-
21
20
  from typing import AsyncIterator, Callable, Optional
22
21
 
23
22
  from fastapi import HTTPException
23
+ from grpc import StatusCode
24
+ from grpc.aio import AioRpcError
24
25
 
25
26
  from nucliadb.common.cache import resource_cache
27
+ from nucliadb.train import logger
26
28
  from nucliadb.train.generators.field_classifier import (
27
29
  field_classification_batch_generator,
28
30
  )
@@ -93,7 +95,16 @@ async def generate_train_data(
93
95
  # This cache size is an arbitrary number, once we have a metric in place and
94
96
  # we analyze memory consumption, we can adjust it with more knoweldge
95
97
  with resource_cache(size=settings.resource_cache_size):
96
- async for item in batch_generator(kbid, trainset, shard_replica_id, filter_expression):
97
- payload = item.SerializeToString()
98
- yield len(payload).to_bytes(4, byteorder="big", signed=False)
99
- yield payload
98
+ try:
99
+ async for item in batch_generator(kbid, trainset, shard_replica_id, filter_expression):
100
+ payload = item.SerializeToString()
101
+ yield len(payload).to_bytes(4, byteorder="big", signed=False)
102
+ yield payload
103
+ except AioRpcError as exc:
104
+ if exc.code() == StatusCode.NOT_FOUND:
105
+ logger.warning(
106
+ f"Shard not found in nidx. Halting the stream",
107
+ extra={"kbid": kbid, "shard": shard, "shard_replica_id": shard_replica_id},
108
+ )
109
+ return
110
+ raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.5.0.post4408
3
+ Version: 6.5.0.post4413
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4408
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4408
24
- Requires-Dist: nucliadb-protos>=6.5.0.post4408
25
- Requires-Dist: nucliadb-models>=6.5.0.post4408
26
- Requires-Dist: nidx-protos>=6.5.0.post4408
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4413
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4413
24
+ Requires-Dist: nucliadb-protos>=6.5.0.post4413
25
+ Requires-Dist: nucliadb-models>=6.5.0.post4413
26
+ Requires-Dist: nidx-protos>=6.5.0.post4413
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.24.2
29
29
  Requires-Dist: uvicorn[standard]
@@ -303,7 +303,7 @@ nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
303
303
  nucliadb/tests/vectors.py,sha256=CcNKx-E8LPpyvRyljbmb-Tn_wST9Juw2CBoogWrKiTk,62843
304
304
  nucliadb/train/__init__.py,sha256=NVwe5yULoHXb80itIJT8YJYEz2xbiOPQ7_OMys6XJw8,1301
305
305
  nucliadb/train/app.py,sha256=z6xlGVVVaJmZZmLPIVTgkjD-wIz5b0NYlXAQp7hBHYw,2652
306
- nucliadb/train/generator.py,sha256=fwFYal7VsV0EP7J_g3IOJ-WLpjwqrVo0gEP7vxIlxGs,4152
306
+ nucliadb/train/generator.py,sha256=H8JLkQ23QQVo4CAdg1ZZh_cncPz7COEfaDu1l-h-0hM,4616
307
307
  nucliadb/train/lifecycle.py,sha256=3HadM4GRsYb2m-v4jtdr9C-KBEBx8GlrJDArPYi3SWQ,1960
308
308
  nucliadb/train/models.py,sha256=BmgmMjDsu_1Ih5JDAqo6whhume90q0ASJcDP9dkMQm8,1198
309
309
  nucliadb/train/nodes.py,sha256=6QD9ZnAacPyFCzs1jxJzsL3CkVNyADvsfnPjjrvn-NU,5610
@@ -317,11 +317,11 @@ nucliadb/train/upload.py,sha256=fTjH1KEL-0ogf3LV0T6ODO0QdPGwdZShSUtFUCAcUlA,3256
317
317
  nucliadb/train/uploader.py,sha256=xdLGz1ToDue9Q_M8A-_KYkO-V6fWKYOZQ6IGM4FuwWA,6424
318
318
  nucliadb/train/utils.py,sha256=OxQ8No19nxOxUhwNYDICNt8n2H-gowkAgu2Vt3Hatzk,3163
319
319
  nucliadb/train/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
320
- nucliadb/train/api/utils.py,sha256=1E5h1dS3dLppXD4k6qH6jmaY0WYa_ChaNHiTW9y336Q,1307
320
+ nucliadb/train/api/utils.py,sha256=7verIqWRzDVZUsPrc2OQMGSKcd_01aG2lheEc0z5rP8,1320
321
321
  nucliadb/train/api/v1/__init__.py,sha256=P4vCIv93r_Cq1WFDDNjy_Wg7zBkzx0S4euXwfPy1LA4,928
322
322
  nucliadb/train/api/v1/router.py,sha256=ukdxn5q1oMar6NSPobgJczWsSxLCHw6DYKlb3zwCiSo,910
323
- nucliadb/train/api/v1/shards.py,sha256=olgjQFKeYSSCbpYhglCRq6Q9WsM9hxOZgk6mOBb4g0o,3367
324
- nucliadb/train/api/v1/trainset.py,sha256=kpnpDgiMWr1FKHZJgwH7hue5kzilA8-i9X0YHlNeHuU,2113
323
+ nucliadb/train/api/v1/shards.py,sha256=0TLDWdrMwLAD3lb0AmdvpJF_gALxK-dw0ns_lfGSOzY,3553
324
+ nucliadb/train/api/v1/trainset.py,sha256=1Iep1Ze0mmRREHsP3mpJ8cC6sU-_avlqh5ItTRQnxP8,2307
325
325
  nucliadb/train/generators/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
326
326
  nucliadb/train/generators/field_classifier.py,sha256=UcA5snqLNjIHw0VBzXo9ZtSua6o7wBU3tV9_d5qWpRA,3542
327
327
  nucliadb/train/generators/field_streaming.py,sha256=p0xu39D5gaSQc-LagKwpgsVaxm2ULTkWZDPi-Ad1lHc,8378
@@ -370,8 +370,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
370
370
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
371
371
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
372
372
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
373
- nucliadb-6.5.0.post4408.dist-info/METADATA,sha256=5PRi3_KM32DTF2_BSWmfndESWte8q1aH9fPh0SYJ6aE,4152
374
- nucliadb-6.5.0.post4408.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
375
- nucliadb-6.5.0.post4408.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
376
- nucliadb-6.5.0.post4408.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
377
- nucliadb-6.5.0.post4408.dist-info/RECORD,,
373
+ nucliadb-6.5.0.post4413.dist-info/METADATA,sha256=B1pDmizFK3zLtNJQUCZAOb4jLeQgnyw3vcryDfJO5y4,4152
374
+ nucliadb-6.5.0.post4413.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
375
+ nucliadb-6.5.0.post4413.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
376
+ nucliadb-6.5.0.post4413.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
377
+ nucliadb-6.5.0.post4413.dist-info/RECORD,,