nucliadb 6.5.0.post4408__py3-none-any.whl → 6.5.0.post4413__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/train/api/utils.py +1 -1
- nucliadb/train/api/v1/shards.py +5 -1
- nucliadb/train/api/v1/trainset.py +6 -2
- nucliadb/train/generator.py +16 -5
- {nucliadb-6.5.0.post4408.dist-info → nucliadb-6.5.0.post4413.dist-info}/METADATA +6 -6
- {nucliadb-6.5.0.post4408.dist-info → nucliadb-6.5.0.post4413.dist-info}/RECORD +9 -9
- {nucliadb-6.5.0.post4408.dist-info → nucliadb-6.5.0.post4413.dist-info}/WHEEL +0 -0
- {nucliadb-6.5.0.post4408.dist-info → nucliadb-6.5.0.post4413.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.5.0.post4408.dist-info → nucliadb-6.5.0.post4413.dist-info}/top_level.txt +0 -0
nucliadb/train/api/utils.py
CHANGED
@@ -24,7 +24,7 @@ from typing import Optional
|
|
24
24
|
from nucliadb.train.utils import get_shard_manager
|
25
25
|
|
26
26
|
|
27
|
-
async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
|
27
|
+
async def get_kb_partitions(kbid: str, prefix: Optional[str] = None) -> list[str]:
|
28
28
|
shard_manager = get_shard_manager()
|
29
29
|
shards = await shard_manager.get_shards_by_kbid_inner(kbid=kbid)
|
30
30
|
valid_shards = []
|
nucliadb/train/api/v1/shards.py
CHANGED
@@ -26,6 +26,7 @@ from fastapi import HTTPException, Request
|
|
26
26
|
from fastapi.responses import StreamingResponse
|
27
27
|
from fastapi_versioning import version
|
28
28
|
|
29
|
+
from nucliadb.common.cluster.exceptions import ShardNotFound
|
29
30
|
from nucliadb.train.api.utils import get_kb_partitions
|
30
31
|
from nucliadb.train.api.v1.router import KB_PREFIX, api
|
31
32
|
from nucliadb.train.generator import generate_train_data
|
@@ -49,7 +50,10 @@ async def object_get_response(
|
|
49
50
|
kbid: str,
|
50
51
|
shard: str,
|
51
52
|
) -> StreamingResponse:
|
52
|
-
|
53
|
+
try:
|
54
|
+
partitions = await get_kb_partitions(kbid, prefix=shard)
|
55
|
+
except ShardNotFound:
|
56
|
+
raise HTTPException(status_code=404, detail=f"No shards found for kb")
|
53
57
|
if shard not in partitions:
|
54
58
|
raise HTTPException(status_code=404, detail=f"Partition {shard} not found")
|
55
59
|
trainset, filter_expression = await get_trainset(request)
|
@@ -20,9 +20,10 @@
|
|
20
20
|
|
21
21
|
from typing import Optional
|
22
22
|
|
23
|
-
from fastapi import Request
|
23
|
+
from fastapi import HTTPException, Request
|
24
24
|
from fastapi_versioning import version
|
25
25
|
|
26
|
+
from nucliadb.common.cluster.exceptions import ShardNotFound
|
26
27
|
from nucliadb.train.api.utils import get_kb_partitions
|
27
28
|
from nucliadb.train.api.v1.router import KB_PREFIX, api
|
28
29
|
from nucliadb_models.resource import NucliaDBRoles
|
@@ -57,5 +58,8 @@ async def get_partitions_prefix(request: Request, kbid: str, prefix: str) -> Tra
|
|
57
58
|
|
58
59
|
|
59
60
|
async def get_partitions(kbid: str, prefix: Optional[str] = None) -> TrainSetPartitions:
|
60
|
-
|
61
|
+
try:
|
62
|
+
all_keys = await get_kb_partitions(kbid, prefix)
|
63
|
+
except ShardNotFound:
|
64
|
+
raise HTTPException(status_code=404, detail=f"No shards found for kb")
|
61
65
|
return TrainSetPartitions(partitions=all_keys)
|
nucliadb/train/generator.py
CHANGED
@@ -17,12 +17,14 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
|
21
20
|
from typing import AsyncIterator, Callable, Optional
|
22
21
|
|
23
22
|
from fastapi import HTTPException
|
23
|
+
from grpc import StatusCode
|
24
|
+
from grpc.aio import AioRpcError
|
24
25
|
|
25
26
|
from nucliadb.common.cache import resource_cache
|
27
|
+
from nucliadb.train import logger
|
26
28
|
from nucliadb.train.generators.field_classifier import (
|
27
29
|
field_classification_batch_generator,
|
28
30
|
)
|
@@ -93,7 +95,16 @@ async def generate_train_data(
|
|
93
95
|
# This cache size is an arbitrary number, once we have a metric in place and
|
94
96
|
# we analyze memory consumption, we can adjust it with more knoweldge
|
95
97
|
with resource_cache(size=settings.resource_cache_size):
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
98
|
+
try:
|
99
|
+
async for item in batch_generator(kbid, trainset, shard_replica_id, filter_expression):
|
100
|
+
payload = item.SerializeToString()
|
101
|
+
yield len(payload).to_bytes(4, byteorder="big", signed=False)
|
102
|
+
yield payload
|
103
|
+
except AioRpcError as exc:
|
104
|
+
if exc.code() == StatusCode.NOT_FOUND:
|
105
|
+
logger.warning(
|
106
|
+
f"Shard not found in nidx. Halting the stream",
|
107
|
+
extra={"kbid": kbid, "shard": shard, "shard_replica_id": shard_replica_id},
|
108
|
+
)
|
109
|
+
return
|
110
|
+
raise
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.5.0.
|
3
|
+
Version: 6.5.0.post4413
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: <4,>=3.9
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.5.0.
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.
|
24
|
-
Requires-Dist: nucliadb-protos>=6.5.0.
|
25
|
-
Requires-Dist: nucliadb-models>=6.5.0.
|
26
|
-
Requires-Dist: nidx-protos>=6.5.0.
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4413
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4413
|
24
|
+
Requires-Dist: nucliadb-protos>=6.5.0.post4413
|
25
|
+
Requires-Dist: nucliadb-models>=6.5.0.post4413
|
26
|
+
Requires-Dist: nidx-protos>=6.5.0.post4413
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
28
28
|
Requires-Dist: nuclia-models>=0.24.2
|
29
29
|
Requires-Dist: uvicorn[standard]
|
@@ -303,7 +303,7 @@ nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
|
|
303
303
|
nucliadb/tests/vectors.py,sha256=CcNKx-E8LPpyvRyljbmb-Tn_wST9Juw2CBoogWrKiTk,62843
|
304
304
|
nucliadb/train/__init__.py,sha256=NVwe5yULoHXb80itIJT8YJYEz2xbiOPQ7_OMys6XJw8,1301
|
305
305
|
nucliadb/train/app.py,sha256=z6xlGVVVaJmZZmLPIVTgkjD-wIz5b0NYlXAQp7hBHYw,2652
|
306
|
-
nucliadb/train/generator.py,sha256=
|
306
|
+
nucliadb/train/generator.py,sha256=H8JLkQ23QQVo4CAdg1ZZh_cncPz7COEfaDu1l-h-0hM,4616
|
307
307
|
nucliadb/train/lifecycle.py,sha256=3HadM4GRsYb2m-v4jtdr9C-KBEBx8GlrJDArPYi3SWQ,1960
|
308
308
|
nucliadb/train/models.py,sha256=BmgmMjDsu_1Ih5JDAqo6whhume90q0ASJcDP9dkMQm8,1198
|
309
309
|
nucliadb/train/nodes.py,sha256=6QD9ZnAacPyFCzs1jxJzsL3CkVNyADvsfnPjjrvn-NU,5610
|
@@ -317,11 +317,11 @@ nucliadb/train/upload.py,sha256=fTjH1KEL-0ogf3LV0T6ODO0QdPGwdZShSUtFUCAcUlA,3256
|
|
317
317
|
nucliadb/train/uploader.py,sha256=xdLGz1ToDue9Q_M8A-_KYkO-V6fWKYOZQ6IGM4FuwWA,6424
|
318
318
|
nucliadb/train/utils.py,sha256=OxQ8No19nxOxUhwNYDICNt8n2H-gowkAgu2Vt3Hatzk,3163
|
319
319
|
nucliadb/train/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
320
|
-
nucliadb/train/api/utils.py,sha256=
|
320
|
+
nucliadb/train/api/utils.py,sha256=7verIqWRzDVZUsPrc2OQMGSKcd_01aG2lheEc0z5rP8,1320
|
321
321
|
nucliadb/train/api/v1/__init__.py,sha256=P4vCIv93r_Cq1WFDDNjy_Wg7zBkzx0S4euXwfPy1LA4,928
|
322
322
|
nucliadb/train/api/v1/router.py,sha256=ukdxn5q1oMar6NSPobgJczWsSxLCHw6DYKlb3zwCiSo,910
|
323
|
-
nucliadb/train/api/v1/shards.py,sha256=
|
324
|
-
nucliadb/train/api/v1/trainset.py,sha256=
|
323
|
+
nucliadb/train/api/v1/shards.py,sha256=0TLDWdrMwLAD3lb0AmdvpJF_gALxK-dw0ns_lfGSOzY,3553
|
324
|
+
nucliadb/train/api/v1/trainset.py,sha256=1Iep1Ze0mmRREHsP3mpJ8cC6sU-_avlqh5ItTRQnxP8,2307
|
325
325
|
nucliadb/train/generators/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
326
326
|
nucliadb/train/generators/field_classifier.py,sha256=UcA5snqLNjIHw0VBzXo9ZtSua6o7wBU3tV9_d5qWpRA,3542
|
327
327
|
nucliadb/train/generators/field_streaming.py,sha256=p0xu39D5gaSQc-LagKwpgsVaxm2ULTkWZDPi-Ad1lHc,8378
|
@@ -370,8 +370,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
370
370
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
371
371
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
372
372
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
373
|
-
nucliadb-6.5.0.
|
374
|
-
nucliadb-6.5.0.
|
375
|
-
nucliadb-6.5.0.
|
376
|
-
nucliadb-6.5.0.
|
377
|
-
nucliadb-6.5.0.
|
373
|
+
nucliadb-6.5.0.post4413.dist-info/METADATA,sha256=B1pDmizFK3zLtNJQUCZAOb4jLeQgnyw3vcryDfJO5y4,4152
|
374
|
+
nucliadb-6.5.0.post4413.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
375
|
+
nucliadb-6.5.0.post4413.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
376
|
+
nucliadb-6.5.0.post4413.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
377
|
+
nucliadb-6.5.0.post4413.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|