nucliadb 6.3.4.post3729__py3-none-any.whl → 6.3.4.post3740__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0027_rollover_texts3.py +4 -2
- migrations/0033_rollover_nidx_relation_2.py +39 -0
- nucliadb/common/cluster/rollover.py +30 -10
- nucliadb/common/datamanagers/rollover.py +3 -3
- nucliadb/migrator/settings.py +7 -0
- {nucliadb-6.3.4.post3729.dist-info → nucliadb-6.3.4.post3740.dist-info}/METADATA +6 -6
- {nucliadb-6.3.4.post3729.dist-info → nucliadb-6.3.4.post3740.dist-info}/RECORD +10 -9
- {nucliadb-6.3.4.post3729.dist-info → nucliadb-6.3.4.post3740.dist-info}/WHEEL +0 -0
- {nucliadb-6.3.4.post3729.dist-info → nucliadb-6.3.4.post3740.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.4.post3729.dist-info → nucliadb-6.3.4.post3740.dist-info}/top_level.txt +0 -0
@@ -27,7 +27,6 @@ import logging
|
|
27
27
|
|
28
28
|
from nucliadb import learning_proxy
|
29
29
|
from nucliadb.common import datamanagers
|
30
|
-
from nucliadb.common.cluster.rollover import rollover_kb_index
|
31
30
|
from nucliadb.migrator.context import ExecutionContext
|
32
31
|
|
33
32
|
logger = logging.getLogger(__name__)
|
@@ -38,7 +37,10 @@ async def migrate(context: ExecutionContext) -> None: ...
|
|
38
37
|
|
39
38
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
40
39
|
await maybe_fix_vector_dimensions(context, kbid)
|
41
|
-
|
40
|
+
|
41
|
+
# We only need 1 rollover migration defined at a time; otherwise, we will
|
42
|
+
# possibly run many for a kb when we only ever need to run one
|
43
|
+
# await rollover_kb_index(context, kbid)
|
42
44
|
|
43
45
|
|
44
46
|
async def maybe_fix_vector_dimensions(context: ExecutionContext, kbid: str) -> None:
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #33
|
22
|
+
|
23
|
+
Rollover to update to nidx_relation v2
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
import logging
|
28
|
+
|
29
|
+
from nucliadb.common.cluster.rollover import rollover_kb_index
|
30
|
+
from nucliadb.migrator.context import ExecutionContext
|
31
|
+
|
32
|
+
logger = logging.getLogger(__name__)
|
33
|
+
|
34
|
+
|
35
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
36
|
+
|
37
|
+
|
38
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
39
|
+
await rollover_kb_index(context, kbid)
|
@@ -31,6 +31,7 @@ from nucliadb.common.external_index_providers.manager import (
|
|
31
31
|
get_external_index_manager,
|
32
32
|
)
|
33
33
|
from nucliadb.common.nidx import get_nidx_fake_node
|
34
|
+
from nucliadb.migrator.settings import settings
|
34
35
|
from nucliadb_protos import writer_pb2
|
35
36
|
from nucliadb_telemetry import errors
|
36
37
|
|
@@ -43,6 +44,8 @@ from .utils import (
|
|
43
44
|
|
44
45
|
logger = logging.getLogger(__name__)
|
45
46
|
|
47
|
+
resource_index_semaphore = asyncio.Semaphore(settings.max_concurrent_rollover_resources)
|
48
|
+
|
46
49
|
|
47
50
|
class UnexpectedRolloverError(Exception):
|
48
51
|
pass
|
@@ -234,10 +237,33 @@ async def index_to_rollover_index(
|
|
234
237
|
# now index on all new shards only
|
235
238
|
while True:
|
236
239
|
async with datamanagers.with_transaction() as txn:
|
237
|
-
|
238
|
-
|
240
|
+
resource_ids = await datamanagers.rollover.get_to_index(
|
241
|
+
txn, kbid=kbid, count=settings.max_concurrent_rollover_resources
|
242
|
+
)
|
243
|
+
if resource_ids is None:
|
239
244
|
break
|
240
245
|
|
246
|
+
batch = [
|
247
|
+
_index_resource_to_rollover_index(app_context, rollover_shards, kbid, rid, external)
|
248
|
+
for rid in resource_ids
|
249
|
+
]
|
250
|
+
await asyncio.gather(*batch)
|
251
|
+
|
252
|
+
async with datamanagers.with_transaction() as txn:
|
253
|
+
state.resources_indexed = True
|
254
|
+
await datamanagers.rollover.set_rollover_state(txn, kbid=kbid, state=state)
|
255
|
+
await datamanagers.rollover.update_kb_rollover_shards(txn, kbid=kbid, kb_shards=rollover_shards)
|
256
|
+
await txn.commit()
|
257
|
+
|
258
|
+
|
259
|
+
async def _index_resource_to_rollover_index(
|
260
|
+
app_context: ApplicationContext,
|
261
|
+
rollover_shards: writer_pb2.Shards,
|
262
|
+
kbid: str,
|
263
|
+
resource_id: str,
|
264
|
+
external: Optional[ExternalIndexManager] = None,
|
265
|
+
) -> None:
|
266
|
+
async with resource_index_semaphore:
|
241
267
|
async with datamanagers.with_transaction() as txn:
|
242
268
|
shard_id = await datamanagers.resources.get_resource_shard_id(
|
243
269
|
txn, kbid=kbid, rid=resource_id
|
@@ -250,7 +276,7 @@ async def index_to_rollover_index(
|
|
250
276
|
async with datamanagers.with_transaction() as txn:
|
251
277
|
await datamanagers.rollover.remove_to_index(txn, kbid=kbid, resource=resource_id)
|
252
278
|
await txn.commit()
|
253
|
-
|
279
|
+
return
|
254
280
|
|
255
281
|
shard = _get_shard(rollover_shards, shard_id)
|
256
282
|
if shard is None: # pragma: no cover
|
@@ -268,7 +294,7 @@ async def index_to_rollover_index(
|
|
268
294
|
async with datamanagers.with_transaction() as txn:
|
269
295
|
await datamanagers.rollover.remove_to_index(txn, kbid=kbid, resource=resource_id)
|
270
296
|
await txn.commit()
|
271
|
-
|
297
|
+
return
|
272
298
|
|
273
299
|
if external is not None:
|
274
300
|
await external.index_resource(resource_id, index_message, to_rollover_indexes=True)
|
@@ -287,12 +313,6 @@ async def index_to_rollover_index(
|
|
287
313
|
)
|
288
314
|
await txn.commit()
|
289
315
|
|
290
|
-
async with datamanagers.with_transaction() as txn:
|
291
|
-
state.resources_indexed = True
|
292
|
-
await datamanagers.rollover.set_rollover_state(txn, kbid=kbid, state=state)
|
293
|
-
await datamanagers.rollover.update_kb_rollover_shards(txn, kbid=kbid, kb_shards=rollover_shards)
|
294
|
-
await txn.commit()
|
295
|
-
|
296
316
|
|
297
317
|
async def cutover_index(
|
298
318
|
app_context: ApplicationContext, kbid: str, external: Optional[ExternalIndexManager] = None
|
@@ -91,11 +91,11 @@ async def add_batch_to_index(txn: Transaction, *, kbid: str, batch: list[str]) -
|
|
91
91
|
await txn.set(key, b"")
|
92
92
|
|
93
93
|
|
94
|
-
async def get_to_index(txn: Transaction, *, kbid: str) -> Optional[str]:
|
94
|
+
async def get_to_index(txn: Transaction, *, kbid: str, count: int) -> Optional[list[str]]:
|
95
95
|
key = KB_ROLLOVER_RESOURCES_TO_INDEX.format(kbid=kbid, resource="")
|
96
|
-
found = [key async for key in txn.keys(key, count=
|
96
|
+
found = [key async for key in txn.keys(key, count=count)]
|
97
97
|
if found:
|
98
|
-
return
|
98
|
+
return [f.split("/")[-1] for f in found]
|
99
99
|
return None
|
100
100
|
|
101
101
|
|
nucliadb/migrator/settings.py
CHANGED
@@ -29,3 +29,10 @@ class Settings(pydantic_settings.BaseSettings):
|
|
29
29
|
default=5,
|
30
30
|
description="Maximum number of concurrent KB migrations allowed.",
|
31
31
|
)
|
32
|
+
max_concurrent_rollover_resources: int = pydantic.Field(
|
33
|
+
default=5,
|
34
|
+
description="Maximum number of concurrent index operations during rollover.",
|
35
|
+
)
|
36
|
+
|
37
|
+
|
38
|
+
settings = Settings()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.3.4.
|
3
|
+
Version: 6.3.4.post3740
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.3.4.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.3.4.
|
26
|
-
Requires-Dist: nucliadb-models>=6.3.4.
|
27
|
-
Requires-Dist: nidx-protos>=6.3.4.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3740
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3740
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.4.post3740
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.4.post3740
|
27
|
+
Requires-Dist: nidx-protos>=6.3.4.post3740
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -22,12 +22,13 @@ migrations/0022_fix_paragraph_deletion_bug.py,sha256=-tH342VXF-8xwc_h3P1cYaUtTT1
|
|
22
22
|
migrations/0023_backfill_pg_catalog.py,sha256=gw22pU5cAtg2a7n7xVaVqT2itjAoDMNtzWwTllwqIvg,2993
|
23
23
|
migrations/0025_assign_models_to_kbs_v2.py,sha256=QC6nDF2Wyc6zQMqNoKzvz-3507UpDyJztlbIsvlwHss,4678
|
24
24
|
migrations/0026_fix_high_cardinality_content_types.py,sha256=BsbBkvZDzjRHQfoouZNNtHA1xMxTKm8wOVnp_WAS9j4,2322
|
25
|
-
migrations/0027_rollover_texts3.py,sha256=
|
25
|
+
migrations/0027_rollover_texts3.py,sha256=J7tRVr-OooLuZCFkhYquOPHFRMiHNQT1klT8r0d8a_8,2815
|
26
26
|
migrations/0028_extracted_vectors_reference.py,sha256=49DHCIlBpjofU8cYVHTdWv0EBIlnPTWV2WCezf0rJUo,2392
|
27
27
|
migrations/0029_backfill_field_status.py,sha256=QWF69n1da9lpRnbEpgbqPjSQ-Wfn6rMC7Enz6bBYGt4,5663
|
28
28
|
migrations/0030_label_deduplication.py,sha256=y14TxtCMi3-TBMz_eZoyyPDHNlZb29taJujlDuHumsA,2008
|
29
29
|
migrations/0031_languages_deduplication.py,sha256=o6va6lP3oTRT1uSzp5MIhHHBFbhCxSZ-oNlXXpiAdUo,2340
|
30
30
|
migrations/0032_remove_old_relations.py,sha256=ZaswhmRRsLgw6DVYVdT7cP-gdBf4X3PL9fklUXrmSD0,2318
|
31
|
+
migrations/0033_rollover_nidx_relation_2.py,sha256=LLhw7LknMVl1mu_mQlsijVUYDe7F87eZx2N8IyeZUK8,1244
|
31
32
|
migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
32
33
|
migrations/pg/0001_bootstrap.py,sha256=Fsqkeof50m7fKiJN05kmNEMwiKDlOrAgcAS5sLLkutA,1256
|
33
34
|
migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
|
@@ -61,7 +62,7 @@ nucliadb/common/cluster/exceptions.py,sha256=t7v_l93t44l2tQpdQXgO_w-c4YZRcaayOz1
|
|
61
62
|
nucliadb/common/cluster/grpc_node_dummy.py,sha256=LxONv0mhDFhx7mI91qqGfQlQ-R0qOGDYaxhXoBHLXaE,3548
|
62
63
|
nucliadb/common/cluster/manager.py,sha256=KIzqAYGgdVK3GicJ9LdLoei8arWZ7H60imbc32USPj4,12754
|
63
64
|
nucliadb/common/cluster/rebalance.py,sha256=cLUlR08SsqmnoA_9GDflV6k2tXmkAPpyFxZErzp45vo,8754
|
64
|
-
nucliadb/common/cluster/rollover.py,sha256=
|
65
|
+
nucliadb/common/cluster/rollover.py,sha256=QCvns28pr81geSQvC37dGKa-nYGu4_IfZwJxyzKg6z4,26042
|
65
66
|
nucliadb/common/cluster/settings.py,sha256=JPwV_0U_i618Tn66GWUq6qCKNjy4TWkGEGld9GwH5uk,2048
|
66
67
|
nucliadb/common/cluster/utils.py,sha256=7nQvnVFxM4XV7J560R8hUA-GPzrgD19UlQxHrl4mZUc,4687
|
67
68
|
nucliadb/common/cluster/standalone/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
|
@@ -78,7 +79,7 @@ nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCN
|
|
78
79
|
nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
|
79
80
|
nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
|
80
81
|
nucliadb/common/datamanagers/resources.py,sha256=cuwcVL-GEjS1VHigJtz5SG_dRhl09UADQ9MSYOOqs70,10779
|
81
|
-
nucliadb/common/datamanagers/rollover.py,sha256=
|
82
|
+
nucliadb/common/datamanagers/rollover.py,sha256=BM1hJ2cEU91xekM5PtmnA0SN3i3w0WmodiyTpO8YZZs,7865
|
82
83
|
nucliadb/common/datamanagers/search_configurations.py,sha256=O-8eW43CE46GcxO6TB5hpi27NBguv4BL4SI1vLlN8os,2463
|
83
84
|
nucliadb/common/datamanagers/synonyms.py,sha256=zk3GEH38KF5vV_VcuL6DCg-2JwgXJfQl7Io6VPqv2cw,1566
|
84
85
|
nucliadb/common/datamanagers/utils.py,sha256=McHlXvE4P3x-bBY3pr0n8djbTDQvI1G5WusJrnRdhLA,1827
|
@@ -163,7 +164,7 @@ nucliadb/migrator/datamanager.py,sha256=sPrFvgn0aOyWKUUkwaaUmAqqn3jKlVe1IpIXrkoK
|
|
163
164
|
nucliadb/migrator/exceptions.py,sha256=jTj3YhKmFwUyjjgoKUNoCAiGrpEbB64X1Um212nSNQ8,889
|
164
165
|
nucliadb/migrator/migrator.py,sha256=tpdKvqqB0p1klcX2LjUD0Br6brcxxfJMVMMoKABmPD4,10834
|
165
166
|
nucliadb/migrator/models.py,sha256=3PJkL2PGvKgIG0KIBv4H5XCsOVmwWMlRV3m0ntDj10Q,1145
|
166
|
-
nucliadb/migrator/settings.py,sha256=
|
167
|
+
nucliadb/migrator/settings.py,sha256=3eK6PIwqqtoCea9V7-RMjVfZC_0BC5DqPPlfo9XMyNE,1340
|
167
168
|
nucliadb/migrator/utils.py,sha256=NgUreUvON8_nWEzTxELBMWlfV7E6-6qi-g0DMEbVEz4,2885
|
168
169
|
nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
169
170
|
nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
|
@@ -353,8 +354,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
353
354
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
354
355
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
355
356
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
356
|
-
nucliadb-6.3.4.
|
357
|
-
nucliadb-6.3.4.
|
358
|
-
nucliadb-6.3.4.
|
359
|
-
nucliadb-6.3.4.
|
360
|
-
nucliadb-6.3.4.
|
357
|
+
nucliadb-6.3.4.post3740.dist-info/METADATA,sha256=kXv8CG9jprFzMIeB6zi4A3R9gUN7RQ9J2CiTGBolbu0,4291
|
358
|
+
nucliadb-6.3.4.post3740.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
359
|
+
nucliadb-6.3.4.post3740.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
360
|
+
nucliadb-6.3.4.post3740.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
361
|
+
nucliadb-6.3.4.post3740.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|