nucliadb 6.3.4.post3729__py3-none-any.whl → 6.3.4.post3740__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,7 +27,6 @@ import logging
27
27
 
28
28
  from nucliadb import learning_proxy
29
29
  from nucliadb.common import datamanagers
30
- from nucliadb.common.cluster.rollover import rollover_kb_index
31
30
  from nucliadb.migrator.context import ExecutionContext
32
31
 
33
32
  logger = logging.getLogger(__name__)
@@ -38,7 +37,10 @@ async def migrate(context: ExecutionContext) -> None: ...
38
37
 
39
38
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
40
39
  await maybe_fix_vector_dimensions(context, kbid)
41
- await rollover_kb_index(context, kbid)
40
+
41
+ # We only need 1 rollover migration defined at a time; otherwise, we will
42
+ # possibly run many for a kb when we only ever need to run one
43
+ # await rollover_kb_index(context, kbid)
42
44
 
43
45
 
44
46
  async def maybe_fix_vector_dimensions(context: ExecutionContext, kbid: str) -> None:
@@ -0,0 +1,39 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #33
22
+
23
+ Rollover to update to nidx_relation v2
24
+
25
+ """
26
+
27
+ import logging
28
+
29
+ from nucliadb.common.cluster.rollover import rollover_kb_index
30
+ from nucliadb.migrator.context import ExecutionContext
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ async def migrate(context: ExecutionContext) -> None: ...
36
+
37
+
38
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
39
+ await rollover_kb_index(context, kbid)
@@ -31,6 +31,7 @@ from nucliadb.common.external_index_providers.manager import (
31
31
  get_external_index_manager,
32
32
  )
33
33
  from nucliadb.common.nidx import get_nidx_fake_node
34
+ from nucliadb.migrator.settings import settings
34
35
  from nucliadb_protos import writer_pb2
35
36
  from nucliadb_telemetry import errors
36
37
 
@@ -43,6 +44,8 @@ from .utils import (
43
44
 
44
45
  logger = logging.getLogger(__name__)
45
46
 
47
+ resource_index_semaphore = asyncio.Semaphore(settings.max_concurrent_rollover_resources)
48
+
46
49
 
47
50
  class UnexpectedRolloverError(Exception):
48
51
  pass
@@ -234,10 +237,33 @@ async def index_to_rollover_index(
234
237
  # now index on all new shards only
235
238
  while True:
236
239
  async with datamanagers.with_transaction() as txn:
237
- resource_id = await datamanagers.rollover.get_to_index(txn, kbid=kbid)
238
- if resource_id is None:
240
+ resource_ids = await datamanagers.rollover.get_to_index(
241
+ txn, kbid=kbid, count=settings.max_concurrent_rollover_resources
242
+ )
243
+ if resource_ids is None:
239
244
  break
240
245
 
246
+ batch = [
247
+ _index_resource_to_rollover_index(app_context, rollover_shards, kbid, rid, external)
248
+ for rid in resource_ids
249
+ ]
250
+ await asyncio.gather(*batch)
251
+
252
+ async with datamanagers.with_transaction() as txn:
253
+ state.resources_indexed = True
254
+ await datamanagers.rollover.set_rollover_state(txn, kbid=kbid, state=state)
255
+ await datamanagers.rollover.update_kb_rollover_shards(txn, kbid=kbid, kb_shards=rollover_shards)
256
+ await txn.commit()
257
+
258
+
259
+ async def _index_resource_to_rollover_index(
260
+ app_context: ApplicationContext,
261
+ rollover_shards: writer_pb2.Shards,
262
+ kbid: str,
263
+ resource_id: str,
264
+ external: Optional[ExternalIndexManager] = None,
265
+ ) -> None:
266
+ async with resource_index_semaphore:
241
267
  async with datamanagers.with_transaction() as txn:
242
268
  shard_id = await datamanagers.resources.get_resource_shard_id(
243
269
  txn, kbid=kbid, rid=resource_id
@@ -250,7 +276,7 @@ async def index_to_rollover_index(
250
276
  async with datamanagers.with_transaction() as txn:
251
277
  await datamanagers.rollover.remove_to_index(txn, kbid=kbid, resource=resource_id)
252
278
  await txn.commit()
253
- continue
279
+ return
254
280
 
255
281
  shard = _get_shard(rollover_shards, shard_id)
256
282
  if shard is None: # pragma: no cover
@@ -268,7 +294,7 @@ async def index_to_rollover_index(
268
294
  async with datamanagers.with_transaction() as txn:
269
295
  await datamanagers.rollover.remove_to_index(txn, kbid=kbid, resource=resource_id)
270
296
  await txn.commit()
271
- continue
297
+ return
272
298
 
273
299
  if external is not None:
274
300
  await external.index_resource(resource_id, index_message, to_rollover_indexes=True)
@@ -287,12 +313,6 @@ async def index_to_rollover_index(
287
313
  )
288
314
  await txn.commit()
289
315
 
290
- async with datamanagers.with_transaction() as txn:
291
- state.resources_indexed = True
292
- await datamanagers.rollover.set_rollover_state(txn, kbid=kbid, state=state)
293
- await datamanagers.rollover.update_kb_rollover_shards(txn, kbid=kbid, kb_shards=rollover_shards)
294
- await txn.commit()
295
-
296
316
 
297
317
  async def cutover_index(
298
318
  app_context: ApplicationContext, kbid: str, external: Optional[ExternalIndexManager] = None
@@ -91,11 +91,11 @@ async def add_batch_to_index(txn: Transaction, *, kbid: str, batch: list[str]) -
91
91
  await txn.set(key, b"")
92
92
 
93
93
 
94
- async def get_to_index(txn: Transaction, *, kbid: str) -> Optional[str]:
94
+ async def get_to_index(txn: Transaction, *, kbid: str, count: int) -> Optional[list[str]]:
95
95
  key = KB_ROLLOVER_RESOURCES_TO_INDEX.format(kbid=kbid, resource="")
96
- found = [key async for key in txn.keys(key, count=1)]
96
+ found = [key async for key in txn.keys(key, count=count)]
97
97
  if found:
98
- return found[0].split("/")[-1]
98
+ return [f.split("/")[-1] for f in found]
99
99
  return None
100
100
 
101
101
 
@@ -29,3 +29,10 @@ class Settings(pydantic_settings.BaseSettings):
29
29
  default=5,
30
30
  description="Maximum number of concurrent KB migrations allowed.",
31
31
  )
32
+ max_concurrent_rollover_resources: int = pydantic.Field(
33
+ default=5,
34
+ description="Maximum number of concurrent index operations during rollover.",
35
+ )
36
+
37
+
38
+ settings = Settings()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.4.post3729
3
+ Version: 6.3.4.post3740
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3729
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3729
25
- Requires-Dist: nucliadb-protos>=6.3.4.post3729
26
- Requires-Dist: nucliadb-models>=6.3.4.post3729
27
- Requires-Dist: nidx-protos>=6.3.4.post3729
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.4.post3740
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.4.post3740
25
+ Requires-Dist: nucliadb-protos>=6.3.4.post3740
26
+ Requires-Dist: nucliadb-models>=6.3.4.post3740
27
+ Requires-Dist: nidx-protos>=6.3.4.post3740
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -22,12 +22,13 @@ migrations/0022_fix_paragraph_deletion_bug.py,sha256=-tH342VXF-8xwc_h3P1cYaUtTT1
22
22
  migrations/0023_backfill_pg_catalog.py,sha256=gw22pU5cAtg2a7n7xVaVqT2itjAoDMNtzWwTllwqIvg,2993
23
23
  migrations/0025_assign_models_to_kbs_v2.py,sha256=QC6nDF2Wyc6zQMqNoKzvz-3507UpDyJztlbIsvlwHss,4678
24
24
  migrations/0026_fix_high_cardinality_content_types.py,sha256=BsbBkvZDzjRHQfoouZNNtHA1xMxTKm8wOVnp_WAS9j4,2322
25
- migrations/0027_rollover_texts3.py,sha256=UQDaMOayVuqDisf82NDrPStoEVveHvdjkSmzbIcU9o4,2730
25
+ migrations/0027_rollover_texts3.py,sha256=J7tRVr-OooLuZCFkhYquOPHFRMiHNQT1klT8r0d8a_8,2815
26
26
  migrations/0028_extracted_vectors_reference.py,sha256=49DHCIlBpjofU8cYVHTdWv0EBIlnPTWV2WCezf0rJUo,2392
27
27
  migrations/0029_backfill_field_status.py,sha256=QWF69n1da9lpRnbEpgbqPjSQ-Wfn6rMC7Enz6bBYGt4,5663
28
28
  migrations/0030_label_deduplication.py,sha256=y14TxtCMi3-TBMz_eZoyyPDHNlZb29taJujlDuHumsA,2008
29
29
  migrations/0031_languages_deduplication.py,sha256=o6va6lP3oTRT1uSzp5MIhHHBFbhCxSZ-oNlXXpiAdUo,2340
30
30
  migrations/0032_remove_old_relations.py,sha256=ZaswhmRRsLgw6DVYVdT7cP-gdBf4X3PL9fklUXrmSD0,2318
31
+ migrations/0033_rollover_nidx_relation_2.py,sha256=LLhw7LknMVl1mu_mQlsijVUYDe7F87eZx2N8IyeZUK8,1244
31
32
  migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
32
33
  migrations/pg/0001_bootstrap.py,sha256=Fsqkeof50m7fKiJN05kmNEMwiKDlOrAgcAS5sLLkutA,1256
33
34
  migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
@@ -61,7 +62,7 @@ nucliadb/common/cluster/exceptions.py,sha256=t7v_l93t44l2tQpdQXgO_w-c4YZRcaayOz1
61
62
  nucliadb/common/cluster/grpc_node_dummy.py,sha256=LxONv0mhDFhx7mI91qqGfQlQ-R0qOGDYaxhXoBHLXaE,3548
62
63
  nucliadb/common/cluster/manager.py,sha256=KIzqAYGgdVK3GicJ9LdLoei8arWZ7H60imbc32USPj4,12754
63
64
  nucliadb/common/cluster/rebalance.py,sha256=cLUlR08SsqmnoA_9GDflV6k2tXmkAPpyFxZErzp45vo,8754
64
- nucliadb/common/cluster/rollover.py,sha256=iTJ9EQmHbzXL34foNFto-hqdC0Kq1pF1mNxqv0jqhBs,25362
65
+ nucliadb/common/cluster/rollover.py,sha256=QCvns28pr81geSQvC37dGKa-nYGu4_IfZwJxyzKg6z4,26042
65
66
  nucliadb/common/cluster/settings.py,sha256=JPwV_0U_i618Tn66GWUq6qCKNjy4TWkGEGld9GwH5uk,2048
66
67
  nucliadb/common/cluster/utils.py,sha256=7nQvnVFxM4XV7J560R8hUA-GPzrgD19UlQxHrl4mZUc,4687
67
68
  nucliadb/common/cluster/standalone/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
@@ -78,7 +79,7 @@ nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCN
78
79
  nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
79
80
  nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
80
81
  nucliadb/common/datamanagers/resources.py,sha256=cuwcVL-GEjS1VHigJtz5SG_dRhl09UADQ9MSYOOqs70,10779
81
- nucliadb/common/datamanagers/rollover.py,sha256=c_DE3jtZusNL_9aOVjHOB9PV5OSVg7GJ5J-Ny0goHBE,7833
82
+ nucliadb/common/datamanagers/rollover.py,sha256=BM1hJ2cEU91xekM5PtmnA0SN3i3w0WmodiyTpO8YZZs,7865
82
83
  nucliadb/common/datamanagers/search_configurations.py,sha256=O-8eW43CE46GcxO6TB5hpi27NBguv4BL4SI1vLlN8os,2463
83
84
  nucliadb/common/datamanagers/synonyms.py,sha256=zk3GEH38KF5vV_VcuL6DCg-2JwgXJfQl7Io6VPqv2cw,1566
84
85
  nucliadb/common/datamanagers/utils.py,sha256=McHlXvE4P3x-bBY3pr0n8djbTDQvI1G5WusJrnRdhLA,1827
@@ -163,7 +164,7 @@ nucliadb/migrator/datamanager.py,sha256=sPrFvgn0aOyWKUUkwaaUmAqqn3jKlVe1IpIXrkoK
163
164
  nucliadb/migrator/exceptions.py,sha256=jTj3YhKmFwUyjjgoKUNoCAiGrpEbB64X1Um212nSNQ8,889
164
165
  nucliadb/migrator/migrator.py,sha256=tpdKvqqB0p1klcX2LjUD0Br6brcxxfJMVMMoKABmPD4,10834
165
166
  nucliadb/migrator/models.py,sha256=3PJkL2PGvKgIG0KIBv4H5XCsOVmwWMlRV3m0ntDj10Q,1145
166
- nucliadb/migrator/settings.py,sha256=jOUX0ZMunCXN8HpF9xXN0aunJYRhu4Vdr_ffjRIqwtw,1144
167
+ nucliadb/migrator/settings.py,sha256=3eK6PIwqqtoCea9V7-RMjVfZC_0BC5DqPPlfo9XMyNE,1340
167
168
  nucliadb/migrator/utils.py,sha256=NgUreUvON8_nWEzTxELBMWlfV7E6-6qi-g0DMEbVEz4,2885
168
169
  nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
169
170
  nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
@@ -353,8 +354,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
353
354
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
354
355
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
355
356
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
356
- nucliadb-6.3.4.post3729.dist-info/METADATA,sha256=7WsHIY6R-JkCVTyVFbfCWk-avqWWNy_lJEYRNtow_F4,4291
357
- nucliadb-6.3.4.post3729.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
358
- nucliadb-6.3.4.post3729.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
359
- nucliadb-6.3.4.post3729.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
360
- nucliadb-6.3.4.post3729.dist-info/RECORD,,
357
+ nucliadb-6.3.4.post3740.dist-info/METADATA,sha256=kXv8CG9jprFzMIeB6zi4A3R9gUN7RQ9J2CiTGBolbu0,4291
358
+ nucliadb-6.3.4.post3740.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
359
+ nucliadb-6.3.4.post3740.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
360
+ nucliadb-6.3.4.post3740.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
361
+ nucliadb-6.3.4.post3740.dist-info/RECORD,,