nucliadb 6.9.1.post5180__py3-none-any.whl → 6.9.2.post5282__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb might be problematic. Click here for more details.

Files changed (35) hide show
  1. migrations/pg/0010_shards_index.py +34 -0
  2. nucliadb/common/cluster/manager.py +3 -19
  3. nucliadb/common/cluster/rebalance.py +484 -110
  4. nucliadb/common/cluster/rollover.py +29 -0
  5. nucliadb/common/cluster/utils.py +26 -0
  6. nucliadb/common/datamanagers/atomic.py +6 -0
  7. nucliadb/common/filter_expression.py +15 -32
  8. nucliadb/ingest/consumer/service.py +1 -2
  9. nucliadb/ingest/consumer/shard_creator.py +16 -5
  10. nucliadb/ingest/fields/base.py +0 -17
  11. nucliadb/ingest/orm/knowledgebox.py +78 -29
  12. nucliadb/ingest/orm/processor/processor.py +21 -16
  13. nucliadb/ingest/service/writer.py +12 -5
  14. nucliadb/migrator/datamanager.py +1 -7
  15. nucliadb/purge/__init__.py +2 -7
  16. nucliadb/reader/api/v1/learning_config.py +21 -0
  17. nucliadb/search/api/v1/find.py +1 -4
  18. nucliadb/search/api/v1/resource/ask.py +21 -1
  19. nucliadb/search/api/v1/search.py +1 -4
  20. nucliadb/search/search/chat/ask.py +0 -1
  21. nucliadb/search/search/chat/prompt.py +45 -13
  22. nucliadb/search/search/chat/query.py +0 -1
  23. nucliadb/search/search/find.py +1 -6
  24. nucliadb/search/search/query.py +0 -23
  25. nucliadb/search/search/query_parser/models.py +0 -1
  26. nucliadb/search/search/query_parser/parsers/catalog.py +2 -2
  27. nucliadb/search/search/query_parser/parsers/find.py +0 -8
  28. nucliadb/search/search/query_parser/parsers/search.py +0 -8
  29. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -11
  30. nucliadb/writer/api/v1/knowledgebox.py +15 -22
  31. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/METADATA +8 -9
  32. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/RECORD +35 -34
  33. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/WHEEL +0 -0
  34. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/entry_points.txt +0 -0
  35. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/top_level.txt +0 -0
@@ -34,8 +34,10 @@ from nucliadb.common.external_index_providers.base import ExternalIndexManager
34
34
  from nucliadb.common.external_index_providers.manager import (
35
35
  get_external_index_manager,
36
36
  )
37
+ from nucliadb.common.maindb.utils import get_driver
37
38
  from nucliadb.common.nidx import get_nidx_api_client
38
39
  from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
40
+ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
39
41
  from nucliadb.migrator.settings import settings
40
42
  from nucliadb_protos import utils_pb2, writer_pb2
41
43
  from nucliadb_telemetry import errors
@@ -45,6 +47,7 @@ from .utils import (
45
47
  get_resource,
46
48
  get_rollover_resource_index_message,
47
49
  index_resource_to_shard,
50
+ wait_for_nidx,
48
51
  )
49
52
 
50
53
  logger = logging.getLogger(__name__)
@@ -254,6 +257,7 @@ async def index_to_rollover_index(
254
257
  for rid in resource_ids
255
258
  ]
256
259
  await asyncio.gather(*batch)
260
+ await wait_for_indexing_to_catch_up(app_context)
257
261
 
258
262
  async with datamanagers.with_transaction() as txn:
259
263
  state.resources_indexed = True
@@ -262,6 +266,22 @@ async def index_to_rollover_index(
262
266
  await txn.commit()
263
267
 
264
268
 
269
+ async def wait_for_indexing_to_catch_up(app_context: ApplicationContext):
270
+ try:
271
+ app_context.nats_manager
272
+ except AssertionError:
273
+ logger.warning("Nats manager not initialized. Cannot wait for indexing to catch up")
274
+ return
275
+ max_pending = 1000
276
+ while True:
277
+ try:
278
+ await wait_for_nidx(app_context.nats_manager, max_wait_seconds=60, max_pending=max_pending)
279
+ return
280
+ except asyncio.TimeoutError:
281
+ logger.warning(f"Nidx is behind more than {max_pending} messages. Throttling rollover.")
282
+ await asyncio.sleep(30)
283
+
284
+
265
285
  async def _index_resource_to_rollover_index(
266
286
  app_context: ApplicationContext,
267
287
  rollover_shards: writer_pb2.Shards,
@@ -415,6 +435,15 @@ async def cutover_shards(app_context: ApplicationContext, kbid: str) -> None:
415
435
 
416
436
  await txn.commit()
417
437
 
438
+ # For KBs with pre-warm enabled, we must configure the new shards. There may
439
+ # be some small delay between this call and the shards being actually
440
+ # prewarmed, but rollovers are quite unusual and we prefer this rather than
441
+ # prewarming old and new shards at the same time
442
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
443
+ if kb_config is not None and kb_config.prewarm_enabled:
444
+ driver = get_driver()
445
+ await KnowledgeBox.configure_shards(driver, kbid, prewarm=True)
446
+
418
447
 
419
448
  async def validate_indexed_data(
420
449
  app_context: ApplicationContext, kbid: str, external: Optional[ExternalIndexManager] = None
@@ -32,6 +32,7 @@ from nucliadb.common.cluster.settings import settings
32
32
  from nucliadb.ingest.orm import index_message
33
33
  from nucliadb.ingest.orm.resource import Resource
34
34
  from nucliadb_protos import writer_pb2
35
+ from nucliadb_utils.nats import NatsConnectionManager
35
36
  from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
36
37
 
37
38
  if TYPE_CHECKING: # pragma: no cover
@@ -125,3 +126,28 @@ async def delete_resource_from_shard(
125
126
  partition = partitioning.generate_partition(kbid, resource_id)
126
127
 
127
128
  await sm.delete_resource(shard, resource_id, 0, str(partition), kbid)
129
+
130
+
131
+ async def get_nats_consumer_pending_messages(
132
+ nats_manager: NatsConnectionManager, *, stream: str, consumer: str
133
+ ) -> int:
134
+ # get raw js client
135
+ js = nats_manager.js
136
+ consumer_info = await js.consumer_info(stream, consumer)
137
+ return consumer_info.num_pending
138
+
139
+
140
+ async def wait_for_nidx(
141
+ nats_manager: NatsConnectionManager,
142
+ max_pending: int,
143
+ poll_interval_seconds: int = 5,
144
+ max_wait_seconds: int = 60,
145
+ ):
146
+ async with asyncio.timeout(max_wait_seconds): # type: ignore
147
+ while True:
148
+ pending = await get_nats_consumer_pending_messages(
149
+ nats_manager, stream="nidx", consumer="nidx"
150
+ )
151
+ if pending < max_pending:
152
+ return
153
+ await asyncio.sleep(poll_interval_seconds)
@@ -42,6 +42,7 @@ from typing_extensions import Concatenate, ParamSpec
42
42
 
43
43
  from nucliadb.common.maindb.driver import Transaction
44
44
 
45
+ from . import cluster as cluster_dm
45
46
  from . import kb as kb_dm
46
47
  from . import labels as labels_dm
47
48
  from . import resources as resources_dm
@@ -73,6 +74,10 @@ def rw_txn_wrap(fun: Callable[Concatenate[Transaction, P], Awaitable[T]]) -> Cal
73
74
  return wrapper
74
75
 
75
76
 
77
+ class cluster:
78
+ get_kb_shards = ro_txn_wrap(cluster_dm.get_kb_shards)
79
+
80
+
76
81
  class kb:
77
82
  exists_kb = ro_txn_wrap(kb_dm.exists_kb)
78
83
  get_config = ro_txn_wrap(kb_dm.get_config)
@@ -83,6 +88,7 @@ class resources:
83
88
  get_resource_uuid_from_slug = ro_txn_wrap(resources_dm.get_resource_uuid_from_slug)
84
89
  resource_exists = ro_txn_wrap(resources_dm.resource_exists)
85
90
  slug_exists = ro_txn_wrap(resources_dm.slug_exists)
91
+ get_all_field_ids = ro_txn_wrap(resources_dm.get_all_field_ids)
86
92
 
87
93
 
88
94
  class labelset:
@@ -52,37 +52,20 @@ from nucliadb_models.filters import (
52
52
  )
53
53
 
54
54
  # Filters that end up as a facet
55
- FacetFilter = Union[
56
- OriginTag,
57
- Label,
58
- ResourceMimetype,
59
- FieldMimetype,
60
- Entity,
61
- Language,
62
- OriginMetadata,
63
- OriginPath,
64
- Generated,
65
- Kind,
66
- OriginCollaborator,
67
- OriginSource,
68
- Status,
69
- ]
70
- # In Python 3.9 we cannot do isinstance against an union
71
- # Once we support only 3.10+, we can remove this
72
- FacetFilterTypes = (
73
- OriginTag,
74
- Label,
75
- ResourceMimetype,
76
- FieldMimetype,
77
- Entity,
78
- Language,
79
- OriginMetadata,
80
- OriginPath,
81
- Generated,
82
- Kind,
83
- OriginCollaborator,
84
- OriginSource,
85
- Status,
55
+ FacetFilter = (
56
+ OriginTag
57
+ | Label
58
+ | ResourceMimetype
59
+ | FieldMimetype
60
+ | Entity
61
+ | Language
62
+ | OriginMetadata
63
+ | OriginPath
64
+ | Generated
65
+ | Kind
66
+ | OriginCollaborator
67
+ | OriginSource
68
+ | Status
86
69
  )
87
70
 
88
71
 
@@ -131,7 +114,7 @@ async def parse_expression(
131
114
  f.date.since.FromDatetime(expr.since)
132
115
  if expr.until:
133
116
  f.date.until.FromDatetime(expr.until)
134
- elif isinstance(expr, FacetFilterTypes):
117
+ elif isinstance(expr, FacetFilter):
135
118
  f.facet.facet = facet_from_filter(expr)
136
119
  else:
137
120
  # This is a trick so mypy generates an error if this branch can be reached,
@@ -140,9 +140,8 @@ async def start_shard_creator() -> Callable[[], Awaitable[None]]:
140
140
  driver = await setup_driver()
141
141
  pubsub = await get_pubsub()
142
142
  assert pubsub is not None, "Pubsub is not configured"
143
- storage = await get_storage(service_name=SERVICE_NAME)
144
143
 
145
- shard_creator = ShardCreatorHandler(driver=driver, storage=storage, pubsub=pubsub)
144
+ shard_creator = ShardCreatorHandler(driver=driver, pubsub=pubsub)
146
145
  await shard_creator.initialize()
147
146
 
148
147
  return shard_creator.finalize
@@ -25,14 +25,14 @@ from typing import Any
25
25
 
26
26
  from nidx_protos import nodereader_pb2, noderesources_pb2
27
27
 
28
- from nucliadb.common import locking
28
+ from nucliadb.common import datamanagers, locking
29
+ from nucliadb.common.cluster.settings import settings
29
30
  from nucliadb.common.cluster.utils import get_shard_manager
30
31
  from nucliadb.common.maindb.driver import Driver
31
32
  from nucliadb.common.nidx import get_nidx_api_client
32
33
  from nucliadb_protos import writer_pb2
33
34
  from nucliadb_utils import const
34
35
  from nucliadb_utils.cache.pubsub import PubSubDriver
35
- from nucliadb_utils.storages.storage import Storage
36
36
 
37
37
  from . import metrics
38
38
  from .utils import DelayedTaskHandler
@@ -52,12 +52,10 @@ class ShardCreatorHandler:
52
52
  self,
53
53
  *,
54
54
  driver: Driver,
55
- storage: Storage,
56
55
  pubsub: PubSubDriver,
57
56
  check_delay: float = 10.0,
58
57
  ):
59
58
  self.driver = driver
60
- self.storage = storage
61
59
  self.pubsub = pubsub
62
60
  self.shard_manager = get_shard_manager()
63
61
  self.task_handler = DelayedTaskHandler(check_delay)
@@ -111,4 +109,17 @@ class ShardCreatorHandler:
111
109
  shard_id=noderesources_pb2.ShardId(id=current_shard.nidx_shard_id)
112
110
  ) # type: ignore
113
111
  )
114
- await self.shard_manager.maybe_create_new_shard(kbid, shard.paragraphs)
112
+
113
+ if not should_create_new_shard(shard.paragraphs):
114
+ return
115
+
116
+ logger.info({"message": "Adding shard", "kbid": kbid})
117
+ async with datamanagers.with_rw_transaction() as txn:
118
+ kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
119
+ prewarm = kb_config is not None and kb_config.prewarm_enabled
120
+ await self.shard_manager.create_shard_by_kbid(txn, kbid, prewarm_enabled=prewarm)
121
+ await txn.commit()
122
+
123
+
124
+ def should_create_new_shard(num_paragraphs: int) -> bool:
125
+ return num_paragraphs > settings.max_shard_paragraphs
@@ -47,10 +47,8 @@ from nucliadb_protos.resources_pb2 import (
47
47
  )
48
48
  from nucliadb_protos.utils_pb2 import ExtractedText, VectorObject
49
49
  from nucliadb_protos.writer_pb2 import Error, FieldStatus
50
- from nucliadb_utils import const
51
50
  from nucliadb_utils.storages.exceptions import CouldNotCopyNotFound
52
51
  from nucliadb_utils.storages.storage import Storage, StorageField
53
- from nucliadb_utils.utilities import has_feature
54
52
 
55
53
  logger = logging.getLogger(__name__)
56
54
 
@@ -224,21 +222,6 @@ class Field(Generic[PbType]):
224
222
  ) -> None:
225
223
  # Try delete vectors
226
224
  sf = self._get_extracted_vectors_storage_field(vectorset, storage_key_kind)
227
-
228
- if has_feature(const.Features.DEBUG_MISSING_VECTORS):
229
- # This is a very chatty log. It is just a temporary hint while debugging an issue.
230
- logger.info(
231
- "Deleting vectors from storage",
232
- extra={
233
- "kbid": self.kbid,
234
- "rid": self.resource.uuid,
235
- "field": f"{self.type}/{self.id}",
236
- "vectorset": vectorset,
237
- "storage_key_kind": storage_key_kind,
238
- "key": sf.key,
239
- "bucket": sf.bucket,
240
- },
241
- )
242
225
  try:
243
226
  await self.storage.delete_upload(sf.key, sf.bucket)
244
227
  except KeyError:
@@ -24,7 +24,7 @@ from uuid import uuid4
24
24
 
25
25
  from grpc import StatusCode
26
26
  from grpc.aio import AioRpcError
27
- from nidx_protos import noderesources_pb2
27
+ from nidx_protos import nidx_pb2, noderesources_pb2
28
28
 
29
29
  from nucliadb.common import datamanagers
30
30
  from nucliadb.common.cluster.exceptions import ShardNotFound
@@ -108,6 +108,7 @@ class KnowledgeBox:
108
108
  external_index_provider: CreateExternalIndexProviderMetadata = CreateExternalIndexProviderMetadata(),
109
109
  hidden_resources_enabled: bool = False,
110
110
  hidden_resources_hide_on_creation: bool = False,
111
+ prewarm_enabled: bool = False,
111
112
  ) -> tuple[str, str]:
112
113
  """Creates a new knowledge box and return its id and slug."""
113
114
 
@@ -194,6 +195,7 @@ class KnowledgeBox:
194
195
  migration_version=get_latest_version(),
195
196
  hidden_resources_enabled=hidden_resources_enabled,
196
197
  hidden_resources_hide_on_creation=hidden_resources_hide_on_creation,
198
+ prewarm_enabled=prewarm_enabled,
197
199
  )
198
200
  config.external_index_provider.CopyFrom(stored_external_index_provider)
199
201
  await datamanagers.kb.set_config(txn, kbid=kbid, config=config)
@@ -220,7 +222,7 @@ class KnowledgeBox:
220
222
  shard_manager = get_shard_manager()
221
223
  # XXX creating a shard is a slow IO operation that requires a write
222
224
  # txn to be open!
223
- await shard_manager.create_shard_by_kbid(txn, kbid)
225
+ await shard_manager.create_shard_by_kbid(txn, kbid, prewarm_enabled=prewarm_enabled)
224
226
  # shards don't need a rollback as they will be eventually purged
225
227
 
226
228
  await txn.commit()
@@ -243,39 +245,86 @@ class KnowledgeBox:
243
245
  @classmethod
244
246
  async def update(
245
247
  cls,
246
- txn: Transaction,
247
- uuid: str,
248
+ driver: Driver,
249
+ kbid: str,
250
+ *,
248
251
  slug: Optional[str] = None,
249
- config: Optional[KnowledgeBoxConfig] = None,
252
+ title: Optional[str] = None,
253
+ description: Optional[str] = None,
254
+ migration_version: Optional[int] = None,
255
+ external_index_provider: Optional[StoredExternalIndexProviderMetadata] = None,
256
+ hidden_resources_enabled: Optional[bool] = None,
257
+ hidden_resources_hide_on_creation: Optional[bool] = None,
258
+ prewarm_enabled: Optional[bool] = None,
250
259
  ) -> str:
251
- exist = await datamanagers.kb.get_config(txn, kbid=uuid, for_update=True)
252
- if not exist:
253
- raise datamanagers.exceptions.KnowledgeBoxNotFound()
254
-
255
- if slug:
256
- await txn.delete(datamanagers.kb.KB_SLUGS.format(slug=exist.slug))
257
- await txn.set(
258
- datamanagers.kb.KB_SLUGS.format(slug=slug),
259
- uuid.encode(),
260
- )
261
- if config:
262
- config.slug = slug
263
- else:
264
- exist.slug = slug
260
+ async with driver.rw_transaction() as txn:
261
+ stored = await datamanagers.kb.get_config(txn, kbid=kbid, for_update=True)
262
+ if not stored:
263
+ raise datamanagers.exceptions.KnowledgeBoxNotFound()
264
+
265
+ if slug:
266
+ await txn.delete(datamanagers.kb.KB_SLUGS.format(slug=stored.slug))
267
+ await txn.set(
268
+ datamanagers.kb.KB_SLUGS.format(slug=slug),
269
+ kbid.encode(),
270
+ )
271
+ stored.slug = slug
265
272
 
266
- if config and exist != config:
267
- exist.MergeFrom(config)
268
- exist.hidden_resources_enabled = config.hidden_resources_enabled
269
- exist.hidden_resources_hide_on_creation = config.hidden_resources_hide_on_creation
273
+ if title is not None:
274
+ stored.title = title
275
+ if description is not None:
276
+ stored.description = description
270
277
 
271
- if exist.hidden_resources_hide_on_creation and not exist.hidden_resources_enabled:
272
- raise KnowledgeBoxCreationError(
273
- "Cannot hide new resources if the hidden resources feature is disabled"
274
- )
278
+ if migration_version is not None:
279
+ stored.migration_version = migration_version
280
+
281
+ if external_index_provider is not None:
282
+ stored.external_index_provider.MergeFrom(external_index_provider)
275
283
 
276
- await datamanagers.kb.set_config(txn, kbid=uuid, config=exist)
284
+ if hidden_resources_enabled is not None:
285
+ stored.hidden_resources_enabled = hidden_resources_enabled
286
+ if hidden_resources_hide_on_creation is not None:
287
+ stored.hidden_resources_hide_on_creation = hidden_resources_hide_on_creation
288
+
289
+ update_nidx_prewarm = None
290
+ if prewarm_enabled is not None:
291
+ if stored.prewarm_enabled != prewarm_enabled:
292
+ update_nidx_prewarm = prewarm_enabled
293
+ stored.prewarm_enabled = prewarm_enabled
294
+
295
+ if stored.hidden_resources_hide_on_creation and not stored.hidden_resources_enabled:
296
+ raise KnowledgeBoxCreationError(
297
+ "Cannot hide new resources if the hidden resources feature is disabled"
298
+ )
277
299
 
278
- return uuid
300
+ await datamanagers.kb.set_config(txn, kbid=kbid, config=stored)
301
+
302
+ await txn.commit()
303
+
304
+ if update_nidx_prewarm is not None:
305
+ await cls.configure_shards(driver, kbid, prewarm=update_nidx_prewarm)
306
+
307
+ return kbid
308
+
309
+ @classmethod
310
+ async def configure_shards(cls, driver: Driver, kbid: str, *, prewarm: bool):
311
+ shards_obj = await datamanagers.atomic.cluster.get_kb_shards(kbid=kbid)
312
+ if shards_obj is None:
313
+ logger.warning(f"Shards not found for KB while updating pre-warm flag", extra={"kbid": kbid})
314
+ return
315
+
316
+ nidx_shard_ids = [shard.nidx_shard_id for shard in shards_obj.shards]
317
+
318
+ nidx_api = get_nidx_api_client()
319
+ if nidx_api is not None and len(nidx_shard_ids) > 0:
320
+ configs = [
321
+ nidx_pb2.ShardConfig(
322
+ shard_id=shard_id,
323
+ prewarm_enabled=prewarm,
324
+ )
325
+ for shard_id in nidx_shard_ids
326
+ ]
327
+ await nidx_api.ConfigureShards(nidx_pb2.ShardsConfig(configs=configs))
279
328
 
280
329
  @classmethod
281
330
  async def delete(cls, driver: Driver, kbid: str):
@@ -446,22 +446,27 @@ class Processor:
446
446
  # a resource was move to another shard while it was being indexed
447
447
  shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=uuid)
448
448
 
449
- shard = None
450
- if shard_id is not None:
451
- # Resource already has a shard assigned
452
- shard = await kb.get_resource_shard(shard_id)
453
- if shard is None:
454
- raise AttributeError("Shard not available")
455
- else:
456
- # It's a new resource, get KB's current active shard to place new resource on
457
- shard = await self.index_node_shard_manager.get_current_active_shard(txn, kbid)
458
- if shard is None:
459
- # No current shard available, create a new one
460
- shard = await self.index_node_shard_manager.create_shard_by_kbid(txn, kbid)
461
- await datamanagers.resources.set_resource_shard_id(
462
- txn, kbid=kbid, rid=uuid, shard=shard.shard
463
- )
464
- return shard
449
+ shard = None
450
+ if shard_id is not None:
451
+ # Resource already has a shard assigned
452
+ shard = await kb.get_resource_shard(shard_id)
453
+ if shard is None:
454
+ raise AttributeError("Shard not available")
455
+ else:
456
+ # It's a new resource, get KB's current active shard to place new resource on
457
+ shard = await self.index_node_shard_manager.get_current_active_shard(txn, kbid)
458
+ if shard is None:
459
+ # No current shard available, create a new one
460
+ async with locking.distributed_lock(locking.NEW_SHARD_LOCK.format(kbid=kbid)):
461
+ kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
462
+ prewarm = kb_config is not None and kb_config.prewarm_enabled
463
+ shard = await self.index_node_shard_manager.create_shard_by_kbid(
464
+ txn, kbid, prewarm_enabled=prewarm
465
+ )
466
+ await datamanagers.resources.set_resource_shard_id(
467
+ txn, kbid=kbid, rid=uuid, shard=shard.shard
468
+ )
469
+ return shard
465
470
 
466
471
  @processor_observer.wrap({"type": "index_resource"})
467
472
  async def index_resource(
@@ -126,6 +126,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
126
126
  external_index_provider=request.external_index_provider,
127
127
  hidden_resources_enabled=request.hidden_resources_enabled,
128
128
  hidden_resources_hide_on_creation=request.hidden_resources_hide_on_creation,
129
+ prewarm_enabled=request.prewarm_enabled,
129
130
  )
130
131
 
131
132
  except KnowledgeBoxConflict:
@@ -167,11 +168,17 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
167
168
  )
168
169
 
169
170
  try:
170
- async with self.driver.rw_transaction() as txn:
171
- kbid = await KnowledgeBoxORM.update(
172
- txn, uuid=request.uuid, slug=request.slug, config=request.config
173
- )
174
- await txn.commit()
171
+ kbid = await KnowledgeBoxORM.update(
172
+ self.driver,
173
+ kbid=request.uuid,
174
+ slug=request.slug,
175
+ title=request.config.title or None,
176
+ description=request.config.description or None,
177
+ external_index_provider=request.config.external_index_provider or None,
178
+ hidden_resources_enabled=request.config.hidden_resources_enabled,
179
+ hidden_resources_hide_on_creation=request.config.hidden_resources_hide_on_creation,
180
+ prewarm_enabled=request.config.prewarm_enabled,
181
+ )
175
182
  except KnowledgeBoxNotFound:
176
183
  return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.NOTFOUND)
177
184
  except Exception:
@@ -77,13 +77,7 @@ class MigrationsDataManager:
77
77
  return KnowledgeBoxInfo(current_version=kb_config.migration_version)
78
78
 
79
79
  async def update_kb_info(self, *, kbid: str, current_version: int) -> None:
80
- async with self.driver.rw_transaction() as txn:
81
- kb_config = await datamanagers.kb.get_config(txn, kbid=kbid, for_update=True)
82
- if kb_config is None:
83
- raise Exception(f"KB {kbid} does not exist")
84
- kb_config.migration_version = current_version
85
- await KnowledgeBoxORM.update(txn, kbid, config=kb_config)
86
- await txn.commit()
80
+ await KnowledgeBoxORM.update(self.driver, kbid, migration_version=current_version)
87
81
 
88
82
  async def get_global_info(self) -> GlobalInfo:
89
83
  async with self.driver.ro_transaction() as txn:
@@ -19,6 +19,7 @@
19
19
  #
20
20
  import asyncio
21
21
  import importlib.metadata
22
+ from itertools import batched # type: ignore
22
23
  from typing import AsyncGenerator
23
24
 
24
25
  from nucliadb.common import datamanagers
@@ -233,7 +234,7 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
233
234
  fields.extend((await resource.get_fields(force=True)).values())
234
235
 
235
236
  logger.info(f"Purging {len(fields)} fields for vectorset {vectorset}", extra={"kbid": kbid})
236
- for fields_batch in batchify(fields, 20):
237
+ for fields_batch in batched(fields, n=20):
237
238
  tasks = []
238
239
  for field in fields_batch:
239
240
  if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
@@ -317,9 +318,3 @@ def run() -> int: # pragma: no cover
317
318
  setup_logging()
318
319
  errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
319
320
  return asyncio.run(main())
320
-
321
-
322
- def batchify(iterable, n=1):
323
- """Yield successive n-sized chunks from iterable."""
324
- for i in range(0, len(iterable), n):
325
- yield iterable[i : i + n]
@@ -128,6 +128,27 @@ async def get_schema_for_configuration_updates(
128
128
  )
129
129
 
130
130
 
131
+ @api.get(
132
+ path=f"/{KB_PREFIX}/{{kbid}}/generative_providers",
133
+ status_code=200,
134
+ summary="Available models for a knowledge box",
135
+ description="Get all available models for a knowledge box grouped by provider",
136
+ response_model=None,
137
+ tags=["Models"],
138
+ )
139
+ @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
140
+ @version(1)
141
+ async def get_models_group_by_providers(
142
+ request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
143
+ ):
144
+ return await learning_config_proxy(
145
+ request,
146
+ "GET",
147
+ f"/generative_providers/{kbid}",
148
+ headers={"account-id": x_nucliadb_account},
149
+ )
150
+
151
+
131
152
  @api.get(
132
153
  path=f"/nua/schema",
133
154
  status_code=200,
@@ -46,7 +46,6 @@ from nucliadb_models.search import (
46
46
  KnowledgeboxFindResults,
47
47
  NucliaDBClientType,
48
48
  RankFusionName,
49
- Reranker,
50
49
  RerankerName,
51
50
  ResourceProperties,
52
51
  SearchParamDefaults,
@@ -127,11 +126,10 @@ async def find_knowledgebox(
127
126
  extracted: list[ExtractedDataTypeName] = fastapi_query(SearchParamDefaults.extracted),
128
127
  with_duplicates: bool = fastapi_query(SearchParamDefaults.with_duplicates),
129
128
  with_synonyms: bool = fastapi_query(SearchParamDefaults.with_synonyms),
130
- autofilter: bool = fastapi_query(SearchParamDefaults.autofilter),
131
129
  security_groups: list[str] = fastapi_query(SearchParamDefaults.security_groups),
132
130
  show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
133
131
  rank_fusion: RankFusionName = fastapi_query(SearchParamDefaults.rank_fusion),
134
- reranker: Union[RerankerName, Reranker] = fastapi_query(SearchParamDefaults.reranker),
132
+ reranker: RerankerName = fastapi_query(SearchParamDefaults.reranker),
135
133
  search_configuration: Optional[str] = Query(
136
134
  default=None,
137
135
  description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
@@ -166,7 +164,6 @@ async def find_knowledgebox(
166
164
  extracted=extracted,
167
165
  with_duplicates=with_duplicates,
168
166
  with_synonyms=with_synonyms,
169
- autofilter=autofilter,
170
167
  security=security,
171
168
  show_hidden=show_hidden,
172
169
  rank_fusion=rank_fusion,
@@ -28,7 +28,8 @@ from nucliadb.search.api.v1.resource.utils import get_resource_uuid_by_slug
28
28
  from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_SLUG_PREFIX, api
29
29
  from nucliadb_models.resource import NucliaDBRoles
30
30
  from nucliadb_models.search import AskRequest, NucliaDBClientType, SyncAskResponse
31
- from nucliadb_utils.authentication import requires
31
+ from nucliadb_models.security import RequestSecurity
32
+ from nucliadb_utils.authentication import NucliaUser, requires
32
33
 
33
34
  from ..ask import create_ask_response
34
35
 
@@ -58,6 +59,15 @@ async def resource_ask_endpoint_by_uuid(
58
59
  "This is slower and requires waiting for entire answer to be ready.",
59
60
  ),
60
61
  ) -> Union[StreamingResponse, HTTPClientError, Response]:
62
+ current_user: NucliaUser = request.user
63
+ # If present, security groups from AuthorizationBackend overrides any
64
+ # security group of the payload
65
+ if current_user.security_groups:
66
+ if item.security is None:
67
+ item.security = RequestSecurity(groups=current_user.security_groups)
68
+ else:
69
+ item.security.groups = current_user.security_groups
70
+
61
71
  return await create_ask_response(
62
72
  kbid=kbid,
63
73
  ask_request=item,
@@ -98,6 +108,16 @@ async def resource_ask_endpoint_by_slug(
98
108
  resource_id = await get_resource_uuid_by_slug(kbid, slug)
99
109
  if resource_id is None:
100
110
  return HTTPClientError(status_code=404, detail="Resource not found")
111
+
112
+ current_user: NucliaUser = request.user
113
+ # If present, security groups from AuthorizationBackend overrides any
114
+ # security group of the payload
115
+ if current_user.security_groups:
116
+ if item.security is None:
117
+ item.security = RequestSecurity(groups=current_user.security_groups)
118
+ else:
119
+ item.security.groups = current_user.security_groups
120
+
101
121
  return await create_ask_response(
102
122
  kbid=kbid,
103
123
  ask_request=item,