nucliadb 6.2.0.post2675__py3-none-any.whl → 6.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. migrations/0028_extracted_vectors_reference.py +61 -0
  2. migrations/0029_backfill_field_status.py +149 -0
  3. migrations/0030_label_deduplication.py +60 -0
  4. nucliadb/common/cluster/manager.py +41 -331
  5. nucliadb/common/cluster/rebalance.py +2 -2
  6. nucliadb/common/cluster/rollover.py +12 -71
  7. nucliadb/common/cluster/settings.py +3 -0
  8. nucliadb/common/cluster/standalone/utils.py +0 -43
  9. nucliadb/common/cluster/utils.py +0 -16
  10. nucliadb/common/counters.py +1 -0
  11. nucliadb/common/datamanagers/fields.py +48 -7
  12. nucliadb/common/datamanagers/vectorsets.py +11 -2
  13. nucliadb/common/external_index_providers/base.py +2 -1
  14. nucliadb/common/external_index_providers/pinecone.py +3 -5
  15. nucliadb/common/ids.py +18 -4
  16. nucliadb/common/models_utils/from_proto.py +479 -0
  17. nucliadb/common/models_utils/to_proto.py +60 -0
  18. nucliadb/common/nidx.py +76 -37
  19. nucliadb/export_import/models.py +3 -3
  20. nucliadb/health.py +0 -7
  21. nucliadb/ingest/app.py +0 -8
  22. nucliadb/ingest/consumer/auditing.py +1 -1
  23. nucliadb/ingest/consumer/shard_creator.py +1 -1
  24. nucliadb/ingest/fields/base.py +83 -21
  25. nucliadb/ingest/orm/brain.py +55 -56
  26. nucliadb/ingest/orm/broker_message.py +12 -2
  27. nucliadb/ingest/orm/entities.py +6 -17
  28. nucliadb/ingest/orm/knowledgebox.py +44 -22
  29. nucliadb/ingest/orm/processor/data_augmentation.py +7 -29
  30. nucliadb/ingest/orm/processor/processor.py +5 -2
  31. nucliadb/ingest/orm/resource.py +222 -413
  32. nucliadb/ingest/processing.py +8 -2
  33. nucliadb/ingest/serialize.py +77 -46
  34. nucliadb/ingest/service/writer.py +2 -56
  35. nucliadb/ingest/settings.py +1 -4
  36. nucliadb/learning_proxy.py +6 -4
  37. nucliadb/purge/__init__.py +102 -12
  38. nucliadb/purge/orphan_shards.py +6 -4
  39. nucliadb/reader/api/models.py +3 -3
  40. nucliadb/reader/api/v1/__init__.py +1 -0
  41. nucliadb/reader/api/v1/download.py +2 -2
  42. nucliadb/reader/api/v1/knowledgebox.py +3 -3
  43. nucliadb/reader/api/v1/resource.py +23 -12
  44. nucliadb/reader/api/v1/services.py +4 -4
  45. nucliadb/reader/api/v1/vectorsets.py +48 -0
  46. nucliadb/search/api/v1/ask.py +11 -1
  47. nucliadb/search/api/v1/feedback.py +3 -3
  48. nucliadb/search/api/v1/knowledgebox.py +8 -13
  49. nucliadb/search/api/v1/search.py +3 -2
  50. nucliadb/search/api/v1/suggest.py +0 -2
  51. nucliadb/search/predict.py +6 -4
  52. nucliadb/search/requesters/utils.py +1 -2
  53. nucliadb/search/search/chat/ask.py +77 -13
  54. nucliadb/search/search/chat/prompt.py +16 -5
  55. nucliadb/search/search/chat/query.py +74 -34
  56. nucliadb/search/search/exceptions.py +2 -7
  57. nucliadb/search/search/find.py +9 -5
  58. nucliadb/search/search/find_merge.py +10 -4
  59. nucliadb/search/search/graph_strategy.py +884 -0
  60. nucliadb/search/search/hydrator.py +6 -0
  61. nucliadb/search/search/merge.py +79 -24
  62. nucliadb/search/search/query.py +74 -245
  63. nucliadb/search/search/query_parser/exceptions.py +11 -1
  64. nucliadb/search/search/query_parser/fetcher.py +405 -0
  65. nucliadb/search/search/query_parser/models.py +0 -3
  66. nucliadb/search/search/query_parser/parser.py +22 -21
  67. nucliadb/search/search/rerankers.py +1 -42
  68. nucliadb/search/search/shards.py +19 -0
  69. nucliadb/standalone/api_router.py +2 -14
  70. nucliadb/standalone/settings.py +4 -0
  71. nucliadb/train/generators/field_streaming.py +7 -3
  72. nucliadb/train/lifecycle.py +3 -6
  73. nucliadb/train/nodes.py +14 -12
  74. nucliadb/train/resource.py +380 -0
  75. nucliadb/writer/api/constants.py +20 -16
  76. nucliadb/writer/api/v1/__init__.py +1 -0
  77. nucliadb/writer/api/v1/export_import.py +1 -1
  78. nucliadb/writer/api/v1/field.py +13 -7
  79. nucliadb/writer/api/v1/knowledgebox.py +3 -46
  80. nucliadb/writer/api/v1/resource.py +20 -13
  81. nucliadb/writer/api/v1/services.py +10 -1
  82. nucliadb/writer/api/v1/upload.py +61 -34
  83. nucliadb/writer/{vectorsets.py → api/v1/vectorsets.py} +99 -47
  84. nucliadb/writer/back_pressure.py +17 -46
  85. nucliadb/writer/resource/basic.py +9 -7
  86. nucliadb/writer/resource/field.py +42 -9
  87. nucliadb/writer/settings.py +2 -2
  88. nucliadb/writer/tus/gcs.py +11 -10
  89. {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/METADATA +11 -14
  90. {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/RECORD +94 -96
  91. {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/WHEEL +1 -1
  92. nucliadb/common/cluster/discovery/base.py +0 -178
  93. nucliadb/common/cluster/discovery/k8s.py +0 -301
  94. nucliadb/common/cluster/discovery/manual.py +0 -57
  95. nucliadb/common/cluster/discovery/single.py +0 -51
  96. nucliadb/common/cluster/discovery/types.py +0 -32
  97. nucliadb/common/cluster/discovery/utils.py +0 -67
  98. nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
  99. nucliadb/common/cluster/standalone/index_node.py +0 -123
  100. nucliadb/common/cluster/standalone/service.py +0 -84
  101. nucliadb/standalone/introspect.py +0 -208
  102. nucliadb-6.2.0.post2675.dist-info/zip-safe +0 -1
  103. /nucliadb/common/{cluster/discovery → models_utils}/__init__.py +0 -0
  104. {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/entry_points.txt +0 -0
  105. {nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union
22
22
  from pydantic import BaseModel
23
23
 
24
24
  import nucliadb_models as models
25
- from nucliadb_models.common import FIELD_TYPES_MAP, FieldTypeName
25
+ from nucliadb_models.common import FieldTypeName
26
26
  from nucliadb_models.resource import (
27
27
  ConversationFieldExtractedData,
28
28
  Error,
@@ -52,10 +52,10 @@ class ResourceField(BaseModel):
52
52
  value: ValueType = None
53
53
  extracted: Optional[ExtractedDataType] = None
54
54
  error: Optional[Error] = None
55
+ status: Optional[str] = None
56
+ errors: Optional[list[Error]] = None
55
57
 
56
58
 
57
- FIELD_NAMES_TO_PB_TYPE_MAP = {v: k for k, v in FIELD_TYPES_MAP.items()}
58
-
59
59
  FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = {
60
60
  FieldTypeName.TEXT: TextFieldExtractedData,
61
61
  FieldTypeName.FILE: FileFieldExtractedData,
@@ -23,4 +23,5 @@ from . import knowledgebox # noqa
23
23
  from . import learning_config # noqa
24
24
  from . import resource # noqa
25
25
  from . import services # noqa
26
+ from . import vectorsets # noqa
26
27
  from .router import api # noqa
@@ -29,9 +29,9 @@ from starlette.datastructures import Headers
29
29
  from starlette.responses import StreamingResponse
30
30
 
31
31
  from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR
32
+ from nucliadb.common.models_utils import to_proto
32
33
  from nucliadb.ingest.serialize import get_resource_uuid_by_slug
33
34
  from nucliadb.reader import SERVICE_NAME, logger
34
- from nucliadb.reader.api.models import FIELD_NAMES_TO_PB_TYPE_MAP
35
35
  from nucliadb_models.common import FieldTypeName
36
36
  from nucliadb_models.resource import NucliaDBRoles
37
37
  from nucliadb_utils.authentication import requires_one
@@ -97,7 +97,7 @@ async def _download_extract_file(
97
97
 
98
98
  storage = await get_storage(service_name=SERVICE_NAME)
99
99
 
100
- pb_field_type = FIELD_NAMES_TO_PB_TYPE_MAP[field_type]
100
+ pb_field_type = to_proto.field_type_name(field_type)
101
101
  field_type_letter = FIELD_TYPE_PB_TO_STR[pb_field_type]
102
102
 
103
103
  sf = storage.file_extracted(kbid, rid, field_type_letter, field_id, download_field)
@@ -23,9 +23,9 @@ from starlette.requests import Request
23
23
 
24
24
  from nucliadb.common import datamanagers
25
25
  from nucliadb.common.maindb.utils import get_driver
26
+ from nucliadb.common.models_utils import from_proto
26
27
  from nucliadb.reader.api.v1.router import KB_PREFIX, KBS_PREFIX, api
27
28
  from nucliadb_models.resource import (
28
- KnowledgeBoxConfig,
29
29
  KnowledgeBoxList,
30
30
  KnowledgeBoxObj,
31
31
  KnowledgeBoxObjSummary,
@@ -72,7 +72,7 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
72
72
  return KnowledgeBoxObj(
73
73
  uuid=kbid,
74
74
  slug=kb_config.slug,
75
- config=KnowledgeBoxConfig.from_message(kb_config),
75
+ config=from_proto.knowledgebox_config(kb_config),
76
76
  )
77
77
 
78
78
 
@@ -99,5 +99,5 @@ async def get_kb_by_slug(request: Request, slug: str) -> KnowledgeBoxObj:
99
99
  return KnowledgeBoxObj(
100
100
  uuid=kbid,
101
101
  slug=kb_config.slug,
102
- config=KnowledgeBoxConfig.from_message(kb_config),
102
+ config=from_proto.knowledgebox_config(kb_config),
103
103
  )
@@ -22,9 +22,9 @@ from typing import Optional, Union
22
22
  from fastapi import Header, HTTPException, Query, Request, Response
23
23
  from fastapi_versioning import version
24
24
 
25
- import nucliadb_models as models
26
25
  from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG_BASE
27
26
  from nucliadb.common.maindb.utils import get_driver
27
+ from nucliadb.common.models_utils import from_proto, to_proto
28
28
  from nucliadb.ingest.fields.conversation import Conversation
29
29
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as ORMKnowledgeBox
30
30
  from nucliadb.ingest.orm.resource import Resource as ORMResource
@@ -37,7 +37,6 @@ from nucliadb.reader import SERVICE_NAME
37
37
  from nucliadb.reader.api import DEFAULT_RESOURCE_LIST_PAGE_SIZE
38
38
  from nucliadb.reader.api.models import (
39
39
  FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP,
40
- FIELD_NAMES_TO_PB_TYPE_MAP,
41
40
  ResourceField,
42
41
  )
43
42
  from nucliadb.reader.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
@@ -53,6 +52,7 @@ from nucliadb_models.resource import (
53
52
  )
54
53
  from nucliadb_models.search import ResourceProperties
55
54
  from nucliadb_protos import resources_pb2
55
+ from nucliadb_protos.writer_pb2 import FieldStatus
56
56
  from nucliadb_telemetry import errors
57
57
  from nucliadb_utils.authentication import requires, requires_one
58
58
  from nucliadb_utils.utilities import get_audit, get_storage
@@ -334,9 +334,7 @@ async def _get_resource_field(
334
334
  ) -> Response:
335
335
  storage = await get_storage(service_name=SERVICE_NAME)
336
336
  driver = get_driver()
337
-
338
- pb_field_id = FIELD_NAMES_TO_PB_TYPE_MAP[field_type]
339
-
337
+ pb_field_id = to_proto.field_type_name(field_type)
340
338
  async with driver.transaction() as txn:
341
339
  kb = ORMKnowledgeBox(txn, storage, kbid)
342
340
 
@@ -358,15 +356,15 @@ async def _get_resource_field(
358
356
 
359
357
  if isinstance(value, resources_pb2.FieldText):
360
358
  value = await field.get_value()
361
- resource_field.value = models.FieldText.from_message(value)
359
+ resource_field.value = from_proto.field_text(value)
362
360
 
363
361
  if isinstance(value, resources_pb2.FieldFile):
364
362
  value = await field.get_value()
365
- resource_field.value = models.FieldFile.from_message(value)
363
+ resource_field.value = from_proto.field_file(value)
366
364
 
367
365
  if isinstance(value, resources_pb2.FieldLink):
368
366
  value = await field.get_value()
369
- resource_field.value = models.FieldLink.from_message(value)
367
+ resource_field.value = from_proto.field_link(value)
370
368
 
371
369
  if isinstance(field, Conversation):
372
370
  if page == "first":
@@ -379,7 +377,7 @@ async def _get_resource_field(
379
377
 
380
378
  value = await field.get_value(page=page_to_fetch)
381
379
  if value is not None:
382
- resource_field.value = models.Conversation.from_message(value)
380
+ resource_field.value = from_proto.conversation(value)
383
381
 
384
382
  if ResourceFieldProperties.EXTRACTED in show and extracted:
385
383
  resource_field.extracted = FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP[field_type]()
@@ -391,9 +389,22 @@ async def _get_resource_field(
391
389
  )
392
390
 
393
391
  if ResourceFieldProperties.ERROR in show:
394
- error = await field.get_error()
395
- if error is not None:
396
- resource_field.error = Error(body=error.error, code=error.code)
392
+ status = await field.get_status()
393
+ if status is None:
394
+ status = FieldStatus()
395
+ resource_field.status = status.Status.Name(status.status)
396
+ if status.errors:
397
+ resource_field.errors = []
398
+ for error in status.errors:
399
+ resource_field.errors.append(
400
+ Error(
401
+ body=error.source_error.error,
402
+ code=error.source_error.code,
403
+ code_str=error.source_error.ErrorCode.Name(error.source_error.code),
404
+ created=error.created.ToDatetime(),
405
+ )
406
+ )
407
+ resource_field.error = resource_field.errors[-1]
397
408
 
398
409
  return Response(
399
410
  content=resource_field.model_dump_json(exclude_unset=True, by_alias=True),
@@ -32,6 +32,7 @@ from nucliadb.common.context.fastapi import get_app_context
32
32
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
33
33
  from nucliadb.common.http_clients import processing
34
34
  from nucliadb.common.maindb.utils import get_driver
35
+ from nucliadb.common.models_utils import from_proto
35
36
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
36
37
  from nucliadb.models.responses import HTTPClientError
37
38
  from nucliadb.reader import SERVICE_NAME
@@ -39,7 +40,6 @@ from nucliadb.reader.api.v1.router import KB_PREFIX, api
39
40
  from nucliadb.reader.reader.notifications import kb_notifications_stream
40
41
  from nucliadb_models.entities import (
41
42
  EntitiesGroup,
42
- EntitiesGroupSummary,
43
43
  KnowledgeBoxEntities,
44
44
  )
45
45
  from nucliadb_models.labels import KnowledgeBoxLabels, LabelSet
@@ -86,7 +86,7 @@ async def list_entities_groups(kbid: str):
86
86
  if entities_groups.status == ListEntitiesGroupsResponse.Status.OK:
87
87
  response = KnowledgeBoxEntities(uuid=kbid)
88
88
  for key, eg_summary in entities_groups.groups.items():
89
- entities_group = EntitiesGroupSummary.from_message(eg_summary)
89
+ entities_group = from_proto.entities_group_summary(eg_summary)
90
90
  response.groups[key] = entities_group
91
91
  return response
92
92
  elif entities_groups.status == ListEntitiesGroupsResponse.Status.NOTFOUND:
@@ -114,7 +114,7 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
114
114
 
115
115
  kbobj: GetEntitiesGroupResponse = await ingest.GetEntitiesGroup(l_request) # type: ignore
116
116
  if kbobj.status == GetEntitiesGroupResponse.Status.OK:
117
- response = EntitiesGroup.from_message(kbobj.group)
117
+ response = from_proto.entities_group(kbobj.group)
118
118
  return response
119
119
  elif kbobj.status == GetEntitiesGroupResponse.Status.KB_NOT_FOUND:
120
120
  raise HTTPException(status_code=404, detail=f"Knowledge Box '{kbid}' does not exist")
@@ -208,7 +208,7 @@ async def get_custom_synonyms(request: Request, kbid: str):
208
208
  if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
209
209
  raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
210
210
  synonyms = await datamanagers.atomic.synonyms.get(kbid=kbid) or Synonyms()
211
- return KnowledgeBoxSynonyms.from_message(synonyms)
211
+ return from_proto.kb_synonyms(synonyms)
212
212
 
213
213
 
214
214
  @api.get(
@@ -0,0 +1,48 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from fastapi_versioning import version
21
+ from starlette.requests import Request
22
+
23
+ from nucliadb.common import datamanagers
24
+ from nucliadb.reader.api.v1.router import KB_PREFIX, api
25
+ from nucliadb_models.resource import (
26
+ NucliaDBRoles,
27
+ )
28
+ from nucliadb_models.vectorsets import VectorSetList, VectorSetListItem
29
+ from nucliadb_utils.authentication import requires_one
30
+
31
+
32
+ @api.get(
33
+ f"/{KB_PREFIX}/{{kbid}}/vectorsets",
34
+ status_code=200,
35
+ summary="List vector sets",
36
+ response_model=VectorSetList,
37
+ tags=["Vector Sets"],
38
+ # TODO: remove when the feature is mature
39
+ include_in_schema=False,
40
+ )
41
+ @requires_one([NucliaDBRoles.READER])
42
+ @version(1)
43
+ async def list_vectorsets(request: Request, kbid: str) -> VectorSetList:
44
+ vectorsets = []
45
+ async with datamanagers.with_ro_transaction() as txn:
46
+ async for vid, _ in datamanagers.vectorsets.iter(txn, kbid=kbid):
47
+ vectorsets.append(VectorSetListItem(id=vid))
48
+ return VectorSetList(vectorsets=vectorsets)
@@ -36,7 +36,8 @@ from nucliadb_models.search import (
36
36
  SyncAskResponse,
37
37
  parse_max_tokens,
38
38
  )
39
- from nucliadb_utils.authentication import requires
39
+ from nucliadb_models.security import RequestSecurity
40
+ from nucliadb_utils.authentication import NucliaUser, requires
40
41
 
41
42
 
42
43
  @api.post(
@@ -62,6 +63,15 @@ async def ask_knowledgebox_endpoint(
62
63
  "This is slower and requires waiting for entire answer to be ready.",
63
64
  ),
64
65
  ) -> Union[StreamingResponse, HTTPClientError, Response]:
66
+ current_user: NucliaUser = request.user
67
+ # If present, security groups from AuthorizationBackend overrides any
68
+ # security group of the payload
69
+ if current_user.security_groups:
70
+ if item.security is None:
71
+ item.security = RequestSecurity(groups=current_user.security_groups)
72
+ else:
73
+ item.security.groups = current_user.security_groups
74
+
65
75
  return await create_ask_response(
66
76
  kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
67
77
  )
@@ -18,10 +18,10 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
-
22
21
  from fastapi import Header, Request, Response
23
22
  from fastapi_versioning import version
24
23
 
24
+ from nucliadb.common.models_utils import to_proto
25
25
  from nucliadb.models.responses import HTTPClientError
26
26
  from nucliadb.search import logger
27
27
  from nucliadb.search.api.v1.router import KB_PREFIX, api
@@ -56,11 +56,11 @@ async def send_feedback_endpoint(
56
56
  audit.feedback(
57
57
  kbid=kbid,
58
58
  user=x_nucliadb_user,
59
- client_type=x_ndb_client.to_proto(),
59
+ client_type=to_proto.client_type(x_ndb_client),
60
60
  origin=x_forwarded_for,
61
61
  learning_id=item.ident,
62
62
  good=item.good,
63
- task=item.task.to_proto(),
63
+ task=to_proto.feedback_task(item.task),
64
64
  feedback=item.feedback,
65
65
  text_block_id=item.text_block_id,
66
66
  )
@@ -32,6 +32,7 @@ from nucliadb.common.cluster.utils import get_shard_manager
32
32
  from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
33
33
  from nucliadb.common.counters import IndexCounts
34
34
  from nucliadb.common.external_index_providers.manager import get_external_index_manager
35
+ from nucliadb.common.models_utils import from_proto
35
36
  from nucliadb.search import logger
36
37
  from nucliadb.search.api.v1.router import KB_PREFIX, api
37
38
  from nucliadb.search.api.v1.utils import fastapi_query
@@ -47,9 +48,7 @@ from nucliadb_protos.noderesources_pb2 import Shard
47
48
  from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject
48
49
  from nucliadb_protos.writer_pb2 import Shards
49
50
  from nucliadb_telemetry import errors
50
- from nucliadb_utils import const
51
51
  from nucliadb_utils.authentication import requires, requires_one
52
- from nucliadb_utils.utilities import has_feature
53
52
 
54
53
  MAX_PARAGRAPHS_FOR_SMALL_KB = 250_000
55
54
 
@@ -73,7 +72,7 @@ async def knowledgebox_shards(request: Request, kbid: str) -> KnowledgeboxShards
73
72
  status_code=404,
74
73
  detail="The knowledgebox or its shards configuration is missing",
75
74
  )
76
- return KnowledgeboxShards.from_message(shards)
75
+ return from_proto.kb_shards(shards)
77
76
 
78
77
 
79
78
  @api.get(
@@ -124,8 +123,9 @@ async def _kb_counters(
124
123
  counters.sentences = index_counts.sentences
125
124
  is_small_kb = index_counts.paragraphs < MAX_PARAGRAPHS_FOR_SMALL_KB
126
125
  resource_count = await get_resources_count(kbid, force_calculate=is_small_kb)
127
- # TODO: Find a way to query the fields count from the external index provider or use the catalog
126
+ # TODO: Find a way to query the fields count and size from the external index provider or use the catalog
128
127
  counters.resources = counters.fields = resource_count
128
+ counters.index_size = counters.paragraphs * AVG_PARAGRAPH_SIZE_BYTES
129
129
  else:
130
130
  node_index_counts, queried_shards = await get_node_index_counts(kbid)
131
131
  counters.fields = node_index_counts.fields
@@ -134,7 +134,7 @@ async def _kb_counters(
134
134
  is_small_kb = node_index_counts.paragraphs < MAX_PARAGRAPHS_FOR_SMALL_KB
135
135
  resource_count = await get_resources_count(kbid, force_calculate=is_small_kb)
136
136
  counters.resources = resource_count
137
- counters.index_size = counters.paragraphs * AVG_PARAGRAPH_SIZE_BYTES
137
+ counters.index_size = node_index_counts.size_bytes
138
138
  if debug and queried_shards is not None:
139
139
  counters.shards = queried_shards
140
140
  return counters
@@ -165,9 +165,7 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
165
165
  queried_shards = []
166
166
  for shard_object in shard_groups:
167
167
  try:
168
- node, shard_id = choose_node(
169
- shard_object, use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": kbid})
170
- )
168
+ node, shard_id = choose_node(shard_object)
171
169
  except KeyError:
172
170
  raise HTTPException(
173
171
  status_code=500,
@@ -205,11 +203,7 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
205
203
  if results is None:
206
204
  raise HTTPException(status_code=503, detail=f"No shards found")
207
205
 
208
- counts = IndexCounts(
209
- fields=0,
210
- paragraphs=0,
211
- sentences=0,
212
- )
206
+ counts = IndexCounts(fields=0, paragraphs=0, sentences=0, size_bytes=0)
213
207
  for shard in results:
214
208
  if isinstance(shard, Exception):
215
209
  logger.error("Error getting shard info", exc_info=shard)
@@ -218,4 +212,5 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
218
212
  counts.fields += shard.fields
219
213
  counts.paragraphs += shard.paragraphs
220
214
  counts.sentences += shard.sentences
215
+ counts.size_bytes += shard.size_bytes
221
216
  return counts, queried_shards
@@ -27,6 +27,7 @@ from fastapi_versioning import version
27
27
  from pydantic import ValidationError
28
28
 
29
29
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
30
+ from nucliadb.common.models_utils import to_proto
30
31
  from nucliadb.models.responses import HTTPClientError
31
32
  from nucliadb.search import predict
32
33
  from nucliadb.search.api.v1.router import KB_PREFIX, api
@@ -292,7 +293,7 @@ async def search(
292
293
  hidden=await filter_hidden_resources(kbid, item.show_hidden),
293
294
  rephrase_prompt=item.rephrase_prompt,
294
295
  )
295
- pb_query, incomplete_results, autofilters = await query_parser.parse()
296
+ pb_query, incomplete_results, autofilters, _ = await query_parser.parse()
296
297
 
297
298
  results, query_incomplete_results, queried_nodes = await node_query(
298
299
  kbid, Method.SEARCH, pb_query, target_shard_replicas=item.shards
@@ -318,7 +319,7 @@ async def search(
318
319
  audit.search(
319
320
  kbid,
320
321
  x_nucliadb_user,
321
- x_ndb_client.to_proto(),
322
+ to_proto.client_type(x_ndb_client),
322
323
  x_forwarded_for,
323
324
  pb_query,
324
325
  time() - start_time,
@@ -151,8 +151,6 @@ async def suggest(
151
151
  search_results = await merge_suggest_results(
152
152
  results,
153
153
  kbid=kbid,
154
- show=show,
155
- field_type_filter=field_type_filter,
156
154
  highlight=highlight,
157
155
  )
158
156
 
@@ -21,7 +21,7 @@ import json
21
21
  import os
22
22
  import random
23
23
  from enum import Enum
24
- from typing import Any, AsyncIterator, Optional
24
+ from typing import Any, AsyncGenerator, Optional
25
25
  from unittest.mock import AsyncMock, Mock
26
26
 
27
27
  import aiohttp
@@ -121,12 +121,14 @@ class AnswerStatusCode(str, Enum):
121
121
  SUCCESS = "0"
122
122
  ERROR = "-1"
123
123
  NO_CONTEXT = "-2"
124
+ NO_RETRIEVAL_DATA = "-3"
124
125
 
125
126
  def prettify(self) -> str:
126
127
  return {
127
128
  AnswerStatusCode.SUCCESS: "success",
128
129
  AnswerStatusCode.ERROR: "error",
129
130
  AnswerStatusCode.NO_CONTEXT: "no_context",
131
+ AnswerStatusCode.NO_RETRIEVAL_DATA: "no_retrieval_data",
130
132
  }[self]
131
133
 
132
134
 
@@ -266,7 +268,7 @@ class PredictEngine:
266
268
  @predict_observer.wrap({"type": "chat_ndjson"})
267
269
  async def chat_query_ndjson(
268
270
  self, kbid: str, item: ChatModel
269
- ) -> tuple[str, str, AsyncIterator[GenerativeChunk]]:
271
+ ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
270
272
  """
271
273
  Chat query using the new stream format
272
274
  Format specs: https://github.com/ndjson/ndjson-spec
@@ -442,7 +444,7 @@ class DummyPredictEngine(PredictEngine):
442
444
 
443
445
  async def chat_query_ndjson(
444
446
  self, kbid: str, item: ChatModel
445
- ) -> tuple[str, str, AsyncIterator[GenerativeChunk]]:
447
+ ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
446
448
  self.calls.append(("chat_query_ndjson", item))
447
449
 
448
450
  async def generate():
@@ -553,7 +555,7 @@ def get_answer_generator(response: aiohttp.ClientResponse):
553
555
 
554
556
  def get_chat_ndjson_generator(
555
557
  response: aiohttp.ClientResponse,
556
- ) -> AsyncIterator[GenerativeChunk]:
558
+ ) -> AsyncGenerator[GenerativeChunk, None]:
557
559
  async def _parse_generative_chunks(gen):
558
560
  async for chunk in gen:
559
561
  try:
@@ -123,7 +123,6 @@ async def node_query(
123
123
  try:
124
124
  node, shard_id = cluster_manager.choose_node(
125
125
  shard_obj,
126
- use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": kbid}),
127
126
  use_read_replica_nodes=use_read_replica_nodes,
128
127
  target_shard_replicas=target_shard_replicas,
129
128
  )
@@ -224,7 +223,7 @@ def validate_node_query_results(results: list[Any]) -> Optional[HTTPException]:
224
223
  )
225
224
  else:
226
225
  errors.capture_exception(result)
227
- logger.exception("Error while querying shard data", exc_info=result)
226
+ logger.exception(f"Error while querying shard data {result}", exc_info=result)
228
227
 
229
228
  return HTTPException(status_code=status_code, detail=reason)
230
229