nucliadb 6.3.7.post4116__py3-none-any.whl → 6.3.7.post4119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. migrations/0017_multiple_writable_shards.py +16 -13
  2. migrations/0025_assign_models_to_kbs_v2.py +3 -6
  3. nucliadb/common/cluster/base.py +6 -4
  4. nucliadb/common/cluster/grpc_node_dummy.py +5 -4
  5. nucliadb/common/cluster/manager.py +7 -9
  6. nucliadb/common/cluster/rebalance.py +2 -1
  7. nucliadb/common/cluster/rollover.py +2 -2
  8. nucliadb/common/cluster/utils.py +2 -1
  9. nucliadb/common/datamanagers/rollover.py +2 -3
  10. nucliadb/common/external_index_providers/base.py +2 -2
  11. nucliadb/common/external_index_providers/pinecone.py +2 -2
  12. nucliadb/common/nidx.py +3 -3
  13. nucliadb/common/vector_index_config.py +39 -0
  14. nucliadb/ingest/consumer/auditing.py +3 -1
  15. nucliadb/ingest/consumer/shard_creator.py +3 -1
  16. nucliadb/ingest/orm/brain.py +10 -9
  17. nucliadb/ingest/orm/brain_v2.py +10 -9
  18. nucliadb/ingest/orm/entities.py +8 -7
  19. nucliadb/ingest/orm/index_message.py +2 -1
  20. nucliadb/ingest/orm/knowledgebox.py +4 -3
  21. nucliadb/ingest/orm/processor/pgcatalog.py +2 -1
  22. nucliadb/ingest/orm/processor/processor.py +2 -3
  23. nucliadb/metrics_exporter.py +2 -1
  24. nucliadb/search/api/v1/knowledgebox.py +1 -1
  25. nucliadb/search/requesters/utils.py +8 -8
  26. nucliadb/search/search/chat/query.py +5 -4
  27. nucliadb/search/search/fetch.py +2 -1
  28. nucliadb/search/search/find_merge.py +9 -8
  29. nucliadb/search/search/graph_merge.py +2 -1
  30. nucliadb/search/search/graph_strategy.py +1 -1
  31. nucliadb/search/search/merge.py +12 -11
  32. nucliadb/search/search/query.py +4 -2
  33. nucliadb/search/search/query_parser/filter_expression.py +2 -1
  34. nucliadb/search/search/query_parser/models.py +2 -1
  35. nucliadb/search/search/query_parser/old_filters.py +2 -1
  36. nucliadb/search/search/query_parser/parsers/find.py +2 -1
  37. nucliadb/search/search/query_parser/parsers/graph.py +3 -1
  38. nucliadb/search/search/query_parser/parsers/search.py +3 -1
  39. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -2
  40. nucliadb/search/search/shards.py +4 -4
  41. nucliadb/train/generators/field_classifier.py +2 -1
  42. nucliadb/train/generators/field_streaming.py +2 -1
  43. nucliadb/train/generators/paragraph_classifier.py +1 -1
  44. nucliadb/train/generators/paragraph_streaming.py +2 -1
  45. nucliadb/train/generators/question_answer_streaming.py +2 -1
  46. nucliadb/train/generators/sentence_classifier.py +1 -1
  47. nucliadb/train/generators/token_classifier.py +2 -1
  48. nucliadb/writer/back_pressure.py +0 -24
  49. {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4119.dist-info}/METADATA +6 -6
  50. {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4119.dist-info}/RECORD +53 -52
  51. {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4119.dist-info}/WHEEL +0 -0
  52. {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4119.dist-info}/entry_points.txt +0 -0
  53. {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4119.dist-info}/top_level.txt +0 -0
@@ -31,7 +31,6 @@ future multiple writable shards will be possible.
31
31
 
32
32
  import logging
33
33
 
34
- from nucliadb.common import datamanagers
35
34
  from nucliadb.migrator.context import ExecutionContext
36
35
 
37
36
  logger = logging.getLogger(__name__)
@@ -41,18 +40,22 @@ async def migrate(context: ExecutionContext) -> None: ...
41
40
 
42
41
 
43
42
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
44
- async with context.kv_driver.transaction() as txn:
45
- shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
46
- if shards is None:
47
- logger.error("KB without shards", extra={"kbid": kbid})
48
- return
43
+ pass
49
44
 
50
- for shard_object in shards.shards:
51
- shard_object.read_only = True
52
- shards.shards[shards.actual].read_only = False
45
+ # No longer relevant with nidx
53
46
 
54
- # just ensure we're writing it correctly
55
- assert [shard_object.read_only for shard_object in shards.shards].count(False) == 1
47
+ # async with context.kv_driver.transaction() as txn:
48
+ # shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
49
+ # if shards is None:
50
+ # logger.error("KB without shards", extra={"kbid": kbid})
51
+ # return
56
52
 
57
- await datamanagers.cluster.update_kb_shards(txn, kbid=kbid, shards=shards)
58
- await txn.commit()
53
+ # for shard_object in shards.shards:
54
+ # shard_object.read_only = True
55
+ # shards.shards[shards.actual].read_only = False
56
+
57
+ # # just ensure we're writing it correctly
58
+ # assert [shard_object.read_only for shard_object in shards.shards].count(False) == 1
59
+
60
+ # await datamanagers.cluster.update_kb_shards(txn, kbid=kbid, shards=shards)
61
+ # await txn.commit()
@@ -38,10 +38,7 @@ import logging
38
38
  from nucliadb import learning_proxy
39
39
  from nucliadb.common import datamanagers
40
40
  from nucliadb.migrator.context import ExecutionContext
41
- from nucliadb_protos import (
42
- knowledgebox_pb2,
43
- nodewriter_pb2,
44
- )
41
+ from nucliadb_protos import knowledgebox_pb2
45
42
 
46
43
  logger = logging.getLogger(__name__)
47
44
 
@@ -97,10 +94,10 @@ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
97
94
 
98
95
  default_vectorset = knowledgebox_pb2.VectorSetConfig(
99
96
  vectorset_id=vectorset_id,
100
- vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
97
+ vectorset_index_config=knowledgebox_pb2.VectorIndexConfig(
101
98
  vector_dimension=maindb_vector_dimension,
102
99
  similarity=maindb_similarity,
103
- vector_type=nodewriter_pb2.VectorType.DENSE_F32, # we only support this for now
100
+ vector_type=knowledgebox_pb2.VectorType.DENSE_F32, # we only support this for now
104
101
  normalize_vectors=maindb_normalize_vectors,
105
102
  ),
106
103
  matryoshka_dimensions=maindb_matryoshka_dimensions,
@@ -20,15 +20,17 @@
20
20
  from abc import ABCMeta, abstractmethod
21
21
  from typing import AsyncIterator
22
22
 
23
- from nucliadb_protos import nodereader_pb2, noderesources_pb2, utils_pb2
24
- from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
25
- from nucliadb_protos.nodewriter_pb2 import (
23
+ from nidx_protos import nodereader_pb2, noderesources_pb2
24
+ from nidx_protos.nodereader_pb2_grpc import NodeReaderStub
25
+ from nidx_protos.nodewriter_pb2 import (
26
26
  NewShardRequest,
27
27
  NewVectorSetRequest,
28
28
  OpStatus,
29
29
  VectorIndexConfig,
30
30
  )
31
- from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
31
+ from nidx_protos.nodewriter_pb2_grpc import NodeWriterStub
32
+
33
+ from nucliadb_protos import utils_pb2
32
34
 
33
35
 
34
36
  class AbstractIndexNode(metaclass=ABCMeta):
@@ -19,19 +19,20 @@
19
19
  #
20
20
  from typing import Any
21
21
 
22
- from nucliadb_protos.nodereader_pb2 import (
22
+ from nidx_protos.nodereader_pb2 import (
23
23
  EdgeList,
24
24
  RelationEdge,
25
25
  )
26
- from nucliadb_protos.noderesources_pb2 import (
26
+ from nidx_protos.noderesources_pb2 import (
27
27
  EmptyResponse,
28
28
  ShardCreated,
29
29
  ShardId,
30
30
  ShardIds,
31
31
  VectorSetList,
32
32
  )
33
- from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
34
- from nucliadb_protos.nodewriter_pb2 import OpStatus
33
+ from nidx_protos.noderesources_pb2 import Shard as NodeResourcesShard
34
+ from nidx_protos.nodewriter_pb2 import OpStatus
35
+
35
36
  from nucliadb_protos.utils_pb2 import Relation
36
37
 
37
38
 
@@ -22,6 +22,9 @@ import logging
22
22
  import uuid
23
23
  from typing import Any, Awaitable, Callable, Optional
24
24
 
25
+ from nidx_protos import noderesources_pb2, nodewriter_pb2
26
+ from nidx_protos.nodewriter_pb2 import IndexMessage, IndexMessageSource, NewShardRequest, TypeMessage
27
+
25
28
  from nucliadb.common import datamanagers
26
29
  from nucliadb.common.cluster.base import AbstractIndexNode
27
30
  from nucliadb.common.cluster.exceptions import (
@@ -31,13 +34,8 @@ from nucliadb.common.cluster.exceptions import (
31
34
  )
32
35
  from nucliadb.common.maindb.driver import Transaction
33
36
  from nucliadb.common.nidx import get_nidx, get_nidx_api_client, get_nidx_fake_node
34
- from nucliadb_protos import (
35
- knowledgebox_pb2,
36
- noderesources_pb2,
37
- nodewriter_pb2,
38
- writer_pb2,
39
- )
40
- from nucliadb_protos.nodewriter_pb2 import IndexMessage, IndexMessageSource, NewShardRequest, TypeMessage
37
+ from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
38
+ from nucliadb_protos import knowledgebox_pb2, writer_pb2
41
39
  from nucliadb_telemetry import errors
42
40
  from nucliadb_utils.utilities import get_storage
43
41
 
@@ -123,7 +121,7 @@ class KBShardManager:
123
121
  raise ShardsNotFound(msg)
124
122
 
125
123
  vectorsets = {
126
- vectorset_id: vectorset_config.vectorset_index_config
124
+ vectorset_id: nucliadb_index_config_to_nidx(vectorset_config.vectorset_index_config)
127
125
  async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
128
126
  }
129
127
 
@@ -256,7 +254,7 @@ class KBShardManager:
256
254
 
257
255
  async def _create_vectorset(node: AbstractIndexNode, shard_id: str):
258
256
  vectorset_id = config.vectorset_id
259
- index_config = config.vectorset_index_config
257
+ index_config = nucliadb_index_config_to_nidx(config.vectorset_index_config)
260
258
  result = await node.add_vectorset(shard_id, vectorset_id, index_config)
261
259
  if result.status != result.Status.OK:
262
260
  raise NodeError(
@@ -20,11 +20,12 @@
20
20
  import asyncio
21
21
  import logging
22
22
 
23
+ from nidx_protos import nodereader_pb2, noderesources_pb2
24
+
23
25
  from nucliadb.common import datamanagers, locking
24
26
  from nucliadb.common.cluster.manager import choose_node
25
27
  from nucliadb.common.cluster.utils import get_shard_manager
26
28
  from nucliadb.common.context import ApplicationContext
27
- from nucliadb_protos import nodereader_pb2, noderesources_pb2
28
29
  from nucliadb_telemetry import errors
29
30
  from nucliadb_telemetry.logs import setup_logging
30
31
  from nucliadb_telemetry.utils import setup_telemetry
@@ -31,6 +31,7 @@ from nucliadb.common.external_index_providers.manager import (
31
31
  get_external_index_manager,
32
32
  )
33
33
  from nucliadb.common.nidx import get_nidx_fake_node
34
+ from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
34
35
  from nucliadb.migrator.settings import settings
35
36
  from nucliadb_protos import writer_pb2
36
37
  from nucliadb_telemetry import errors
@@ -137,9 +138,8 @@ async def create_rollover_shards(
137
138
  created_shards = []
138
139
  try:
139
140
  for shard in kb_shards.shards:
140
- shard.ClearField("replicas")
141
141
  vectorsets = {
142
- vectorset_id: vectorset_config.vectorset_index_config
142
+ vectorset_id: nucliadb_index_config_to_nidx(vectorset_config.vectorset_index_config)
143
143
  async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
144
144
  }
145
145
 
@@ -21,6 +21,7 @@ import logging
21
21
  from typing import TYPE_CHECKING, Optional, Union
22
22
 
23
23
  import backoff
24
+ from nidx_protos import nodereader_pb2
24
25
 
25
26
  from nucliadb.common import datamanagers
26
27
  from nucliadb.common.cluster.manager import (
@@ -30,7 +31,7 @@ from nucliadb.common.cluster.manager import (
30
31
  from nucliadb.common.cluster.settings import settings
31
32
  from nucliadb.ingest.orm import index_message
32
33
  from nucliadb.ingest.orm.resource import Resource
33
- from nucliadb_protos import nodereader_pb2, writer_pb2
34
+ from nucliadb_protos import writer_pb2
34
35
  from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
35
36
 
36
37
  if TYPE_CHECKING: # pragma: no cover
@@ -79,9 +79,8 @@ async def is_rollover_shard(txn: Transaction, *, kbid: str, shard_id: str) -> bo
79
79
  return False
80
80
 
81
81
  for shard_obj in shards.shards:
82
- for replica_obj in shard_obj.replicas:
83
- if shard_id == replica_obj.shard.id:
84
- return True
82
+ if shard_id == shard_obj.nidx_shard_id:
83
+ return True
85
84
  return False
86
85
 
87
86
 
@@ -22,6 +22,8 @@ import logging
22
22
  from dataclasses import dataclass
23
23
  from typing import Any, Iterator, Optional
24
24
 
25
+ from nidx_protos.nodereader_pb2 import SearchRequest
26
+ from nidx_protos.noderesources_pb2 import Resource
25
27
  from pydantic import BaseModel
26
28
 
27
29
  from nucliadb.common.counters import IndexCounts
@@ -33,8 +35,6 @@ from nucliadb_protos.knowledgebox_pb2 import (
33
35
  CreateExternalIndexProviderMetadata,
34
36
  StoredExternalIndexProviderMetadata,
35
37
  )
36
- from nucliadb_protos.nodereader_pb2 import SearchRequest
37
- from nucliadb_protos.noderesources_pb2 import Resource
38
38
  from nucliadb_protos.utils_pb2 import VectorSimilarity
39
39
  from nucliadb_telemetry.metrics import Observer
40
40
 
@@ -25,6 +25,8 @@ from uuid import uuid4
25
25
 
26
26
  import backoff
27
27
  from cachetools import TTLCache
28
+ from nidx_protos.nodereader_pb2 import FilterExpression, SearchRequest
29
+ from nidx_protos.noderesources_pb2 import IndexParagraph, Resource, VectorSentence
28
30
  from pydantic import BaseModel
29
31
 
30
32
  from nucliadb.common.counters import IndexCounts
@@ -40,8 +42,6 @@ from nucliadb.common.ids import ParagraphId, VectorId
40
42
  from nucliadb_models.search import SCORE_TYPE, TextPosition
41
43
  from nucliadb_protos import knowledgebox_pb2 as kb_pb2
42
44
  from nucliadb_protos import utils_pb2
43
- from nucliadb_protos.nodereader_pb2 import FilterExpression, SearchRequest
44
- from nucliadb_protos.noderesources_pb2 import IndexParagraph, Resource, VectorSentence
45
45
  from nucliadb_telemetry.metrics import Observer
46
46
  from nucliadb_utils.aiopynecone.client import DataPlane, FilterOperator, LogicalOperator
47
47
  from nucliadb_utils.aiopynecone.exceptions import (
nucliadb/common/nidx.py CHANGED
@@ -22,14 +22,14 @@ import os
22
22
  from typing import Optional
23
23
 
24
24
  from nidx_protos.nidx_pb2_grpc import NidxApiStub, NidxIndexerStub, NidxSearcherStub
25
+ from nidx_protos.nodewriter_pb2 import (
26
+ IndexMessage,
27
+ )
25
28
 
26
29
  from nucliadb.common.cluster.base import AbstractIndexNode
27
30
  from nucliadb.common.cluster.settings import settings
28
31
  from nucliadb.ingest.settings import DriverConfig
29
32
  from nucliadb.ingest.settings import settings as ingest_settings
30
- from nucliadb_protos.nodewriter_pb2 import (
31
- IndexMessage,
32
- )
33
33
  from nucliadb_utils import logger
34
34
  from nucliadb_utils.grpc import get_traced_grpc_channel
35
35
  from nucliadb_utils.nats import NatsConnectionManager
@@ -0,0 +1,39 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from nidx_protos import nodewriter_pb2 as Nidx
22
+
23
+ from nucliadb_protos import knowledgebox_pb2 as Nucliadb
24
+
25
+
26
+ def nucliadb_vector_type_to_nidx(nucliadb: Nucliadb.VectorType.ValueType) -> Nidx.VectorType.ValueType:
27
+ if nucliadb == Nucliadb.DENSE_F32:
28
+ return Nidx.DENSE_F32
29
+ else: # pragma: nocover
30
+ raise Exception("Unknown vector type")
31
+
32
+
33
+ def nucliadb_index_config_to_nidx(nucliadb: Nucliadb.VectorIndexConfig) -> Nidx.VectorIndexConfig:
34
+ return Nidx.VectorIndexConfig(
35
+ normalize_vectors=nucliadb.normalize_vectors,
36
+ similarity=nucliadb.similarity,
37
+ vector_dimension=nucliadb.vector_dimension,
38
+ vector_type=nucliadb_vector_type_to_nidx(nucliadb.vector_type),
39
+ )
@@ -23,12 +23,14 @@ import logging
23
23
  import uuid
24
24
  from functools import partial
25
25
 
26
+ from nidx_protos import nodereader_pb2, noderesources_pb2
27
+
26
28
  from nucliadb.common import datamanagers
27
29
  from nucliadb.common.cluster.exceptions import ShardsNotFound
28
30
  from nucliadb.common.cluster.manager import choose_node
29
31
  from nucliadb.common.cluster.utils import get_shard_manager
30
32
  from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
31
- from nucliadb_protos import audit_pb2, nodereader_pb2, noderesources_pb2, writer_pb2
33
+ from nucliadb_protos import audit_pb2, writer_pb2
32
34
  from nucliadb_utils import const
33
35
  from nucliadb_utils.audit.audit import AuditStorage
34
36
  from nucliadb_utils.cache.pubsub import PubSubDriver
@@ -22,11 +22,13 @@ import logging
22
22
  import uuid
23
23
  from functools import partial
24
24
 
25
+ from nidx_protos import nodereader_pb2, noderesources_pb2
26
+
25
27
  from nucliadb.common import locking
26
28
  from nucliadb.common.cluster.manager import choose_node
27
29
  from nucliadb.common.cluster.utils import get_shard_manager
28
30
  from nucliadb.common.maindb.driver import Driver
29
- from nucliadb_protos import nodereader_pb2, noderesources_pb2, writer_pb2
31
+ from nucliadb_protos import writer_pb2
30
32
  from nucliadb_utils import const
31
33
  from nucliadb_utils.cache.pubsub import PubSubDriver
32
34
  from nucliadb_utils.storages.storage import Storage
@@ -22,21 +22,22 @@ from copy import deepcopy
22
22
  from dataclasses import dataclass
23
23
  from typing import Optional
24
24
 
25
+ from nidx_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
26
+ from nidx_protos.noderesources_pb2 import (
27
+ IndexRelation,
28
+ ParagraphMetadata,
29
+ Representation,
30
+ ResourceID,
31
+ )
32
+ from nidx_protos.noderesources_pb2 import Position as TextPosition
33
+ from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
34
+
25
35
  from nucliadb.common import ids
26
36
  from nucliadb.ingest import logger
27
37
  from nucliadb.ingest.orm.utils import compute_paragraph_key
28
38
  from nucliadb_models.labels import BASE_LABELS, LABEL_HIDDEN, flatten_resource_labels
29
39
  from nucliadb_models.metadata import ResourceProcessingStatus
30
40
  from nucliadb_protos import utils_pb2
31
- from nucliadb_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
32
- from nucliadb_protos.noderesources_pb2 import (
33
- IndexRelation,
34
- ParagraphMetadata,
35
- Representation,
36
- ResourceID,
37
- )
38
- from nucliadb_protos.noderesources_pb2 import Position as TextPosition
39
- from nucliadb_protos.noderesources_pb2 import Resource as PBBrainResource
40
41
  from nucliadb_protos.resources_pb2 import (
41
42
  Basic,
42
43
  ExtractedText,
@@ -22,6 +22,16 @@ from copy import deepcopy
22
22
  from dataclasses import dataclass
23
23
  from typing import Optional
24
24
 
25
+ from nidx_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
26
+ from nidx_protos.noderesources_pb2 import (
27
+ IndexRelation,
28
+ ParagraphMetadata,
29
+ Representation,
30
+ ResourceID,
31
+ )
32
+ from nidx_protos.noderesources_pb2 import Position as TextPosition
33
+ from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
34
+
25
35
  from nucliadb.common import ids
26
36
  from nucliadb.ingest import logger
27
37
  from nucliadb.ingest.orm.metrics import brain_observer as observer
@@ -29,15 +39,6 @@ from nucliadb.ingest.orm.utils import compute_paragraph_key
29
39
  from nucliadb_models.labels import BASE_LABELS, LABEL_HIDDEN, flatten_resource_labels
30
40
  from nucliadb_models.metadata import ResourceProcessingStatus
31
41
  from nucliadb_protos import utils_pb2
32
- from nucliadb_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
33
- from nucliadb_protos.noderesources_pb2 import (
34
- IndexRelation,
35
- ParagraphMetadata,
36
- Representation,
37
- ResourceID,
38
- )
39
- from nucliadb_protos.noderesources_pb2 import Position as TextPosition
40
- from nucliadb_protos.noderesources_pb2 import Resource as PBBrainResource
41
42
  from nucliadb_protos.resources_pb2 import (
42
43
  Basic,
43
44
  ExtractedText,
@@ -21,6 +21,14 @@
21
21
  import asyncio
22
22
  from typing import AsyncGenerator, Optional
23
23
 
24
+ from nidx_protos.nodereader_pb2 import (
25
+ Faceted,
26
+ GraphSearchRequest,
27
+ GraphSearchResponse,
28
+ SearchRequest,
29
+ SearchResponse,
30
+ )
31
+
24
32
  from nucliadb.common import datamanagers
25
33
  from nucliadb.common.cluster.base import AbstractIndexNode
26
34
  from nucliadb.common.cluster.exceptions import (
@@ -43,13 +51,6 @@ from nucliadb_protos.knowledgebox_pb2 import (
43
51
  EntitiesGroupSummary,
44
52
  Entity,
45
53
  )
46
- from nucliadb_protos.nodereader_pb2 import (
47
- Faceted,
48
- GraphSearchRequest,
49
- GraphSearchResponse,
50
- SearchRequest,
51
- SearchResponse,
52
- )
53
54
  from nucliadb_protos.utils_pb2 import RelationNode
54
55
  from nucliadb_protos.writer_pb2 import GetEntitiesResponse
55
56
 
@@ -22,6 +22,8 @@
22
22
  import asyncio
23
23
  from typing import Optional
24
24
 
25
+ from nidx_protos.noderesources_pb2 import Resource as IndexMessage
26
+
25
27
  from nucliadb.common import datamanagers
26
28
  from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
27
29
  from nucliadb.ingest.fields.file import File
@@ -29,7 +31,6 @@ from nucliadb.ingest.orm.brain_v2 import ResourceBrainV2 as ResourceBrain
29
31
  from nucliadb.ingest.orm.metrics import index_message_observer as observer
30
32
  from nucliadb.ingest.orm.resource import Resource, get_file_page_positions
31
33
  from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
32
- from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
33
34
  from nucliadb_protos.resources_pb2 import Basic, FieldID, FieldType
34
35
  from nucliadb_protos.writer_pb2 import BrokerMessage
35
36
  from nucliadb_utils import const
@@ -24,6 +24,7 @@ from uuid import uuid4
24
24
 
25
25
  from grpc import StatusCode
26
26
  from grpc.aio import AioRpcError
27
+ from nidx_protos import noderesources_pb2
27
28
 
28
29
  from nucliadb.common import datamanagers
29
30
  from nucliadb.common.cluster.exceptions import ShardNotFound
@@ -49,7 +50,7 @@ from nucliadb.ingest.orm.metrics import processor_observer
49
50
  from nucliadb.ingest.orm.resource import Resource
50
51
  from nucliadb.ingest.orm.utils import choose_matryoshka_dimension, compute_paragraph_key
51
52
  from nucliadb.migrator.utils import get_latest_version
52
- from nucliadb_protos import knowledgebox_pb2, noderesources_pb2, nodewriter_pb2, writer_pb2
53
+ from nucliadb_protos import knowledgebox_pb2, writer_pb2
53
54
  from nucliadb_protos.knowledgebox_pb2 import (
54
55
  CreateExternalIndexProviderMetadata,
55
56
  ExternalIndexProviderType,
@@ -165,10 +166,10 @@ class KnowledgeBox:
165
166
 
166
167
  vectorset_config = knowledgebox_pb2.VectorSetConfig(
167
168
  vectorset_id=vectorset_id,
168
- vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
169
+ vectorset_index_config=knowledgebox_pb2.VectorIndexConfig(
169
170
  similarity=semantic_model.similarity_function,
170
171
  # XXX: hardcoded value
171
- vector_type=nodewriter_pb2.VectorType.DENSE_F32,
172
+ vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
172
173
  normalize_vectors=len(semantic_model.matryoshka_dimensions) > 0,
173
174
  vector_dimension=dimension,
174
175
  ),
@@ -20,10 +20,11 @@
20
20
 
21
21
  from typing import cast
22
22
 
23
+ from nidx_protos.noderesources_pb2 import Resource as IndexMessage
24
+
23
25
  from nucliadb.common.maindb.driver import Transaction
24
26
  from nucliadb.common.maindb.pg import PGDriver, PGTransaction
25
27
  from nucliadb.common.maindb.utils import get_driver
26
- from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
27
28
  from nucliadb_telemetry import metrics
28
29
 
29
30
  from ..resource import Resource
@@ -24,6 +24,8 @@ from typing import Optional
24
24
  import aiohttp.client_exceptions
25
25
  import nats.errors
26
26
  import nats.js.errors
27
+ from nidx_protos import noderesources_pb2, nodewriter_pb2
28
+ from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
27
29
 
28
30
  from nucliadb.common import datamanagers, locking
29
31
  from nucliadb.common.cluster.settings import settings as cluster_settings
@@ -50,12 +52,9 @@ from nucliadb.ingest.orm.processor.data_augmentation import (
50
52
  from nucliadb.ingest.orm.resource import Resource
51
53
  from nucliadb_protos import (
52
54
  knowledgebox_pb2,
53
- noderesources_pb2,
54
- nodewriter_pb2,
55
55
  resources_pb2,
56
56
  writer_pb2,
57
57
  )
58
- from nucliadb_protos.noderesources_pb2 import Resource as PBBrainResource
59
58
  from nucliadb_telemetry import errors
60
59
  from nucliadb_utils import const
61
60
  from nucliadb_utils.cache.pubsub import PubSubDriver
@@ -22,6 +22,8 @@ from __future__ import annotations
22
22
  import asyncio
23
23
  from typing import AsyncGenerator, Callable, Tuple, cast
24
24
 
25
+ from nidx_protos.noderesources_pb2 import EmptyQuery, NodeMetadata
26
+
25
27
  from nucliadb import logger
26
28
  from nucliadb.common import datamanagers
27
29
  from nucliadb.common.context import ApplicationContext
@@ -29,7 +31,6 @@ from nucliadb.common.maindb.pg import PGDriver
29
31
  from nucliadb.common.maindb.utils import get_driver
30
32
  from nucliadb.common.nidx import get_nidx_api_client
31
33
  from nucliadb.migrator.datamanager import MigrationsDataManager
32
- from nucliadb_protos.noderesources_pb2 import EmptyQuery, NodeMetadata
33
34
  from nucliadb_telemetry import metrics
34
35
  from nucliadb_telemetry.logs import setup_logging
35
36
  from nucliadb_telemetry.utils import setup_telemetry
@@ -24,6 +24,7 @@ from fastapi import HTTPException, Request
24
24
  from fastapi_versioning import version
25
25
  from grpc import StatusCode as GrpcStatusCode
26
26
  from grpc.aio import AioRpcError
27
+ from nidx_protos.noderesources_pb2 import Shard
27
28
 
28
29
  from nucliadb.common import datamanagers
29
30
  from nucliadb.common.cluster.exceptions import ShardsNotFound
@@ -44,7 +45,6 @@ from nucliadb_models.search import (
44
45
  KnowledgeboxCounters,
45
46
  SearchParamDefaults,
46
47
  )
47
- from nucliadb_protos.noderesources_pb2 import Shard
48
48
  from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject
49
49
  from nucliadb_protos.writer_pb2 import Shards
50
50
  from nucliadb_telemetry import errors
@@ -26,6 +26,14 @@ from fastapi import HTTPException
26
26
  from google.protobuf.json_format import MessageToDict
27
27
  from grpc import StatusCode as GrpcStatusCode
28
28
  from grpc.aio import AioRpcError
29
+ from nidx_protos.nodereader_pb2 import (
30
+ GraphSearchRequest,
31
+ GraphSearchResponse,
32
+ SearchRequest,
33
+ SearchResponse,
34
+ SuggestRequest,
35
+ SuggestResponse,
36
+ )
29
37
 
30
38
  from nucliadb.common.cluster import manager as cluster_manager
31
39
  from nucliadb.common.cluster.base import AbstractIndexNode
@@ -38,14 +46,6 @@ from nucliadb.search.search.shards import (
38
46
  suggest_shard,
39
47
  )
40
48
  from nucliadb.search.settings import settings
41
- from nucliadb_protos.nodereader_pb2 import (
42
- GraphSearchRequest,
43
- GraphSearchResponse,
44
- SearchRequest,
45
- SearchResponse,
46
- SuggestRequest,
47
- SuggestResponse,
48
- )
49
49
  from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject
50
50
  from nucliadb_telemetry import errors
51
51
 
@@ -20,6 +20,11 @@
20
20
  import asyncio
21
21
  from typing import Iterable, Optional, Union
22
22
 
23
+ from nidx_protos.nodereader_pb2 import (
24
+ GraphSearchResponse,
25
+ SearchResponse,
26
+ )
27
+
23
28
  from nucliadb.common.models_utils import to_proto
24
29
  from nucliadb.search import logger
25
30
  from nucliadb.search.predict import AnswerStatusCode
@@ -52,10 +57,6 @@ from nucliadb_models.search import (
52
57
  parse_rephrase_prompt,
53
58
  )
54
59
  from nucliadb_protos import audit_pb2
55
- from nucliadb_protos.nodereader_pb2 import (
56
- GraphSearchResponse,
57
- SearchResponse,
58
- )
59
60
  from nucliadb_protos.utils_pb2 import RelationNode
60
61
  from nucliadb_telemetry.errors import capture_exception
61
62
  from nucliadb_utils.utilities import get_audit
@@ -21,6 +21,8 @@ import asyncio
21
21
  from contextvars import ContextVar
22
22
  from typing import Optional
23
23
 
24
+ from nidx_protos.nodereader_pb2 import DocumentResult, ParagraphResult
25
+
24
26
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
25
27
  from nucliadb.common.maindb.utils import get_driver
26
28
  from nucliadb.ingest.orm.resource import Resource as ResourceORM
@@ -30,7 +32,6 @@ from nucliadb.search.search import cache
30
32
  from nucliadb_models.common import FieldTypeName
31
33
  from nucliadb_models.resource import ExtractedDataTypeName, Resource
32
34
  from nucliadb_models.search import ResourceProperties
33
- from nucliadb_protos.nodereader_pb2 import DocumentResult, ParagraphResult
34
35
  from nucliadb_protos.resources_pb2 import Paragraph
35
36
  from nucliadb_utils import const
36
37
  from nucliadb_utils.utilities import has_feature