nucliadb 6.3.7.post4116__py3-none-any.whl → 6.3.7.post4120__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0017_multiple_writable_shards.py +16 -13
- migrations/0025_assign_models_to_kbs_v2.py +3 -6
- nucliadb/common/cluster/base.py +6 -4
- nucliadb/common/cluster/grpc_node_dummy.py +5 -4
- nucliadb/common/cluster/manager.py +7 -9
- nucliadb/common/cluster/rebalance.py +2 -1
- nucliadb/common/cluster/rollover.py +2 -2
- nucliadb/common/cluster/utils.py +2 -1
- nucliadb/common/datamanagers/rollover.py +2 -3
- nucliadb/common/external_index_providers/base.py +2 -2
- nucliadb/common/external_index_providers/pinecone.py +2 -2
- nucliadb/common/nidx.py +3 -3
- nucliadb/common/vector_index_config.py +39 -0
- nucliadb/ingest/consumer/auditing.py +3 -1
- nucliadb/ingest/consumer/shard_creator.py +3 -1
- nucliadb/ingest/fields/text.py +3 -1
- nucliadb/ingest/orm/brain.py +12 -11
- nucliadb/ingest/orm/brain_v2.py +10 -9
- nucliadb/ingest/orm/entities.py +8 -7
- nucliadb/ingest/orm/index_message.py +2 -1
- nucliadb/ingest/orm/knowledgebox.py +4 -3
- nucliadb/ingest/orm/processor/pgcatalog.py +2 -1
- nucliadb/ingest/orm/processor/processor.py +2 -3
- nucliadb/ingest/orm/resource.py +5 -2
- nucliadb/metrics_exporter.py +2 -1
- nucliadb/search/api/v1/knowledgebox.py +1 -1
- nucliadb/search/requesters/utils.py +8 -8
- nucliadb/search/search/chat/query.py +5 -4
- nucliadb/search/search/fetch.py +2 -1
- nucliadb/search/search/find_merge.py +9 -8
- nucliadb/search/search/graph_merge.py +2 -1
- nucliadb/search/search/graph_strategy.py +1 -1
- nucliadb/search/search/merge.py +12 -11
- nucliadb/search/search/query.py +4 -2
- nucliadb/search/search/query_parser/filter_expression.py +2 -1
- nucliadb/search/search/query_parser/models.py +2 -1
- nucliadb/search/search/query_parser/old_filters.py +2 -1
- nucliadb/search/search/query_parser/parsers/find.py +2 -1
- nucliadb/search/search/query_parser/parsers/graph.py +3 -1
- nucliadb/search/search/query_parser/parsers/search.py +3 -1
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -2
- nucliadb/search/search/shards.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -1
- nucliadb/train/generators/field_streaming.py +2 -1
- nucliadb/train/generators/paragraph_classifier.py +1 -1
- nucliadb/train/generators/paragraph_streaming.py +2 -1
- nucliadb/train/generators/question_answer_streaming.py +2 -1
- nucliadb/train/generators/sentence_classifier.py +1 -1
- nucliadb/train/generators/token_classifier.py +2 -1
- nucliadb/writer/back_pressure.py +0 -24
- {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4120.dist-info}/METADATA +6 -6
- {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4120.dist-info}/RECORD +55 -54
- {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4120.dist-info}/WHEEL +0 -0
- {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4120.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.7.post4116.dist-info → nucliadb-6.3.7.post4120.dist-info}/top_level.txt +0 -0
@@ -31,7 +31,6 @@ future multiple writable shards will be possible.
|
|
31
31
|
|
32
32
|
import logging
|
33
33
|
|
34
|
-
from nucliadb.common import datamanagers
|
35
34
|
from nucliadb.migrator.context import ExecutionContext
|
36
35
|
|
37
36
|
logger = logging.getLogger(__name__)
|
@@ -41,18 +40,22 @@ async def migrate(context: ExecutionContext) -> None: ...
|
|
41
40
|
|
42
41
|
|
43
42
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
44
|
-
|
45
|
-
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
|
46
|
-
if shards is None:
|
47
|
-
logger.error("KB without shards", extra={"kbid": kbid})
|
48
|
-
return
|
43
|
+
pass
|
49
44
|
|
50
|
-
|
51
|
-
shard_object.read_only = True
|
52
|
-
shards.shards[shards.actual].read_only = False
|
45
|
+
# No longer relevant with nidx
|
53
46
|
|
54
|
-
|
55
|
-
|
47
|
+
# async with context.kv_driver.transaction() as txn:
|
48
|
+
# shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
|
49
|
+
# if shards is None:
|
50
|
+
# logger.error("KB without shards", extra={"kbid": kbid})
|
51
|
+
# return
|
56
52
|
|
57
|
-
|
58
|
-
|
53
|
+
# for shard_object in shards.shards:
|
54
|
+
# shard_object.read_only = True
|
55
|
+
# shards.shards[shards.actual].read_only = False
|
56
|
+
|
57
|
+
# # just ensure we're writing it correctly
|
58
|
+
# assert [shard_object.read_only for shard_object in shards.shards].count(False) == 1
|
59
|
+
|
60
|
+
# await datamanagers.cluster.update_kb_shards(txn, kbid=kbid, shards=shards)
|
61
|
+
# await txn.commit()
|
@@ -38,10 +38,7 @@ import logging
|
|
38
38
|
from nucliadb import learning_proxy
|
39
39
|
from nucliadb.common import datamanagers
|
40
40
|
from nucliadb.migrator.context import ExecutionContext
|
41
|
-
from nucliadb_protos import
|
42
|
-
knowledgebox_pb2,
|
43
|
-
nodewriter_pb2,
|
44
|
-
)
|
41
|
+
from nucliadb_protos import knowledgebox_pb2
|
45
42
|
|
46
43
|
logger = logging.getLogger(__name__)
|
47
44
|
|
@@ -97,10 +94,10 @@ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
|
97
94
|
|
98
95
|
default_vectorset = knowledgebox_pb2.VectorSetConfig(
|
99
96
|
vectorset_id=vectorset_id,
|
100
|
-
vectorset_index_config=
|
97
|
+
vectorset_index_config=knowledgebox_pb2.VectorIndexConfig(
|
101
98
|
vector_dimension=maindb_vector_dimension,
|
102
99
|
similarity=maindb_similarity,
|
103
|
-
vector_type=
|
100
|
+
vector_type=knowledgebox_pb2.VectorType.DENSE_F32, # we only support this for now
|
104
101
|
normalize_vectors=maindb_normalize_vectors,
|
105
102
|
),
|
106
103
|
matryoshka_dimensions=maindb_matryoshka_dimensions,
|
nucliadb/common/cluster/base.py
CHANGED
@@ -20,15 +20,17 @@
|
|
20
20
|
from abc import ABCMeta, abstractmethod
|
21
21
|
from typing import AsyncIterator
|
22
22
|
|
23
|
-
from
|
24
|
-
from
|
25
|
-
from
|
23
|
+
from nidx_protos import nodereader_pb2, noderesources_pb2
|
24
|
+
from nidx_protos.nodereader_pb2_grpc import NodeReaderStub
|
25
|
+
from nidx_protos.nodewriter_pb2 import (
|
26
26
|
NewShardRequest,
|
27
27
|
NewVectorSetRequest,
|
28
28
|
OpStatus,
|
29
29
|
VectorIndexConfig,
|
30
30
|
)
|
31
|
-
from
|
31
|
+
from nidx_protos.nodewriter_pb2_grpc import NodeWriterStub
|
32
|
+
|
33
|
+
from nucliadb_protos import utils_pb2
|
32
34
|
|
33
35
|
|
34
36
|
class AbstractIndexNode(metaclass=ABCMeta):
|
@@ -19,19 +19,20 @@
|
|
19
19
|
#
|
20
20
|
from typing import Any
|
21
21
|
|
22
|
-
from
|
22
|
+
from nidx_protos.nodereader_pb2 import (
|
23
23
|
EdgeList,
|
24
24
|
RelationEdge,
|
25
25
|
)
|
26
|
-
from
|
26
|
+
from nidx_protos.noderesources_pb2 import (
|
27
27
|
EmptyResponse,
|
28
28
|
ShardCreated,
|
29
29
|
ShardId,
|
30
30
|
ShardIds,
|
31
31
|
VectorSetList,
|
32
32
|
)
|
33
|
-
from
|
34
|
-
from
|
33
|
+
from nidx_protos.noderesources_pb2 import Shard as NodeResourcesShard
|
34
|
+
from nidx_protos.nodewriter_pb2 import OpStatus
|
35
|
+
|
35
36
|
from nucliadb_protos.utils_pb2 import Relation
|
36
37
|
|
37
38
|
|
@@ -22,6 +22,9 @@ import logging
|
|
22
22
|
import uuid
|
23
23
|
from typing import Any, Awaitable, Callable, Optional
|
24
24
|
|
25
|
+
from nidx_protos import noderesources_pb2, nodewriter_pb2
|
26
|
+
from nidx_protos.nodewriter_pb2 import IndexMessage, IndexMessageSource, NewShardRequest, TypeMessage
|
27
|
+
|
25
28
|
from nucliadb.common import datamanagers
|
26
29
|
from nucliadb.common.cluster.base import AbstractIndexNode
|
27
30
|
from nucliadb.common.cluster.exceptions import (
|
@@ -31,13 +34,8 @@ from nucliadb.common.cluster.exceptions import (
|
|
31
34
|
)
|
32
35
|
from nucliadb.common.maindb.driver import Transaction
|
33
36
|
from nucliadb.common.nidx import get_nidx, get_nidx_api_client, get_nidx_fake_node
|
34
|
-
from
|
35
|
-
|
36
|
-
noderesources_pb2,
|
37
|
-
nodewriter_pb2,
|
38
|
-
writer_pb2,
|
39
|
-
)
|
40
|
-
from nucliadb_protos.nodewriter_pb2 import IndexMessage, IndexMessageSource, NewShardRequest, TypeMessage
|
37
|
+
from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
|
38
|
+
from nucliadb_protos import knowledgebox_pb2, writer_pb2
|
41
39
|
from nucliadb_telemetry import errors
|
42
40
|
from nucliadb_utils.utilities import get_storage
|
43
41
|
|
@@ -123,7 +121,7 @@ class KBShardManager:
|
|
123
121
|
raise ShardsNotFound(msg)
|
124
122
|
|
125
123
|
vectorsets = {
|
126
|
-
vectorset_id: vectorset_config.vectorset_index_config
|
124
|
+
vectorset_id: nucliadb_index_config_to_nidx(vectorset_config.vectorset_index_config)
|
127
125
|
async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
|
128
126
|
}
|
129
127
|
|
@@ -256,7 +254,7 @@ class KBShardManager:
|
|
256
254
|
|
257
255
|
async def _create_vectorset(node: AbstractIndexNode, shard_id: str):
|
258
256
|
vectorset_id = config.vectorset_id
|
259
|
-
index_config = config.vectorset_index_config
|
257
|
+
index_config = nucliadb_index_config_to_nidx(config.vectorset_index_config)
|
260
258
|
result = await node.add_vectorset(shard_id, vectorset_id, index_config)
|
261
259
|
if result.status != result.Status.OK:
|
262
260
|
raise NodeError(
|
@@ -20,11 +20,12 @@
|
|
20
20
|
import asyncio
|
21
21
|
import logging
|
22
22
|
|
23
|
+
from nidx_protos import nodereader_pb2, noderesources_pb2
|
24
|
+
|
23
25
|
from nucliadb.common import datamanagers, locking
|
24
26
|
from nucliadb.common.cluster.manager import choose_node
|
25
27
|
from nucliadb.common.cluster.utils import get_shard_manager
|
26
28
|
from nucliadb.common.context import ApplicationContext
|
27
|
-
from nucliadb_protos import nodereader_pb2, noderesources_pb2
|
28
29
|
from nucliadb_telemetry import errors
|
29
30
|
from nucliadb_telemetry.logs import setup_logging
|
30
31
|
from nucliadb_telemetry.utils import setup_telemetry
|
@@ -31,6 +31,7 @@ from nucliadb.common.external_index_providers.manager import (
|
|
31
31
|
get_external_index_manager,
|
32
32
|
)
|
33
33
|
from nucliadb.common.nidx import get_nidx_fake_node
|
34
|
+
from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
|
34
35
|
from nucliadb.migrator.settings import settings
|
35
36
|
from nucliadb_protos import writer_pb2
|
36
37
|
from nucliadb_telemetry import errors
|
@@ -137,9 +138,8 @@ async def create_rollover_shards(
|
|
137
138
|
created_shards = []
|
138
139
|
try:
|
139
140
|
for shard in kb_shards.shards:
|
140
|
-
shard.ClearField("replicas")
|
141
141
|
vectorsets = {
|
142
|
-
vectorset_id: vectorset_config.vectorset_index_config
|
142
|
+
vectorset_id: nucliadb_index_config_to_nidx(vectorset_config.vectorset_index_config)
|
143
143
|
async for vectorset_id, vectorset_config in datamanagers.vectorsets.iter(txn, kbid=kbid)
|
144
144
|
}
|
145
145
|
|
nucliadb/common/cluster/utils.py
CHANGED
@@ -21,6 +21,7 @@ import logging
|
|
21
21
|
from typing import TYPE_CHECKING, Optional, Union
|
22
22
|
|
23
23
|
import backoff
|
24
|
+
from nidx_protos import nodereader_pb2
|
24
25
|
|
25
26
|
from nucliadb.common import datamanagers
|
26
27
|
from nucliadb.common.cluster.manager import (
|
@@ -30,7 +31,7 @@ from nucliadb.common.cluster.manager import (
|
|
30
31
|
from nucliadb.common.cluster.settings import settings
|
31
32
|
from nucliadb.ingest.orm import index_message
|
32
33
|
from nucliadb.ingest.orm.resource import Resource
|
33
|
-
from nucliadb_protos import
|
34
|
+
from nucliadb_protos import writer_pb2
|
34
35
|
from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
|
35
36
|
|
36
37
|
if TYPE_CHECKING: # pragma: no cover
|
@@ -79,9 +79,8 @@ async def is_rollover_shard(txn: Transaction, *, kbid: str, shard_id: str) -> bo
|
|
79
79
|
return False
|
80
80
|
|
81
81
|
for shard_obj in shards.shards:
|
82
|
-
|
83
|
-
|
84
|
-
return True
|
82
|
+
if shard_id == shard_obj.nidx_shard_id:
|
83
|
+
return True
|
85
84
|
return False
|
86
85
|
|
87
86
|
|
@@ -22,6 +22,8 @@ import logging
|
|
22
22
|
from dataclasses import dataclass
|
23
23
|
from typing import Any, Iterator, Optional
|
24
24
|
|
25
|
+
from nidx_protos.nodereader_pb2 import SearchRequest
|
26
|
+
from nidx_protos.noderesources_pb2 import Resource
|
25
27
|
from pydantic import BaseModel
|
26
28
|
|
27
29
|
from nucliadb.common.counters import IndexCounts
|
@@ -33,8 +35,6 @@ from nucliadb_protos.knowledgebox_pb2 import (
|
|
33
35
|
CreateExternalIndexProviderMetadata,
|
34
36
|
StoredExternalIndexProviderMetadata,
|
35
37
|
)
|
36
|
-
from nucliadb_protos.nodereader_pb2 import SearchRequest
|
37
|
-
from nucliadb_protos.noderesources_pb2 import Resource
|
38
38
|
from nucliadb_protos.utils_pb2 import VectorSimilarity
|
39
39
|
from nucliadb_telemetry.metrics import Observer
|
40
40
|
|
@@ -25,6 +25,8 @@ from uuid import uuid4
|
|
25
25
|
|
26
26
|
import backoff
|
27
27
|
from cachetools import TTLCache
|
28
|
+
from nidx_protos.nodereader_pb2 import FilterExpression, SearchRequest
|
29
|
+
from nidx_protos.noderesources_pb2 import IndexParagraph, Resource, VectorSentence
|
28
30
|
from pydantic import BaseModel
|
29
31
|
|
30
32
|
from nucliadb.common.counters import IndexCounts
|
@@ -40,8 +42,6 @@ from nucliadb.common.ids import ParagraphId, VectorId
|
|
40
42
|
from nucliadb_models.search import SCORE_TYPE, TextPosition
|
41
43
|
from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
42
44
|
from nucliadb_protos import utils_pb2
|
43
|
-
from nucliadb_protos.nodereader_pb2 import FilterExpression, SearchRequest
|
44
|
-
from nucliadb_protos.noderesources_pb2 import IndexParagraph, Resource, VectorSentence
|
45
45
|
from nucliadb_telemetry.metrics import Observer
|
46
46
|
from nucliadb_utils.aiopynecone.client import DataPlane, FilterOperator, LogicalOperator
|
47
47
|
from nucliadb_utils.aiopynecone.exceptions import (
|
nucliadb/common/nidx.py
CHANGED
@@ -22,14 +22,14 @@ import os
|
|
22
22
|
from typing import Optional
|
23
23
|
|
24
24
|
from nidx_protos.nidx_pb2_grpc import NidxApiStub, NidxIndexerStub, NidxSearcherStub
|
25
|
+
from nidx_protos.nodewriter_pb2 import (
|
26
|
+
IndexMessage,
|
27
|
+
)
|
25
28
|
|
26
29
|
from nucliadb.common.cluster.base import AbstractIndexNode
|
27
30
|
from nucliadb.common.cluster.settings import settings
|
28
31
|
from nucliadb.ingest.settings import DriverConfig
|
29
32
|
from nucliadb.ingest.settings import settings as ingest_settings
|
30
|
-
from nucliadb_protos.nodewriter_pb2 import (
|
31
|
-
IndexMessage,
|
32
|
-
)
|
33
33
|
from nucliadb_utils import logger
|
34
34
|
from nucliadb_utils.grpc import get_traced_grpc_channel
|
35
35
|
from nucliadb_utils.nats import NatsConnectionManager
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from nidx_protos import nodewriter_pb2 as Nidx
|
22
|
+
|
23
|
+
from nucliadb_protos import knowledgebox_pb2 as Nucliadb
|
24
|
+
|
25
|
+
|
26
|
+
def nucliadb_vector_type_to_nidx(nucliadb: Nucliadb.VectorType.ValueType) -> Nidx.VectorType.ValueType:
|
27
|
+
if nucliadb == Nucliadb.DENSE_F32:
|
28
|
+
return Nidx.DENSE_F32
|
29
|
+
else: # pragma: nocover
|
30
|
+
raise Exception("Unknown vector type")
|
31
|
+
|
32
|
+
|
33
|
+
def nucliadb_index_config_to_nidx(nucliadb: Nucliadb.VectorIndexConfig) -> Nidx.VectorIndexConfig:
|
34
|
+
return Nidx.VectorIndexConfig(
|
35
|
+
normalize_vectors=nucliadb.normalize_vectors,
|
36
|
+
similarity=nucliadb.similarity,
|
37
|
+
vector_dimension=nucliadb.vector_dimension,
|
38
|
+
vector_type=nucliadb_vector_type_to_nidx(nucliadb.vector_type),
|
39
|
+
)
|
@@ -23,12 +23,14 @@ import logging
|
|
23
23
|
import uuid
|
24
24
|
from functools import partial
|
25
25
|
|
26
|
+
from nidx_protos import nodereader_pb2, noderesources_pb2
|
27
|
+
|
26
28
|
from nucliadb.common import datamanagers
|
27
29
|
from nucliadb.common.cluster.exceptions import ShardsNotFound
|
28
30
|
from nucliadb.common.cluster.manager import choose_node
|
29
31
|
from nucliadb.common.cluster.utils import get_shard_manager
|
30
32
|
from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
|
31
|
-
from nucliadb_protos import audit_pb2,
|
33
|
+
from nucliadb_protos import audit_pb2, writer_pb2
|
32
34
|
from nucliadb_utils import const
|
33
35
|
from nucliadb_utils.audit.audit import AuditStorage
|
34
36
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
@@ -22,11 +22,13 @@ import logging
|
|
22
22
|
import uuid
|
23
23
|
from functools import partial
|
24
24
|
|
25
|
+
from nidx_protos import nodereader_pb2, noderesources_pb2
|
26
|
+
|
25
27
|
from nucliadb.common import locking
|
26
28
|
from nucliadb.common.cluster.manager import choose_node
|
27
29
|
from nucliadb.common.cluster.utils import get_shard_manager
|
28
30
|
from nucliadb.common.maindb.driver import Driver
|
29
|
-
from nucliadb_protos import
|
31
|
+
from nucliadb_protos import writer_pb2
|
30
32
|
from nucliadb_utils import const
|
31
33
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
32
34
|
from nucliadb_utils.storages.storage import Storage
|
nucliadb/ingest/fields/text.py
CHANGED
@@ -22,6 +22,7 @@ import hashlib
|
|
22
22
|
from typing import Optional
|
23
23
|
|
24
24
|
from nucliadb.ingest.fields.base import Field
|
25
|
+
from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
|
25
26
|
from nucliadb_protos.resources_pb2 import FieldAuthor, FieldText
|
26
27
|
|
27
28
|
|
@@ -32,7 +33,8 @@ class Text(Field[FieldText]):
|
|
32
33
|
|
33
34
|
async def generated_by(self) -> FieldAuthor:
|
34
35
|
value = await self.get_value()
|
35
|
-
|
36
|
+
if value is None:
|
37
|
+
raise FieldAuthorNotFound("Field has no value, can't know who generated it")
|
36
38
|
return value.generated_by
|
37
39
|
|
38
40
|
async def set_value(self, payload: FieldText):
|
nucliadb/ingest/orm/brain.py
CHANGED
@@ -22,21 +22,22 @@ from copy import deepcopy
|
|
22
22
|
from dataclasses import dataclass
|
23
23
|
from typing import Optional
|
24
24
|
|
25
|
+
from nidx_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
|
26
|
+
from nidx_protos.noderesources_pb2 import (
|
27
|
+
IndexRelation,
|
28
|
+
ParagraphMetadata,
|
29
|
+
Representation,
|
30
|
+
ResourceID,
|
31
|
+
)
|
32
|
+
from nidx_protos.noderesources_pb2 import Position as TextPosition
|
33
|
+
from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
|
34
|
+
|
25
35
|
from nucliadb.common import ids
|
26
36
|
from nucliadb.ingest import logger
|
27
37
|
from nucliadb.ingest.orm.utils import compute_paragraph_key
|
28
38
|
from nucliadb_models.labels import BASE_LABELS, LABEL_HIDDEN, flatten_resource_labels
|
29
39
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
30
40
|
from nucliadb_protos import utils_pb2
|
31
|
-
from nucliadb_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
|
32
|
-
from nucliadb_protos.noderesources_pb2 import (
|
33
|
-
IndexRelation,
|
34
|
-
ParagraphMetadata,
|
35
|
-
Representation,
|
36
|
-
ResourceID,
|
37
|
-
)
|
38
|
-
from nucliadb_protos.noderesources_pb2 import Position as TextPosition
|
39
|
-
from nucliadb_protos.noderesources_pb2 import Resource as PBBrainResource
|
40
41
|
from nucliadb_protos.resources_pb2 import (
|
41
42
|
Basic,
|
42
43
|
ExtractedText,
|
@@ -575,7 +576,7 @@ class ResourceBrain:
|
|
575
576
|
field_key: str,
|
576
577
|
metadata: Optional[FieldComputedMetadata],
|
577
578
|
uuid: str,
|
578
|
-
generated_by: FieldAuthor,
|
579
|
+
generated_by: Optional[FieldAuthor],
|
579
580
|
basic_user_metadata: Optional[UserMetadata] = None,
|
580
581
|
basic_user_fieldmetadata: Optional[UserFieldMetadata] = None,
|
581
582
|
):
|
@@ -628,7 +629,7 @@ class ResourceBrain:
|
|
628
629
|
paragraph_annotation.key
|
629
630
|
].labels.append(label)
|
630
631
|
|
631
|
-
if generated_by.WhichOneof("author") == "data_augmentation":
|
632
|
+
if generated_by is not None and generated_by.WhichOneof("author") == "data_augmentation":
|
632
633
|
field_type, field_id = field_key.split("/")
|
633
634
|
da_task_id = ids.extract_data_augmentation_id(field_id)
|
634
635
|
if da_task_id is None: # pragma: nocover
|
nucliadb/ingest/orm/brain_v2.py
CHANGED
@@ -22,6 +22,16 @@ from copy import deepcopy
|
|
22
22
|
from dataclasses import dataclass
|
23
23
|
from typing import Optional
|
24
24
|
|
25
|
+
from nidx_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
|
26
|
+
from nidx_protos.noderesources_pb2 import (
|
27
|
+
IndexRelation,
|
28
|
+
ParagraphMetadata,
|
29
|
+
Representation,
|
30
|
+
ResourceID,
|
31
|
+
)
|
32
|
+
from nidx_protos.noderesources_pb2 import Position as TextPosition
|
33
|
+
from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
|
34
|
+
|
25
35
|
from nucliadb.common import ids
|
26
36
|
from nucliadb.ingest import logger
|
27
37
|
from nucliadb.ingest.orm.metrics import brain_observer as observer
|
@@ -29,15 +39,6 @@ from nucliadb.ingest.orm.utils import compute_paragraph_key
|
|
29
39
|
from nucliadb_models.labels import BASE_LABELS, LABEL_HIDDEN, flatten_resource_labels
|
30
40
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
31
41
|
from nucliadb_protos import utils_pb2
|
32
|
-
from nucliadb_protos.noderesources_pb2 import IndexParagraph as BrainParagraph
|
33
|
-
from nucliadb_protos.noderesources_pb2 import (
|
34
|
-
IndexRelation,
|
35
|
-
ParagraphMetadata,
|
36
|
-
Representation,
|
37
|
-
ResourceID,
|
38
|
-
)
|
39
|
-
from nucliadb_protos.noderesources_pb2 import Position as TextPosition
|
40
|
-
from nucliadb_protos.noderesources_pb2 import Resource as PBBrainResource
|
41
42
|
from nucliadb_protos.resources_pb2 import (
|
42
43
|
Basic,
|
43
44
|
ExtractedText,
|
nucliadb/ingest/orm/entities.py
CHANGED
@@ -21,6 +21,14 @@
|
|
21
21
|
import asyncio
|
22
22
|
from typing import AsyncGenerator, Optional
|
23
23
|
|
24
|
+
from nidx_protos.nodereader_pb2 import (
|
25
|
+
Faceted,
|
26
|
+
GraphSearchRequest,
|
27
|
+
GraphSearchResponse,
|
28
|
+
SearchRequest,
|
29
|
+
SearchResponse,
|
30
|
+
)
|
31
|
+
|
24
32
|
from nucliadb.common import datamanagers
|
25
33
|
from nucliadb.common.cluster.base import AbstractIndexNode
|
26
34
|
from nucliadb.common.cluster.exceptions import (
|
@@ -43,13 +51,6 @@ from nucliadb_protos.knowledgebox_pb2 import (
|
|
43
51
|
EntitiesGroupSummary,
|
44
52
|
Entity,
|
45
53
|
)
|
46
|
-
from nucliadb_protos.nodereader_pb2 import (
|
47
|
-
Faceted,
|
48
|
-
GraphSearchRequest,
|
49
|
-
GraphSearchResponse,
|
50
|
-
SearchRequest,
|
51
|
-
SearchResponse,
|
52
|
-
)
|
53
54
|
from nucliadb_protos.utils_pb2 import RelationNode
|
54
55
|
from nucliadb_protos.writer_pb2 import GetEntitiesResponse
|
55
56
|
|
@@ -22,6 +22,8 @@
|
|
22
22
|
import asyncio
|
23
23
|
from typing import Optional
|
24
24
|
|
25
|
+
from nidx_protos.noderesources_pb2 import Resource as IndexMessage
|
26
|
+
|
25
27
|
from nucliadb.common import datamanagers
|
26
28
|
from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
|
27
29
|
from nucliadb.ingest.fields.file import File
|
@@ -29,7 +31,6 @@ from nucliadb.ingest.orm.brain_v2 import ResourceBrainV2 as ResourceBrain
|
|
29
31
|
from nucliadb.ingest.orm.metrics import index_message_observer as observer
|
30
32
|
from nucliadb.ingest.orm.resource import Resource, get_file_page_positions
|
31
33
|
from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
|
32
|
-
from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
|
33
34
|
from nucliadb_protos.resources_pb2 import Basic, FieldID, FieldType
|
34
35
|
from nucliadb_protos.writer_pb2 import BrokerMessage
|
35
36
|
from nucliadb_utils import const
|
@@ -24,6 +24,7 @@ from uuid import uuid4
|
|
24
24
|
|
25
25
|
from grpc import StatusCode
|
26
26
|
from grpc.aio import AioRpcError
|
27
|
+
from nidx_protos import noderesources_pb2
|
27
28
|
|
28
29
|
from nucliadb.common import datamanagers
|
29
30
|
from nucliadb.common.cluster.exceptions import ShardNotFound
|
@@ -49,7 +50,7 @@ from nucliadb.ingest.orm.metrics import processor_observer
|
|
49
50
|
from nucliadb.ingest.orm.resource import Resource
|
50
51
|
from nucliadb.ingest.orm.utils import choose_matryoshka_dimension, compute_paragraph_key
|
51
52
|
from nucliadb.migrator.utils import get_latest_version
|
52
|
-
from nucliadb_protos import knowledgebox_pb2,
|
53
|
+
from nucliadb_protos import knowledgebox_pb2, writer_pb2
|
53
54
|
from nucliadb_protos.knowledgebox_pb2 import (
|
54
55
|
CreateExternalIndexProviderMetadata,
|
55
56
|
ExternalIndexProviderType,
|
@@ -165,10 +166,10 @@ class KnowledgeBox:
|
|
165
166
|
|
166
167
|
vectorset_config = knowledgebox_pb2.VectorSetConfig(
|
167
168
|
vectorset_id=vectorset_id,
|
168
|
-
vectorset_index_config=
|
169
|
+
vectorset_index_config=knowledgebox_pb2.VectorIndexConfig(
|
169
170
|
similarity=semantic_model.similarity_function,
|
170
171
|
# XXX: hardcoded value
|
171
|
-
vector_type=
|
172
|
+
vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
|
172
173
|
normalize_vectors=len(semantic_model.matryoshka_dimensions) > 0,
|
173
174
|
vector_dimension=dimension,
|
174
175
|
),
|
@@ -20,10 +20,11 @@
|
|
20
20
|
|
21
21
|
from typing import cast
|
22
22
|
|
23
|
+
from nidx_protos.noderesources_pb2 import Resource as IndexMessage
|
24
|
+
|
23
25
|
from nucliadb.common.maindb.driver import Transaction
|
24
26
|
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
25
27
|
from nucliadb.common.maindb.utils import get_driver
|
26
|
-
from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
|
27
28
|
from nucliadb_telemetry import metrics
|
28
29
|
|
29
30
|
from ..resource import Resource
|
@@ -24,6 +24,8 @@ from typing import Optional
|
|
24
24
|
import aiohttp.client_exceptions
|
25
25
|
import nats.errors
|
26
26
|
import nats.js.errors
|
27
|
+
from nidx_protos import noderesources_pb2, nodewriter_pb2
|
28
|
+
from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
|
27
29
|
|
28
30
|
from nucliadb.common import datamanagers, locking
|
29
31
|
from nucliadb.common.cluster.settings import settings as cluster_settings
|
@@ -50,12 +52,9 @@ from nucliadb.ingest.orm.processor.data_augmentation import (
|
|
50
52
|
from nucliadb.ingest.orm.resource import Resource
|
51
53
|
from nucliadb_protos import (
|
52
54
|
knowledgebox_pb2,
|
53
|
-
noderesources_pb2,
|
54
|
-
nodewriter_pb2,
|
55
55
|
resources_pb2,
|
56
56
|
writer_pb2,
|
57
57
|
)
|
58
|
-
from nucliadb_protos.noderesources_pb2 import Resource as PBBrainResource
|
59
58
|
from nucliadb_telemetry import errors
|
60
59
|
from nucliadb_utils import const
|
61
60
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
nucliadb/ingest/orm/resource.py
CHANGED
@@ -32,6 +32,7 @@ from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR, FieldId
|
|
32
32
|
from nucliadb.common.maindb.driver import Transaction
|
33
33
|
from nucliadb.ingest.fields.base import Field
|
34
34
|
from nucliadb.ingest.fields.conversation import Conversation
|
35
|
+
from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
|
35
36
|
from nucliadb.ingest.fields.file import File
|
36
37
|
from nucliadb.ingest.fields.generic import VALID_GENERIC_FIELDS, Generic
|
37
38
|
from nucliadb.ingest.fields.link import Link
|
@@ -974,8 +975,10 @@ class Resource:
|
|
974
975
|
):
|
975
976
|
valid_user_field_metadata = user_field_metadata
|
976
977
|
break
|
977
|
-
|
978
|
-
|
978
|
+
try:
|
979
|
+
generated_by = await fieldobj.generated_by()
|
980
|
+
except FieldAuthorNotFound:
|
981
|
+
generated_by = None
|
979
982
|
brain.apply_field_labels(
|
980
983
|
fieldkey,
|
981
984
|
extracted_metadata,
|
nucliadb/metrics_exporter.py
CHANGED
@@ -22,6 +22,8 @@ from __future__ import annotations
|
|
22
22
|
import asyncio
|
23
23
|
from typing import AsyncGenerator, Callable, Tuple, cast
|
24
24
|
|
25
|
+
from nidx_protos.noderesources_pb2 import EmptyQuery, NodeMetadata
|
26
|
+
|
25
27
|
from nucliadb import logger
|
26
28
|
from nucliadb.common import datamanagers
|
27
29
|
from nucliadb.common.context import ApplicationContext
|
@@ -29,7 +31,6 @@ from nucliadb.common.maindb.pg import PGDriver
|
|
29
31
|
from nucliadb.common.maindb.utils import get_driver
|
30
32
|
from nucliadb.common.nidx import get_nidx_api_client
|
31
33
|
from nucliadb.migrator.datamanager import MigrationsDataManager
|
32
|
-
from nucliadb_protos.noderesources_pb2 import EmptyQuery, NodeMetadata
|
33
34
|
from nucliadb_telemetry import metrics
|
34
35
|
from nucliadb_telemetry.logs import setup_logging
|
35
36
|
from nucliadb_telemetry.utils import setup_telemetry
|
@@ -24,6 +24,7 @@ from fastapi import HTTPException, Request
|
|
24
24
|
from fastapi_versioning import version
|
25
25
|
from grpc import StatusCode as GrpcStatusCode
|
26
26
|
from grpc.aio import AioRpcError
|
27
|
+
from nidx_protos.noderesources_pb2 import Shard
|
27
28
|
|
28
29
|
from nucliadb.common import datamanagers
|
29
30
|
from nucliadb.common.cluster.exceptions import ShardsNotFound
|
@@ -44,7 +45,6 @@ from nucliadb_models.search import (
|
|
44
45
|
KnowledgeboxCounters,
|
45
46
|
SearchParamDefaults,
|
46
47
|
)
|
47
|
-
from nucliadb_protos.noderesources_pb2 import Shard
|
48
48
|
from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject
|
49
49
|
from nucliadb_protos.writer_pb2 import Shards
|
50
50
|
from nucliadb_telemetry import errors
|
@@ -26,6 +26,14 @@ from fastapi import HTTPException
|
|
26
26
|
from google.protobuf.json_format import MessageToDict
|
27
27
|
from grpc import StatusCode as GrpcStatusCode
|
28
28
|
from grpc.aio import AioRpcError
|
29
|
+
from nidx_protos.nodereader_pb2 import (
|
30
|
+
GraphSearchRequest,
|
31
|
+
GraphSearchResponse,
|
32
|
+
SearchRequest,
|
33
|
+
SearchResponse,
|
34
|
+
SuggestRequest,
|
35
|
+
SuggestResponse,
|
36
|
+
)
|
29
37
|
|
30
38
|
from nucliadb.common.cluster import manager as cluster_manager
|
31
39
|
from nucliadb.common.cluster.base import AbstractIndexNode
|
@@ -38,14 +46,6 @@ from nucliadb.search.search.shards import (
|
|
38
46
|
suggest_shard,
|
39
47
|
)
|
40
48
|
from nucliadb.search.settings import settings
|
41
|
-
from nucliadb_protos.nodereader_pb2 import (
|
42
|
-
GraphSearchRequest,
|
43
|
-
GraphSearchResponse,
|
44
|
-
SearchRequest,
|
45
|
-
SearchResponse,
|
46
|
-
SuggestRequest,
|
47
|
-
SuggestResponse,
|
48
|
-
)
|
49
49
|
from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject
|
50
50
|
from nucliadb_telemetry import errors
|
51
51
|
|