nucliadb 6.7.2.post4862__py3-none-any.whl → 6.9.2.post5282__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- migrations/0016_upgrade_to_paragraphs_v2.py +1 -1
- migrations/0017_multiple_writable_shards.py +1 -1
- migrations/0018_purge_orphan_kbslugs.py +1 -1
- migrations/0019_upgrade_to_paragraphs_v3.py +1 -1
- migrations/0021_overwrite_vectorsets_key.py +1 -1
- migrations/0023_backfill_pg_catalog.py +7 -3
- migrations/0025_assign_models_to_kbs_v2.py +3 -3
- migrations/0027_rollover_texts3.py +1 -1
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +1 -1
- migrations/0032_remove_old_relations.py +1 -1
- migrations/0036_backfill_catalog_slug.py +1 -1
- migrations/0037_backfill_catalog_facets.py +1 -1
- migrations/0038_backfill_catalog_field_labels.py +7 -3
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/backups/create.py +3 -3
- nucliadb/backups/restore.py +3 -3
- nucliadb/common/cache.py +1 -1
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +294 -208
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +3 -19
- nucliadb/common/cluster/rebalance.py +484 -110
- nucliadb/common/cluster/rollover.py +29 -0
- nucliadb/common/cluster/settings.py +1 -1
- nucliadb/common/cluster/utils.py +26 -0
- nucliadb/common/datamanagers/atomic.py +6 -0
- nucliadb/common/datamanagers/utils.py +2 -2
- nucliadb/common/external_index_providers/manager.py +1 -29
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +16 -33
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +4 -0
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +77 -55
- nucliadb/common/locking.py +4 -4
- nucliadb/common/maindb/driver.py +11 -1
- nucliadb/common/maindb/local.py +1 -1
- nucliadb/common/maindb/pg.py +1 -1
- nucliadb/common/nidx.py +19 -1
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +3 -3
- nucliadb/ingest/consumer/pull.py +7 -0
- nucliadb/ingest/consumer/service.py +2 -27
- nucliadb/ingest/consumer/shard_creator.py +17 -6
- nucliadb/ingest/fields/base.py +9 -17
- nucliadb/ingest/fields/conversation.py +47 -1
- nucliadb/ingest/orm/brain_v2.py +21 -3
- nucliadb/ingest/orm/index_message.py +126 -111
- nucliadb/ingest/orm/knowledgebox.py +84 -43
- nucliadb/ingest/orm/processor/auditing.py +1 -1
- nucliadb/ingest/orm/processor/processor.py +95 -149
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +10 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/serialize.py +2 -2
- nucliadb/ingest/service/writer.py +26 -19
- nucliadb/ingest/settings.py +33 -11
- nucliadb/learning_proxy.py +12 -15
- nucliadb/metrics_exporter.py +17 -4
- nucliadb/migrator/datamanager.py +11 -17
- nucliadb/migrator/migrator.py +2 -2
- nucliadb/purge/__init__.py +12 -17
- nucliadb/purge/orphan_shards.py +2 -2
- nucliadb/reader/api/v1/knowledgebox.py +40 -12
- nucliadb/reader/api/v1/learning_config.py +30 -10
- nucliadb/reader/api/v1/resource.py +2 -2
- nucliadb/reader/api/v1/services.py +1 -1
- nucliadb/reader/reader/notifications.py +1 -1
- nucliadb/search/api/v1/__init__.py +1 -0
- nucliadb/search/api/v1/catalog.py +4 -4
- nucliadb/search/api/v1/find.py +1 -4
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/resource/ask.py +21 -1
- nucliadb/search/api/v1/search.py +1 -4
- nucliadb/search/predict.py +9 -2
- nucliadb/search/search/cache.py +1 -20
- nucliadb/search/search/chat/ask.py +50 -8
- nucliadb/search/search/chat/prompt.py +47 -15
- nucliadb/search/search/chat/query.py +8 -1
- nucliadb/search/search/fetch.py +1 -1
- nucliadb/search/search/find.py +1 -6
- nucliadb/search/search/{hydrator.py → hydrator/__init__.py} +5 -4
- nucliadb/search/search/hydrator/fields.py +175 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +307 -0
- nucliadb/search/search/hydrator/resources.py +56 -0
- nucliadb/search/search/metrics.py +16 -0
- nucliadb/search/search/predict_proxy.py +33 -11
- nucliadb/search/search/query.py +0 -23
- nucliadb/search/search/query_parser/fetcher.py +5 -5
- nucliadb/search/search/query_parser/models.py +1 -30
- nucliadb/search/search/query_parser/parsers/ask.py +1 -1
- nucliadb/search/search/query_parser/parsers/catalog.py +4 -7
- nucliadb/search/search/query_parser/parsers/common.py +16 -7
- nucliadb/search/search/query_parser/parsers/find.py +0 -11
- nucliadb/search/search/query_parser/parsers/graph.py +5 -5
- nucliadb/search/search/query_parser/parsers/search.py +0 -11
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -11
- nucliadb/search/search/rerankers.py +1 -1
- nucliadb/search/search/summarize.py +1 -1
- nucliadb/standalone/run.py +3 -0
- nucliadb/tasks/retries.py +4 -4
- nucliadb/train/generators/sentence_classifier.py +2 -8
- nucliadb/train/generators/utils.py +1 -1
- nucliadb/train/nodes.py +4 -4
- nucliadb/train/servicer.py +1 -1
- nucliadb/train/uploader.py +1 -1
- nucliadb/writer/api/v1/field.py +14 -9
- nucliadb/writer/api/v1/knowledgebox.py +15 -52
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +2 -2
- nucliadb/writer/resource/field.py +38 -2
- nucliadb/writer/tus/azure.py +4 -4
- nucliadb/writer/tus/gcs.py +11 -17
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/METADATA +9 -10
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/RECORD +124 -114
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
+
#
|
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
+
#
|
|
6
|
+
# AGPL:
|
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
+
# License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
# GNU Affero General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
+
#
|
|
20
|
+
from nidx_protos.noderesources_pb2 import Resource as IndexMessage
|
|
21
|
+
|
|
22
|
+
from nucliadb.common.catalog.interface import CatalogResourceData
|
|
23
|
+
from nucliadb.ingest.orm.resource import Resource
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def build_catalog_resource_data(resource: Resource, index_message: IndexMessage) -> CatalogResourceData:
|
|
27
|
+
if resource.basic is None:
|
|
28
|
+
raise ValueError("Cannot index into the catalog a resource without basic metadata ")
|
|
29
|
+
|
|
30
|
+
created_at = resource.basic.created.ToDatetime()
|
|
31
|
+
modified_at = resource.basic.modified.ToDatetime()
|
|
32
|
+
if modified_at < created_at:
|
|
33
|
+
modified_at = created_at
|
|
34
|
+
|
|
35
|
+
# Do not index canceled labels
|
|
36
|
+
cancelled_labels = {
|
|
37
|
+
f"/l/{clf.labelset}/{clf.label}"
|
|
38
|
+
for clf in resource.basic.usermetadata.classifications
|
|
39
|
+
if clf.cancelled_by_user
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Labels from the resource and classification labels from each field
|
|
43
|
+
labels = [label for label in index_message.labels]
|
|
44
|
+
for classification in resource.basic.computedmetadata.field_classifications:
|
|
45
|
+
for clf in classification.classifications:
|
|
46
|
+
label = f"/l/{clf.labelset}/{clf.label}"
|
|
47
|
+
if label not in cancelled_labels:
|
|
48
|
+
labels.append(label)
|
|
49
|
+
|
|
50
|
+
return CatalogResourceData(
|
|
51
|
+
title=resource.basic.title,
|
|
52
|
+
created_at=created_at,
|
|
53
|
+
modified_at=modified_at,
|
|
54
|
+
labels=labels,
|
|
55
|
+
slug=resource.basic.slug,
|
|
56
|
+
)
|
|
@@ -43,8 +43,6 @@ from nucliadb_protos import knowledgebox_pb2, writer_pb2
|
|
|
43
43
|
from nucliadb_telemetry import errors
|
|
44
44
|
from nucliadb_utils.utilities import get_storage
|
|
45
45
|
|
|
46
|
-
from .settings import settings
|
|
47
|
-
|
|
48
46
|
logger = logging.getLogger(__name__)
|
|
49
47
|
|
|
50
48
|
|
|
@@ -113,6 +111,8 @@ class KBShardManager:
|
|
|
113
111
|
self,
|
|
114
112
|
txn: Transaction,
|
|
115
113
|
kbid: str,
|
|
114
|
+
*,
|
|
115
|
+
prewarm_enabled: bool,
|
|
116
116
|
) -> writer_pb2.ShardObject:
|
|
117
117
|
kb_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
|
|
118
118
|
if kb_shards is None:
|
|
@@ -133,6 +133,7 @@ class KBShardManager:
|
|
|
133
133
|
req = NewShardRequest(
|
|
134
134
|
kbid=kbid,
|
|
135
135
|
vectorsets_configs=vectorsets,
|
|
136
|
+
prewarm_enabled=prewarm_enabled,
|
|
136
137
|
)
|
|
137
138
|
|
|
138
139
|
resp = await nidx_api.NewShard(req) # type: ignore
|
|
@@ -232,23 +233,6 @@ class KBShardManager:
|
|
|
232
233
|
indexpb.shard = shard.nidx_shard_id
|
|
233
234
|
await nidx.index(indexpb)
|
|
234
235
|
|
|
235
|
-
def should_create_new_shard(self, num_paragraphs: int) -> bool:
|
|
236
|
-
return num_paragraphs > settings.max_shard_paragraphs
|
|
237
|
-
|
|
238
|
-
async def maybe_create_new_shard(
|
|
239
|
-
self,
|
|
240
|
-
kbid: str,
|
|
241
|
-
num_paragraphs: int,
|
|
242
|
-
):
|
|
243
|
-
if not self.should_create_new_shard(num_paragraphs):
|
|
244
|
-
return
|
|
245
|
-
|
|
246
|
-
logger.info({"message": "Adding shard", "kbid": kbid})
|
|
247
|
-
|
|
248
|
-
async with datamanagers.with_transaction() as txn:
|
|
249
|
-
await self.create_shard_by_kbid(txn, kbid)
|
|
250
|
-
await txn.commit()
|
|
251
|
-
|
|
252
236
|
async def create_vectorset(self, kbid: str, config: knowledgebox_pb2.VectorSetConfig):
|
|
253
237
|
"""Create a new vectorset in all KB shards."""
|
|
254
238
|
|