nucliadb 6.7.2.post4862__py3-none-any.whl → 6.9.2.post5282__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- migrations/0016_upgrade_to_paragraphs_v2.py +1 -1
- migrations/0017_multiple_writable_shards.py +1 -1
- migrations/0018_purge_orphan_kbslugs.py +1 -1
- migrations/0019_upgrade_to_paragraphs_v3.py +1 -1
- migrations/0021_overwrite_vectorsets_key.py +1 -1
- migrations/0023_backfill_pg_catalog.py +7 -3
- migrations/0025_assign_models_to_kbs_v2.py +3 -3
- migrations/0027_rollover_texts3.py +1 -1
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +1 -1
- migrations/0032_remove_old_relations.py +1 -1
- migrations/0036_backfill_catalog_slug.py +1 -1
- migrations/0037_backfill_catalog_facets.py +1 -1
- migrations/0038_backfill_catalog_field_labels.py +7 -3
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/backups/create.py +3 -3
- nucliadb/backups/restore.py +3 -3
- nucliadb/common/cache.py +1 -1
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +294 -208
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +3 -19
- nucliadb/common/cluster/rebalance.py +484 -110
- nucliadb/common/cluster/rollover.py +29 -0
- nucliadb/common/cluster/settings.py +1 -1
- nucliadb/common/cluster/utils.py +26 -0
- nucliadb/common/datamanagers/atomic.py +6 -0
- nucliadb/common/datamanagers/utils.py +2 -2
- nucliadb/common/external_index_providers/manager.py +1 -29
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +16 -33
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +4 -0
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +77 -55
- nucliadb/common/locking.py +4 -4
- nucliadb/common/maindb/driver.py +11 -1
- nucliadb/common/maindb/local.py +1 -1
- nucliadb/common/maindb/pg.py +1 -1
- nucliadb/common/nidx.py +19 -1
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +3 -3
- nucliadb/ingest/consumer/pull.py +7 -0
- nucliadb/ingest/consumer/service.py +2 -27
- nucliadb/ingest/consumer/shard_creator.py +17 -6
- nucliadb/ingest/fields/base.py +9 -17
- nucliadb/ingest/fields/conversation.py +47 -1
- nucliadb/ingest/orm/brain_v2.py +21 -3
- nucliadb/ingest/orm/index_message.py +126 -111
- nucliadb/ingest/orm/knowledgebox.py +84 -43
- nucliadb/ingest/orm/processor/auditing.py +1 -1
- nucliadb/ingest/orm/processor/processor.py +95 -149
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +10 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/serialize.py +2 -2
- nucliadb/ingest/service/writer.py +26 -19
- nucliadb/ingest/settings.py +33 -11
- nucliadb/learning_proxy.py +12 -15
- nucliadb/metrics_exporter.py +17 -4
- nucliadb/migrator/datamanager.py +11 -17
- nucliadb/migrator/migrator.py +2 -2
- nucliadb/purge/__init__.py +12 -17
- nucliadb/purge/orphan_shards.py +2 -2
- nucliadb/reader/api/v1/knowledgebox.py +40 -12
- nucliadb/reader/api/v1/learning_config.py +30 -10
- nucliadb/reader/api/v1/resource.py +2 -2
- nucliadb/reader/api/v1/services.py +1 -1
- nucliadb/reader/reader/notifications.py +1 -1
- nucliadb/search/api/v1/__init__.py +1 -0
- nucliadb/search/api/v1/catalog.py +4 -4
- nucliadb/search/api/v1/find.py +1 -4
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/resource/ask.py +21 -1
- nucliadb/search/api/v1/search.py +1 -4
- nucliadb/search/predict.py +9 -2
- nucliadb/search/search/cache.py +1 -20
- nucliadb/search/search/chat/ask.py +50 -8
- nucliadb/search/search/chat/prompt.py +47 -15
- nucliadb/search/search/chat/query.py +8 -1
- nucliadb/search/search/fetch.py +1 -1
- nucliadb/search/search/find.py +1 -6
- nucliadb/search/search/{hydrator.py → hydrator/__init__.py} +5 -4
- nucliadb/search/search/hydrator/fields.py +175 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +307 -0
- nucliadb/search/search/hydrator/resources.py +56 -0
- nucliadb/search/search/metrics.py +16 -0
- nucliadb/search/search/predict_proxy.py +33 -11
- nucliadb/search/search/query.py +0 -23
- nucliadb/search/search/query_parser/fetcher.py +5 -5
- nucliadb/search/search/query_parser/models.py +1 -30
- nucliadb/search/search/query_parser/parsers/ask.py +1 -1
- nucliadb/search/search/query_parser/parsers/catalog.py +4 -7
- nucliadb/search/search/query_parser/parsers/common.py +16 -7
- nucliadb/search/search/query_parser/parsers/find.py +0 -11
- nucliadb/search/search/query_parser/parsers/graph.py +5 -5
- nucliadb/search/search/query_parser/parsers/search.py +0 -11
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -11
- nucliadb/search/search/rerankers.py +1 -1
- nucliadb/search/search/summarize.py +1 -1
- nucliadb/standalone/run.py +3 -0
- nucliadb/tasks/retries.py +4 -4
- nucliadb/train/generators/sentence_classifier.py +2 -8
- nucliadb/train/generators/utils.py +1 -1
- nucliadb/train/nodes.py +4 -4
- nucliadb/train/servicer.py +1 -1
- nucliadb/train/uploader.py +1 -1
- nucliadb/writer/api/v1/field.py +14 -9
- nucliadb/writer/api/v1/knowledgebox.py +15 -52
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +2 -2
- nucliadb/writer/resource/field.py +38 -2
- nucliadb/writer/tus/azure.py +4 -4
- nucliadb/writer/tus/gcs.py +11 -17
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/METADATA +9 -10
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/RECORD +124 -114
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/top_level.txt +0 -0
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
-
#
|
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
-
#
|
|
6
|
-
# AGPL:
|
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
-
# License, or (at your option) any later version.
|
|
11
|
-
#
|
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
-
# GNU Affero General Public License for more details.
|
|
16
|
-
#
|
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
from typing import cast
|
|
22
|
-
|
|
23
|
-
from nidx_protos.noderesources_pb2 import Resource as IndexMessage
|
|
24
|
-
|
|
25
|
-
from nucliadb.common.maindb.driver import Transaction
|
|
26
|
-
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
|
27
|
-
from nucliadb.common.maindb.utils import get_driver
|
|
28
|
-
from nucliadb_telemetry import metrics
|
|
29
|
-
|
|
30
|
-
from ..resource import Resource
|
|
31
|
-
|
|
32
|
-
observer = metrics.Observer("pg_catalog_write", labels={"type": ""})
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def _pg_transaction(txn: Transaction) -> PGTransaction:
|
|
36
|
-
return cast(PGTransaction, txn)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def pgcatalog_enabled(kbid):
|
|
40
|
-
return isinstance(get_driver(), PGDriver)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def extract_facets(labels):
|
|
44
|
-
facets = set()
|
|
45
|
-
for label in labels:
|
|
46
|
-
parts = label.split("/")
|
|
47
|
-
facet = ""
|
|
48
|
-
for part in parts[1:]:
|
|
49
|
-
facet += f"/{part}"
|
|
50
|
-
facets.add(facet)
|
|
51
|
-
return facets
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@observer.wrap({"type": "update"})
|
|
55
|
-
async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, index_message: IndexMessage):
|
|
56
|
-
if not pgcatalog_enabled(kbid):
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
if resource.basic is None:
|
|
60
|
-
raise ValueError("Cannot index into the catalog a resource without basic metadata ")
|
|
61
|
-
|
|
62
|
-
created_at = resource.basic.created.ToDatetime()
|
|
63
|
-
modified_at = resource.basic.modified.ToDatetime()
|
|
64
|
-
if modified_at < created_at:
|
|
65
|
-
modified_at = created_at
|
|
66
|
-
|
|
67
|
-
async with _pg_transaction(txn).connection.cursor() as cur:
|
|
68
|
-
# Do not index canceled labels
|
|
69
|
-
cancelled_labels = {
|
|
70
|
-
f"/l/{clf.labelset}/{clf.label}"
|
|
71
|
-
for clf in resource.basic.usermetadata.classifications
|
|
72
|
-
if clf.cancelled_by_user
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
# Labels from the resource and classification labels from each field
|
|
76
|
-
labels = [label for label in index_message.labels]
|
|
77
|
-
for classification in resource.basic.computedmetadata.field_classifications:
|
|
78
|
-
for clf in classification.classifications:
|
|
79
|
-
label = f"/l/{clf.labelset}/{clf.label}"
|
|
80
|
-
if label not in cancelled_labels:
|
|
81
|
-
labels.append(label)
|
|
82
|
-
|
|
83
|
-
await cur.execute(
|
|
84
|
-
"""
|
|
85
|
-
INSERT INTO catalog
|
|
86
|
-
(kbid, rid, title, created_at, modified_at, labels, slug)
|
|
87
|
-
VALUES
|
|
88
|
-
(%(kbid)s, %(rid)s, %(title)s, %(created_at)s, %(modified_at)s, %(labels)s, %(slug)s)
|
|
89
|
-
ON CONFLICT (kbid, rid) DO UPDATE SET
|
|
90
|
-
title = excluded.title,
|
|
91
|
-
created_at = excluded.created_at,
|
|
92
|
-
modified_at = excluded.modified_at,
|
|
93
|
-
labels = excluded.labels,
|
|
94
|
-
slug = excluded.slug""",
|
|
95
|
-
{
|
|
96
|
-
"kbid": resource.kb.kbid,
|
|
97
|
-
"rid": resource.uuid,
|
|
98
|
-
"title": resource.basic.title,
|
|
99
|
-
"created_at": created_at,
|
|
100
|
-
"modified_at": modified_at,
|
|
101
|
-
"labels": labels,
|
|
102
|
-
"slug": resource.basic.slug,
|
|
103
|
-
},
|
|
104
|
-
)
|
|
105
|
-
await cur.execute(
|
|
106
|
-
"DELETE FROM catalog_facets WHERE kbid = %(kbid)s AND rid = %(rid)s",
|
|
107
|
-
{
|
|
108
|
-
"kbid": resource.kb.kbid,
|
|
109
|
-
"rid": resource.uuid,
|
|
110
|
-
},
|
|
111
|
-
)
|
|
112
|
-
await cur.execute(
|
|
113
|
-
"INSERT INTO catalog_facets (kbid, rid, facet) SELECT %(kbid)s AS kbid, %(rid)s AS rid, unnest(%(facets)s::text[]) AS facet",
|
|
114
|
-
{
|
|
115
|
-
"kbid": resource.kb.kbid,
|
|
116
|
-
"rid": resource.uuid,
|
|
117
|
-
"facets": list(extract_facets(labels)),
|
|
118
|
-
},
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
@observer.wrap({"type": "delete"})
|
|
123
|
-
async def pgcatalog_delete(txn: Transaction, kbid: str, rid: str):
|
|
124
|
-
if not pgcatalog_enabled(kbid):
|
|
125
|
-
return
|
|
126
|
-
async with _pg_transaction(txn).connection.cursor() as cur:
|
|
127
|
-
await cur.execute(
|
|
128
|
-
"DELETE FROM catalog where kbid = %(kbid)s AND rid = %(rid)s", {"kbid": kbid, "rid": rid}
|
|
129
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|