nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
+
#
|
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
+
#
|
|
6
|
+
# AGPL:
|
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
+
# License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
# GNU Affero General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
+
#
|
|
20
|
+
|
|
21
|
+
import asyncio
|
|
22
|
+
from typing import cast
|
|
23
|
+
|
|
24
|
+
from fastapi import Header, Request
|
|
25
|
+
from fastapi_versioning import version
|
|
26
|
+
|
|
27
|
+
from nucliadb.common.ids import FieldId, ParagraphId
|
|
28
|
+
from nucliadb.models.internal import augment as internal_augment
|
|
29
|
+
from nucliadb.models.internal.augment import (
|
|
30
|
+
Augment,
|
|
31
|
+
Augmented,
|
|
32
|
+
ConversationAnswerOrAfter,
|
|
33
|
+
ConversationAttachments,
|
|
34
|
+
ConversationAugment,
|
|
35
|
+
ConversationProp,
|
|
36
|
+
ConversationSelector,
|
|
37
|
+
ConversationText,
|
|
38
|
+
DeepResourceAugment,
|
|
39
|
+
FieldAugment,
|
|
40
|
+
FieldClassificationLabels,
|
|
41
|
+
FieldEntities,
|
|
42
|
+
FieldProp,
|
|
43
|
+
FieldText,
|
|
44
|
+
FileAugment,
|
|
45
|
+
FileProp,
|
|
46
|
+
FileThumbnail,
|
|
47
|
+
FullSelector,
|
|
48
|
+
MessageSelector,
|
|
49
|
+
Metadata,
|
|
50
|
+
Paragraph,
|
|
51
|
+
ParagraphAugment,
|
|
52
|
+
ParagraphImage,
|
|
53
|
+
ParagraphPage,
|
|
54
|
+
ParagraphPosition,
|
|
55
|
+
ParagraphProp,
|
|
56
|
+
ParagraphTable,
|
|
57
|
+
ParagraphText,
|
|
58
|
+
RelatedParagraphs,
|
|
59
|
+
ResourceAugment,
|
|
60
|
+
ResourceClassificationLabels,
|
|
61
|
+
ResourceProp,
|
|
62
|
+
ResourceSummary,
|
|
63
|
+
ResourceTitle,
|
|
64
|
+
WindowSelector,
|
|
65
|
+
)
|
|
66
|
+
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
|
67
|
+
from nucliadb.search.augmentor import augmentor
|
|
68
|
+
from nucliadb.search.search.cache import request_caches
|
|
69
|
+
from nucliadb_models.augment import (
|
|
70
|
+
AugmentedConversationField,
|
|
71
|
+
AugmentedConversationMessage,
|
|
72
|
+
AugmentedField,
|
|
73
|
+
AugmentedFileField,
|
|
74
|
+
AugmentedParagraph,
|
|
75
|
+
AugmentedResource,
|
|
76
|
+
AugmentParagraphs,
|
|
77
|
+
AugmentRequest,
|
|
78
|
+
AugmentResources,
|
|
79
|
+
AugmentResponse,
|
|
80
|
+
)
|
|
81
|
+
from nucliadb_models.common import FieldTypeName
|
|
82
|
+
from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
|
|
83
|
+
from nucliadb_models.search import NucliaDBClientType, ResourceProperties
|
|
84
|
+
from nucliadb_utils.authentication import requires
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@api.post(
|
|
88
|
+
f"/{KB_PREFIX}/{{kbid}}/augment",
|
|
89
|
+
status_code=200,
|
|
90
|
+
description="Augment data on a Knowledge Box",
|
|
91
|
+
include_in_schema=False,
|
|
92
|
+
tags=["Augment"],
|
|
93
|
+
)
|
|
94
|
+
@requires(NucliaDBRoles.READER)
|
|
95
|
+
@version(1)
|
|
96
|
+
async def _augment_endpoint(
|
|
97
|
+
request: Request,
|
|
98
|
+
kbid: str,
|
|
99
|
+
item: AugmentRequest,
|
|
100
|
+
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
|
101
|
+
x_nucliadb_user: str = Header(""),
|
|
102
|
+
x_forwarded_for: str = Header(""),
|
|
103
|
+
) -> AugmentResponse:
|
|
104
|
+
return await augment_endpoint(kbid, item)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
async def augment_endpoint(kbid: str, item: AugmentRequest) -> AugmentResponse:
|
|
108
|
+
augmentations = parse_first_augments(item)
|
|
109
|
+
|
|
110
|
+
if len(augmentations) == 0:
|
|
111
|
+
return AugmentResponse(resources={}, fields={}, paragraphs={})
|
|
112
|
+
|
|
113
|
+
with request_caches():
|
|
114
|
+
max_ops = asyncio.Semaphore(50)
|
|
115
|
+
|
|
116
|
+
first_augmented = await augmentor.augment(kbid, augmentations, concurrency_control=max_ops)
|
|
117
|
+
response = build_augment_response(item, first_augmented)
|
|
118
|
+
|
|
119
|
+
# 2nd round trip to augmentor
|
|
120
|
+
#
|
|
121
|
+
# There are some augmentations that require some augmented content to be
|
|
122
|
+
# able to keep augmenting, as neighbour paragraphs.
|
|
123
|
+
#
|
|
124
|
+
# However, as many data is already cached (when using cache), this
|
|
125
|
+
# second round should be orders of magnitude faster than the first round.
|
|
126
|
+
#
|
|
127
|
+
augmentations = parse_second_augments(item, first_augmented)
|
|
128
|
+
if len(augmentations) > 0:
|
|
129
|
+
second_augmented = await augmentor.augment(kbid, augmentations, concurrency_control=max_ops)
|
|
130
|
+
merge_second_augment(item, response, second_augmented)
|
|
131
|
+
|
|
132
|
+
return response
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def parse_first_augments(item: AugmentRequest) -> list[Augment]:
|
|
136
|
+
"""Parse an augment request and return a list of internal augments to
|
|
137
|
+
fulfill as much as the requested information as it can.
|
|
138
|
+
|
|
139
|
+
Notice there are augments that will require a 2nd round trip to the
|
|
140
|
+
augmentor, e.g., neighbouring paragraphs. This makes code a bit more
|
|
141
|
+
convoluted but avoids synchronization between augments, as many paragraphs
|
|
142
|
+
could lead to the same neighbours.
|
|
143
|
+
|
|
144
|
+
"""
|
|
145
|
+
augmentations: list[Augment] = []
|
|
146
|
+
|
|
147
|
+
if item.resources is not None:
|
|
148
|
+
for resource_augment in item.resources:
|
|
149
|
+
show, extracted, resource_select = parse_deep_resource_augment(resource_augment)
|
|
150
|
+
if resource_augment.field_type_filter is None:
|
|
151
|
+
field_type_filter = list(FieldTypeName)
|
|
152
|
+
else:
|
|
153
|
+
field_type_filter = resource_augment.field_type_filter
|
|
154
|
+
|
|
155
|
+
if show:
|
|
156
|
+
augmentations.append(
|
|
157
|
+
DeepResourceAugment(
|
|
158
|
+
given=resource_augment.given,
|
|
159
|
+
show=show,
|
|
160
|
+
extracted=extracted,
|
|
161
|
+
field_type_filter=field_type_filter,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
if resource_select:
|
|
165
|
+
augmentations.append(
|
|
166
|
+
ResourceAugment(
|
|
167
|
+
given=resource_augment.given, # type: ignore[arg-type]
|
|
168
|
+
select=resource_select,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if resource_augment.fields is not None:
|
|
173
|
+
# Augment resource fields with an optional field filter
|
|
174
|
+
field_select: list[FieldProp] = []
|
|
175
|
+
if resource_augment.fields.text:
|
|
176
|
+
field_select.append(FieldText())
|
|
177
|
+
if resource_augment.fields.classification_labels:
|
|
178
|
+
field_select.append(FieldClassificationLabels())
|
|
179
|
+
|
|
180
|
+
augmentations.append(
|
|
181
|
+
FieldAugment(
|
|
182
|
+
given=resource_augment.given, # type: ignore[arg-type]
|
|
183
|
+
select=field_select, # type: ignore[arg-type]
|
|
184
|
+
filter=resource_augment.fields.filters,
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if item.fields is not None:
|
|
189
|
+
for field_augment in item.fields:
|
|
190
|
+
given = [FieldId.from_string(id) for id in field_augment.given]
|
|
191
|
+
select: list[FieldProp] = []
|
|
192
|
+
if field_augment.text:
|
|
193
|
+
select.append(FieldText())
|
|
194
|
+
if field_augment.entities:
|
|
195
|
+
select.append(FieldEntities())
|
|
196
|
+
if field_augment.classification_labels:
|
|
197
|
+
select.append(FieldClassificationLabels())
|
|
198
|
+
|
|
199
|
+
if len(select) > 0:
|
|
200
|
+
augmentations.append(
|
|
201
|
+
FieldAugment(
|
|
202
|
+
given=given,
|
|
203
|
+
select=select,
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
file_select: list[FileProp] = []
|
|
208
|
+
if field_augment.file_thumbnail:
|
|
209
|
+
file_select.append(FileThumbnail())
|
|
210
|
+
|
|
211
|
+
if len(file_select) > 0:
|
|
212
|
+
augmentations.append(
|
|
213
|
+
FileAugment(
|
|
214
|
+
given=given, # type: ignore
|
|
215
|
+
select=file_select,
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
conversation_select: list[ConversationProp] = []
|
|
220
|
+
selector: ConversationSelector
|
|
221
|
+
|
|
222
|
+
if field_augment.full_conversation:
|
|
223
|
+
selector = FullSelector()
|
|
224
|
+
conversation_select.append(ConversationText(selector=selector))
|
|
225
|
+
if (
|
|
226
|
+
field_augment.conversation_text_attachments
|
|
227
|
+
or field_augment.conversation_image_attachments
|
|
228
|
+
):
|
|
229
|
+
conversation_select.append(ConversationAttachments(selector=selector))
|
|
230
|
+
|
|
231
|
+
elif field_augment.max_conversation_messages is not None:
|
|
232
|
+
# we want to always get the first conversation and the window
|
|
233
|
+
# requested by the user
|
|
234
|
+
first_selector = MessageSelector(index="first")
|
|
235
|
+
window_selector = WindowSelector(size=field_augment.max_conversation_messages)
|
|
236
|
+
conversation_select.append(ConversationText(selector=first_selector))
|
|
237
|
+
conversation_select.append(ConversationText(selector=window_selector))
|
|
238
|
+
if (
|
|
239
|
+
field_augment.conversation_text_attachments
|
|
240
|
+
or field_augment.conversation_image_attachments
|
|
241
|
+
):
|
|
242
|
+
conversation_select.append(ConversationAttachments(selector=first_selector))
|
|
243
|
+
conversation_select.append(ConversationAttachments(selector=window_selector))
|
|
244
|
+
|
|
245
|
+
if field_augment.conversation_answer_or_messages_after:
|
|
246
|
+
conversation_select.append(ConversationAnswerOrAfter())
|
|
247
|
+
|
|
248
|
+
if len(conversation_select) > 0:
|
|
249
|
+
augmentations.append(
|
|
250
|
+
ConversationAugment(
|
|
251
|
+
given=given, # type: ignore
|
|
252
|
+
select=conversation_select,
|
|
253
|
+
)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
if item.paragraphs is not None:
|
|
257
|
+
for paragraph_augment in item.paragraphs:
|
|
258
|
+
paragraphs_to_augment, paragraph_selector = parse_paragraph_augment(paragraph_augment)
|
|
259
|
+
augmentations.append(
|
|
260
|
+
ParagraphAugment(
|
|
261
|
+
given=paragraphs_to_augment,
|
|
262
|
+
select=paragraph_selector,
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return augmentations
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def parse_deep_resource_augment(
|
|
270
|
+
item: AugmentResources,
|
|
271
|
+
) -> tuple[list[ResourceProperties], list[ExtractedDataTypeName], list[ResourceProp]]:
|
|
272
|
+
show = []
|
|
273
|
+
if item.basic:
|
|
274
|
+
show.append(ResourceProperties.BASIC)
|
|
275
|
+
if item.origin:
|
|
276
|
+
show.append(ResourceProperties.ORIGIN)
|
|
277
|
+
if item.extra:
|
|
278
|
+
show.append(ResourceProperties.EXTRA)
|
|
279
|
+
if item.relations:
|
|
280
|
+
show.append(ResourceProperties.RELATIONS)
|
|
281
|
+
if item.values:
|
|
282
|
+
show.append(ResourceProperties.VALUES)
|
|
283
|
+
if item.errors:
|
|
284
|
+
show.append(ResourceProperties.ERRORS)
|
|
285
|
+
if item.security:
|
|
286
|
+
show.append(ResourceProperties.SECURITY)
|
|
287
|
+
|
|
288
|
+
extracted = []
|
|
289
|
+
if item.extracted_text:
|
|
290
|
+
extracted.append(ExtractedDataTypeName.TEXT)
|
|
291
|
+
if item.extracted_metadata:
|
|
292
|
+
extracted.append(ExtractedDataTypeName.METADATA)
|
|
293
|
+
if item.extracted_shortened_metadata:
|
|
294
|
+
extracted.append(ExtractedDataTypeName.SHORTENED_METADATA)
|
|
295
|
+
if item.extracted_large_metadata:
|
|
296
|
+
extracted.append(ExtractedDataTypeName.LARGE_METADATA)
|
|
297
|
+
if item.extracted_vector:
|
|
298
|
+
extracted.append(ExtractedDataTypeName.VECTOR)
|
|
299
|
+
if item.extracted_link:
|
|
300
|
+
extracted.append(ExtractedDataTypeName.LINK)
|
|
301
|
+
if item.extracted_file:
|
|
302
|
+
extracted.append(ExtractedDataTypeName.FILE)
|
|
303
|
+
if item.extracted_qa:
|
|
304
|
+
extracted.append(ExtractedDataTypeName.QA)
|
|
305
|
+
|
|
306
|
+
if len(extracted) > 0:
|
|
307
|
+
show.append(ResourceProperties.EXTRACTED)
|
|
308
|
+
|
|
309
|
+
select: list[ResourceProp] = []
|
|
310
|
+
if item.title:
|
|
311
|
+
select.append(ResourceTitle())
|
|
312
|
+
if item.summary:
|
|
313
|
+
select.append(ResourceSummary())
|
|
314
|
+
if item.classification_labels:
|
|
315
|
+
select.append(ResourceClassificationLabels())
|
|
316
|
+
|
|
317
|
+
return (
|
|
318
|
+
show,
|
|
319
|
+
extracted,
|
|
320
|
+
select,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def parse_paragraph_augment(item: AugmentParagraphs) -> tuple[list[Paragraph], list[ParagraphProp]]:
|
|
325
|
+
paragraphs_to_augment = []
|
|
326
|
+
for paragraph in item.given:
|
|
327
|
+
try:
|
|
328
|
+
paragraph_id = ParagraphId.from_string(paragraph.id)
|
|
329
|
+
except ValueError:
|
|
330
|
+
# invalid paragraph id, skipping
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
if paragraph.metadata is None:
|
|
334
|
+
metadata = None
|
|
335
|
+
else:
|
|
336
|
+
metadata = Metadata(
|
|
337
|
+
is_an_image=paragraph.metadata.is_an_image,
|
|
338
|
+
is_a_table=paragraph.metadata.is_a_table,
|
|
339
|
+
source_file=paragraph.metadata.source_file,
|
|
340
|
+
page=paragraph.metadata.page,
|
|
341
|
+
in_page_with_visual=paragraph.metadata.in_page_with_visual,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
paragraphs_to_augment.append(Paragraph(id=paragraph_id, metadata=metadata))
|
|
345
|
+
|
|
346
|
+
selector: list[ParagraphProp] = []
|
|
347
|
+
if item.text:
|
|
348
|
+
selector.append(ParagraphText())
|
|
349
|
+
if item.neighbours_before or item.neighbours_after:
|
|
350
|
+
selector.append(
|
|
351
|
+
RelatedParagraphs(
|
|
352
|
+
neighbours_before=item.neighbours_before or 0,
|
|
353
|
+
neighbours_after=item.neighbours_after or 0,
|
|
354
|
+
)
|
|
355
|
+
)
|
|
356
|
+
if item.source_image:
|
|
357
|
+
selector.append(ParagraphImage())
|
|
358
|
+
if item.table_image:
|
|
359
|
+
selector.append(ParagraphTable(prefer_page_preview=item.table_prefers_page_preview))
|
|
360
|
+
if item.page_preview_image:
|
|
361
|
+
selector.append(ParagraphPage(preview=True))
|
|
362
|
+
|
|
363
|
+
return paragraphs_to_augment, selector
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def build_augment_response(item: AugmentRequest, augmented: Augmented) -> AugmentResponse:
|
|
367
|
+
response = AugmentResponse(
|
|
368
|
+
resources={},
|
|
369
|
+
fields={},
|
|
370
|
+
paragraphs={},
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# start with deep resources, as they return a Resource object we can merge
|
|
374
|
+
# with the augmented model
|
|
375
|
+
for rid, resource_deep in augmented.resources_deep.items():
|
|
376
|
+
if resource_deep is None:
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
augmented_resource = AugmentedResource(id=rid)
|
|
380
|
+
augmented_resource.updated_from(resource_deep)
|
|
381
|
+
response.resources[rid] = augmented_resource
|
|
382
|
+
|
|
383
|
+
# now we can cherry pick properties from the augmented resources and merge
|
|
384
|
+
# them with the deep ones
|
|
385
|
+
for rid, resource in augmented.resources.items():
|
|
386
|
+
if resource is None:
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
augmented_resource = response.resources.setdefault(rid, AugmentedResource(id=rid))
|
|
390
|
+
|
|
391
|
+
# merge resource with deep resources without overwriting
|
|
392
|
+
augmented_resource.title = augmented_resource.title or resource.title
|
|
393
|
+
augmented_resource.summary = augmented_resource.summary or resource.summary
|
|
394
|
+
|
|
395
|
+
# properties original to the augmented resources (not in deep resource augment)
|
|
396
|
+
if resource.classification_labels is not None:
|
|
397
|
+
augmented_resource.classification_labels = {
|
|
398
|
+
labelset: list(labels) for labelset, labels in resource.classification_labels.items()
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
for field_id, field in augmented.fields.items():
|
|
402
|
+
if field is None:
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
# common augments for all fields
|
|
406
|
+
|
|
407
|
+
if field.classification_labels is None:
|
|
408
|
+
classification_labels = None
|
|
409
|
+
else:
|
|
410
|
+
classification_labels = {
|
|
411
|
+
labelset: list(labels) for labelset, labels in field.classification_labels.items()
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
if field.entities is None:
|
|
415
|
+
entities = None
|
|
416
|
+
else:
|
|
417
|
+
entities = {family: list(entity) for family, entity in field.entities.items()}
|
|
418
|
+
|
|
419
|
+
if field_id.type in (
|
|
420
|
+
FieldTypeName.TEXT.abbreviation(),
|
|
421
|
+
FieldTypeName.LINK.abbreviation(),
|
|
422
|
+
FieldTypeName.GENERIC.abbreviation(),
|
|
423
|
+
):
|
|
424
|
+
response.fields[field_id.full()] = AugmentedField(
|
|
425
|
+
text=field.text, # type: ignore # field is instance of any of the above and has the text property
|
|
426
|
+
classification_labels=classification_labels,
|
|
427
|
+
entities=entities,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
elif field_id.type == FieldTypeName.FILE.abbreviation():
|
|
431
|
+
field = cast(internal_augment.AugmentedFileField, field)
|
|
432
|
+
response.fields[field_id.full()] = AugmentedFileField(
|
|
433
|
+
text=field.text, # type: ignore # field is instance of any of the above and has the text property
|
|
434
|
+
classification_labels=classification_labels,
|
|
435
|
+
entities=entities,
|
|
436
|
+
thumbnail_image=field.thumbnail_path,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
elif field_id.type == FieldTypeName.CONVERSATION.abbreviation():
|
|
440
|
+
field = cast(internal_augment.AugmentedConversationField, field)
|
|
441
|
+
conversation = AugmentedConversationField(
|
|
442
|
+
classification_labels=classification_labels,
|
|
443
|
+
entities=entities,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
if field.messages is not None:
|
|
447
|
+
conversation.messages = []
|
|
448
|
+
for m in field.messages:
|
|
449
|
+
if m.attachments is None:
|
|
450
|
+
attachments = None
|
|
451
|
+
else:
|
|
452
|
+
attachments = []
|
|
453
|
+
for f in m.attachments:
|
|
454
|
+
attachments.append(f.full())
|
|
455
|
+
|
|
456
|
+
conversation.messages.append(
|
|
457
|
+
AugmentedConversationMessage(
|
|
458
|
+
ident=m.ident,
|
|
459
|
+
text=m.text,
|
|
460
|
+
attachments=attachments,
|
|
461
|
+
)
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
response.fields[field_id.full()] = conversation
|
|
465
|
+
|
|
466
|
+
else: # pragma: no cover
|
|
467
|
+
assert False, f"unknown field type: {field_id.type}"
|
|
468
|
+
|
|
469
|
+
for paragraph_id, paragraph in augmented.paragraphs.items():
|
|
470
|
+
if paragraph is None:
|
|
471
|
+
continue
|
|
472
|
+
|
|
473
|
+
augmented_paragraph = AugmentedParagraph()
|
|
474
|
+
augmented_paragraph.text = paragraph.text
|
|
475
|
+
if paragraph.related is not None:
|
|
476
|
+
augmented_paragraph.neighbours_before = list(
|
|
477
|
+
map(lambda x: x.full(), paragraph.related.neighbours_before)
|
|
478
|
+
)
|
|
479
|
+
augmented_paragraph.neighbours_after = list(
|
|
480
|
+
map(lambda x: x.full(), paragraph.related.neighbours_after)
|
|
481
|
+
)
|
|
482
|
+
augmented_paragraph.source_image = paragraph.source_image_path
|
|
483
|
+
augmented_paragraph.table_image = paragraph.table_image_path
|
|
484
|
+
augmented_paragraph.page_preview_image = paragraph.page_preview_path
|
|
485
|
+
response.paragraphs[paragraph_id.full()] = augmented_paragraph
|
|
486
|
+
|
|
487
|
+
return response
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def parse_second_augments(item: AugmentRequest, augmented: Augmented) -> list[Augment]:
|
|
491
|
+
"""Given an augment request an a first augmentation, return a list of
|
|
492
|
+
augments required to fulfill the requested data.
|
|
493
|
+
|
|
494
|
+
"""
|
|
495
|
+
augmentations: list[Augment] = []
|
|
496
|
+
|
|
497
|
+
for paragraph_augment in item.paragraphs or []:
|
|
498
|
+
if paragraph_augment.neighbours_before or paragraph_augment.neighbours_after:
|
|
499
|
+
neighbours = []
|
|
500
|
+
for paragraph_id, paragraph in augmented.paragraphs.items():
|
|
501
|
+
if paragraph.related is not None:
|
|
502
|
+
for neighbour_before in paragraph.related.neighbours_before:
|
|
503
|
+
neighbours.append(Paragraph(id=neighbour_before, metadata=None))
|
|
504
|
+
for neighbour_after in paragraph.related.neighbours_after:
|
|
505
|
+
neighbours.append(Paragraph(id=neighbour_after, metadata=None))
|
|
506
|
+
|
|
507
|
+
if neighbours:
|
|
508
|
+
augmentations.append(
|
|
509
|
+
ParagraphAugment(
|
|
510
|
+
given=neighbours,
|
|
511
|
+
select=[
|
|
512
|
+
ParagraphText(),
|
|
513
|
+
ParagraphPosition(),
|
|
514
|
+
],
|
|
515
|
+
)
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
return augmentations
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def merge_second_augment(item: AugmentRequest, response: AugmentResponse, augmented: Augmented):
|
|
522
|
+
"""Merge in-place augmented data with an existing augment response."""
|
|
523
|
+
|
|
524
|
+
if any(
|
|
525
|
+
(
|
|
526
|
+
paragraph_augment.neighbours_before or paragraph_augment.neighbours_after
|
|
527
|
+
for paragraph_augment in item.paragraphs or []
|
|
528
|
+
)
|
|
529
|
+
):
|
|
530
|
+
# neighbour paragraphs
|
|
531
|
+
|
|
532
|
+
new_paragraphs = {}
|
|
533
|
+
for paragraph_id_str, augmented_paragraph in response.paragraphs.items():
|
|
534
|
+
before_refs = []
|
|
535
|
+
for before_id_str in augmented_paragraph.neighbours_before or []:
|
|
536
|
+
before_id = ParagraphId.from_string(before_id_str)
|
|
537
|
+
|
|
538
|
+
if before_id not in augmented.paragraphs:
|
|
539
|
+
continue
|
|
540
|
+
neighbour = augmented.paragraphs[before_id]
|
|
541
|
+
|
|
542
|
+
if before_id_str not in response.paragraphs:
|
|
543
|
+
if not neighbour.text and not neighbour.position:
|
|
544
|
+
continue
|
|
545
|
+
# create a new paragraph for the neighbour
|
|
546
|
+
new_paragraphs[before_id_str] = AugmentedParagraph(
|
|
547
|
+
text=neighbour.text, position=neighbour.position
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
else:
|
|
551
|
+
# merge neighbour with existing paragraph
|
|
552
|
+
if not response.paragraphs[before_id_str].text:
|
|
553
|
+
response.paragraphs[before_id_str].text = neighbour.text
|
|
554
|
+
|
|
555
|
+
before_refs.append(before_id_str)
|
|
556
|
+
|
|
557
|
+
after_refs = []
|
|
558
|
+
for after_id_str in augmented_paragraph.neighbours_after or []:
|
|
559
|
+
after_id = ParagraphId.from_string(after_id_str)
|
|
560
|
+
|
|
561
|
+
if after_id not in augmented.paragraphs:
|
|
562
|
+
continue
|
|
563
|
+
neighbour = augmented.paragraphs[after_id]
|
|
564
|
+
|
|
565
|
+
if after_id_str not in response.paragraphs:
|
|
566
|
+
if not neighbour.text and not neighbour.position:
|
|
567
|
+
continue
|
|
568
|
+
# create a new paragraph for the neighbour
|
|
569
|
+
new_paragraphs[after_id_str] = AugmentedParagraph(
|
|
570
|
+
text=neighbour.text, position=neighbour.position
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
else:
|
|
574
|
+
# merge neighbour with existing paragraph
|
|
575
|
+
if not response.paragraphs[after_id_str].text:
|
|
576
|
+
response.paragraphs[after_id_str].text = neighbour.text
|
|
577
|
+
|
|
578
|
+
after_refs.append(after_id_str)
|
|
579
|
+
|
|
580
|
+
# update references to contain only the neighbours that existed in
|
|
581
|
+
# the response or we added
|
|
582
|
+
augmented_paragraph.neighbours_before = before_refs
|
|
583
|
+
augmented_paragraph.neighbours_after = after_refs
|
|
584
|
+
|
|
585
|
+
response.paragraphs.update(new_paragraphs)
|
|
@@ -19,12 +19,12 @@
|
|
|
19
19
|
#
|
|
20
20
|
import json
|
|
21
21
|
from time import time
|
|
22
|
-
from typing import Optional, Union
|
|
23
22
|
|
|
24
23
|
from fastapi import Request, Response
|
|
25
24
|
from fastapi_versioning import version
|
|
26
25
|
from pydantic import ValidationError
|
|
27
26
|
|
|
27
|
+
from nucliadb.common.catalog import catalog_facets, catalog_search
|
|
28
28
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
|
29
29
|
from nucliadb.common.exceptions import InvalidQueryError
|
|
30
30
|
from nucliadb.models.responses import HTTPClientError
|
|
@@ -33,7 +33,6 @@ from nucliadb.search.api.v1.router import KB_PREFIX, api
|
|
|
33
33
|
from nucliadb.search.api.v1.utils import fastapi_query
|
|
34
34
|
from nucliadb.search.search import cache
|
|
35
35
|
from nucliadb.search.search.merge import fetch_resources
|
|
36
|
-
from nucliadb.search.search.pgcatalog import pgcatalog_facets, pgcatalog_search
|
|
37
36
|
from nucliadb.search.search.query_parser.parsers import parse_catalog
|
|
38
37
|
from nucliadb.search.search.utils import (
|
|
39
38
|
maybe_log_request_payload,
|
|
@@ -75,31 +74,28 @@ async def catalog_get(
|
|
|
75
74
|
response: Response,
|
|
76
75
|
kbid: str,
|
|
77
76
|
query: str = fastapi_query(SearchParamDefaults.query),
|
|
78
|
-
filter_expression:
|
|
77
|
+
filter_expression: str | None = fastapi_query(SearchParamDefaults.catalog_filter_expression),
|
|
79
78
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
|
80
79
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
|
81
80
|
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
|
82
|
-
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
|
83
81
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
|
84
82
|
page_number: int = fastapi_query(SearchParamDefaults.catalog_page_number),
|
|
85
83
|
page_size: int = fastapi_query(SearchParamDefaults.catalog_page_size),
|
|
86
|
-
with_status:
|
|
84
|
+
with_status: ResourceProcessingStatus | None = fastapi_query(
|
|
87
85
|
SearchParamDefaults.with_status, deprecated="Use filters instead"
|
|
88
86
|
),
|
|
89
87
|
debug: bool = fastapi_query(SearchParamDefaults.debug, include_in_schema=False),
|
|
90
|
-
range_creation_start:
|
|
91
|
-
range_creation_end:
|
|
92
|
-
range_modification_start:
|
|
88
|
+
range_creation_start: DateTime | None = fastapi_query(SearchParamDefaults.range_creation_start),
|
|
89
|
+
range_creation_end: DateTime | None = fastapi_query(SearchParamDefaults.range_creation_end),
|
|
90
|
+
range_modification_start: DateTime | None = fastapi_query(
|
|
93
91
|
SearchParamDefaults.range_modification_start
|
|
94
92
|
),
|
|
95
|
-
range_modification_end:
|
|
96
|
-
|
|
97
|
-
),
|
|
98
|
-
hidden: Optional[bool] = fastapi_query(SearchParamDefaults.hidden),
|
|
93
|
+
range_modification_end: DateTime | None = fastapi_query(SearchParamDefaults.range_modification_end),
|
|
94
|
+
hidden: bool | None = fastapi_query(SearchParamDefaults.hidden),
|
|
99
95
|
show: list[ResourceProperties] = fastapi_query(
|
|
100
96
|
SearchParamDefaults.show, default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
|
|
101
97
|
),
|
|
102
|
-
) ->
|
|
98
|
+
) -> CatalogResponse | HTTPClientError:
|
|
103
99
|
try:
|
|
104
100
|
expr = (
|
|
105
101
|
CatalogFilterExpression.model_validate_json(filter_expression) if filter_expression else None
|
|
@@ -125,7 +121,7 @@ async def catalog_get(
|
|
|
125
121
|
show=show,
|
|
126
122
|
)
|
|
127
123
|
if sort_field:
|
|
128
|
-
item.sort = SortOptions(field=sort_field,
|
|
124
|
+
item.sort = SortOptions(field=sort_field, order=sort_order)
|
|
129
125
|
return await catalog(kbid, item)
|
|
130
126
|
|
|
131
127
|
|
|
@@ -144,14 +140,14 @@ async def catalog_post(
|
|
|
144
140
|
request: Request,
|
|
145
141
|
kbid: str,
|
|
146
142
|
item: CatalogRequest,
|
|
147
|
-
) ->
|
|
143
|
+
) -> CatalogResponse | HTTPClientError:
|
|
148
144
|
return await catalog(kbid, item)
|
|
149
145
|
|
|
150
146
|
|
|
151
147
|
async def catalog(
|
|
152
148
|
kbid: str,
|
|
153
149
|
item: CatalogRequest,
|
|
154
|
-
) ->
|
|
150
|
+
) -> HTTPClientError | CatalogResponse:
|
|
155
151
|
"""
|
|
156
152
|
Catalog endpoint is a simplified version of the search endpoint, it only
|
|
157
153
|
returns bm25 results on titles and it does not support vector search.
|
|
@@ -164,7 +160,7 @@ async def catalog(
|
|
|
164
160
|
query_parser = await parse_catalog(kbid, item)
|
|
165
161
|
|
|
166
162
|
catalog_results = CatalogResponse()
|
|
167
|
-
catalog_results.fulltext = await
|
|
163
|
+
catalog_results.fulltext = await catalog_search(query_parser)
|
|
168
164
|
catalog_results.resources = await fetch_resources(
|
|
169
165
|
resources=[r.rid for r in catalog_results.fulltext.results],
|
|
170
166
|
kbid=kbid,
|
|
@@ -205,7 +201,7 @@ async def catalog(
|
|
|
205
201
|
)
|
|
206
202
|
@requires(NucliaDBRoles.READER)
|
|
207
203
|
@version(1)
|
|
208
|
-
async def
|
|
204
|
+
async def catalog_facets_endpoint(
|
|
209
205
|
request: Request, kbid: str, item: CatalogFacetsRequest
|
|
210
206
|
) -> CatalogFacetsResponse:
|
|
211
|
-
return CatalogFacetsResponse(facets=await
|
|
207
|
+
return CatalogFacetsResponse(facets=await catalog_facets(kbid, item))
|