nucliadb 6.2.1.post2744__py3-none-any.whl → 6.2.1.post2751__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/models_utils/__init__.py +19 -0
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/ingest/serialize.py +21 -26
- nucliadb/reader/api/models.py +1 -3
- nucliadb/reader/api/v1/download.py +2 -2
- nucliadb/reader/api/v1/knowledgebox.py +3 -3
- nucliadb/reader/api/v1/resource.py +6 -9
- nucliadb/reader/api/v1/services.py +4 -4
- nucliadb/search/api/v1/feedback.py +3 -3
- nucliadb/search/api/v1/knowledgebox.py +2 -1
- nucliadb/search/api/v1/search.py +2 -1
- nucliadb/search/search/chat/prompt.py +3 -2
- nucliadb/search/search/chat/query.py +2 -1
- nucliadb/search/search/find.py +2 -1
- nucliadb/search/search/merge.py +14 -4
- nucliadb/search/search/query.py +10 -2
- nucliadb/writer/api/v1/services.py +2 -1
- nucliadb/writer/resource/basic.py +7 -6
- nucliadb/writer/resource/field.py +4 -7
- {nucliadb-6.2.1.post2744.dist-info → nucliadb-6.2.1.post2751.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post2744.dist-info → nucliadb-6.2.1.post2751.dist-info}/RECORD +26 -23
- {nucliadb-6.2.1.post2744.dist-info → nucliadb-6.2.1.post2751.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post2744.dist-info → nucliadb-6.2.1.post2751.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post2744.dist-info → nucliadb-6.2.1.post2751.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post2744.dist-info → nucliadb-6.2.1.post2751.dist-info}/zip-safe +0 -0
nucliadb/ingest/serialize.py
CHANGED
@@ -23,13 +23,14 @@ from typing import Optional
|
|
23
23
|
import nucliadb_models as models
|
24
24
|
from nucliadb.common.maindb.driver import Transaction
|
25
25
|
from nucliadb.common.maindb.utils import get_driver
|
26
|
+
from nucliadb.common.models_utils import from_proto
|
26
27
|
from nucliadb.ingest.fields.base import Field
|
27
28
|
from nucliadb.ingest.fields.conversation import Conversation
|
28
29
|
from nucliadb.ingest.fields.file import File
|
29
30
|
from nucliadb.ingest.fields.link import Link
|
30
31
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
31
32
|
from nucliadb.ingest.orm.resource import Resource as ORMResource
|
32
|
-
from nucliadb_models.common import
|
33
|
+
from nucliadb_models.common import FieldTypeName
|
33
34
|
from nucliadb_models.resource import (
|
34
35
|
ConversationFieldData,
|
35
36
|
ConversationFieldExtractedData,
|
@@ -64,7 +65,7 @@ async def set_resource_field_extracted_data(
|
|
64
65
|
if ExtractedDataTypeName.TEXT in wanted_extracted_data:
|
65
66
|
data_et = await field.get_extracted_text()
|
66
67
|
if data_et is not None:
|
67
|
-
field_data.text =
|
68
|
+
field_data.text = from_proto.extracted_text(data_et)
|
68
69
|
|
69
70
|
metadata_wanted = ExtractedDataTypeName.METADATA in wanted_extracted_data
|
70
71
|
shortened_metadata_wanted = ExtractedDataTypeName.SHORTENED_METADATA in wanted_extracted_data
|
@@ -72,24 +73,24 @@ async def set_resource_field_extracted_data(
|
|
72
73
|
data_fcm = await field.get_field_metadata()
|
73
74
|
|
74
75
|
if data_fcm is not None:
|
75
|
-
field_data.metadata =
|
76
|
+
field_data.metadata = from_proto.field_computed_metadata(
|
76
77
|
data_fcm, shortened=shortened_metadata_wanted and not metadata_wanted
|
77
78
|
)
|
78
79
|
|
79
80
|
if ExtractedDataTypeName.LARGE_METADATA in wanted_extracted_data:
|
80
81
|
data_lcm = await field.get_large_field_metadata()
|
81
82
|
if data_lcm is not None:
|
82
|
-
field_data.large_metadata =
|
83
|
+
field_data.large_metadata = from_proto.large_computed_metadata(data_lcm)
|
83
84
|
|
84
85
|
if ExtractedDataTypeName.VECTOR in wanted_extracted_data:
|
85
86
|
data_vec = await field.get_vectors()
|
86
87
|
if data_vec is not None:
|
87
|
-
field_data.vectors =
|
88
|
+
field_data.vectors = from_proto.vector_object(data_vec)
|
88
89
|
|
89
90
|
if ExtractedDataTypeName.QA in wanted_extracted_data:
|
90
91
|
qa = await field.get_question_answers()
|
91
92
|
if qa is not None:
|
92
|
-
field_data.question_answers =
|
93
|
+
field_data.question_answers = from_proto.field_question_answers(qa)
|
93
94
|
|
94
95
|
if (
|
95
96
|
isinstance(field, File)
|
@@ -98,7 +99,7 @@ async def set_resource_field_extracted_data(
|
|
98
99
|
):
|
99
100
|
data_fed = await field.get_file_extracted_data()
|
100
101
|
if data_fed is not None:
|
101
|
-
field_data.file =
|
102
|
+
field_data.file = from_proto.file_extracted_data(data_fed)
|
102
103
|
|
103
104
|
if (
|
104
105
|
isinstance(field, Link)
|
@@ -107,7 +108,7 @@ async def set_resource_field_extracted_data(
|
|
107
108
|
):
|
108
109
|
data_led = await field.get_link_extracted_data()
|
109
110
|
if data_led is not None:
|
110
|
-
field_data.link =
|
111
|
+
field_data.link = from_proto.link_extracted_data(data_led)
|
111
112
|
|
112
113
|
|
113
114
|
async def serialize(
|
@@ -174,14 +175,12 @@ async def managed_serialize(
|
|
174
175
|
else None
|
175
176
|
)
|
176
177
|
|
177
|
-
resource.metadata =
|
178
|
-
resource.usermetadata =
|
178
|
+
resource.metadata = from_proto.metadata(orm_resource.basic.metadata)
|
179
|
+
resource.usermetadata = from_proto.user_metadata(orm_resource.basic.usermetadata)
|
179
180
|
resource.fieldmetadata = [
|
180
|
-
|
181
|
+
from_proto.user_field_metadata(fm) for fm in orm_resource.basic.fieldmetadata
|
181
182
|
]
|
182
|
-
resource.computedmetadata =
|
183
|
-
orm_resource.basic.computedmetadata
|
184
|
-
)
|
183
|
+
resource.computedmetadata = from_proto.computed_metadata(orm_resource.basic.computedmetadata)
|
185
184
|
|
186
185
|
resource.last_seqid = orm_resource.basic.last_seqid
|
187
186
|
|
@@ -195,18 +194,18 @@ async def managed_serialize(
|
|
195
194
|
await orm_resource.get_relations()
|
196
195
|
if orm_resource.relations is not None:
|
197
196
|
resource.relations = [
|
198
|
-
|
197
|
+
from_proto.relation(relation) for relation in orm_resource.relations.relations
|
199
198
|
]
|
200
199
|
|
201
200
|
if ResourceProperties.ORIGIN in show:
|
202
201
|
await orm_resource.get_origin()
|
203
202
|
if orm_resource.origin is not None:
|
204
|
-
resource.origin =
|
203
|
+
resource.origin = from_proto.origin(orm_resource.origin)
|
205
204
|
|
206
205
|
if ResourceProperties.EXTRA in show:
|
207
206
|
await orm_resource.get_extra()
|
208
207
|
if orm_resource.extra is not None:
|
209
|
-
resource.extra =
|
208
|
+
resource.extra = from_proto.extra(orm_resource.extra)
|
210
209
|
|
211
210
|
include_errors = ResourceProperties.ERRORS in show
|
212
211
|
|
@@ -221,7 +220,7 @@ async def managed_serialize(
|
|
221
220
|
await orm_resource.get_fields()
|
222
221
|
resource.data = ResourceData()
|
223
222
|
for (field_type, _), field in orm_resource.fields.items():
|
224
|
-
field_type_name =
|
223
|
+
field_type_name = from_proto.field_type_name(field_type)
|
225
224
|
if field_type_name not in field_type_filter:
|
226
225
|
continue
|
227
226
|
|
@@ -236,9 +235,7 @@ async def managed_serialize(
|
|
236
235
|
if field.id not in resource.data.texts:
|
237
236
|
resource.data.texts[field.id] = TextFieldData()
|
238
237
|
if include_value:
|
239
|
-
serialized_value = (
|
240
|
-
models.FieldText.from_message(value) if value is not None else None
|
241
|
-
)
|
238
|
+
serialized_value = from_proto.field_text(value) if value is not None else None
|
242
239
|
resource.data.texts[field.id].value = serialized_value
|
243
240
|
if include_errors:
|
244
241
|
error = await field.get_error()
|
@@ -259,7 +256,7 @@ async def managed_serialize(
|
|
259
256
|
resource.data.files[field.id] = FileFieldData()
|
260
257
|
if include_value:
|
261
258
|
if value is not None:
|
262
|
-
resource.data.files[field.id].value =
|
259
|
+
resource.data.files[field.id].value = from_proto.field_file(value)
|
263
260
|
else:
|
264
261
|
resource.data.files[field.id].value = None
|
265
262
|
|
@@ -282,7 +279,7 @@ async def managed_serialize(
|
|
282
279
|
if field.id not in resource.data.links:
|
283
280
|
resource.data.links[field.id] = LinkFieldData()
|
284
281
|
if include_value and value is not None:
|
285
|
-
resource.data.links[field.id].value =
|
282
|
+
resource.data.links[field.id].value = from_proto.field_link(value)
|
286
283
|
|
287
284
|
if include_errors:
|
288
285
|
error = await field.get_error()
|
@@ -310,9 +307,7 @@ async def managed_serialize(
|
|
310
307
|
)
|
311
308
|
if include_value and isinstance(field, Conversation):
|
312
309
|
value = await field.get_metadata()
|
313
|
-
resource.data.conversations[field.id].value =
|
314
|
-
value
|
315
|
-
)
|
310
|
+
resource.data.conversations[field.id].value = from_proto.field_conversation(value)
|
316
311
|
if include_extracted_data:
|
317
312
|
resource.data.conversations[field.id].extracted = ConversationFieldExtractedData()
|
318
313
|
await set_resource_field_extracted_data(
|
nucliadb/reader/api/models.py
CHANGED
@@ -22,7 +22,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union
|
|
22
22
|
from pydantic import BaseModel
|
23
23
|
|
24
24
|
import nucliadb_models as models
|
25
|
-
from nucliadb_models.common import
|
25
|
+
from nucliadb_models.common import FieldTypeName
|
26
26
|
from nucliadb_models.resource import (
|
27
27
|
ConversationFieldExtractedData,
|
28
28
|
Error,
|
@@ -54,8 +54,6 @@ class ResourceField(BaseModel):
|
|
54
54
|
error: Optional[Error] = None
|
55
55
|
|
56
56
|
|
57
|
-
FIELD_NAMES_TO_PB_TYPE_MAP = {v: k for k, v in FIELD_TYPES_MAP.items()}
|
58
|
-
|
59
57
|
FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = {
|
60
58
|
FieldTypeName.TEXT: TextFieldExtractedData,
|
61
59
|
FieldTypeName.FILE: FileFieldExtractedData,
|
@@ -29,9 +29,9 @@ from starlette.datastructures import Headers
|
|
29
29
|
from starlette.responses import StreamingResponse
|
30
30
|
|
31
31
|
from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR
|
32
|
+
from nucliadb.common.models_utils import to_proto
|
32
33
|
from nucliadb.ingest.serialize import get_resource_uuid_by_slug
|
33
34
|
from nucliadb.reader import SERVICE_NAME, logger
|
34
|
-
from nucliadb.reader.api.models import FIELD_NAMES_TO_PB_TYPE_MAP
|
35
35
|
from nucliadb_models.common import FieldTypeName
|
36
36
|
from nucliadb_models.resource import NucliaDBRoles
|
37
37
|
from nucliadb_utils.authentication import requires_one
|
@@ -97,7 +97,7 @@ async def _download_extract_file(
|
|
97
97
|
|
98
98
|
storage = await get_storage(service_name=SERVICE_NAME)
|
99
99
|
|
100
|
-
pb_field_type =
|
100
|
+
pb_field_type = to_proto.field_type_name(field_type)
|
101
101
|
field_type_letter = FIELD_TYPE_PB_TO_STR[pb_field_type]
|
102
102
|
|
103
103
|
sf = storage.file_extracted(kbid, rid, field_type_letter, field_id, download_field)
|
@@ -23,9 +23,9 @@ from starlette.requests import Request
|
|
23
23
|
|
24
24
|
from nucliadb.common import datamanagers
|
25
25
|
from nucliadb.common.maindb.utils import get_driver
|
26
|
+
from nucliadb.common.models_utils import from_proto
|
26
27
|
from nucliadb.reader.api.v1.router import KB_PREFIX, KBS_PREFIX, api
|
27
28
|
from nucliadb_models.resource import (
|
28
|
-
KnowledgeBoxConfig,
|
29
29
|
KnowledgeBoxList,
|
30
30
|
KnowledgeBoxObj,
|
31
31
|
KnowledgeBoxObjSummary,
|
@@ -72,7 +72,7 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
72
72
|
return KnowledgeBoxObj(
|
73
73
|
uuid=kbid,
|
74
74
|
slug=kb_config.slug,
|
75
|
-
config=
|
75
|
+
config=from_proto.knowledgebox_config(kb_config),
|
76
76
|
)
|
77
77
|
|
78
78
|
|
@@ -99,5 +99,5 @@ async def get_kb_by_slug(request: Request, slug: str) -> KnowledgeBoxObj:
|
|
99
99
|
return KnowledgeBoxObj(
|
100
100
|
uuid=kbid,
|
101
101
|
slug=kb_config.slug,
|
102
|
-
config=
|
102
|
+
config=from_proto.knowledgebox_config(kb_config),
|
103
103
|
)
|
@@ -22,9 +22,9 @@ from typing import Optional, Union
|
|
22
22
|
from fastapi import Header, HTTPException, Query, Request, Response
|
23
23
|
from fastapi_versioning import version
|
24
24
|
|
25
|
-
import nucliadb_models as models
|
26
25
|
from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG_BASE
|
27
26
|
from nucliadb.common.maindb.utils import get_driver
|
27
|
+
from nucliadb.common.models_utils import from_proto, to_proto
|
28
28
|
from nucliadb.ingest.fields.conversation import Conversation
|
29
29
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as ORMKnowledgeBox
|
30
30
|
from nucliadb.ingest.orm.resource import Resource as ORMResource
|
@@ -37,7 +37,6 @@ from nucliadb.reader import SERVICE_NAME
|
|
37
37
|
from nucliadb.reader.api import DEFAULT_RESOURCE_LIST_PAGE_SIZE
|
38
38
|
from nucliadb.reader.api.models import (
|
39
39
|
FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP,
|
40
|
-
FIELD_NAMES_TO_PB_TYPE_MAP,
|
41
40
|
ResourceField,
|
42
41
|
)
|
43
42
|
from nucliadb.reader.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
|
@@ -334,9 +333,7 @@ async def _get_resource_field(
|
|
334
333
|
) -> Response:
|
335
334
|
storage = await get_storage(service_name=SERVICE_NAME)
|
336
335
|
driver = get_driver()
|
337
|
-
|
338
|
-
pb_field_id = FIELD_NAMES_TO_PB_TYPE_MAP[field_type]
|
339
|
-
|
336
|
+
pb_field_id = to_proto.field_type_name(field_type)
|
340
337
|
async with driver.transaction() as txn:
|
341
338
|
kb = ORMKnowledgeBox(txn, storage, kbid)
|
342
339
|
|
@@ -358,15 +355,15 @@ async def _get_resource_field(
|
|
358
355
|
|
359
356
|
if isinstance(value, resources_pb2.FieldText):
|
360
357
|
value = await field.get_value()
|
361
|
-
resource_field.value =
|
358
|
+
resource_field.value = from_proto.field_text(value)
|
362
359
|
|
363
360
|
if isinstance(value, resources_pb2.FieldFile):
|
364
361
|
value = await field.get_value()
|
365
|
-
resource_field.value =
|
362
|
+
resource_field.value = from_proto.field_file(value)
|
366
363
|
|
367
364
|
if isinstance(value, resources_pb2.FieldLink):
|
368
365
|
value = await field.get_value()
|
369
|
-
resource_field.value =
|
366
|
+
resource_field.value = from_proto.field_link(value)
|
370
367
|
|
371
368
|
if isinstance(field, Conversation):
|
372
369
|
if page == "first":
|
@@ -379,7 +376,7 @@ async def _get_resource_field(
|
|
379
376
|
|
380
377
|
value = await field.get_value(page=page_to_fetch)
|
381
378
|
if value is not None:
|
382
|
-
resource_field.value =
|
379
|
+
resource_field.value = from_proto.conversation(value)
|
383
380
|
|
384
381
|
if ResourceFieldProperties.EXTRACTED in show and extracted:
|
385
382
|
resource_field.extracted = FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP[field_type]()
|
@@ -32,6 +32,7 @@ from nucliadb.common.context.fastapi import get_app_context
|
|
32
32
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
33
33
|
from nucliadb.common.http_clients import processing
|
34
34
|
from nucliadb.common.maindb.utils import get_driver
|
35
|
+
from nucliadb.common.models_utils import from_proto
|
35
36
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
36
37
|
from nucliadb.models.responses import HTTPClientError
|
37
38
|
from nucliadb.reader import SERVICE_NAME
|
@@ -39,7 +40,6 @@ from nucliadb.reader.api.v1.router import KB_PREFIX, api
|
|
39
40
|
from nucliadb.reader.reader.notifications import kb_notifications_stream
|
40
41
|
from nucliadb_models.entities import (
|
41
42
|
EntitiesGroup,
|
42
|
-
EntitiesGroupSummary,
|
43
43
|
KnowledgeBoxEntities,
|
44
44
|
)
|
45
45
|
from nucliadb_models.labels import KnowledgeBoxLabels, LabelSet
|
@@ -86,7 +86,7 @@ async def list_entities_groups(kbid: str):
|
|
86
86
|
if entities_groups.status == ListEntitiesGroupsResponse.Status.OK:
|
87
87
|
response = KnowledgeBoxEntities(uuid=kbid)
|
88
88
|
for key, eg_summary in entities_groups.groups.items():
|
89
|
-
entities_group =
|
89
|
+
entities_group = from_proto.entities_group_summary(eg_summary)
|
90
90
|
response.groups[key] = entities_group
|
91
91
|
return response
|
92
92
|
elif entities_groups.status == ListEntitiesGroupsResponse.Status.NOTFOUND:
|
@@ -114,7 +114,7 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
|
|
114
114
|
|
115
115
|
kbobj: GetEntitiesGroupResponse = await ingest.GetEntitiesGroup(l_request) # type: ignore
|
116
116
|
if kbobj.status == GetEntitiesGroupResponse.Status.OK:
|
117
|
-
response =
|
117
|
+
response = from_proto.entities_group(kbobj.group)
|
118
118
|
return response
|
119
119
|
elif kbobj.status == GetEntitiesGroupResponse.Status.KB_NOT_FOUND:
|
120
120
|
raise HTTPException(status_code=404, detail=f"Knowledge Box '{kbid}' does not exist")
|
@@ -208,7 +208,7 @@ async def get_custom_synonyms(request: Request, kbid: str):
|
|
208
208
|
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
209
209
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
210
210
|
synonyms = await datamanagers.atomic.synonyms.get(kbid=kbid) or Synonyms()
|
211
|
-
return
|
211
|
+
return from_proto.kb_synonyms(synonyms)
|
212
212
|
|
213
213
|
|
214
214
|
@api.get(
|
@@ -18,10 +18,10 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
|
22
21
|
from fastapi import Header, Request, Response
|
23
22
|
from fastapi_versioning import version
|
24
23
|
|
24
|
+
from nucliadb.common.models_utils import to_proto
|
25
25
|
from nucliadb.models.responses import HTTPClientError
|
26
26
|
from nucliadb.search import logger
|
27
27
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
@@ -56,11 +56,11 @@ async def send_feedback_endpoint(
|
|
56
56
|
audit.feedback(
|
57
57
|
kbid=kbid,
|
58
58
|
user=x_nucliadb_user,
|
59
|
-
client_type=
|
59
|
+
client_type=to_proto.client_type(x_ndb_client),
|
60
60
|
origin=x_forwarded_for,
|
61
61
|
learning_id=item.ident,
|
62
62
|
good=item.good,
|
63
|
-
task=item.task
|
63
|
+
task=to_proto.feedback_task(item.task),
|
64
64
|
feedback=item.feedback,
|
65
65
|
text_block_id=item.text_block_id,
|
66
66
|
)
|
@@ -32,6 +32,7 @@ from nucliadb.common.cluster.utils import get_shard_manager
|
|
32
32
|
from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
|
33
33
|
from nucliadb.common.counters import IndexCounts
|
34
34
|
from nucliadb.common.external_index_providers.manager import get_external_index_manager
|
35
|
+
from nucliadb.common.models_utils import from_proto
|
35
36
|
from nucliadb.search import logger
|
36
37
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
37
38
|
from nucliadb.search.api.v1.utils import fastapi_query
|
@@ -73,7 +74,7 @@ async def knowledgebox_shards(request: Request, kbid: str) -> KnowledgeboxShards
|
|
73
74
|
status_code=404,
|
74
75
|
detail="The knowledgebox or its shards configuration is missing",
|
75
76
|
)
|
76
|
-
return
|
77
|
+
return from_proto.kb_shards(shards)
|
77
78
|
|
78
79
|
|
79
80
|
@api.get(
|
nucliadb/search/api/v1/search.py
CHANGED
@@ -27,6 +27,7 @@ from fastapi_versioning import version
|
|
27
27
|
from pydantic import ValidationError
|
28
28
|
|
29
29
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
30
|
+
from nucliadb.common.models_utils import to_proto
|
30
31
|
from nucliadb.models.responses import HTTPClientError
|
31
32
|
from nucliadb.search import predict
|
32
33
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
@@ -318,7 +319,7 @@ async def search(
|
|
318
319
|
audit.search(
|
319
320
|
kbid,
|
320
321
|
x_nucliadb_user,
|
321
|
-
|
322
|
+
to_proto.client_type(x_ndb_client),
|
322
323
|
x_forwarded_for,
|
323
324
|
pb_query,
|
324
325
|
time() - start_time,
|
@@ -28,6 +28,7 @@ from pydantic import BaseModel
|
|
28
28
|
|
29
29
|
from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB, FieldId, ParagraphId
|
30
30
|
from nucliadb.common.maindb.utils import get_driver
|
31
|
+
from nucliadb.common.models_utils import from_proto
|
31
32
|
from nucliadb.ingest.fields.base import Field
|
32
33
|
from nucliadb.ingest.fields.conversation import Conversation
|
33
34
|
from nucliadb.ingest.fields.file import File
|
@@ -346,7 +347,7 @@ async def extend_prompt_context_with_origin_metadata(context, kbid, text_block_i
|
|
346
347
|
if resource is not None:
|
347
348
|
pb_origin = await resource.get_origin()
|
348
349
|
if pb_origin is not None:
|
349
|
-
origin =
|
350
|
+
origin = from_proto.origin(pb_origin)
|
350
351
|
return rid, origin
|
351
352
|
|
352
353
|
rids = {tb_id.rid for tb_id in text_block_ids}
|
@@ -433,7 +434,7 @@ async def extend_prompt_context_with_extra_metadata(context, kbid, text_block_id
|
|
433
434
|
if resource is not None:
|
434
435
|
pb_extra = await resource.get_extra()
|
435
436
|
if pb_extra is not None:
|
436
|
-
extra =
|
437
|
+
extra = from_proto.extra(pb_extra)
|
437
438
|
return rid, extra
|
438
439
|
|
439
440
|
rids = {tb_id.rid for tb_id in text_block_ids}
|
@@ -20,6 +20,7 @@
|
|
20
20
|
import asyncio
|
21
21
|
from typing import Optional
|
22
22
|
|
23
|
+
from nucliadb.common.models_utils import to_proto
|
23
24
|
from nucliadb.search import logger
|
24
25
|
from nucliadb.search.predict import AnswerStatusCode
|
25
26
|
from nucliadb.search.requesters.utils import Method, node_query
|
@@ -278,7 +279,7 @@ def maybe_audit_chat(
|
|
278
279
|
audit.chat(
|
279
280
|
kbid,
|
280
281
|
user_id,
|
281
|
-
client_type
|
282
|
+
to_proto.client_type(client_type),
|
282
283
|
origin,
|
283
284
|
question=user_query,
|
284
285
|
generative_answer_time=generative_answer_time,
|
nucliadb/search/search/find.py
CHANGED
@@ -24,6 +24,7 @@ from typing import Optional
|
|
24
24
|
|
25
25
|
from nucliadb.common.external_index_providers.base import ExternalIndexManager
|
26
26
|
from nucliadb.common.external_index_providers.manager import get_external_index_manager
|
27
|
+
from nucliadb.common.models_utils import to_proto
|
27
28
|
from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
|
28
29
|
from nucliadb.search.search.find_merge import (
|
29
30
|
build_find_response,
|
@@ -136,7 +137,7 @@ async def _index_node_retrieval(
|
|
136
137
|
audit.search(
|
137
138
|
kbid,
|
138
139
|
x_nucliadb_user,
|
139
|
-
|
140
|
+
to_proto.client_type(x_ndb_client),
|
140
141
|
x_forwarded_for,
|
141
142
|
pb_query,
|
142
143
|
search_time,
|
nucliadb/search/search/merge.py
CHANGED
@@ -23,6 +23,7 @@ import math
|
|
23
23
|
from typing import Any, Optional, Set, Union
|
24
24
|
|
25
25
|
from nucliadb.common.ids import FieldId, ParagraphId
|
26
|
+
from nucliadb.common.models_utils.from_proto import RelationTypePbMap
|
26
27
|
from nucliadb.search.search import cache
|
27
28
|
from nucliadb.search.search.cut import cut_page
|
28
29
|
from nucliadb.search.search.fetch import (
|
@@ -33,11 +34,11 @@ from nucliadb.search.search.fetch import (
|
|
33
34
|
)
|
34
35
|
from nucliadb_models.common import FieldTypeName
|
35
36
|
from nucliadb_models.labels import translate_system_to_alias_label
|
36
|
-
from nucliadb_models.metadata import RelationTypePbMap
|
37
37
|
from nucliadb_models.resource import ExtractedDataTypeName
|
38
38
|
from nucliadb_models.search import (
|
39
39
|
DirectionalRelation,
|
40
40
|
EntitySubgraph,
|
41
|
+
EntityType,
|
41
42
|
KnowledgeboxSearchResults,
|
42
43
|
KnowledgeboxSuggestResults,
|
43
44
|
MinScore,
|
@@ -46,7 +47,6 @@ from nucliadb_models.search import (
|
|
46
47
|
RelatedEntities,
|
47
48
|
RelatedEntity,
|
48
49
|
RelationDirection,
|
49
|
-
RelationNodeTypeMap,
|
50
50
|
Relations,
|
51
51
|
ResourceProperties,
|
52
52
|
ResourceResult,
|
@@ -71,6 +71,7 @@ from nucliadb_protos.nodereader_pb2 import (
|
|
71
71
|
SuggestResponse,
|
72
72
|
VectorSearchResponse,
|
73
73
|
)
|
74
|
+
from nucliadb_protos.utils_pb2 import RelationNode
|
74
75
|
|
75
76
|
from .metrics import merge_observer
|
76
77
|
from .paragraphs import get_paragraph_text, get_text_sentence
|
@@ -81,6 +82,15 @@ TitleScore = str
|
|
81
82
|
SortValue = Union[Bm25Score, TimestampScore, TitleScore]
|
82
83
|
|
83
84
|
|
85
|
+
def relation_node_type_to_entity_type(node_type: RelationNode.NodeType.ValueType) -> EntityType:
|
86
|
+
return {
|
87
|
+
RelationNode.NodeType.ENTITY: EntityType.ENTITY,
|
88
|
+
RelationNode.NodeType.LABEL: EntityType.LABEL,
|
89
|
+
RelationNode.NodeType.RESOURCE: EntityType.RESOURCE,
|
90
|
+
RelationNode.NodeType.USER: EntityType.USER,
|
91
|
+
}[node_type]
|
92
|
+
|
93
|
+
|
84
94
|
def sort_results_by_score(results: Union[list[ParagraphResult], list[DocumentResult]]):
|
85
95
|
results.sort(key=lambda x: (x.score.bm25, x.score.booster), reverse=True)
|
86
96
|
|
@@ -457,7 +467,7 @@ def _merge_relations_results(
|
|
457
467
|
relations.entities[origin.value].related_to.append(
|
458
468
|
DirectionalRelation(
|
459
469
|
entity=destination.value,
|
460
|
-
entity_type=
|
470
|
+
entity_type=relation_node_type_to_entity_type(destination.ntype),
|
461
471
|
relation=relation_type,
|
462
472
|
relation_label=relation_label,
|
463
473
|
direction=RelationDirection.OUT,
|
@@ -467,7 +477,7 @@ def _merge_relations_results(
|
|
467
477
|
relations.entities[destination.value].related_to.append(
|
468
478
|
DirectionalRelation(
|
469
479
|
entity=origin.value,
|
470
|
-
entity_type=
|
480
|
+
entity_type=relation_node_type_to_entity_type(origin.ntype),
|
471
481
|
relation=relation_type,
|
472
482
|
relation_label=relation_label,
|
473
483
|
direction=RelationDirection.IN,
|
nucliadb/search/search/query.py
CHANGED
@@ -57,7 +57,6 @@ from nucliadb_models.search import (
|
|
57
57
|
MinScore,
|
58
58
|
SearchOptions,
|
59
59
|
SortField,
|
60
|
-
SortFieldMap,
|
61
60
|
SortOptions,
|
62
61
|
SortOrder,
|
63
62
|
SortOrderMap,
|
@@ -388,7 +387,7 @@ class QueryParser:
|
|
388
387
|
else:
|
389
388
|
request.result_per_page = self.top_k
|
390
389
|
|
391
|
-
sort_field =
|
390
|
+
sort_field = get_sort_field_proto(self.sort.field) if self.sort else None
|
392
391
|
if sort_field is not None:
|
393
392
|
request.order.sort_by = sort_field
|
394
393
|
request.order.type = SortOrderMap[self.sort.order] # type: ignore
|
@@ -910,3 +909,12 @@ async def get_matryoshka_dimension(kbid: str, vectorset: Optional[str]) -> Optio
|
|
910
909
|
matryoshka_dimension = vectorset_config.vectorset_index_config.vector_dimension
|
911
910
|
|
912
911
|
return matryoshka_dimension
|
912
|
+
|
913
|
+
|
914
|
+
def get_sort_field_proto(obj: SortField) -> Optional[nodereader_pb2.OrderBy.OrderField.ValueType]:
|
915
|
+
return {
|
916
|
+
SortField.SCORE: None,
|
917
|
+
SortField.CREATED: nodereader_pb2.OrderBy.OrderField.CREATED,
|
918
|
+
SortField.MODIFIED: nodereader_pb2.OrderBy.OrderField.MODIFIED,
|
919
|
+
SortField.TITLE: None,
|
920
|
+
}[obj]
|
@@ -23,6 +23,7 @@ from starlette.requests import Request
|
|
23
23
|
|
24
24
|
from nucliadb.common import datamanagers
|
25
25
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
26
|
+
from nucliadb.common.models_utils import to_proto
|
26
27
|
from nucliadb.models.responses import (
|
27
28
|
HTTPConflict,
|
28
29
|
HTTPInternalServerError,
|
@@ -240,7 +241,7 @@ async def delete_labelset(kbid: str, labelset_id: str):
|
|
240
241
|
async def set_custom_synonyms(request: Request, kbid: str, item: KnowledgeBoxSynonyms):
|
241
242
|
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
242
243
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
243
|
-
synonyms =
|
244
|
+
synonyms = to_proto.kb_synonyms(item)
|
244
245
|
await datamanagers.atomic.synonyms.set(kbid=kbid, synonyms=synonyms)
|
245
246
|
return Response(status_code=204)
|
246
247
|
|
@@ -22,17 +22,19 @@ from typing import Optional
|
|
22
22
|
|
23
23
|
from fastapi import HTTPException
|
24
24
|
|
25
|
+
from nucliadb.common.models_utils import to_proto
|
26
|
+
from nucliadb.common.models_utils.from_proto import (
|
27
|
+
RelationNodeTypeMap,
|
28
|
+
RelationTypeMap,
|
29
|
+
)
|
25
30
|
from nucliadb.ingest.orm.utils import set_title
|
26
31
|
from nucliadb.ingest.processing import PushPayload
|
27
|
-
from nucliadb_models.common import FIELD_TYPES_MAP_REVERSE
|
28
32
|
from nucliadb_models.content_types import GENERIC_MIME_TYPE
|
29
33
|
from nucliadb_models.file import FileField
|
30
34
|
from nucliadb_models.link import LinkField
|
31
35
|
from nucliadb_models.metadata import (
|
32
36
|
ParagraphAnnotation,
|
33
37
|
QuestionAnswerAnnotation,
|
34
|
-
RelationNodeTypeMap,
|
35
|
-
RelationTypeMap,
|
36
38
|
)
|
37
39
|
from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
|
38
40
|
from nucliadb_models.writer import (
|
@@ -144,9 +146,8 @@ def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess
|
|
144
146
|
userfieldmetadata.question_answers.append(qa_annotation_pb)
|
145
147
|
|
146
148
|
userfieldmetadata.field.field = fieldmetadata.field.field
|
147
|
-
|
148
|
-
|
149
|
-
]
|
149
|
+
|
150
|
+
userfieldmetadata.field.field_type = to_proto.field_type(fieldmetadata.field.field_type)
|
150
151
|
|
151
152
|
bm.basic.fieldmetadata.append(userfieldmetadata)
|
152
153
|
|
@@ -23,16 +23,13 @@ from typing import Optional, Union
|
|
23
23
|
from google.protobuf.json_format import MessageToDict
|
24
24
|
|
25
25
|
import nucliadb_models as models
|
26
|
+
from nucliadb.common.models_utils import from_proto, to_proto
|
26
27
|
from nucliadb.ingest.fields.conversation import Conversation
|
27
28
|
from nucliadb.ingest.orm.resource import Resource as ORMResource
|
28
29
|
from nucliadb.ingest.processing import PushPayload
|
29
30
|
from nucliadb.writer import SERVICE_NAME
|
30
31
|
from nucliadb.writer.utilities import get_processing
|
31
|
-
from nucliadb_models.common import
|
32
|
-
FIELD_TYPES_MAP,
|
33
|
-
FIELD_TYPES_MAP_REVERSE,
|
34
|
-
FieldTypeName,
|
35
|
-
)
|
32
|
+
from nucliadb_models.common import FieldTypeName
|
36
33
|
from nucliadb_models.content_types import GENERIC_MIME_TYPE
|
37
34
|
from nucliadb_models.conversation import PushConversation
|
38
35
|
from nucliadb_models.writer import (
|
@@ -83,7 +80,7 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
|
|
83
80
|
storage = await get_storage(service_name=SERVICE_NAME)
|
84
81
|
await resource.get_fields()
|
85
82
|
for (field_type, field_id), field in resource.fields.items():
|
86
|
-
field_type_name =
|
83
|
+
field_type_name = from_proto.field_type_name(field_type)
|
87
84
|
|
88
85
|
if field_type_name not in {
|
89
86
|
FieldTypeName.TEXT,
|
@@ -338,7 +335,7 @@ async def parse_conversation_field(
|
|
338
335
|
cm.content.attachments_fields.extend(
|
339
336
|
[
|
340
337
|
resources_pb2.FieldRef(
|
341
|
-
field_type=
|
338
|
+
field_type=to_proto.field_type_name(attachment.field_type),
|
342
339
|
field_id=attachment.field_id,
|
343
340
|
split=attachment.split if attachment.split is not None else "",
|
344
341
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post2751
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2751
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2751
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post2751
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post2751
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nucliadb-node-binding>=2.26.0
|
31
31
|
Requires-Dist: nuclia-models>=0.24.2
|