nucliadb-models 6.9.2.post5276__py3-none-any.whl → 6.9.6.post5453__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ # Copyright 2025 Bosutech XXI S.L.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ from pydantic import BaseModel
17
+
18
+ from nucliadb_models.common import FieldTypeName
19
+ from nucliadb_models.resource import ExtractedDataTypeName, Resource
20
+ from nucliadb_models.search import Image, ResourceProperties, SearchParamDefaults
21
+
22
+ ParagraphId = str
23
+
24
+
25
+ class AugmentedParagraph(BaseModel):
26
+ text: str | None = None
27
+
28
+ neighbours_before: dict[ParagraphId, str] | None = None
29
+ neighbours_after: dict[ParagraphId, str] | None = None
30
+
31
+ image: Image | None = None
32
+
33
+
34
+ class AugmentedField(BaseModel):
35
+ page_preview_image: Image | None = None
36
+
37
+
38
+ class AugmentedResource(Resource):
39
+ def updated_from(self, origin: Resource):
40
+ for key in origin.model_fields.keys():
41
+ self.__setattr__(key, getattr(origin, key))
42
+
43
+
44
+ class AugmentResources(BaseModel):
45
+ given: list[str]
46
+
47
+ show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
48
+ extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
49
+ field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
50
+ # TODO: field name filter, da field prefix filter
51
+
52
+
53
+ class AugmentParagraph(BaseModel):
54
+ id: ParagraphId
55
+
56
+
57
+ class AugmentParagraphs(BaseModel):
58
+ given: list[AugmentParagraph]
59
+
60
+ text: bool = True
61
+
62
+ neighbours_before: int = 0
63
+ neighbours_after: int = 0
64
+
65
+ # paragraph extracted from an image, return an image
66
+ source_image: bool = False
67
+
68
+ # paragraph extracted from a table, return table image
69
+ table_image: bool = False
70
+
71
+ # return page_preview instead of table image if table image enabled
72
+ table_prefers_page_preview: bool = False
73
+
74
+ # paragraph from a page, return page preview image
75
+ page_preview_image: bool = False
76
+
77
+
78
+ class AugmentRequest(BaseModel):
79
+ resources: AugmentResources
80
+ paragraphs: AugmentParagraphs
81
+
82
+
83
+ class AugmentResponse(BaseModel):
84
+ resources: dict[str, AugmentedResource]
85
+ paragraphs: dict[str, AugmentedParagraph]
@@ -86,7 +86,7 @@ class FieldConversation(BaseModel):
86
86
 
87
87
 
88
88
  class InputMessageContent(BaseModel):
89
- text: str = Field(max_length=10 * 1024)
89
+ text: str = Field()
90
90
  format: MessageFormat = MessageFormat.PLAIN
91
91
  attachments: List[FileB64] = Field(default=[], max_length=50)
92
92
  attachments_fields: List[FieldRef] = Field(default=[], max_length=50)
@@ -129,7 +129,6 @@ class InputConversationField(BaseModel):
129
129
  messages: List[InputMessage] = Field(
130
130
  default_factory=list,
131
131
  description="List of messages in the conversation field. Each message must have a unique ident. A single conversation can contain up to 51,200 messages. You can add up to 2,048 messages per request.",
132
- max_length=2048,
133
132
  )
134
133
  extract_strategy: Optional[str] = Field(
135
134
  default=None,
@@ -246,10 +246,11 @@ class Hydration(BaseModel, extra="forbid"):
246
246
  ParagraphId = Annotated[
247
247
  str,
248
248
  StringConstraints(
249
- pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+/[0-9]+-[0-9]+$",
250
- min_length=32 + 1 + 1 + 1 + 1 + 1 + 3,
249
+ # resource-uuid/field-type/field-id/[split-id/]paragraph-id
250
+ pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?/[0-9]+-[0-9]+$",
251
+ min_length=32 + 1 + 1 + 1 + 1 + 0 + 0 + 1 + 3,
251
252
  # max field id of 250 and 10 digit paragraphs. More than enough
252
- max_length=32 + 1 + 1 + 1 + 250 + 1 + 21,
253
+ max_length=32 + 1 + 1 + 1 + 250 + 1 + 128 + 1 + 21,
253
254
  ),
254
255
  ]
255
256
 
nucliadb_models/search.py CHANGED
@@ -347,10 +347,12 @@ SortOrderMap = {
347
347
 
348
348
  class SortOptions(BaseModel):
349
349
  field: SortField
350
- limit: Optional[int] = Field(None, gt=0)
351
350
  order: SortOrder = SortOrder.DESC
352
351
 
353
352
 
353
+ MAX_RANK_FUSION_WINDOW = 500
354
+
355
+
354
356
  class RankFusionName(str, Enum):
355
357
  RECIPROCAL_RANK_FUSION = "rrf"
356
358
 
@@ -380,7 +382,7 @@ class ReciprocalRankFusion(_BaseRankFusion):
380
382
  )
381
383
  window: Optional[int] = Field(
382
384
  default=None,
383
- le=500,
385
+ le=MAX_RANK_FUSION_WINDOW,
384
386
  title="RRF window",
385
387
  description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time", # noqa: E501
386
388
  )
@@ -503,10 +505,18 @@ class SearchParamDefaults:
503
505
  )
504
506
  top_k = ParamDefault(
505
507
  default=20,
508
+ gt=-1,
506
509
  le=200,
507
510
  title="Top k",
508
511
  description="The number of results search should return. The maximum number of results allowed is 200.",
509
512
  )
513
+ offset = ParamDefault(
514
+ default=0,
515
+ gt=-1,
516
+ le=1000,
517
+ title="Results offset",
518
+ description="The number of results to skip, starting from the beginning in sort order. Used for pagination. It can only be used with the keyword and fulltext indexes.",
519
+ )
510
520
  highlight = ParamDefault(
511
521
  default=False,
512
522
  title="Highlight",
@@ -532,12 +542,6 @@ class SearchParamDefaults:
532
542
  title="Sort order",
533
543
  description="Order to sort results with",
534
544
  )
535
- sort_limit = ParamDefault(
536
- default=None,
537
- title="Sort limit",
538
- description="",
539
- gt=0,
540
- )
541
545
  sort_field = ParamDefault(
542
546
  default=None,
543
547
  title="Sort field",
@@ -938,12 +942,32 @@ class SearchRequest(BaseSearchRequest):
938
942
  )
939
943
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
940
944
  sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
945
+ offset: int = SearchParamDefaults.offset.to_pydantic_field()
941
946
 
942
947
  @field_validator("faceted")
943
948
  @classmethod
944
949
  def nested_facets_not_supported(cls, facets):
945
950
  return validate_facets(facets)
946
951
 
952
+ @model_validator(mode="after")
953
+ def offset_sort_only_on_keyword_indexes(self):
954
+ has_non_keyword_indexes = set(self.features) & {SearchOptions.SEMANTIC, SearchOptions.RELATIONS}
955
+ if has_non_keyword_indexes:
956
+ if self.offset > 0:
957
+ raise ValueError("offset cannot be used with the semantic or relations index")
958
+ if self.sort and self.sort.field != SortField.SCORE:
959
+ raise ValueError("sort by date cannot be used with the semantic or relations index")
960
+
961
+ return self
962
+
963
+ @field_validator("sort", mode="after")
964
+ @classmethod
965
+ def sorting_by_title_not_supported(cls, value: Optional[SortOptions]) -> Optional[SortOptions]:
966
+ if value and value.field == SortField.TITLE:
967
+ raise ValueError("sorting by title not supported in /search")
968
+
969
+ return value
970
+
947
971
 
948
972
  class Author(str, Enum):
949
973
  NUCLIA = "NUCLIA"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb_models
3
- Version: 6.9.2.post5276
3
+ Version: 6.9.6.post5453
4
4
  Author-email: Nuclia <nucliadb@nuclia.com>
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Homepage, https://nuclia.com
@@ -1,15 +1,16 @@
1
1
  nucliadb_models/__init__.py,sha256=3y8-htogKuCZcbhaUZdSjTeEjUSeec9aRWyL8AlKCyM,1077
2
+ nucliadb_models/augment.py,sha256=vAtFh4D4eC4nvfwaRTlfeuAMOL9Z9TFZnUNiRAMasss,2543
2
3
  nucliadb_models/common.py,sha256=2dtKG4ZNi9p-yoNY76Uvyu1SlMeNYpH-MnuU3Q6w9Js,8169
3
4
  nucliadb_models/configuration.py,sha256=BBrJsNjP324Cw_5J3dBrGwvpkHQYbXEo3TUaI9IqAOg,2449
4
5
  nucliadb_models/content_types.py,sha256=36Ga-iGf4ivCqgtXC7imFgegrwHB117s9eqP62JtGv0,3456
5
- nucliadb_models/conversation.py,sha256=Ts-h7gMf18rw9tbjF2X__vNtMshHRDqbLua_VXb1qOE,5126
6
+ nucliadb_models/conversation.py,sha256=k9bKhkDiqhqmdrDfDPNoUfG7-2H_-KAyuOnETd8zV0E,5081
6
7
  nucliadb_models/entities.py,sha256=i-7Y8qmFRRTih5zw0ajv1U_iiXexe66M3TK8hUikQZk,2356
7
8
  nucliadb_models/export_import.py,sha256=mNm9IArOLnC6TLupkwqVFhxD5d08mpIVOVFneECv8UA,1073
8
9
  nucliadb_models/external_index_providers.py,sha256=pL3leo4MkuJOnKlU1Sg6GT_mnK_VUBxGui-RPmDYVWU,1126
9
10
  nucliadb_models/extracted.py,sha256=Owz7LC3le3Dvau3TtRiO8NY84meOf6IxN-RrOqqpMPs,5593
10
11
  nucliadb_models/file.py,sha256=tXtgB9c7i2ADsnJ7HdbXyroAmXadGvOeA49htBh7BZo,2263
11
12
  nucliadb_models/filters.py,sha256=NQI2-4AFzzJuZy8NeY3jXlTbbU5wxiwMCP-5DrD-7lE,14759
12
- nucliadb_models/hydration.py,sha256=7SFnAcTQRE9etVccpph6aA1AUqsHVwkzT4YF6Uzl0Gs,14262
13
+ nucliadb_models/hydration.py,sha256=SlAzraJE6DX0uOpZWxu2k_9-ikYorsj0t8xwsWSBQZY,14363
13
14
  nucliadb_models/labels.py,sha256=9zqRgkpZuX3kUPwsTTgCH7JyOWK7dM5pwyuHJR86YdU,3949
14
15
  nucliadb_models/link.py,sha256=PF5hHLwdOed5TMBTxtokkgWtMh1bFnORZjybh0NwVCw,2526
15
16
  nucliadb_models/metadata.py,sha256=OOKGy_83NtlG1QKQZEwMuwu4wbVEe7P30Y2QvnGSDto,8933
@@ -17,7 +18,7 @@ nucliadb_models/notifications.py,sha256=mna8-AoD_29Wds0Thl0AF0zpERnJmYGLZX1w1fUo
17
18
  nucliadb_models/processing.py,sha256=nhKuHQjqCdb9zJVkYGPTLub23tK9e_lwL5OCDVymZjY,719
18
19
  nucliadb_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
20
  nucliadb_models/resource.py,sha256=RzCos0QRgSMkaV-p7EoceSmt7UTzt9G9be5BKF-iGrQ,9021
20
- nucliadb_models/search.py,sha256=gQEXJ9bXXcxswr7aOzvBeGIQlrq5TgRWIqTxKEbSoCE,96409
21
+ nucliadb_models/search.py,sha256=_vn3pDXcK4iwiCfim3BtlD5EaQAeXoxl2IfNDsrKesA,97514
21
22
  nucliadb_models/security.py,sha256=opxaDLfvk3aU0sjesK0jGrYLx5h4YCwlKKN0moYs_ig,1150
22
23
  nucliadb_models/synonyms.py,sha256=afbaVqSQSxGLwi2PusVaLSRpkOtA5AZmWOKd1f4nl2E,690
23
24
  nucliadb_models/text.py,sha256=60bxZnOjRHnDdezR8VfR3AZsXTOwePFPs2BKB8wxBak,3414
@@ -33,7 +34,7 @@ nucliadb_models/graph/responses.py,sha256=Sdq8OgFAL1YT-1lJyLLrkqcScvj7YTEqAUwQ-k
33
34
  nucliadb_models/internal/__init__.py,sha256=zG33bUz1rHFPtvqQPWn4rDwBJt3FJodGuQYD45quiQg,583
34
35
  nucliadb_models/internal/predict.py,sha256=Pnx6MmLfK65eExe1XnVxqmSlvMwdowewwks9BOEoqMw,2029
35
36
  nucliadb_models/internal/shards.py,sha256=__y1OZtWGiNcPQEWfSFOj8yw458WGi7mM4vZe0K-L1Y,1691
36
- nucliadb_models-6.9.2.post5276.dist-info/METADATA,sha256=-kzs6LaR18FXQmyL87mIkJuOTwmGhctfqrU9Rn1AGuY,745
37
- nucliadb_models-6.9.2.post5276.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
- nucliadb_models-6.9.2.post5276.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
39
- nucliadb_models-6.9.2.post5276.dist-info/RECORD,,
37
+ nucliadb_models-6.9.6.post5453.dist-info/METADATA,sha256=x-3GFIapOwe9kpigwte5HkdfaNqPk-iL7CuE_NM3PAE,745
38
+ nucliadb_models-6.9.6.post5453.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
+ nucliadb_models-6.9.6.post5453.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
40
+ nucliadb_models-6.9.6.post5453.dist-info/RECORD,,