nucliadb 6.3.0.post3417__py3-none-any.whl → 6.3.0.post3422__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,11 +17,13 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import json
20
21
  from time import time
21
22
  from typing import Optional, Union
22
23
 
23
24
  from fastapi import Request, Response
24
25
  from fastapi_versioning import version
26
+ from pydantic import ValidationError
25
27
 
26
28
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
27
29
  from nucliadb.common.maindb.pg import PGDriver
@@ -34,11 +36,12 @@ from nucliadb.search.search import cache
34
36
  from nucliadb.search.search.exceptions import InvalidQueryError
35
37
  from nucliadb.search.search.merge import fetch_resources
36
38
  from nucliadb.search.search.pgcatalog import pgcatalog_search
37
- from nucliadb.search.search.query_parser.parser import parse_catalog
39
+ from nucliadb.search.search.query_parser.catalog import parse_catalog
38
40
  from nucliadb.search.search.utils import (
39
41
  maybe_log_request_payload,
40
42
  )
41
43
  from nucliadb_models.common import FieldTypeName
44
+ from nucliadb_models.filters import CatalogFilterExpression
42
45
  from nucliadb_models.metadata import ResourceProcessingStatus
43
46
  from nucliadb_models.resource import NucliaDBRoles
44
47
  from nucliadb_models.search import (
@@ -72,6 +75,9 @@ async def catalog_get(
72
75
  response: Response,
73
76
  kbid: str,
74
77
  query: str = fastapi_query(SearchParamDefaults.query),
78
+ filter_expression: Optional[str] = fastapi_query(
79
+ SearchParamDefaults.catalog_filter_expression, include_in_schema=False
80
+ ),
75
81
  filters: list[str] = fastapi_query(SearchParamDefaults.filters),
76
82
  faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
77
83
  sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
@@ -93,8 +99,17 @@ async def catalog_get(
93
99
  ),
94
100
  hidden: Optional[bool] = fastapi_query(SearchParamDefaults.hidden),
95
101
  ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
102
+ try:
103
+ expr = (
104
+ CatalogFilterExpression.model_validate_json(filter_expression) if filter_expression else None
105
+ )
106
+ except ValidationError as exc:
107
+ detail = json.loads(exc.json())
108
+ return HTTPClientError(status_code=422, detail=detail)
109
+
96
110
  item = CatalogRequest(
97
111
  query=query,
112
+ filter_expression=expr,
98
113
  filters=filters,
99
114
  faceted=faceted,
100
115
  page_number=page_number,
@@ -147,7 +162,7 @@ async def catalog(
147
162
  start_time = time()
148
163
  try:
149
164
  with cache.request_caches():
150
- query_parser = parse_catalog(kbid, item)
165
+ query_parser = await parse_catalog(kbid, item)
151
166
 
152
167
  catalog_results = CatalogResponse()
153
168
  catalog_results.fulltext = await pgcatalog_search(query_parser)
@@ -35,7 +35,7 @@ from nucliadb.search.search.exceptions import InvalidQueryError
35
35
  from nucliadb.search.search.find import find
36
36
  from nucliadb.search.search.utils import maybe_log_request_payload, min_score_from_query_params
37
37
  from nucliadb_models.common import FieldTypeName
38
- from nucliadb_models.filter import FilterExpression
38
+ from nucliadb_models.filters import FilterExpression
39
39
  from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
40
40
  from nucliadb_models.search import (
41
41
  FindRequest,
@@ -32,7 +32,7 @@ from nucliadb.search.search import cache
32
32
  from nucliadb.search.search.exceptions import InvalidQueryError
33
33
  from nucliadb.search.search.merge import merge_paragraphs_results
34
34
  from nucliadb.search.search.query import paragraph_query_to_pb
35
- from nucliadb_models.filter import FilterExpression
35
+ from nucliadb_models.filters import FilterExpression
36
36
  from nucliadb_models.resource import NucliaDBRoles
37
37
  from nucliadb_models.search import (
38
38
  NucliaDBClientType,
@@ -45,7 +45,7 @@ from nucliadb.search.search.utils import (
45
45
  should_disable_vector_search,
46
46
  )
47
47
  from nucliadb_models.common import FieldTypeName
48
- from nucliadb_models.filter import FilterExpression
48
+ from nucliadb_models.filters import FilterExpression
49
49
  from nucliadb_models.metadata import ResourceProcessingStatus
50
50
  from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
51
51
  from nucliadb_models.search import (
@@ -35,7 +35,7 @@ from nucliadb.search.search.merge import merge_suggest_results
35
35
  from nucliadb.search.search.query import suggest_query_to_pb
36
36
  from nucliadb.search.search.utils import filter_hidden_resources
37
37
  from nucliadb_models.common import FieldTypeName
38
- from nucliadb_models.filter import FilterExpression
38
+ from nucliadb_models.filters import FilterExpression
39
39
  from nucliadb_models.resource import NucliaDBRoles
40
40
  from nucliadb_models.search import (
41
41
  KnowledgeboxSuggestResults,
@@ -32,7 +32,7 @@ from nucliadb.search.search.metrics import RAGMetrics
32
32
  from nucliadb.search.search.query import QueryParser
33
33
  from nucliadb.search.settings import settings
34
34
  from nucliadb.search.utilities import get_predict
35
- from nucliadb_models import filter
35
+ from nucliadb_models import filters
36
36
  from nucliadb_models.search import (
37
37
  AskRequest,
38
38
  ChatContextMessage,
@@ -159,17 +159,17 @@ def add_resource_filter(request: Union[FindRequest, AskRequest], resources: list
159
159
 
160
160
  if request.filter_expression is not None:
161
161
  if len(resources) > 1:
162
- resource_filter: filter.FieldFilterExpression = filter.Or.model_validate(
163
- {"or": [filter.Resource(prop="resource", id=rid) for rid in resources]}
162
+ resource_filter: filters.FieldFilterExpression = filters.Or.model_validate(
163
+ {"or": [filters.Resource(prop="resource", id=rid) for rid in resources]}
164
164
  )
165
165
  else:
166
- resource_filter = filter.Resource(prop="resource", id=resources[0])
166
+ resource_filter = filters.Resource(prop="resource", id=resources[0])
167
167
 
168
168
  # Add to filter expression if set
169
169
  if request.filter_expression.field is None:
170
170
  request.filter_expression.field = resource_filter
171
171
  else:
172
- request.filter_expression.field = filter.And.model_validate(
172
+ request.filter_expression.field = filters.And.model_validate(
173
173
  {"and": [request.filter_expression.field, resource_filter]}
174
174
  )
175
175
  else:
@@ -20,15 +20,14 @@
20
20
 
21
21
  import logging
22
22
  from collections import defaultdict
23
- from typing import Any, cast
23
+ from typing import Any, Literal, Union, cast
24
24
 
25
25
  from psycopg.rows import dict_row
26
26
 
27
27
  from nucliadb.common.maindb.pg import PGDriver
28
28
  from nucliadb.common.maindb.utils import get_driver
29
- from nucliadb.search.search.query_parser.models import CatalogQuery
29
+ from nucliadb.search.search.query_parser.models import CatalogExpression, CatalogQuery
30
30
  from nucliadb_models.labels import translate_system_to_alias_label
31
- from nucliadb_models.metadata import ResourceProcessingStatus
32
31
  from nucliadb_models.search import (
33
32
  ResourceResult,
34
33
  Resources,
@@ -43,40 +42,73 @@ observer = metrics.Observer("pg_catalog_search", labels={"op": ""})
43
42
  logger = logging.getLogger(__name__)
44
43
 
45
44
 
46
- def _filter_operands(operands):
47
- literals = []
48
- nonliterals = []
49
- for operand in operands:
50
- op, params = next(iter(operand.items()))
51
- if op == "literal":
52
- literals.append(params)
45
+ def _filter_operands(operands: list[CatalogExpression]) -> tuple[list[str], list[CatalogExpression]]:
46
+ facets = []
47
+ nonfacets = []
48
+ for op in operands:
49
+ if op.facet:
50
+ facets.append(op.facet)
53
51
  else:
54
- nonliterals.append(operand)
52
+ nonfacets.append(op)
55
53
 
56
- return literals, nonliterals
54
+ return facets, nonfacets
57
55
 
58
56
 
59
- def _convert_filter(filter, filter_params):
60
- op, operands = next(iter(filter.items()))
61
- if op == "literal":
57
+ def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> str:
58
+ if expr.bool_and:
59
+ return _convert_boolean_op(expr.bool_and, "and", filter_params)
60
+ elif expr.bool_or:
61
+ return _convert_boolean_op(expr.bool_or, "or", filter_params)
62
+ elif expr.bool_not:
63
+ return f"(NOT {_convert_filter(expr.bool_not, filter_params)})"
64
+ elif expr.date:
65
+ return _convert_date_filter(expr.date, filter_params)
66
+ elif expr.facet:
62
67
  param_name = f"param{len(filter_params)}"
63
- filter_params[param_name] = [operands]
68
+ filter_params[param_name] = [expr.facet]
64
69
  return f"extract_facets(labels) @> %({param_name})s"
65
- elif op in ("and", "or"):
66
- array_op = "@>" if op == "and" else "&&"
67
- sql = []
68
- literals, nonliterals = _filter_operands(operands)
69
- if literals:
70
- param_name = f"param{len(filter_params)}"
71
- filter_params[param_name] = literals
72
- sql.append(f"extract_facets(labels) {array_op} %({param_name})s")
73
- for nonlit in nonliterals:
74
- sql.append(_convert_filter(nonlit, filter_params))
75
- return "(" + f" {op.upper()} ".join(sql) + ")"
76
- elif op == "not":
77
- return f"(NOT {_convert_filter(operands, filter_params)})"
70
+ elif expr.resource_id:
71
+ param_name = f"param{len(filter_params)}"
72
+ filter_params[param_name] = [expr.resource_id]
73
+ return f"rid = %({param_name})s"
74
+ else:
75
+ return ""
76
+
77
+
78
+ def _convert_boolean_op(
79
+ operands: list[CatalogExpression],
80
+ op: Union[Literal["and"], Literal["or"]],
81
+ filter_params: dict[str, Any],
82
+ ) -> str:
83
+ array_op = "@>" if op == "and" else "&&"
84
+ sql = []
85
+ facets, nonfacets = _filter_operands(operands)
86
+ if facets:
87
+ param_name = f"param{len(filter_params)}"
88
+ filter_params[param_name] = facets
89
+ sql.append(f"extract_facets(labels) {array_op} %({param_name})s")
90
+ for nonfacet in nonfacets:
91
+ sql.append(_convert_filter(nonfacet, filter_params))
92
+ return "(" + f" {op.upper()} ".join(sql) + ")"
93
+
94
+
95
+ def _convert_date_filter(date: CatalogExpression.Date, filter_params: dict[str, Any]) -> str:
96
+ if date.since and date.until:
97
+ since_name = f"param{len(filter_params)}"
98
+ filter_params[since_name] = date.since
99
+ until_name = f"param{len(filter_params)}"
100
+ filter_params[until_name] = date.until
101
+ return f"{date.field} BETWEEN %({since_name})s AND %({until_name})s"
102
+ elif date.since:
103
+ since_name = f"param{len(filter_params)}"
104
+ filter_params[since_name] = date.since
105
+ return f"{date.field} > %({since_name})s"
106
+ elif date.until:
107
+ until_name = f"param{len(filter_params)}"
108
+ filter_params[until_name] = date.until
109
+ return f"{date.field} < %({until_name})s"
78
110
  else:
79
- raise ValueError(f"Invalid operator {op}")
111
+ raise ValueError(f"Invalid date operator")
80
112
 
81
113
 
82
114
  def _prepare_query(catalog_query: CatalogQuery):
@@ -92,24 +124,8 @@ def _prepare_query(catalog_query: CatalogQuery):
92
124
  )
93
125
  filter_params["query"] = catalog_query.query
94
126
 
95
- if catalog_query.filters.creation.after:
96
- filter_sql.append("created_at > %(created_at_start)s")
97
- filter_params["created_at_start"] = catalog_query.filters.creation.after
98
-
99
- if catalog_query.filters.creation.before:
100
- filter_sql.append("created_at < %(created_at_end)s")
101
- filter_params["created_at_end"] = catalog_query.filters.creation.before
102
-
103
- if catalog_query.filters.modification.after:
104
- filter_sql.append("modified_at > %(modified_at_start)s")
105
- filter_params["modified_at_start"] = catalog_query.filters.modification.after
106
-
107
- if catalog_query.filters.modification.before:
108
- filter_sql.append("modified_at < %(modified_at_end)s")
109
- filter_params["modified_at_end"] = catalog_query.filters.modification.before
110
-
111
- if catalog_query.filters.labels:
112
- filter_sql.append(_convert_filter(catalog_query.filters.labels, filter_params))
127
+ if catalog_query.filters:
128
+ filter_sql.append(_convert_filter(catalog_query.filters, filter_params))
113
129
 
114
130
  order_sql = ""
115
131
  if catalog_query.sort:
@@ -130,13 +146,6 @@ def _prepare_query(catalog_query: CatalogQuery):
130
146
 
131
147
  order_sql = f" ORDER BY {order_field} {order_dir}"
132
148
 
133
- if catalog_query.filters.with_status:
134
- filter_sql.append("labels && %(status)s")
135
- if catalog_query.filters.with_status == ResourceProcessingStatus.PROCESSED:
136
- filter_params["status"] = ["/n/s/PROCESSED", "/n/s/ERROR"]
137
- else:
138
- filter_params["status"] = ["/n/s/PENDING"]
139
-
140
149
  return (
141
150
  f"SELECT * FROM catalog WHERE {' AND '.join(filter_sql)}{order_sql}",
142
151
  filter_params,
@@ -39,7 +39,7 @@ from nucliadb.search.search.rank_fusion import (
39
39
  from nucliadb.search.search.rerankers import (
40
40
  Reranker,
41
41
  )
42
- from nucliadb_models.filter import FilterExpression
42
+ from nucliadb_models.filters import FilterExpression
43
43
  from nucliadb_models.internal.predict import QueryInfo
44
44
  from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
45
45
  from nucliadb_models.metadata import ResourceProcessingStatus
@@ -0,0 +1,190 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+
22
+ from nucliadb.common import datamanagers
23
+ from nucliadb.search.search.exceptions import InvalidQueryError
24
+ from nucliadb.search.search.filters import translate_label
25
+ from nucliadb.search.search.query_parser.models import (
26
+ CatalogExpression,
27
+ CatalogQuery,
28
+ )
29
+ from nucliadb_models import search as search_models
30
+ from nucliadb_models.filters import (
31
+ And,
32
+ DateCreated,
33
+ DateModified,
34
+ Not,
35
+ Or,
36
+ Resource,
37
+ ResourceFilterExpression,
38
+ )
39
+ from nucliadb_models.labels import LABEL_HIDDEN
40
+ from nucliadb_models.metadata import ResourceProcessingStatus
41
+ from nucliadb_models.search import (
42
+ SortField,
43
+ SortOptions,
44
+ SortOrder,
45
+ )
46
+
47
+ from .filter_expression import FacetFilterTypes, facet_from_filter
48
+
49
+
50
+ async def parse_catalog(kbid: str, item: search_models.CatalogRequest) -> CatalogQuery:
51
+ has_old_filters = (
52
+ item.filters
53
+ or item.range_creation_start is not None
54
+ or item.range_creation_end is not None
55
+ or item.range_modification_start is not None
56
+ or item.range_modification_end is not None
57
+ or item.with_status is not None
58
+ )
59
+ if item.filter_expression is not None and has_old_filters:
60
+ raise InvalidQueryError("filter_expression", "Cannot mix old filters with filter_expression")
61
+
62
+ if has_old_filters:
63
+ catalog_expr = parse_old_filters(item)
64
+ elif item.filter_expression:
65
+ catalog_expr = await parse_filter_expression(item.filter_expression.resource, kbid)
66
+ else:
67
+ catalog_expr = None
68
+
69
+ if item.hidden is not None:
70
+ if item.hidden:
71
+ hidden_filter = CatalogExpression(facet=LABEL_HIDDEN)
72
+ else:
73
+ hidden_filter = CatalogExpression(bool_not=CatalogExpression(facet=LABEL_HIDDEN))
74
+
75
+ if catalog_expr:
76
+ catalog_expr = CatalogExpression(bool_and=[catalog_expr, hidden_filter])
77
+ else:
78
+ catalog_expr = hidden_filter
79
+
80
+ sort = item.sort
81
+ if sort is None:
82
+ # By default we sort by creation date (most recent first)
83
+ sort = SortOptions(
84
+ field=SortField.CREATED,
85
+ order=SortOrder.DESC,
86
+ limit=None,
87
+ )
88
+
89
+ return CatalogQuery(
90
+ kbid=kbid,
91
+ query=item.query,
92
+ filters=catalog_expr,
93
+ sort=sort,
94
+ faceted=item.faceted,
95
+ page_number=item.page_number,
96
+ page_size=item.page_size,
97
+ )
98
+
99
+
100
+ def parse_old_filters(item: search_models.CatalogRequest) -> CatalogExpression:
101
+ expressions = []
102
+
103
+ for fltr in item.filters or []:
104
+ if isinstance(fltr, str):
105
+ expressions.append(CatalogExpression(facet=translate_label(fltr)))
106
+ elif fltr.all:
107
+ filters = [CatalogExpression(facet=translate_label(f)) for f in fltr.all]
108
+ expressions.append(CatalogExpression(bool_and=filters))
109
+ elif fltr.any:
110
+ filters = [CatalogExpression(facet=translate_label(f)) for f in fltr.any]
111
+ expressions.append(CatalogExpression(bool_or=filters))
112
+ elif fltr.none:
113
+ filters = [CatalogExpression(facet=translate_label(f)) for f in fltr.none]
114
+ expressions.append(CatalogExpression(bool_not=CatalogExpression(bool_or=filters)))
115
+ elif fltr.not_all:
116
+ filters = [CatalogExpression(facet=translate_label(f)) for f in fltr.not_all]
117
+ expressions.append(CatalogExpression(bool_not=CatalogExpression(bool_and=filters)))
118
+
119
+ if item.range_creation_start or item.range_creation_end:
120
+ expressions.append(
121
+ CatalogExpression(
122
+ date=CatalogExpression.Date(
123
+ field="created_at",
124
+ since=item.range_creation_start,
125
+ until=item.range_creation_end,
126
+ )
127
+ )
128
+ )
129
+
130
+ if item.range_modification_start or item.range_modification_end:
131
+ expressions.append(
132
+ CatalogExpression(
133
+ date=CatalogExpression.Date(
134
+ field="modified_at",
135
+ since=item.range_modification_start,
136
+ until=item.range_modification_end,
137
+ )
138
+ )
139
+ )
140
+
141
+ if item.with_status:
142
+ if item.with_status == ResourceProcessingStatus.PROCESSED:
143
+ expressions.append(
144
+ CatalogExpression(
145
+ bool_or=[
146
+ CatalogExpression(facet="/n/s/PROCESSED"),
147
+ CatalogExpression(facet="/n/s/ERROR"),
148
+ ]
149
+ )
150
+ )
151
+ else:
152
+ expressions.append(CatalogExpression(facet="/n/s/PENDING"))
153
+
154
+ return CatalogExpression(bool_and=expressions)
155
+
156
+
157
+ async def parse_filter_expression(expr: ResourceFilterExpression, kbid: str) -> CatalogExpression:
158
+ cat = CatalogExpression()
159
+ if isinstance(expr, And):
160
+ cat.bool_and = []
161
+ for op in expr.operands:
162
+ cat.bool_and.append(await parse_filter_expression(op, kbid))
163
+ elif isinstance(expr, Or):
164
+ cat.bool_or = []
165
+ for op in expr.operands:
166
+ cat.bool_or.append(await parse_filter_expression(op, kbid))
167
+ elif isinstance(expr, Not):
168
+ cat.bool_not = await parse_filter_expression(expr.operand, kbid)
169
+ elif isinstance(expr, Resource):
170
+ if expr.id:
171
+ cat.resource_id = expr.id
172
+ else:
173
+ rid = await datamanagers.atomic.resources.get_resource_uuid_from_slug(
174
+ kbid=kbid, slug=expr.slug
175
+ )
176
+ if rid is None:
177
+ raise InvalidQueryError("slug", f"Cannot find slug {expr.slug}")
178
+ cat.resource_id = rid
179
+ elif isinstance(expr, DateCreated):
180
+ cat.date = CatalogExpression.Date(field="created_at", since=expr.since, until=expr.until)
181
+ elif isinstance(expr, DateModified):
182
+ cat.date = CatalogExpression.Date(field="modified_at", since=expr.since, until=expr.until)
183
+ elif isinstance(expr, FacetFilterTypes):
184
+ cat.facet = facet_from_filter(expr)
185
+ else:
186
+ # This is a trick so mypy generates an error if this branch can be reached,
187
+ # that is, if we are missing some ifs
188
+ _a: int = "a"
189
+
190
+ return cat
@@ -23,7 +23,7 @@ from typing import Union
23
23
  from nucliadb.common import datamanagers
24
24
  from nucliadb.common.ids import FIELD_TYPE_NAME_TO_STR
25
25
  from nucliadb.search.search.exceptions import InvalidQueryError
26
- from nucliadb_models.filter import (
26
+ from nucliadb_models.filters import (
27
27
  And,
28
28
  DateCreated,
29
29
  DateModified,
@@ -38,15 +38,52 @@ from nucliadb_models.filter import (
38
38
  Language,
39
39
  Not,
40
40
  Or,
41
+ OriginCollaborator,
41
42
  OriginMetadata,
42
43
  OriginPath,
44
+ OriginSource,
43
45
  OriginTag,
44
46
  ParagraphFilterExpression,
45
47
  Resource,
46
48
  ResourceMimetype,
49
+ Status,
47
50
  )
48
51
  from nucliadb_protos.nodereader_pb2 import FilterExpression as PBFilterExpression
49
52
 
53
+ # Filters that end up as a facet
54
+ FacetFilter = Union[
55
+ OriginTag,
56
+ Label,
57
+ ResourceMimetype,
58
+ FieldMimetype,
59
+ Entity,
60
+ Language,
61
+ OriginMetadata,
62
+ OriginPath,
63
+ Generated,
64
+ Kind,
65
+ OriginCollaborator,
66
+ OriginSource,
67
+ Status,
68
+ ]
69
+ # In Python 3.9 we cannot do isinstance against an union
70
+ # Once we support only 3.10+, we can remove this
71
+ FacetFilterTypes = (
72
+ OriginTag,
73
+ Label,
74
+ ResourceMimetype,
75
+ FieldMimetype,
76
+ Entity,
77
+ Language,
78
+ OriginMetadata,
79
+ OriginPath,
80
+ Generated,
81
+ Kind,
82
+ OriginCollaborator,
83
+ OriginSource,
84
+ Status,
85
+ )
86
+
50
87
 
51
88
  async def parse_expression(
52
89
  expr: Union[FieldFilterExpression, ParagraphFilterExpression],
@@ -90,47 +127,64 @@ async def parse_expression(
90
127
  f.date.since.FromDatetime(expr.since)
91
128
  if expr.until:
92
129
  f.date.until.FromDatetime(expr.until)
93
- elif isinstance(expr, OriginTag):
94
- f.facet.facet = f"/t/{expr.tag}"
130
+ elif isinstance(expr, FacetFilterTypes):
131
+ f.facet.facet = facet_from_filter(expr)
132
+ else:
133
+ # This is a trick so mypy generates an error if this branch can be reached,
134
+ # that is, if we are missing some ifs
135
+ _a: int = "a"
136
+
137
+ return f
138
+
139
+
140
+ def facet_from_filter(expr: FacetFilter) -> str:
141
+ if isinstance(expr, OriginTag):
142
+ facet = f"/t/{expr.tag}"
95
143
  elif isinstance(expr, Label):
96
- f.facet.facet = f"/l/{expr.labelset}"
144
+ facet = f"/l/{expr.labelset}"
97
145
  if expr.label:
98
- f.facet.facet += f"/{expr.label}"
146
+ facet += f"/{expr.label}"
99
147
  elif isinstance(expr, ResourceMimetype):
100
- f.facet.facet = f"/n/i/{expr.type}"
148
+ facet = f"/n/i/{expr.type}"
101
149
  if expr.subtype:
102
- f.facet.facet += f"/{expr.subtype}"
150
+ facet += f"/{expr.subtype}"
103
151
  elif isinstance(expr, FieldMimetype):
104
- f.facet.facet = f"/mt/{expr.type}"
152
+ facet = f"/mt/{expr.type}"
105
153
  if expr.subtype:
106
- f.facet.facet += f"/{expr.subtype}"
154
+ facet += f"/{expr.subtype}"
107
155
  elif isinstance(expr, Entity):
108
- f.facet.facet = f"/e/{expr.subtype}"
156
+ facet = f"/e/{expr.subtype}"
109
157
  if expr.value:
110
- f.facet.facet += f"/{expr.value}"
158
+ facet += f"/{expr.value}"
111
159
  elif isinstance(expr, Language):
112
160
  if expr.only_primary:
113
- f.facet.facet = f"/s/p/{expr.language}"
161
+ facet = f"/s/p/{expr.language}"
114
162
  else:
115
- f.facet.facet = f"/s/s/{expr.language}"
163
+ facet = f"/s/s/{expr.language}"
116
164
  elif isinstance(expr, OriginMetadata):
117
- f.facet.facet = f"/m/{expr.field}"
165
+ facet = f"/m/{expr.field}"
118
166
  if expr.value:
119
- f.facet.facet += f"/{expr.value}"
167
+ facet += f"/{expr.value}"
120
168
  elif isinstance(expr, OriginPath):
121
- f.facet.facet = f"/p/{expr.prefix}"
169
+ facet = f"/p/{expr.prefix}"
122
170
  elif isinstance(expr, Generated):
123
- f.facet.facet = "/g/da"
171
+ facet = "/g/da"
124
172
  if expr.da_task:
125
- f.facet.facet += f"/{expr.da_task}"
173
+ facet += f"/{expr.da_task}"
126
174
  elif isinstance(expr, Kind):
127
- f.facet.facet = f"/k/{expr.kind.lower()}"
175
+ facet = f"/k/{expr.kind.lower()}"
176
+ elif isinstance(expr, OriginCollaborator):
177
+ facet = f"/u/o/{expr.collaborator}"
178
+ elif isinstance(expr, OriginSource):
179
+ facet = f"/u/s/{expr.id}"
180
+ elif isinstance(expr, Status):
181
+ facet = f"/n/s/{expr.status.value}"
128
182
  else:
129
183
  # This is a trick so mypy generates an error if this branch can be reached,
130
184
  # that is, if we are missing some ifs
131
185
  _a: int = "a"
132
186
 
133
- return f
187
+ return facet
134
188
 
135
189
 
136
190
  def add_and_expression(dest: PBFilterExpression, add: PBFilterExpression):
@@ -20,7 +20,7 @@
20
20
 
21
21
  from dataclasses import dataclass
22
22
  from datetime import datetime
23
- from typing import Any, Optional
23
+ from typing import Literal, Optional, Union
24
24
 
25
25
  from pydantic import (
26
26
  BaseModel,
@@ -77,21 +77,26 @@ class UnitRetrieval:
77
77
 
78
78
 
79
79
  ### Catalog
80
+ @dataclass
81
+ class CatalogExpression:
82
+ @dataclass
83
+ class Date:
84
+ field: Union[Literal["created_at"], Literal["modified_at"]]
85
+ since: Optional[datetime]
86
+ until: Optional[datetime]
80
87
 
81
-
82
- class CatalogFilters(BaseModel):
83
- labels: dict[str, Any] = Field(
84
- default_factory=dict, description="Labels filter expression, like, `{and: {not: ...}, ...}`"
85
- )
86
- creation: DateTimeFilter
87
- modification: DateTimeFilter
88
- with_status: Optional[search_models.ResourceProcessingStatus] = None
88
+ bool_and: Optional[list["CatalogExpression"]] = None
89
+ bool_or: Optional[list["CatalogExpression"]] = None
90
+ bool_not: Optional["CatalogExpression"] = None
91
+ date: Optional[Date] = None
92
+ facet: Optional[str] = None
93
+ resource_id: Optional[str] = None
89
94
 
90
95
 
91
96
  class CatalogQuery(BaseModel):
92
97
  kbid: str
93
98
  query: str
94
- filters: CatalogFilters
99
+ filters: Optional[CatalogExpression]
95
100
  sort: search_models.SortOptions
96
101
  faceted: list[str]
97
102
  page_size: int
@@ -18,19 +18,11 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import Any
22
21
 
23
22
  from pydantic import ValidationError
24
23
 
25
- from nucliadb.search.search.filters import (
26
- convert_to_node_filters,
27
- translate_label_filters,
28
- )
29
24
  from nucliadb.search.search.query_parser.exceptions import InternalParserError
30
25
  from nucliadb.search.search.query_parser.models import (
31
- CatalogFilters,
32
- CatalogQuery,
33
- DateTimeFilter,
34
26
  NoopReranker,
35
27
  PredictReranker,
36
28
  RankFusion,
@@ -39,13 +31,8 @@ from nucliadb.search.search.query_parser.models import (
39
31
  UnitRetrieval,
40
32
  )
41
33
  from nucliadb_models import search as search_models
42
- from nucliadb_models.labels import LABEL_HIDDEN
43
34
  from nucliadb_models.search import (
44
- Filter,
45
35
  FindRequest,
46
- SortField,
47
- SortOptions,
48
- SortOrder,
49
36
  )
50
37
 
51
38
 
@@ -138,47 +125,3 @@ class _FindParser:
138
125
  raise InternalParserError(f"Unknown reranker {self.item.reranker}")
139
126
 
140
127
  return reranking
141
-
142
-
143
- def parse_catalog(kbid: str, item: search_models.CatalogRequest) -> CatalogQuery:
144
- filters = item.filters
145
-
146
- if item.hidden is not None:
147
- if item.hidden:
148
- filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
149
- else:
150
- filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
151
-
152
- label_filters: dict[str, Any] = convert_to_node_filters(item.filters)
153
- if len(label_filters) > 0:
154
- label_filters = translate_label_filters(label_filters)
155
-
156
- sort = item.sort
157
- if sort is None:
158
- # By default we sort by creation date (most recent first)
159
- sort = SortOptions(
160
- field=SortField.CREATED,
161
- order=SortOrder.DESC,
162
- limit=None,
163
- )
164
-
165
- return CatalogQuery(
166
- kbid=kbid,
167
- query=item.query,
168
- filters=CatalogFilters(
169
- labels=label_filters,
170
- creation=DateTimeFilter(
171
- after=item.range_creation_start,
172
- before=item.range_creation_end,
173
- ),
174
- modification=DateTimeFilter(
175
- after=item.range_modification_start,
176
- before=item.range_modification_end,
177
- ),
178
- with_status=item.with_status,
179
- ),
180
- sort=sort,
181
- faceted=item.faceted,
182
- page_number=item.page_number,
183
- page_size=item.page_size,
184
- )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.3.0.post3417
3
+ Version: 6.3.0.post3422
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.0.post3417
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.0.post3417
25
- Requires-Dist: nucliadb-protos>=6.3.0.post3417
26
- Requires-Dist: nucliadb-models>=6.3.0.post3417
27
- Requires-Dist: nidx-protos>=6.3.0.post3417
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.0.post3422
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.0.post3422
25
+ Requires-Dist: nucliadb-protos>=6.3.0.post3422
26
+ Requires-Dist: nucliadb-models>=6.3.0.post3422
27
+ Requires-Dist: nidx-protos>=6.3.0.post3422
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -192,20 +192,20 @@ nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,
192
192
  nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
193
193
  nucliadb/search/api/v1/__init__.py,sha256=8w6VhZ5rbzX1xLSXr336d2IE-O0dQiv-ba6UYdRKnHA,1325
194
194
  nucliadb/search/api/v1/ask.py,sha256=F2dR3-swb3Xz8MfZPYL3G65KY2R_mgef4YVBbu8kLi4,4352
195
- nucliadb/search/api/v1/catalog.py,sha256=TF19WN-qgZZLkqBwVH5xNsMxYTrmdEflPvy7qft_4lE,7010
195
+ nucliadb/search/api/v1/catalog.py,sha256=LIoLknBAXIRwrJA3TCPoVIoYZZx41R1eMP7yR7x_f_Y,7579
196
196
  nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
197
- nucliadb/search/api/v1/find.py,sha256=2_YX0p6Y8dYVYy99OB0cgPlre13P_JfWmE07MnbVwlE,9627
197
+ nucliadb/search/api/v1/find.py,sha256=l2dRg0eYngq52vyn9_z9iK7bdO7ufHQDnJWBZgMVrqY,9628
198
198
  nucliadb/search/api/v1/knowledgebox.py,sha256=rWhx3PYWryingu19qwwFDbVvVYynq5Ky23FSlzmTutQ,8721
199
199
  nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
200
200
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
201
- nucliadb/search/api/v1/search.py,sha256=DLXxh2FRXmLnZIIXaSLT7XaNoY2GZJTkpcduLTDyVW4,14023
202
- nucliadb/search/api/v1/suggest.py,sha256=urnA8rXLTcT2-Yulw2_43Ow8cAPWQEe5bxgDI9gNNZM,6505
201
+ nucliadb/search/api/v1/search.py,sha256=tv9WHdoXKcU0HQYjajh2PoG-IjlFxPOl2hyQkug2kco,14024
202
+ nucliadb/search/api/v1/suggest.py,sha256=Pwyxyk3Vu7aKU8vl2_rKhuE40ngnjZwAXS1rAilPDtM,6506
203
203
  nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
204
204
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
205
205
  nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
206
206
  nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
207
207
  nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=fqqRCd8Wc9GciS5P98lcnihvTKStsZYYtOU-T1bc-6E,4771
208
- nucliadb/search/api/v1/resource/search.py,sha256=-GY84XiiGd2vyJtunWvZMdBhZU0O0giO-ISo-HMntBo,5227
208
+ nucliadb/search/api/v1/resource/search.py,sha256=s_si6iilqhmopEQ5GG5c7C_4QV3X8QneQyS5zP0d22I,5228
209
209
  nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
210
210
  nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
211
211
  nucliadb/search/requesters/utils.py,sha256=qL81UVPNgBftUMLpcxIYVr7ILsMqpKCo-9SY2EvAaXw,6681
@@ -223,9 +223,9 @@ nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-
223
223
  nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
224
224
  nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUHoiUM,2872
225
225
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
226
- nucliadb/search/search/pgcatalog.py,sha256=hNZJtbke0kFz9ygz70gMMJz9NxhRPznefCad83yQPd4,8887
226
+ nucliadb/search/search/pgcatalog.py,sha256=V1NYLEUSXHpWmgcPIo1HS2riK_HDXSi-uykJjSoOOrE,9033
227
227
  nucliadb/search/search/predict_proxy.py,sha256=IFI3v_ODz2_UU1XZnyaD391fE7-2C0npSmj_HmDvzS4,3123
228
- nucliadb/search/search/query.py,sha256=j7NdAXQXSytWvf_tyqpG-Fz4AhVGI-Y5i7NPm3s7-PI,29980
228
+ nucliadb/search/search/query.py,sha256=S6kvvTNIgOaqsPEisasgbcfsI7ax5E1G5RJwp7-a9jA,29981
229
229
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
230
230
  nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
231
231
  nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
@@ -236,14 +236,15 @@ nucliadb/search/search/chat/ask.py,sha256=HPHM97s1dxbHvugoWZj6aP8vL4gzHjjtwoDvzU
236
236
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
237
237
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
238
238
  nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
239
- nucliadb/search/search/chat/query.py,sha256=kMhtUOuVBkWbOaZnpzAUi32oRc5QnXKmlo2LFoozKWc,16382
239
+ nucliadb/search/search/chat/query.py,sha256=2QhVzvX12zLHOpVZ5MlBflqAauyCBl6dojhRGdm_6qU,16388
240
240
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
241
+ nucliadb/search/search/query_parser/catalog.py,sha256=EX6nDKH2qpMuuc7Ff0R_Ad78R4hj0JUDZp0ifUe1rrY,6963
241
242
  nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
242
243
  nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
243
- nucliadb/search/search/query_parser/filter_expression.py,sha256=wdqqQzdMe5nMS4jZh0HT21ppSd5ur50jyZRjzjGI7o4,5123
244
- nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
244
+ nucliadb/search/search/query_parser/filter_expression.py,sha256=jtQ3P0Hk7fyMgvf30wD1Ly7H-irPKDTr8DZ6yvI-brk,6211
245
+ nucliadb/search/search/query_parser/models.py,sha256=VHDuyJlU2OLZN1usrQX53TZbPmWhzMeVYY0BiYNFzak,2464
245
246
  nucliadb/search/search/query_parser/old_filters.py,sha256=-zbfN-RsXoj_DRjh3Lfp-wShwFXgkISawzVptVzja-A,9071
246
- nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
247
+ nucliadb/search/search/query_parser/parser.py,sha256=9TwkSNna3s-lCQIqBoSJzm6YbXdu8VIHJUan8M4ysfE,4667
247
248
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
248
249
  nucliadb/standalone/api_router.py,sha256=4-g-eEq27nL6vKCLRCoV0Pxf-L273N-eHeEX2vI9qgg,6215
249
250
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -339,8 +340,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
339
340
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
340
341
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
341
342
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
342
- nucliadb-6.3.0.post3417.dist-info/METADATA,sha256=Nya_zwsp_AAX_HD3Qf6w7Hwd5UpeppIBUeCi9qqCnro,4291
343
- nucliadb-6.3.0.post3417.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
344
- nucliadb-6.3.0.post3417.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
345
- nucliadb-6.3.0.post3417.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
346
- nucliadb-6.3.0.post3417.dist-info/RECORD,,
343
+ nucliadb-6.3.0.post3422.dist-info/METADATA,sha256=n6nkft0APRv4XuFn-IXurpMz8EaM6KXnm81xk63NvEg,4291
344
+ nucliadb-6.3.0.post3422.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
345
+ nucliadb-6.3.0.post3422.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
346
+ nucliadb-6.3.0.post3422.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
347
+ nucliadb-6.3.0.post3422.dist-info/RECORD,,