nucliadb 6.5.0.post4426__py3-none-any.whl → 6.5.0.post4484__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,19 +22,15 @@ import logging
22
22
  from collections import defaultdict
23
23
  from typing import Any, Literal, Union, cast
24
24
 
25
- from psycopg.rows import dict_row
25
+ from psycopg import AsyncCursor, sql
26
+ from psycopg.rows import DictRow, dict_row
26
27
 
27
28
  from nucliadb.common.maindb.pg import PGDriver
28
29
  from nucliadb.common.maindb.utils import get_driver
29
30
  from nucliadb.search.search.query_parser.models import CatalogExpression, CatalogQuery
30
31
  from nucliadb_models import search as search_models
31
32
  from nucliadb_models.labels import translate_system_to_alias_label
32
- from nucliadb_models.search import (
33
- ResourceResult,
34
- Resources,
35
- SortField,
36
- SortOrder,
37
- )
33
+ from nucliadb_models.search import CatalogFacetsRequest, ResourceResult, Resources, SortField, SortOrder
38
34
  from nucliadb_telemetry import metrics
39
35
 
40
36
  from .filters import translate_label
@@ -55,65 +51,87 @@ def _filter_operands(operands: list[CatalogExpression]) -> tuple[list[str], list
55
51
  return facets, nonfacets
56
52
 
57
53
 
58
- def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> str:
54
+ def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> sql.Composable:
59
55
  if expr.bool_and:
60
56
  return _convert_boolean_op(expr.bool_and, "and", filter_params)
61
57
  elif expr.bool_or:
62
58
  return _convert_boolean_op(expr.bool_or, "or", filter_params)
63
59
  elif expr.bool_not:
64
- return f"(NOT {_convert_filter(expr.bool_not, filter_params)})"
60
+ return sql.SQL("(NOT {})").format(_convert_filter(expr.bool_not, filter_params))
65
61
  elif expr.date:
66
62
  return _convert_date_filter(expr.date, filter_params)
67
63
  elif expr.facet:
68
64
  param_name = f"param{len(filter_params)}"
69
65
  filter_params[param_name] = [expr.facet]
70
- return f"extract_facets(labels) @> %({param_name})s"
66
+ if expr.facet == "/n/s/PROCESSED":
67
+ # Optimization for the most common case, we know PROCESSED is a full label and can use the smaller labels index
68
+ # This is needed because PROCESSED is present in most catalog entries and PG is unlikely to use any index
69
+ # for it, falling back to executing the extract_facets function which can be slow
70
+ return sql.SQL("labels @> {}").format(sql.Placeholder(param_name))
71
+ else:
72
+ return sql.SQL("extract_facets(labels) @> {}").format(sql.Placeholder(param_name))
71
73
  elif expr.resource_id:
72
74
  param_name = f"param{len(filter_params)}"
73
75
  filter_params[param_name] = [expr.resource_id]
74
- return f"rid = %({param_name})s"
76
+ return sql.SQL("rid = {}").format(sql.Placeholder(param_name))
75
77
  else:
76
- return ""
78
+ return sql.SQL("")
77
79
 
78
80
 
79
81
  def _convert_boolean_op(
80
82
  operands: list[CatalogExpression],
81
83
  op: Union[Literal["and"], Literal["or"]],
82
84
  filter_params: dict[str, Any],
83
- ) -> str:
84
- array_op = "@>" if op == "and" else "&&"
85
- sql = []
85
+ ) -> sql.Composable:
86
+ array_op = sql.SQL("@>" if op == "and" else "&&")
87
+ operands_sql: list[sql.Composable] = []
86
88
  facets, nonfacets = _filter_operands(operands)
87
89
  if facets:
88
90
  param_name = f"param{len(filter_params)}"
91
+ if facets == ["/n/s/PROCESSED"]:
92
+ # Optimization for the most common case, we know PROCESSED is a full label and can use the smaller labels index
93
+ # This is needed because PROCESSED is present in most catalog entries and PG is unlikely to use any index
94
+ # for it, falling back to executing the extract_facets function which can be slow
95
+ operands_sql.append(sql.SQL("labels @> {}").format(sql.Placeholder(param_name)))
96
+ else:
97
+ operands_sql.append(
98
+ sql.SQL("extract_facets(labels) {} {}").format(array_op, sql.Placeholder(param_name))
99
+ )
89
100
  filter_params[param_name] = facets
90
- sql.append(f"extract_facets(labels) {array_op} %({param_name})s")
91
101
  for nonfacet in nonfacets:
92
- sql.append(_convert_filter(nonfacet, filter_params))
93
- return "(" + f" {op.upper()} ".join(sql) + ")"
102
+ operands_sql.append(_convert_filter(nonfacet, filter_params))
103
+ return sql.SQL("({})").format(sql.SQL(f" {op.upper()} ").join(operands_sql))
94
104
 
95
105
 
96
- def _convert_date_filter(date: CatalogExpression.Date, filter_params: dict[str, Any]) -> str:
106
+ def _convert_date_filter(date: CatalogExpression.Date, filter_params: dict[str, Any]) -> sql.Composable:
97
107
  if date.since and date.until:
98
108
  since_name = f"param{len(filter_params)}"
99
109
  filter_params[since_name] = date.since
100
110
  until_name = f"param{len(filter_params)}"
101
111
  filter_params[until_name] = date.until
102
- return f"{date.field} BETWEEN %({since_name})s AND %({until_name})s"
112
+ return sql.SQL("{field} BETWEEN {since} AND {until}").format(
113
+ field=sql.Identifier(date.field),
114
+ since=sql.Placeholder(since_name),
115
+ until=sql.Placeholder(until_name),
116
+ )
103
117
  elif date.since:
104
118
  since_name = f"param{len(filter_params)}"
105
119
  filter_params[since_name] = date.since
106
- return f"{date.field} > %({since_name})s"
120
+ return sql.SQL("{field} > {since}").format(
121
+ field=sql.Identifier(date.field), since=sql.Placeholder(since_name)
122
+ )
107
123
  elif date.until:
108
124
  until_name = f"param{len(filter_params)}"
109
125
  filter_params[until_name] = date.until
110
- return f"{date.field} < %({until_name})s"
126
+ return sql.SQL("{field} < {until}").format(
127
+ field=sql.Identifier(date.field), until=sql.Placeholder(until_name)
128
+ )
111
129
  else:
112
130
  raise ValueError(f"Invalid date operator")
113
131
 
114
132
 
115
- def _prepare_query_filters(catalog_query: CatalogQuery) -> tuple[str, dict[str, Any]]:
116
- filter_sql = ["kbid = %(kbid)s"]
133
+ def _prepare_query_filters(catalog_query: CatalogQuery) -> tuple[sql.Composable, dict[str, Any]]:
134
+ filter_sql: list[sql.Composable] = [sql.SQL("kbid = %(kbid)s")]
117
135
  filter_params: dict[str, Any] = {"kbid": catalog_query.kbid}
118
136
 
119
137
  if catalog_query.query and catalog_query.query.query:
@@ -123,47 +141,50 @@ def _prepare_query_filters(catalog_query: CatalogQuery) -> tuple[str, dict[str,
123
141
  filter_sql.append(_convert_filter(catalog_query.filters, filter_params))
124
142
 
125
143
  return (
126
- f"SELECT * FROM catalog WHERE {' AND '.join(filter_sql)}",
144
+ sql.SQL("SELECT * FROM catalog WHERE {}").format(sql.SQL(" AND ").join(filter_sql)),
127
145
  filter_params,
128
146
  )
129
147
 
130
148
 
131
- def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, Any]) -> str:
149
+ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, Any]) -> sql.Composable:
132
150
  if query.match == search_models.CatalogQueryMatch.Exact:
133
151
  params["query"] = query.query
134
- return f"{query.field.value} = %(query)s"
152
+ return sql.SQL("{} = %(query)s").format(sql.Identifier(query.field.value))
135
153
  elif query.match == search_models.CatalogQueryMatch.StartsWith:
136
154
  params["query"] = query.query + "%"
137
155
  if query.field == search_models.CatalogQueryField.Title:
138
156
  # Insensitive search supported by pg_trgm for title
139
- return f"{query.field.value} ILIKE %(query)s"
157
+ return sql.SQL("{} ILIKE %(query)s").format(sql.Identifier(query.field.value))
140
158
  else:
141
159
  # Sensitive search for slug (btree does not support ILIKE and slugs are all lowercase anyway)
142
- return f"{query.field.value} LIKE %(query)s"
160
+ return sql.SQL("{} LIKE %(query)s").format(sql.Identifier(query.field.value))
143
161
  # The rest of operators only supported by title
144
162
  elif query.match == search_models.CatalogQueryMatch.Words:
145
163
  # This is doing tokenization inside the SQL server (to keep the index updated). We could move it to
146
164
  # the python code at update/query time if it ever becomes a problem but for now, a single regex
147
165
  # executed per query is not a problem.
148
166
  params["query"] = query.query
149
- return "regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
167
+ return sql.SQL(
168
+ "regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
169
+ )
150
170
  elif query.match == search_models.CatalogQueryMatch.Fuzzy:
151
171
  params["query"] = query.query
152
172
  # Note: the operator is %>, We use %%> for psycopg escaping
153
- return "title %%> %(query)s"
173
+ return sql.SQL("title %%> %(query)s")
154
174
  elif query.match == search_models.CatalogQueryMatch.EndsWith:
155
175
  params["query"] = "%" + query.query
156
- return "title ILIKE %(query)s"
176
+ return sql.SQL("title ILIKE %(query)s")
157
177
  elif query.match == search_models.CatalogQueryMatch.Contains:
158
178
  params["query"] = "%" + query.query + "%"
159
- return "title ILIKE %(query)s"
179
+ return sql.SQL("title ILIKE %(query)s")
160
180
  else: # pragma: nocover
161
181
  # This is a trick so mypy generates an error if this branch can be reached,
162
182
  # that is, if we are missing some ifs
163
183
  _a: int = "a"
184
+ return sql.SQL("")
164
185
 
165
186
 
166
- def _prepare_query(catalog_query: CatalogQuery) -> tuple[str, dict[str, Any]]:
187
+ def _prepare_query(catalog_query: CatalogQuery) -> tuple[sql.Composed, dict[str, Any]]:
167
188
  # Base query with all the filters
168
189
  query, filter_params = _prepare_query_filters(catalog_query)
169
190
 
@@ -184,11 +205,11 @@ def _prepare_query(catalog_query: CatalogQuery) -> tuple[str, dict[str, Any]]:
184
205
  else:
185
206
  order_dir = "DESC"
186
207
 
187
- query += f" ORDER BY {order_field} {order_dir}"
208
+ query += sql.SQL(" ORDER BY {} {}").format(sql.Identifier(order_field), sql.SQL(order_dir))
188
209
 
189
210
  # Pagination
190
211
  offset = catalog_query.page_size * catalog_query.page_number
191
- query += f" LIMIT %(page_size)s OFFSET %(offset)s"
212
+ query += sql.SQL(" LIMIT %(page_size)s OFFSET %(offset)s")
192
213
  filter_params["page_size"] = catalog_query.page_size
193
214
  filter_params["offset"] = offset
194
215
 
@@ -213,40 +234,18 @@ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
213
234
  tmp_facets: dict[str, dict[str, int]] = {
214
235
  translate_label(f): defaultdict(int) for f in catalog_query.faceted
215
236
  }
216
- facet_filters = " OR ".join(f"label LIKE '{f}/%%'" for f in tmp_facets.keys())
217
- for facet in tmp_facets.keys():
218
- if not (
219
- facet.startswith("/n/s") or facet.startswith("/n/i") or facet.startswith("/l")
220
- ):
221
- logger.warning(
222
- f"Unexpected facet used at catalog: {facet}, kbid={catalog_query.kbid}"
223
- )
224
-
225
- await cur.execute(
226
- f"SELECT label, COUNT(*) FROM (SELECT unnest(labels) AS label FROM ({query}) fc) nl WHERE ({facet_filters}) GROUP BY 1 ORDER BY 1",
227
- query_params,
228
- )
229
-
230
- for row in await cur.fetchall():
231
- label = row["label"]
232
- label_parts = label.split("/")
233
- parent = "/".join(label_parts[:-1])
234
- count = row["count"]
235
- if parent in tmp_facets:
236
- tmp_facets[parent][translate_system_to_alias_label(label)] = count
237
237
 
238
- # No need to get recursive because our facets are at most 3 levels deep (e.g: /l/set/label)
239
- if len(label_parts) >= 3:
240
- grandparent = "/".join(label_parts[:-2])
241
- if grandparent in tmp_facets:
242
- tmp_facets[grandparent][translate_system_to_alias_label(parent)] += count
238
+ if catalog_query.filters is None:
239
+ await _faceted_search_unfiltered(cur, catalog_query, tmp_facets)
240
+ else:
241
+ await _faceted_search_filtered(cur, catalog_query, tmp_facets, query, query_params)
243
242
 
244
243
  facets = {translate_system_to_alias_label(k): v for k, v in tmp_facets.items()}
245
244
 
246
245
  # Totals
247
246
  with observer({"op": "totals"}):
248
247
  await cur.execute(
249
- f"SELECT COUNT(*) FROM ({query}) fc",
248
+ sql.SQL("SELECT COUNT(*) FROM ({}) fc").format(query),
250
249
  query_params,
251
250
  )
252
251
  total = (await cur.fetchone())["count"] # type: ignore
@@ -276,3 +275,115 @@ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
276
275
  next_page=(catalog_query.page_size * catalog_query.page_number + len(data) < total),
277
276
  min_score=0,
278
277
  )
278
+
279
+
280
+ async def _faceted_search_unfiltered(
281
+ cur: AsyncCursor[DictRow], catalog_query: CatalogQuery, tmp_facets: dict[str, dict[str, int]]
282
+ ):
283
+ facet_params: dict[str, Any] = {}
284
+ facet_sql: sql.Composable
285
+ if len(tmp_facets) <= 5:
286
+ # Asking for few facets, strictly filter to what we need in the query
287
+ prefixes_sql = []
288
+ for cnt, prefix in enumerate(tmp_facets.keys()):
289
+ prefixes_sql.append(
290
+ sql.SQL("(facet LIKE {} AND POSITION('/' IN RIGHT(facet, {})) = 0)").format(
291
+ sql.Placeholder(f"facet_{cnt}"), sql.Placeholder(f"facet_len_{cnt}")
292
+ )
293
+ )
294
+ facet_params[f"facet_{cnt}"] = f"{prefix}/%"
295
+ facet_params[f"facet_len_{cnt}"] = -(len(prefix) + 1)
296
+ facet_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefixes_sql))
297
+ elif all((facet.startswith("/l") or facet.startswith("/n/i") for facet in tmp_facets.keys())):
298
+ # Special case for the catalog query, which can have many facets asked for
299
+ # Filter for the categories (icon and labels) in the query, filter the rest in the code below
300
+ facet_sql = sql.SQL("AND (facet LIKE '/l/%%' OR facet like '/n/i/%%')")
301
+ else:
302
+ # Worst case: ask for all facets and filter here. This is faster than applying lots of filters
303
+ facet_sql = sql.SQL("")
304
+
305
+ await cur.execute(
306
+ sql.SQL(
307
+ "SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s {} GROUP BY facet"
308
+ ).format(facet_sql),
309
+ {"kbid": catalog_query.kbid, **facet_params},
310
+ )
311
+
312
+ # Only keep the facets we asked for
313
+ for row in await cur.fetchall():
314
+ facet = row["facet"]
315
+ facet_parts = facet.split("/")
316
+ parent = "/".join(facet_parts[:-1])
317
+ if parent in tmp_facets:
318
+ tmp_facets[parent][translate_system_to_alias_label(facet)] = row["count"]
319
+
320
+
321
+ async def _faceted_search_filtered(
322
+ cur: AsyncCursor[DictRow],
323
+ catalog_query: CatalogQuery,
324
+ tmp_facets: dict[str, dict[str, int]],
325
+ query: sql.Composable,
326
+ query_params: dict[str, Any],
327
+ ):
328
+ facet_params = {}
329
+ facet_filters = []
330
+ for cnt, facet in enumerate(tmp_facets.keys()):
331
+ facet_filters.append(sql.SQL("label LIKE {}").format(sql.Placeholder(f"facet_{cnt}")))
332
+ facet_params[f"facet_{cnt}"] = f"{facet}/%"
333
+
334
+ for facet in tmp_facets.keys():
335
+ if not (facet.startswith("/n/s") or facet.startswith("/n/i") or facet.startswith("/l")):
336
+ logger.warning(f"Unexpected facet used at catalog: {facet}, kbid={catalog_query.kbid}")
337
+
338
+ await cur.execute(
339
+ sql.SQL(
340
+ "SELECT label, COUNT(*) FROM (SELECT unnest(labels) AS label FROM ({query}) fc) nl WHERE ({facet_filters}) GROUP BY 1 ORDER BY 1"
341
+ ).format(query=query, facet_filters=sql.SQL(" OR ").join(facet_filters)),
342
+ {**query_params, **facet_params},
343
+ )
344
+
345
+ for row in await cur.fetchall():
346
+ label = row["label"]
347
+ label_parts = label.split("/")
348
+ parent = "/".join(label_parts[:-1])
349
+ count = row["count"]
350
+ if parent in tmp_facets:
351
+ tmp_facets[parent][translate_system_to_alias_label(label)] = count
352
+
353
+ # No need to get recursive because our facets are at most 3 levels deep (e.g: /l/set/label)
354
+ if len(label_parts) >= 3:
355
+ grandparent = "/".join(label_parts[:-2])
356
+ if grandparent in tmp_facets:
357
+ tmp_facets[grandparent][translate_system_to_alias_label(parent)] += count
358
+
359
+
360
+ @observer.wrap({"op": "catalog_facets"})
361
+ async def pgcatalog_facets(kbid: str, request: CatalogFacetsRequest) -> dict[str, int]:
362
+ async with _pg_driver()._get_connection() as conn, conn.cursor() as cur:
363
+ prefix_filters: list[sql.Composable] = []
364
+ prefix_params: dict[str, Any] = {}
365
+ for cnt, prefix in enumerate(request.prefixes):
366
+ prefix_sql = sql.SQL("facet LIKE {}").format(sql.Placeholder(f"prefix{cnt}"))
367
+ prefix_params[f"prefix{cnt}"] = f"{prefix.prefix}%"
368
+ if prefix.depth is not None:
369
+ prefix_parts = len(prefix.prefix.split("/"))
370
+ depth_sql = sql.SQL("SPLIT_PART(facet, '/', {}) = ''").format(
371
+ sql.Placeholder(f"depth{cnt}")
372
+ )
373
+ prefix_params[f"depth{cnt}"] = prefix_parts + prefix.depth + 1
374
+ prefix_sql = sql.SQL("({} AND {})").format(prefix_sql, depth_sql)
375
+ prefix_filters.append(prefix_sql)
376
+
377
+ filter_sql: sql.Composable
378
+ if prefix_filters:
379
+ filter_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefix_filters))
380
+ else:
381
+ filter_sql = sql.SQL("")
382
+
383
+ await cur.execute(
384
+ sql.SQL(
385
+ "SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s {} GROUP BY facet"
386
+ ).format(filter_sql),
387
+ {"kbid": kbid, **prefix_params},
388
+ )
389
+ return {k: v for k, v in await cur.fetchall()}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.5.0.post4426
3
+ Version: 6.5.0.post4484
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4426
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4426
24
- Requires-Dist: nucliadb-protos>=6.5.0.post4426
25
- Requires-Dist: nucliadb-models>=6.5.0.post4426
26
- Requires-Dist: nidx-protos>=6.5.0.post4426
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4484
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4484
24
+ Requires-Dist: nucliadb-protos>=6.5.0.post4484
25
+ Requires-Dist: nucliadb-models>=6.5.0.post4484
26
+ Requires-Dist: nidx-protos>=6.5.0.post4484
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.24.2
29
29
  Requires-Dist: uvicorn[standard]
@@ -35,8 +35,8 @@ Requires-Dist: aiofiles>=0.8.0
35
35
  Requires-Dist: psutil>=5.9.7
36
36
  Requires-Dist: types-psutil>=5.9.5.17
37
37
  Requires-Dist: types-aiofiles>=0.8.3
38
- Requires-Dist: protobuf>=5
39
- Requires-Dist: types-protobuf>=5
38
+ Requires-Dist: protobuf<6,>=5
39
+ Requires-Dist: types-protobuf<6,>=5
40
40
  Requires-Dist: grpcio>=1.71.0
41
41
  Requires-Dist: grpcio-health-checking>=1.71.0
42
42
  Requires-Dist: grpcio-channelz>=1.71.0
@@ -32,14 +32,17 @@ migrations/0033_rollover_nidx_relation_2.py,sha256=9etpqNLVS3PA14qIdsdhorReZxenD
32
32
  migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQj8qu1z2XkFc,1452
33
33
  migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
34
34
  migrations/0036_backfill_catalog_slug.py,sha256=mizRM-HfPswKq4iEmqofu4kIT6Gd97ruT3qhb257vZk,2954
35
+ migrations/0037_backfill_catalog_facets.py,sha256=KAf3VKbKePw7ykDnJi47LyJ7pK1JwYkwMxrsXUnbt9g,2788
35
36
  migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
36
- migrations/pg/0001_bootstrap.py,sha256=Fsqkeof50m7fKiJN05kmNEMwiKDlOrAgcAS5sLLkutA,1256
37
+ migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
37
38
  migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
38
39
  migrations/pg/0003_catalog_kbid_index.py,sha256=uKq_vtnuf73GVf0mtl2rhzdk_czAoEU1UdiVKVZpA0M,1044
39
40
  migrations/pg/0004_catalog_facets.py,sha256=FJFASHjfEHG3sNve9BP2HnnLO4xr7dnR6Qpctnmt4LE,2180
40
41
  migrations/pg/0005_purge_tasks_index.py,sha256=3mtyFgpcK0QQ_NONYay7V9xICijCLNkyTPuoc0PBjRg,1139
41
42
  migrations/pg/0006_catalog_title_indexes.py,sha256=n2OGxwE4oeCwHAYaxBkja4t10BmwTjZ2IoCyOdjEBSc,1710
42
43
  migrations/pg/0007_catalog_slug.py,sha256=mArzZCBO-RD5DkWxRIyDKgEzrnAcis1TOGvSNUe7Kgg,1150
44
+ migrations/pg/0008_catalog_facets.py,sha256=dxIUdHJHtI_Gyk2dpP7tjHEnL2iPzAufi6ajYm2FVMI,1595
45
+ migrations/pg/0009_extract_facets_safety.py,sha256=k9Appx7ipp3wDyLy70qgw9oLjN7N6BEadE-N5Fhan-4,1066
43
46
  migrations/pg/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
44
47
  nucliadb/__init__.py,sha256=_abCmDJ_0ku483Os4UAjPX7Nywm39cQgAV_DiyjsKeQ,891
45
48
  nucliadb/health.py,sha256=UIxxA4oms4HIsCRZM_SZsdkIZIlgzmOxw-qSHLlWuak,3465
@@ -144,7 +147,7 @@ nucliadb/ingest/consumer/service.py,sha256=8AD41mMN7EUeUtk4ZNy14zfvxzwmVjIX6Mwe0
144
147
  nucliadb/ingest/consumer/shard_creator.py,sha256=w0smEu01FU_2cjZnsfBRNqT_Ntho11X17zTMST-vKbc,4359
145
148
  nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
146
149
  nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
147
- nucliadb/ingest/fields/base.py,sha256=YMG1xjem9kilHrnUttyj1qFotKtWHqVKXWbnIsymWYE,22397
150
+ nucliadb/ingest/fields/base.py,sha256=vYLGB-8SRYnFIHSZBSm20iXZDAzlwuBiJQC8s3BQv6w,22958
148
151
  nucliadb/ingest/fields/conversation.py,sha256=0tVpHLvi3UmuO98puimBJUpPXv3qEOpqlWVXVYvz9Vw,7082
149
152
  nucliadb/ingest/fields/exceptions.py,sha256=sZBk21BSrXFdOdo1qUdCAyD-9YMYakSLdn4_WdIPCIQ,1217
150
153
  nucliadb/ingest/fields/file.py,sha256=1v4jLg3balUua2VmSV8hHkAwPFShTUCOzufZvIUQcQw,4740
@@ -159,12 +162,12 @@ nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmB
159
162
  nucliadb/ingest/orm/index_message.py,sha256=DWMTHJoVamUbK8opKl5csDvxfgz7c2j7phG1Ut4yIxk,15724
160
163
  nucliadb/ingest/orm/knowledgebox.py,sha256=_rkeTMIXMhR64gbYtZpFHoUHghV2DTJ2lUBqZsoqC_4,23898
161
164
  nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
162
- nucliadb/ingest/orm/resource.py,sha256=hGELQgnzK2wIWgD478bR5OiVDyAxHn6WrFSq2YuHANU,36896
165
+ nucliadb/ingest/orm/resource.py,sha256=OZEdoaaP56VaybuAbUHexGRMmM9C8-S0340jIHqamcQ,37177
163
166
  nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,2693
164
167
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
165
168
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
166
169
  nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
167
- nucliadb/ingest/orm/processor/pgcatalog.py,sha256=Zh6s0gj_bwDKPBXSs61jlMKJ6XP-dLnPGbrMGD6RHcM,3195
170
+ nucliadb/ingest/orm/processor/pgcatalog.py,sha256=GpzQv0_iWTHbM90J0rAz_QIh_TMv1XbghyDgs8tk_8M,4014
168
171
  nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
169
172
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
170
173
  nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
@@ -218,7 +221,7 @@ nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,
218
221
  nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
219
222
  nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
220
223
  nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
221
- nucliadb/search/api/v1/catalog.py,sha256=3SqLgwFkFFY8x-xBruHQaZ0EGpf7oKbSj-_PnobV68E,7747
224
+ nucliadb/search/api/v1/catalog.py,sha256=7yyG46Zsaqvuut9Da-LTl0KcWgo7n5lbEhiTXslyvwM,7865
222
225
  nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
223
226
  nucliadb/search/api/v1/find.py,sha256=iMjyq4y0JOMC_x1B8kUfVdkCoc9G9Ark58kPLLY4HDw,10824
224
227
  nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
@@ -251,7 +254,7 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
251
254
  nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
252
255
  nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
253
256
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
254
- nucliadb/search/search/pgcatalog.py,sha256=QtgArjoM-dW_B1oO0aXqp5au7GlLG8jAct9jevUHatw,10997
257
+ nucliadb/search/search/pgcatalog.py,sha256=O_nRjSJf1Qc-XorVwcNlsDOftzy_zQLLfagkjU4YmSA,16718
255
258
  nucliadb/search/search/predict_proxy.py,sha256=cuD_sfM3RLdEoQaanRz0CflO6nKVGGKPzoFA17shb_w,8647
256
259
  nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
257
260
  nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
@@ -260,10 +263,10 @@ nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8
260
263
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
261
264
  nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
262
265
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
263
- nucliadb/search/search/chat/ask.py,sha256=GFxUh6KvqbidXmtvzgA7trVwF9xNPLcPDCD4IlqvTmI,37839
266
+ nucliadb/search/search/chat/ask.py,sha256=Ehntai2jrFCVQDkp1OoiYuaOxac8UcKrk1Cil5UWgEE,38230
264
267
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
265
268
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
266
- nucliadb/search/search/chat/prompt.py,sha256=e8C7_MPr6Cn3nJHA4hWpeW3629KVI1ZUQA_wZf9Kiu4,48503
269
+ nucliadb/search/search/chat/prompt.py,sha256=Avi-5wXozAKESJU6WwjLwNhqe4uO-YbSW_W2K2gzUgE,51837
267
270
  nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
268
271
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
269
272
  nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
@@ -372,8 +375,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
372
375
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
373
376
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
374
377
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
375
- nucliadb-6.5.0.post4426.dist-info/METADATA,sha256=fRo_rQ3D5zAGctuqOfk22MzKACI4nZ8mijFy-JSGaT0,4152
376
- nucliadb-6.5.0.post4426.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
377
- nucliadb-6.5.0.post4426.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
378
- nucliadb-6.5.0.post4426.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
379
- nucliadb-6.5.0.post4426.dist-info/RECORD,,
378
+ nucliadb-6.5.0.post4484.dist-info/METADATA,sha256=y8M01mfplPZbIOiU6x2TeRcEfSLACTZfXYbay2-LSGQ,4158
379
+ nucliadb-6.5.0.post4484.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
+ nucliadb-6.5.0.post4484.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
+ nucliadb-6.5.0.post4484.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
+ nucliadb-6.5.0.post4484.dist-info/RECORD,,