nucliadb 6.5.0.post4420__py3-none-any.whl → 6.5.0.post4476__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0036_backfill_catalog_slug.py +81 -0
- migrations/0037_backfill_catalog_facets.py +74 -0
- migrations/pg/0001_bootstrap.py +1 -1
- migrations/pg/0007_catalog_slug.py +31 -0
- migrations/pg/0008_catalog_facets.py +43 -0
- migrations/pg/0009_extract_facets_safety.py +26 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +31 -3
- nucliadb/search/api/v1/catalog.py +14 -8
- nucliadb/search/search/pgcatalog.py +174 -63
- {nucliadb-6.5.0.post4420.dist-info → nucliadb-6.5.0.post4476.dist-info}/METADATA +8 -8
- {nucliadb-6.5.0.post4420.dist-info → nucliadb-6.5.0.post4476.dist-info}/RECORD +14 -9
- {nucliadb-6.5.0.post4420.dist-info → nucliadb-6.5.0.post4476.dist-info}/WHEEL +0 -0
- {nucliadb-6.5.0.post4420.dist-info → nucliadb-6.5.0.post4476.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.5.0.post4420.dist-info → nucliadb-6.5.0.post4476.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #36
|
22
|
+
|
23
|
+
Backfill catalog slug field
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
import logging
|
28
|
+
from typing import cast
|
29
|
+
|
30
|
+
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
31
|
+
from nucliadb.migrator.context import ExecutionContext
|
32
|
+
|
33
|
+
logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
async def migrate(context: ExecutionContext) -> None:
|
37
|
+
driver = cast(PGDriver, context.kv_driver)
|
38
|
+
|
39
|
+
BATCH_SIZE = 10_000
|
40
|
+
async with driver.transaction() as txn:
|
41
|
+
txn = cast(PGTransaction, txn)
|
42
|
+
start_key = ""
|
43
|
+
while True:
|
44
|
+
async with txn.connection.cursor() as cur:
|
45
|
+
logger.info(f"Filling {BATCH_SIZE} catalog slugs from {start_key}")
|
46
|
+
# Get a batch of slugs from the resource table
|
47
|
+
await cur.execute(
|
48
|
+
"""
|
49
|
+
CREATE TEMPORARY TABLE tmp_0036_backfill_catalog ON COMMIT DROP AS
|
50
|
+
SELECT
|
51
|
+
key,
|
52
|
+
SPLIT_PART(key, '/', 3)::UUID AS kbid,
|
53
|
+
SPLIT_PART(key, '/', 5) AS slug,
|
54
|
+
ENCODE(value, 'escape')::UUID AS rid
|
55
|
+
FROM resources
|
56
|
+
WHERE key ~ '^/kbs/[^/]+/s/.*'
|
57
|
+
AND key > %s
|
58
|
+
ORDER BY key
|
59
|
+
LIMIT %s
|
60
|
+
""",
|
61
|
+
(start_key, BATCH_SIZE),
|
62
|
+
)
|
63
|
+
|
64
|
+
# Set the key for next iteration
|
65
|
+
await cur.execute("SELECT MAX(key) FROM tmp_0036_backfill_catalog")
|
66
|
+
start_key = (await cur.fetchone())[0] # type: ignore
|
67
|
+
if start_key is None:
|
68
|
+
break
|
69
|
+
|
70
|
+
# Update the catalog with the slugs
|
71
|
+
await cur.execute(
|
72
|
+
"""
|
73
|
+
UPDATE catalog c SET slug = tmp.slug
|
74
|
+
FROM tmp_0036_backfill_catalog tmp
|
75
|
+
WHERE c.kbid = tmp.kbid AND c.rid = tmp.rid
|
76
|
+
"""
|
77
|
+
)
|
78
|
+
await txn.commit()
|
79
|
+
|
80
|
+
|
81
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #37
|
22
|
+
|
23
|
+
Backfill catalog facets
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
import logging
|
28
|
+
from typing import cast
|
29
|
+
|
30
|
+
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
31
|
+
from nucliadb.migrator.context import ExecutionContext
|
32
|
+
|
33
|
+
logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
async def migrate(context: ExecutionContext) -> None:
|
37
|
+
driver = cast(PGDriver, context.kv_driver)
|
38
|
+
|
39
|
+
BATCH_SIZE = 1_000
|
40
|
+
async with driver.transaction() as txn:
|
41
|
+
txn = cast(PGTransaction, txn)
|
42
|
+
start_kbid = "00000000000000000000000000000000"
|
43
|
+
start_rid = "00000000000000000000000000000000"
|
44
|
+
while True:
|
45
|
+
async with txn.connection.cursor() as cur:
|
46
|
+
logger.info(f"Filling {BATCH_SIZE} catalog facets from {start_kbid}, {start_rid}")
|
47
|
+
# Get a batch of facets from the catalog table
|
48
|
+
await cur.execute(
|
49
|
+
"""
|
50
|
+
WITH i AS (
|
51
|
+
INSERT INTO catalog_facets (kbid, rid, facet)
|
52
|
+
SELECT kbid, rid, unnest(extract_facets(labels)) FROM (
|
53
|
+
SELECT * FROM catalog
|
54
|
+
WHERE (kbid = %(kbid)s AND rid > %(rid)s) OR kbid > %(kbid)s
|
55
|
+
ORDER BY kbid, rid
|
56
|
+
LIMIT %(batch)s
|
57
|
+
) rs
|
58
|
+
RETURNING kbid, rid
|
59
|
+
)
|
60
|
+
SELECT kbid, rid FROM i ORDER BY kbid DESC, rid DESC LIMIT 1;
|
61
|
+
""",
|
62
|
+
{"kbid": start_kbid, "rid": start_rid, "batch": BATCH_SIZE},
|
63
|
+
)
|
64
|
+
|
65
|
+
# Set the key for next iteration
|
66
|
+
results = await cur.fetchone() # type: ignore
|
67
|
+
if results is None:
|
68
|
+
break
|
69
|
+
(start_kbid, start_rid) = results
|
70
|
+
|
71
|
+
await txn.commit()
|
72
|
+
|
73
|
+
|
74
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
|
migrations/pg/0001_bootstrap.py
CHANGED
@@ -26,7 +26,7 @@ async def migrate(txn: PGTransaction) -> None:
|
|
26
26
|
# IF NOT EXISTS just for compatibility with older install predating the migration system
|
27
27
|
await cur.execute("""
|
28
28
|
CREATE TABLE IF NOT EXISTS resources (
|
29
|
-
key TEXT PRIMARY KEY,
|
29
|
+
key TEXT COLLATE ucs_basic PRIMARY KEY,
|
30
30
|
value BYTEA
|
31
31
|
);
|
32
32
|
""")
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from nucliadb.common.maindb.pg import PGTransaction
|
22
|
+
|
23
|
+
|
24
|
+
async def migrate(txn: PGTransaction) -> None:
|
25
|
+
async with txn.connection.cursor() as cur:
|
26
|
+
await cur.execute(
|
27
|
+
"""
|
28
|
+
ALTER TABLE catalog ADD COLUMN slug TEXT;
|
29
|
+
CREATE INDEX ON catalog(slug);
|
30
|
+
"""
|
31
|
+
)
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from nucliadb.common.maindb.pg import PGTransaction
|
22
|
+
|
23
|
+
|
24
|
+
async def migrate(txn: PGTransaction) -> None:
|
25
|
+
async with txn.connection.cursor() as cur:
|
26
|
+
await cur.execute(
|
27
|
+
"""
|
28
|
+
CREATE TABLE catalog_facets (
|
29
|
+
id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
|
30
|
+
kbid UUID,
|
31
|
+
rid UUID,
|
32
|
+
facet TEXT COLLATE ucs_basic,
|
33
|
+
|
34
|
+
FOREIGN KEY (kbid, rid) REFERENCES catalog (kbid, rid) ON DELETE CASCADE
|
35
|
+
);
|
36
|
+
|
37
|
+
-- For FK checks
|
38
|
+
CREATE INDEX ON catalog_facets(kbid, rid);
|
39
|
+
|
40
|
+
-- Best for per-facet aggregation, also used by search with facet filter
|
41
|
+
CREATE INDEX ON catalog_facets(kbid, facet);
|
42
|
+
"""
|
43
|
+
)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from nucliadb.common.maindb.pg import PGTransaction
|
22
|
+
|
23
|
+
|
24
|
+
async def migrate(txn: PGTransaction) -> None:
|
25
|
+
async with txn.connection.cursor() as cur:
|
26
|
+
await cur.execute("ALTER FUNCTION extract_facets(text[]) PARALLEL SAFE;")
|
@@ -40,6 +40,17 @@ def pgcatalog_enabled(kbid):
|
|
40
40
|
return isinstance(get_driver(), PGDriver)
|
41
41
|
|
42
42
|
|
43
|
+
def extract_facets(labels):
|
44
|
+
facets = set()
|
45
|
+
for label in labels:
|
46
|
+
parts = label.split("/")
|
47
|
+
facet = ""
|
48
|
+
for part in parts[1:]:
|
49
|
+
facet += f"/{part}"
|
50
|
+
facets.add(facet)
|
51
|
+
return facets
|
52
|
+
|
53
|
+
|
43
54
|
@observer.wrap({"type": "update"})
|
44
55
|
async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, index_message: IndexMessage):
|
45
56
|
if not pgcatalog_enabled(kbid):
|
@@ -57,14 +68,15 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
|
|
57
68
|
await cur.execute(
|
58
69
|
"""
|
59
70
|
INSERT INTO catalog
|
60
|
-
(kbid, rid, title, created_at, modified_at, labels)
|
71
|
+
(kbid, rid, title, created_at, modified_at, labels, slug)
|
61
72
|
VALUES
|
62
|
-
(%(kbid)s, %(rid)s, %(title)s, %(created_at)s, %(modified_at)s, %(labels)s)
|
73
|
+
(%(kbid)s, %(rid)s, %(title)s, %(created_at)s, %(modified_at)s, %(labels)s, %(slug)s)
|
63
74
|
ON CONFLICT (kbid, rid) DO UPDATE SET
|
64
75
|
title = excluded.title,
|
65
76
|
created_at = excluded.created_at,
|
66
77
|
modified_at = excluded.modified_at,
|
67
|
-
labels = excluded.labels
|
78
|
+
labels = excluded.labels,
|
79
|
+
slug = excluded.slug""",
|
68
80
|
{
|
69
81
|
"kbid": resource.kb.kbid,
|
70
82
|
"rid": resource.uuid,
|
@@ -72,6 +84,22 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
|
|
72
84
|
"created_at": created_at,
|
73
85
|
"modified_at": modified_at,
|
74
86
|
"labels": list(index_message.labels),
|
87
|
+
"slug": resource.basic.slug,
|
88
|
+
},
|
89
|
+
)
|
90
|
+
await cur.execute(
|
91
|
+
"DELETE FROM catalog_facets WHERE kbid = %(kbid)s AND rid = %(rid)s",
|
92
|
+
{
|
93
|
+
"kbid": resource.kb.kbid,
|
94
|
+
"rid": resource.uuid,
|
95
|
+
},
|
96
|
+
)
|
97
|
+
await cur.execute(
|
98
|
+
"INSERT INTO catalog_facets (kbid, rid, facet) SELECT %(kbid)s AS kbid, %(rid)s AS rid, unnest(%(facets)s::text[]) AS facet",
|
99
|
+
{
|
100
|
+
"kbid": resource.kb.kbid,
|
101
|
+
"rid": resource.uuid,
|
102
|
+
"facets": list(extract_facets(index_message.labels)),
|
75
103
|
},
|
76
104
|
)
|
77
105
|
|
@@ -27,15 +27,13 @@ from pydantic import ValidationError
|
|
27
27
|
|
28
28
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
29
29
|
from nucliadb.common.exceptions import InvalidQueryError
|
30
|
-
from nucliadb.common.maindb.pg import PGDriver
|
31
|
-
from nucliadb.common.maindb.utils import get_driver
|
32
30
|
from nucliadb.models.responses import HTTPClientError
|
33
31
|
from nucliadb.search import logger
|
34
32
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
35
33
|
from nucliadb.search.api.v1.utils import fastapi_query
|
36
34
|
from nucliadb.search.search import cache
|
37
35
|
from nucliadb.search.search.merge import fetch_resources
|
38
|
-
from nucliadb.search.search.pgcatalog import pgcatalog_search
|
36
|
+
from nucliadb.search.search.pgcatalog import pgcatalog_facets, pgcatalog_search
|
39
37
|
from nucliadb.search.search.query_parser.parsers import parse_catalog
|
40
38
|
from nucliadb.search.search.utils import (
|
41
39
|
maybe_log_request_payload,
|
@@ -45,6 +43,7 @@ from nucliadb_models.filters import CatalogFilterExpression
|
|
45
43
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
46
44
|
from nucliadb_models.resource import NucliaDBRoles
|
47
45
|
from nucliadb_models.search import (
|
46
|
+
CatalogFacetsRequest,
|
48
47
|
CatalogRequest,
|
49
48
|
CatalogResponse,
|
50
49
|
KnowledgeboxSearchResults,
|
@@ -157,9 +156,6 @@ async def catalog(
|
|
157
156
|
returns bm25 results on titles and it does not support vector search.
|
158
157
|
It is useful for listing resources in a knowledge box.
|
159
158
|
"""
|
160
|
-
if not pgcatalog_enabled(): # pragma: no cover
|
161
|
-
return HTTPClientError(status_code=501, detail="PG driver is needed for catalog search")
|
162
|
-
|
163
159
|
maybe_log_request_payload(kbid, "/catalog", item)
|
164
160
|
start_time = time()
|
165
161
|
try:
|
@@ -196,5 +192,15 @@ async def catalog(
|
|
196
192
|
)
|
197
193
|
|
198
194
|
|
199
|
-
|
200
|
-
|
195
|
+
@api.post(
|
196
|
+
f"/{KB_PREFIX}/{{kbid}}/catalog/facets",
|
197
|
+
status_code=200,
|
198
|
+
response_model=dict[str, int],
|
199
|
+
response_model_exclude_unset=True,
|
200
|
+
tags=["Search"],
|
201
|
+
include_in_schema=False,
|
202
|
+
)
|
203
|
+
@requires(NucliaDBRoles.READER)
|
204
|
+
@version(1)
|
205
|
+
async def catalog_facets(request: Request, kbid: str, item: CatalogFacetsRequest) -> dict[str, int]:
|
206
|
+
return await pgcatalog_facets(kbid, item)
|
@@ -22,19 +22,15 @@ import logging
|
|
22
22
|
from collections import defaultdict
|
23
23
|
from typing import Any, Literal, Union, cast
|
24
24
|
|
25
|
-
from psycopg
|
25
|
+
from psycopg import AsyncCursor, sql
|
26
|
+
from psycopg.rows import DictRow, dict_row
|
26
27
|
|
27
28
|
from nucliadb.common.maindb.pg import PGDriver
|
28
29
|
from nucliadb.common.maindb.utils import get_driver
|
29
30
|
from nucliadb.search.search.query_parser.models import CatalogExpression, CatalogQuery
|
30
31
|
from nucliadb_models import search as search_models
|
31
32
|
from nucliadb_models.labels import translate_system_to_alias_label
|
32
|
-
from nucliadb_models.search import
|
33
|
-
ResourceResult,
|
34
|
-
Resources,
|
35
|
-
SortField,
|
36
|
-
SortOrder,
|
37
|
-
)
|
33
|
+
from nucliadb_models.search import CatalogFacetsRequest, ResourceResult, Resources, SortField, SortOrder
|
38
34
|
from nucliadb_telemetry import metrics
|
39
35
|
|
40
36
|
from .filters import translate_label
|
@@ -55,65 +51,87 @@ def _filter_operands(operands: list[CatalogExpression]) -> tuple[list[str], list
|
|
55
51
|
return facets, nonfacets
|
56
52
|
|
57
53
|
|
58
|
-
def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) ->
|
54
|
+
def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> sql.Composable:
|
59
55
|
if expr.bool_and:
|
60
56
|
return _convert_boolean_op(expr.bool_and, "and", filter_params)
|
61
57
|
elif expr.bool_or:
|
62
58
|
return _convert_boolean_op(expr.bool_or, "or", filter_params)
|
63
59
|
elif expr.bool_not:
|
64
|
-
return
|
60
|
+
return sql.SQL("(NOT {})").format(_convert_filter(expr.bool_not, filter_params))
|
65
61
|
elif expr.date:
|
66
62
|
return _convert_date_filter(expr.date, filter_params)
|
67
63
|
elif expr.facet:
|
68
64
|
param_name = f"param{len(filter_params)}"
|
69
65
|
filter_params[param_name] = [expr.facet]
|
70
|
-
|
66
|
+
if expr.facet == "/n/s/PROCESSED":
|
67
|
+
# Optimization for the most common case, we know PROCESSED is a full label and can use the smaller labels index
|
68
|
+
# This is needed because PROCESSED is present in most catalog entries and PG is unlikely to use any index
|
69
|
+
# for it, falling back to executing the extract_facets function which can be slow
|
70
|
+
return sql.SQL("labels @> {}").format(sql.Placeholder(param_name))
|
71
|
+
else:
|
72
|
+
return sql.SQL("extract_facets(labels) @> {}").format(sql.Placeholder(param_name))
|
71
73
|
elif expr.resource_id:
|
72
74
|
param_name = f"param{len(filter_params)}"
|
73
75
|
filter_params[param_name] = [expr.resource_id]
|
74
|
-
return
|
76
|
+
return sql.SQL("rid = {}").format(sql.Placeholder(param_name))
|
75
77
|
else:
|
76
|
-
return ""
|
78
|
+
return sql.SQL("")
|
77
79
|
|
78
80
|
|
79
81
|
def _convert_boolean_op(
|
80
82
|
operands: list[CatalogExpression],
|
81
83
|
op: Union[Literal["and"], Literal["or"]],
|
82
84
|
filter_params: dict[str, Any],
|
83
|
-
) ->
|
84
|
-
array_op = "@>" if op == "and" else "&&"
|
85
|
-
sql = []
|
85
|
+
) -> sql.Composable:
|
86
|
+
array_op = sql.SQL("@>" if op == "and" else "&&")
|
87
|
+
operands_sql: list[sql.Composable] = []
|
86
88
|
facets, nonfacets = _filter_operands(operands)
|
87
89
|
if facets:
|
88
90
|
param_name = f"param{len(filter_params)}"
|
91
|
+
if facets == ["/n/s/PROCESSED"]:
|
92
|
+
# Optimization for the most common case, we know PROCESSED is a full label and can use the smaller labels index
|
93
|
+
# This is needed because PROCESSED is present in most catalog entries and PG is unlikely to use any index
|
94
|
+
# for it, falling back to executing the extract_facets function which can be slow
|
95
|
+
operands_sql.append(sql.SQL("labels @> {}").format(sql.Placeholder(param_name)))
|
96
|
+
else:
|
97
|
+
operands_sql.append(
|
98
|
+
sql.SQL("extract_facets(labels) {} {}").format(array_op, sql.Placeholder(param_name))
|
99
|
+
)
|
89
100
|
filter_params[param_name] = facets
|
90
|
-
sql.append(f"extract_facets(labels) {array_op} %({param_name})s")
|
91
101
|
for nonfacet in nonfacets:
|
92
|
-
|
93
|
-
return "("
|
102
|
+
operands_sql.append(_convert_filter(nonfacet, filter_params))
|
103
|
+
return sql.SQL("({})").format(sql.SQL(f" {op.upper()} ").join(operands_sql))
|
94
104
|
|
95
105
|
|
96
|
-
def _convert_date_filter(date: CatalogExpression.Date, filter_params: dict[str, Any]) ->
|
106
|
+
def _convert_date_filter(date: CatalogExpression.Date, filter_params: dict[str, Any]) -> sql.Composable:
|
97
107
|
if date.since and date.until:
|
98
108
|
since_name = f"param{len(filter_params)}"
|
99
109
|
filter_params[since_name] = date.since
|
100
110
|
until_name = f"param{len(filter_params)}"
|
101
111
|
filter_params[until_name] = date.until
|
102
|
-
return
|
112
|
+
return sql.SQL("{field} BETWEEN {since} AND {until}").format(
|
113
|
+
field=sql.Identifier(date.field),
|
114
|
+
since=sql.Placeholder(since_name),
|
115
|
+
until=sql.Placeholder(until_name),
|
116
|
+
)
|
103
117
|
elif date.since:
|
104
118
|
since_name = f"param{len(filter_params)}"
|
105
119
|
filter_params[since_name] = date.since
|
106
|
-
return
|
120
|
+
return sql.SQL("{field} > {since}").format(
|
121
|
+
field=sql.Identifier(date.field), since=sql.Placeholder(since_name)
|
122
|
+
)
|
107
123
|
elif date.until:
|
108
124
|
until_name = f"param{len(filter_params)}"
|
109
125
|
filter_params[until_name] = date.until
|
110
|
-
return
|
126
|
+
return sql.SQL("{field} < {until}").format(
|
127
|
+
field=sql.Identifier(date.field), until=sql.Placeholder(until_name)
|
128
|
+
)
|
111
129
|
else:
|
112
130
|
raise ValueError(f"Invalid date operator")
|
113
131
|
|
114
132
|
|
115
|
-
def _prepare_query_filters(catalog_query: CatalogQuery) -> tuple[
|
116
|
-
filter_sql = ["kbid = %(kbid)s"]
|
133
|
+
def _prepare_query_filters(catalog_query: CatalogQuery) -> tuple[sql.Composable, dict[str, Any]]:
|
134
|
+
filter_sql: list[sql.Composable] = [sql.SQL("kbid = %(kbid)s")]
|
117
135
|
filter_params: dict[str, Any] = {"kbid": catalog_query.kbid}
|
118
136
|
|
119
137
|
if catalog_query.query and catalog_query.query.query:
|
@@ -123,47 +141,50 @@ def _prepare_query_filters(catalog_query: CatalogQuery) -> tuple[str, dict[str,
|
|
123
141
|
filter_sql.append(_convert_filter(catalog_query.filters, filter_params))
|
124
142
|
|
125
143
|
return (
|
126
|
-
|
144
|
+
sql.SQL("SELECT * FROM catalog WHERE {}").format(sql.SQL(" AND ").join(filter_sql)),
|
127
145
|
filter_params,
|
128
146
|
)
|
129
147
|
|
130
148
|
|
131
|
-
def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, Any]) ->
|
149
|
+
def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, Any]) -> sql.Composable:
|
132
150
|
if query.match == search_models.CatalogQueryMatch.Exact:
|
133
151
|
params["query"] = query.query
|
134
|
-
return
|
152
|
+
return sql.SQL("{} = %(query)s").format(sql.Identifier(query.field.value))
|
135
153
|
elif query.match == search_models.CatalogQueryMatch.StartsWith:
|
136
154
|
params["query"] = query.query + "%"
|
137
155
|
if query.field == search_models.CatalogQueryField.Title:
|
138
156
|
# Insensitive search supported by pg_trgm for title
|
139
|
-
return
|
157
|
+
return sql.SQL("{} ILIKE %(query)s").format(sql.Identifier(query.field.value))
|
140
158
|
else:
|
141
159
|
# Sensitive search for slug (btree does not support ILIKE and slugs are all lowercase anyway)
|
142
|
-
return
|
160
|
+
return sql.SQL("{} LIKE %(query)s").format(sql.Identifier(query.field.value))
|
143
161
|
# The rest of operators only supported by title
|
144
162
|
elif query.match == search_models.CatalogQueryMatch.Words:
|
145
163
|
# This is doing tokenization inside the SQL server (to keep the index updated). We could move it to
|
146
164
|
# the python code at update/query time if it ever becomes a problem but for now, a single regex
|
147
165
|
# executed per query is not a problem.
|
148
166
|
params["query"] = query.query
|
149
|
-
return
|
167
|
+
return sql.SQL(
|
168
|
+
"regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
|
169
|
+
)
|
150
170
|
elif query.match == search_models.CatalogQueryMatch.Fuzzy:
|
151
171
|
params["query"] = query.query
|
152
172
|
# Note: the operator is %>, We use %%> for psycopg escaping
|
153
|
-
return "title %%> %(query)s"
|
173
|
+
return sql.SQL("title %%> %(query)s")
|
154
174
|
elif query.match == search_models.CatalogQueryMatch.EndsWith:
|
155
175
|
params["query"] = "%" + query.query
|
156
|
-
return "title ILIKE %(query)s"
|
176
|
+
return sql.SQL("title ILIKE %(query)s")
|
157
177
|
elif query.match == search_models.CatalogQueryMatch.Contains:
|
158
178
|
params["query"] = "%" + query.query + "%"
|
159
|
-
return "title ILIKE %(query)s"
|
179
|
+
return sql.SQL("title ILIKE %(query)s")
|
160
180
|
else: # pragma: nocover
|
161
181
|
# This is a trick so mypy generates an error if this branch can be reached,
|
162
182
|
# that is, if we are missing some ifs
|
163
183
|
_a: int = "a"
|
184
|
+
return sql.SQL("")
|
164
185
|
|
165
186
|
|
166
|
-
def _prepare_query(catalog_query: CatalogQuery) -> tuple[
|
187
|
+
def _prepare_query(catalog_query: CatalogQuery) -> tuple[sql.Composed, dict[str, Any]]:
|
167
188
|
# Base query with all the filters
|
168
189
|
query, filter_params = _prepare_query_filters(catalog_query)
|
169
190
|
|
@@ -184,11 +205,11 @@ def _prepare_query(catalog_query: CatalogQuery) -> tuple[str, dict[str, Any]]:
|
|
184
205
|
else:
|
185
206
|
order_dir = "DESC"
|
186
207
|
|
187
|
-
query +=
|
208
|
+
query += sql.SQL(" ORDER BY {} {}").format(sql.Identifier(order_field), sql.SQL(order_dir))
|
188
209
|
|
189
210
|
# Pagination
|
190
211
|
offset = catalog_query.page_size * catalog_query.page_number
|
191
|
-
query +=
|
212
|
+
query += sql.SQL(" LIMIT %(page_size)s OFFSET %(offset)s")
|
192
213
|
filter_params["page_size"] = catalog_query.page_size
|
193
214
|
filter_params["offset"] = offset
|
194
215
|
|
@@ -213,40 +234,18 @@ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
|
|
213
234
|
tmp_facets: dict[str, dict[str, int]] = {
|
214
235
|
translate_label(f): defaultdict(int) for f in catalog_query.faceted
|
215
236
|
}
|
216
|
-
facet_filters = " OR ".join(f"label LIKE '{f}/%%'" for f in tmp_facets.keys())
|
217
|
-
for facet in tmp_facets.keys():
|
218
|
-
if not (
|
219
|
-
facet.startswith("/n/s") or facet.startswith("/n/i") or facet.startswith("/l")
|
220
|
-
):
|
221
|
-
logger.warning(
|
222
|
-
f"Unexpected facet used at catalog: {facet}, kbid={catalog_query.kbid}"
|
223
|
-
)
|
224
|
-
|
225
|
-
await cur.execute(
|
226
|
-
f"SELECT label, COUNT(*) FROM (SELECT unnest(labels) AS label FROM ({query}) fc) nl WHERE ({facet_filters}) GROUP BY 1 ORDER BY 1",
|
227
|
-
query_params,
|
228
|
-
)
|
229
|
-
|
230
|
-
for row in await cur.fetchall():
|
231
|
-
label = row["label"]
|
232
|
-
label_parts = label.split("/")
|
233
|
-
parent = "/".join(label_parts[:-1])
|
234
|
-
count = row["count"]
|
235
|
-
if parent in tmp_facets:
|
236
|
-
tmp_facets[parent][translate_system_to_alias_label(label)] = count
|
237
237
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
tmp_facets[grandparent][translate_system_to_alias_label(parent)] += count
|
238
|
+
if catalog_query.filters is None:
|
239
|
+
await _faceted_search_unfiltered(cur, catalog_query, tmp_facets)
|
240
|
+
else:
|
241
|
+
await _faceted_search_filtered(cur, catalog_query, tmp_facets, query, query_params)
|
243
242
|
|
244
243
|
facets = {translate_system_to_alias_label(k): v for k, v in tmp_facets.items()}
|
245
244
|
|
246
245
|
# Totals
|
247
246
|
with observer({"op": "totals"}):
|
248
247
|
await cur.execute(
|
249
|
-
|
248
|
+
sql.SQL("SELECT COUNT(*) FROM ({}) fc").format(query),
|
250
249
|
query_params,
|
251
250
|
)
|
252
251
|
total = (await cur.fetchone())["count"] # type: ignore
|
@@ -276,3 +275,115 @@ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
|
|
276
275
|
next_page=(catalog_query.page_size * catalog_query.page_number + len(data) < total),
|
277
276
|
min_score=0,
|
278
277
|
)
|
278
|
+
|
279
|
+
|
280
|
+
async def _faceted_search_unfiltered(
|
281
|
+
cur: AsyncCursor[DictRow], catalog_query: CatalogQuery, tmp_facets: dict[str, dict[str, int]]
|
282
|
+
):
|
283
|
+
facet_params: dict[str, Any] = {}
|
284
|
+
facet_sql: sql.Composable
|
285
|
+
if len(tmp_facets) <= 5:
|
286
|
+
# Asking for few facets, strictly filter to what we need in the query
|
287
|
+
prefixes_sql = []
|
288
|
+
for cnt, prefix in enumerate(tmp_facets.keys()):
|
289
|
+
prefixes_sql.append(
|
290
|
+
sql.SQL("(facet LIKE {} AND POSITION('/' IN RIGHT(facet, {})) = 0)").format(
|
291
|
+
sql.Placeholder(f"facet_{cnt}"), sql.Placeholder(f"facet_len_{cnt}")
|
292
|
+
)
|
293
|
+
)
|
294
|
+
facet_params[f"facet_{cnt}"] = f"{prefix}/%"
|
295
|
+
facet_params[f"facet_len_{cnt}"] = -(len(prefix) + 1)
|
296
|
+
facet_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefixes_sql))
|
297
|
+
elif all((facet.startswith("/l") or facet.startswith("/n/i") for facet in tmp_facets.keys())):
|
298
|
+
# Special case for the catalog query, which can have many facets asked for
|
299
|
+
# Filter for the categories (icon and labels) in the query, filter the rest in the code below
|
300
|
+
facet_sql = sql.SQL("AND (facet LIKE '/l/%%' OR facet like '/n/i/%%')")
|
301
|
+
else:
|
302
|
+
# Worst case: ask for all facets and filter here. This is faster than applying lots of filters
|
303
|
+
facet_sql = sql.SQL("")
|
304
|
+
|
305
|
+
await cur.execute(
|
306
|
+
sql.SQL(
|
307
|
+
"SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s {} GROUP BY facet"
|
308
|
+
).format(facet_sql),
|
309
|
+
{"kbid": catalog_query.kbid, **facet_params},
|
310
|
+
)
|
311
|
+
|
312
|
+
# Only keep the facets we asked for
|
313
|
+
for row in await cur.fetchall():
|
314
|
+
facet = row["facet"]
|
315
|
+
facet_parts = facet.split("/")
|
316
|
+
parent = "/".join(facet_parts[:-1])
|
317
|
+
if parent in tmp_facets:
|
318
|
+
tmp_facets[parent][translate_system_to_alias_label(facet)] = row["count"]
|
319
|
+
|
320
|
+
|
321
|
+
async def _faceted_search_filtered(
|
322
|
+
cur: AsyncCursor[DictRow],
|
323
|
+
catalog_query: CatalogQuery,
|
324
|
+
tmp_facets: dict[str, dict[str, int]],
|
325
|
+
query: sql.Composable,
|
326
|
+
query_params: dict[str, Any],
|
327
|
+
):
|
328
|
+
facet_params = {}
|
329
|
+
facet_filters = []
|
330
|
+
for cnt, facet in enumerate(tmp_facets.keys()):
|
331
|
+
facet_filters.append(sql.SQL("label LIKE {}").format(sql.Placeholder(f"facet_{cnt}")))
|
332
|
+
facet_params[f"facet_{cnt}"] = f"{facet}/%"
|
333
|
+
|
334
|
+
for facet in tmp_facets.keys():
|
335
|
+
if not (facet.startswith("/n/s") or facet.startswith("/n/i") or facet.startswith("/l")):
|
336
|
+
logger.warning(f"Unexpected facet used at catalog: {facet}, kbid={catalog_query.kbid}")
|
337
|
+
|
338
|
+
await cur.execute(
|
339
|
+
sql.SQL(
|
340
|
+
"SELECT label, COUNT(*) FROM (SELECT unnest(labels) AS label FROM ({query}) fc) nl WHERE ({facet_filters}) GROUP BY 1 ORDER BY 1"
|
341
|
+
).format(query=query, facet_filters=sql.SQL(" OR ").join(facet_filters)),
|
342
|
+
{**query_params, **facet_params},
|
343
|
+
)
|
344
|
+
|
345
|
+
for row in await cur.fetchall():
|
346
|
+
label = row["label"]
|
347
|
+
label_parts = label.split("/")
|
348
|
+
parent = "/".join(label_parts[:-1])
|
349
|
+
count = row["count"]
|
350
|
+
if parent in tmp_facets:
|
351
|
+
tmp_facets[parent][translate_system_to_alias_label(label)] = count
|
352
|
+
|
353
|
+
# No need to get recursive because our facets are at most 3 levels deep (e.g: /l/set/label)
|
354
|
+
if len(label_parts) >= 3:
|
355
|
+
grandparent = "/".join(label_parts[:-2])
|
356
|
+
if grandparent in tmp_facets:
|
357
|
+
tmp_facets[grandparent][translate_system_to_alias_label(parent)] += count
|
358
|
+
|
359
|
+
|
360
|
+
@observer.wrap({"op": "catalog_facets"})
|
361
|
+
async def pgcatalog_facets(kbid: str, request: CatalogFacetsRequest) -> dict[str, int]:
|
362
|
+
async with _pg_driver()._get_connection() as conn, conn.cursor() as cur:
|
363
|
+
prefix_filters: list[sql.Composable] = []
|
364
|
+
prefix_params: dict[str, Any] = {}
|
365
|
+
for cnt, prefix in enumerate(request.prefixes):
|
366
|
+
prefix_sql = sql.SQL("facet LIKE {}").format(sql.Placeholder(f"prefix{cnt}"))
|
367
|
+
prefix_params[f"prefix{cnt}"] = f"{prefix.prefix}%"
|
368
|
+
if prefix.depth is not None:
|
369
|
+
prefix_parts = len(prefix.prefix.split("/"))
|
370
|
+
depth_sql = sql.SQL("SPLIT_PART(facet, '/', {}) = ''").format(
|
371
|
+
sql.Placeholder(f"depth{cnt}")
|
372
|
+
)
|
373
|
+
prefix_params[f"depth{cnt}"] = prefix_parts + prefix.depth + 1
|
374
|
+
prefix_sql = sql.SQL("({} AND {})").format(prefix_sql, depth_sql)
|
375
|
+
prefix_filters.append(prefix_sql)
|
376
|
+
|
377
|
+
filter_sql: sql.Composable
|
378
|
+
if prefix_filters:
|
379
|
+
filter_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefix_filters))
|
380
|
+
else:
|
381
|
+
filter_sql = sql.SQL("")
|
382
|
+
|
383
|
+
await cur.execute(
|
384
|
+
sql.SQL(
|
385
|
+
"SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s {} GROUP BY facet"
|
386
|
+
).format(filter_sql),
|
387
|
+
{"kbid": kbid, **prefix_params},
|
388
|
+
)
|
389
|
+
return {k: v for k, v in await cur.fetchall()}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.5.0.
|
3
|
+
Version: 6.5.0.post4476
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: <4,>=3.9
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.5.0.
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.
|
24
|
-
Requires-Dist: nucliadb-protos>=6.5.0.
|
25
|
-
Requires-Dist: nucliadb-models>=6.5.0.
|
26
|
-
Requires-Dist: nidx-protos>=6.5.0.
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4476
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4476
|
24
|
+
Requires-Dist: nucliadb-protos>=6.5.0.post4476
|
25
|
+
Requires-Dist: nucliadb-models>=6.5.0.post4476
|
26
|
+
Requires-Dist: nidx-protos>=6.5.0.post4476
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
28
28
|
Requires-Dist: nuclia-models>=0.24.2
|
29
29
|
Requires-Dist: uvicorn[standard]
|
@@ -35,8 +35,8 @@ Requires-Dist: aiofiles>=0.8.0
|
|
35
35
|
Requires-Dist: psutil>=5.9.7
|
36
36
|
Requires-Dist: types-psutil>=5.9.5.17
|
37
37
|
Requires-Dist: types-aiofiles>=0.8.3
|
38
|
-
Requires-Dist: protobuf
|
39
|
-
Requires-Dist: types-protobuf
|
38
|
+
Requires-Dist: protobuf<6,>=5
|
39
|
+
Requires-Dist: types-protobuf<6,>=5
|
40
40
|
Requires-Dist: grpcio>=1.71.0
|
41
41
|
Requires-Dist: grpcio-health-checking>=1.71.0
|
42
42
|
Requires-Dist: grpcio-channelz>=1.71.0
|
@@ -31,13 +31,18 @@ migrations/0032_remove_old_relations.py,sha256=ZaswhmRRsLgw6DVYVdT7cP-gdBf4X3PL9
|
|
31
31
|
migrations/0033_rollover_nidx_relation_2.py,sha256=9etpqNLVS3PA14qIdsdhorReZxenDaBl-IJNN2AK_Fg,1340
|
32
32
|
migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQj8qu1z2XkFc,1452
|
33
33
|
migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
|
34
|
+
migrations/0036_backfill_catalog_slug.py,sha256=mizRM-HfPswKq4iEmqofu4kIT6Gd97ruT3qhb257vZk,2954
|
35
|
+
migrations/0037_backfill_catalog_facets.py,sha256=KAf3VKbKePw7ykDnJi47LyJ7pK1JwYkwMxrsXUnbt9g,2788
|
34
36
|
migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
35
|
-
migrations/pg/0001_bootstrap.py,sha256=
|
37
|
+
migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
|
36
38
|
migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
|
37
39
|
migrations/pg/0003_catalog_kbid_index.py,sha256=uKq_vtnuf73GVf0mtl2rhzdk_czAoEU1UdiVKVZpA0M,1044
|
38
40
|
migrations/pg/0004_catalog_facets.py,sha256=FJFASHjfEHG3sNve9BP2HnnLO4xr7dnR6Qpctnmt4LE,2180
|
39
41
|
migrations/pg/0005_purge_tasks_index.py,sha256=3mtyFgpcK0QQ_NONYay7V9xICijCLNkyTPuoc0PBjRg,1139
|
40
42
|
migrations/pg/0006_catalog_title_indexes.py,sha256=n2OGxwE4oeCwHAYaxBkja4t10BmwTjZ2IoCyOdjEBSc,1710
|
43
|
+
migrations/pg/0007_catalog_slug.py,sha256=mArzZCBO-RD5DkWxRIyDKgEzrnAcis1TOGvSNUe7Kgg,1150
|
44
|
+
migrations/pg/0008_catalog_facets.py,sha256=dxIUdHJHtI_Gyk2dpP7tjHEnL2iPzAufi6ajYm2FVMI,1595
|
45
|
+
migrations/pg/0009_extract_facets_safety.py,sha256=k9Appx7ipp3wDyLy70qgw9oLjN7N6BEadE-N5Fhan-4,1066
|
41
46
|
migrations/pg/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
42
47
|
nucliadb/__init__.py,sha256=_abCmDJ_0ku483Os4UAjPX7Nywm39cQgAV_DiyjsKeQ,891
|
43
48
|
nucliadb/health.py,sha256=UIxxA4oms4HIsCRZM_SZsdkIZIlgzmOxw-qSHLlWuak,3465
|
@@ -162,7 +167,7 @@ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,
|
|
162
167
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
163
168
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
164
169
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
|
165
|
-
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=
|
170
|
+
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=GpzQv0_iWTHbM90J0rAz_QIh_TMv1XbghyDgs8tk_8M,4014
|
166
171
|
nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
|
167
172
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
|
168
173
|
nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
|
@@ -216,7 +221,7 @@ nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,
|
|
216
221
|
nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
217
222
|
nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
|
218
223
|
nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
|
219
|
-
nucliadb/search/api/v1/catalog.py,sha256=
|
224
|
+
nucliadb/search/api/v1/catalog.py,sha256=7yyG46Zsaqvuut9Da-LTl0KcWgo7n5lbEhiTXslyvwM,7865
|
220
225
|
nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
|
221
226
|
nucliadb/search/api/v1/find.py,sha256=iMjyq4y0JOMC_x1B8kUfVdkCoc9G9Ark58kPLLY4HDw,10824
|
222
227
|
nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
|
@@ -249,7 +254,7 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
|
|
249
254
|
nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
|
250
255
|
nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
|
251
256
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
252
|
-
nucliadb/search/search/pgcatalog.py,sha256=
|
257
|
+
nucliadb/search/search/pgcatalog.py,sha256=O_nRjSJf1Qc-XorVwcNlsDOftzy_zQLLfagkjU4YmSA,16718
|
253
258
|
nucliadb/search/search/predict_proxy.py,sha256=cuD_sfM3RLdEoQaanRz0CflO6nKVGGKPzoFA17shb_w,8647
|
254
259
|
nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
|
255
260
|
nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
|
@@ -370,8 +375,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
370
375
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
371
376
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
372
377
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
373
|
-
nucliadb-6.5.0.
|
374
|
-
nucliadb-6.5.0.
|
375
|
-
nucliadb-6.5.0.
|
376
|
-
nucliadb-6.5.0.
|
377
|
-
nucliadb-6.5.0.
|
378
|
+
nucliadb-6.5.0.post4476.dist-info/METADATA,sha256=ysG9rsv_jshf_4lJLNHXGBHLm8Br-jWbUKDgRymc9jY,4158
|
379
|
+
nucliadb-6.5.0.post4476.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
380
|
+
nucliadb-6.5.0.post4476.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
381
|
+
nucliadb-6.5.0.post4476.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
382
|
+
nucliadb-6.5.0.post4476.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|