nucliadb 6.7.2.post4889__py3-none-any.whl → 6.7.2.post4911__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- migrations/0023_backfill_pg_catalog.py +6 -2
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0038_backfill_catalog_field_labels.py +6 -2
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +294 -208
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/filter_expression.py +1 -1
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/ingest/orm/brain_v2.py +1 -1
- nucliadb/ingest/orm/processor/processor.py +4 -5
- nucliadb/ingest/settings.py +8 -0
- nucliadb/search/api/v1/catalog.py +4 -4
- nucliadb/search/search/query_parser/models.py +1 -29
- nucliadb/search/search/query_parser/parsers/ask.py +1 -1
- nucliadb/search/search/query_parser/parsers/catalog.py +2 -5
- nucliadb/search/search/query_parser/parsers/graph.py +5 -5
- nucliadb/search/search/rerankers.py +1 -1
- {nucliadb-6.7.2.post4889.dist-info → nucliadb-6.7.2.post4911.dist-info}/METADATA +6 -6
- {nucliadb-6.7.2.post4889.dist-info → nucliadb-6.7.2.post4911.dist-info}/RECORD +24 -21
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- {nucliadb-6.7.2.post4889.dist-info → nucliadb-6.7.2.post4911.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4889.dist-info → nucliadb-6.7.2.post4911.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4889.dist-info → nucliadb-6.7.2.post4911.dist-info}/top_level.txt +0 -0
|
@@ -28,6 +28,7 @@ from nidx_protos import noderesources_pb2, nodewriter_pb2
|
|
|
28
28
|
from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
|
|
29
29
|
|
|
30
30
|
from nucliadb.common import datamanagers, locking
|
|
31
|
+
from nucliadb.common.catalog import catalog_delete, catalog_update
|
|
31
32
|
from nucliadb.common.cluster.settings import settings as cluster_settings
|
|
32
33
|
from nucliadb.common.cluster.utils import get_shard_manager
|
|
33
34
|
from nucliadb.common.external_index_providers.base import ExternalIndexManager
|
|
@@ -61,8 +62,6 @@ from nucliadb_utils.cache.pubsub import PubSubDriver
|
|
|
61
62
|
from nucliadb_utils.storages.storage import Storage
|
|
62
63
|
from nucliadb_utils.utilities import get_storage, has_feature
|
|
63
64
|
|
|
64
|
-
from .pgcatalog import pgcatalog_delete, pgcatalog_update
|
|
65
|
-
|
|
66
65
|
logger = logging.getLogger("ingest-processor")
|
|
67
66
|
|
|
68
67
|
MESSAGE_TO_NOTIFICATION_SOURCE = {
|
|
@@ -227,7 +226,8 @@ class Processor:
|
|
|
227
226
|
shard = await kb.get_resource_shard(shard_id)
|
|
228
227
|
if shard is None:
|
|
229
228
|
raise AttributeError("Shard not available")
|
|
230
|
-
|
|
229
|
+
|
|
230
|
+
await catalog_delete(txn, message.kbid, uuid)
|
|
231
231
|
external_index_manager = await get_external_index_manager(kbid=message.kbid)
|
|
232
232
|
if external_index_manager is not None:
|
|
233
233
|
await self.external_index_delete_resource(external_index_manager, uuid)
|
|
@@ -374,8 +374,7 @@ class Processor:
|
|
|
374
374
|
index_message.labels.remove(current_status[0])
|
|
375
375
|
index_message.labels.append("/n/s/ERROR")
|
|
376
376
|
|
|
377
|
-
await
|
|
378
|
-
|
|
377
|
+
await catalog_update(txn, kbid, resource, index_message)
|
|
379
378
|
if transaction_check:
|
|
380
379
|
await sequence_manager.set_last_seqid(txn, partition, seqid)
|
|
381
380
|
await txn.commit()
|
nucliadb/ingest/settings.py
CHANGED
|
@@ -67,6 +67,11 @@ class DriverSettings(BaseSettings):
|
|
|
67
67
|
)
|
|
68
68
|
|
|
69
69
|
|
|
70
|
+
class CatalogConfig(Enum):
|
|
71
|
+
UNSET = "unset"
|
|
72
|
+
PG = "pg"
|
|
73
|
+
|
|
74
|
+
|
|
70
75
|
# For use during migration from pull v1 to pull v2
|
|
71
76
|
class ProcessingPullMode(Enum):
|
|
72
77
|
OFF = "off"
|
|
@@ -75,6 +80,9 @@ class ProcessingPullMode(Enum):
|
|
|
75
80
|
|
|
76
81
|
|
|
77
82
|
class Settings(DriverSettings):
|
|
83
|
+
# Catalog settings
|
|
84
|
+
catalog: CatalogConfig = Field(default=CatalogConfig.PG, description="Catalog backend")
|
|
85
|
+
|
|
78
86
|
# Pull worker settings
|
|
79
87
|
pull_time_error_backoff: int = 30
|
|
80
88
|
pull_api_timeout: int = 60
|
|
@@ -25,6 +25,7 @@ from fastapi import Request, Response
|
|
|
25
25
|
from fastapi_versioning import version
|
|
26
26
|
from pydantic import ValidationError
|
|
27
27
|
|
|
28
|
+
from nucliadb.common.catalog import catalog_facets, catalog_search
|
|
28
29
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
|
29
30
|
from nucliadb.common.exceptions import InvalidQueryError
|
|
30
31
|
from nucliadb.models.responses import HTTPClientError
|
|
@@ -33,7 +34,6 @@ from nucliadb.search.api.v1.router import KB_PREFIX, api
|
|
|
33
34
|
from nucliadb.search.api.v1.utils import fastapi_query
|
|
34
35
|
from nucliadb.search.search import cache
|
|
35
36
|
from nucliadb.search.search.merge import fetch_resources
|
|
36
|
-
from nucliadb.search.search.pgcatalog import pgcatalog_facets, pgcatalog_search
|
|
37
37
|
from nucliadb.search.search.query_parser.parsers import parse_catalog
|
|
38
38
|
from nucliadb.search.search.utils import (
|
|
39
39
|
maybe_log_request_payload,
|
|
@@ -164,7 +164,7 @@ async def catalog(
|
|
|
164
164
|
query_parser = await parse_catalog(kbid, item)
|
|
165
165
|
|
|
166
166
|
catalog_results = CatalogResponse()
|
|
167
|
-
catalog_results.fulltext = await
|
|
167
|
+
catalog_results.fulltext = await catalog_search(query_parser)
|
|
168
168
|
catalog_results.resources = await fetch_resources(
|
|
169
169
|
resources=[r.rid for r in catalog_results.fulltext.results],
|
|
170
170
|
kbid=kbid,
|
|
@@ -205,7 +205,7 @@ async def catalog(
|
|
|
205
205
|
)
|
|
206
206
|
@requires(NucliaDBRoles.READER)
|
|
207
207
|
@version(1)
|
|
208
|
-
async def
|
|
208
|
+
async def catalog_facets_endpoint(
|
|
209
209
|
request: Request, kbid: str, item: CatalogFacetsRequest
|
|
210
210
|
) -> CatalogFacetsResponse:
|
|
211
|
-
return CatalogFacetsResponse(facets=await
|
|
211
|
+
return CatalogFacetsResponse(facets=await catalog_facets(kbid, item))
|
|
@@ -17,9 +17,8 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from dataclasses import dataclass
|
|
21
20
|
from datetime import datetime
|
|
22
|
-
from typing import
|
|
21
|
+
from typing import Optional, Union
|
|
23
22
|
|
|
24
23
|
from nidx_protos import nodereader_pb2
|
|
25
24
|
from pydantic import BaseModel, ConfigDict, Field
|
|
@@ -153,33 +152,6 @@ class ParsedQuery(BaseModel):
|
|
|
153
152
|
generation: Optional[Generation] = None
|
|
154
153
|
|
|
155
154
|
|
|
156
|
-
### Catalog
|
|
157
|
-
@dataclass
|
|
158
|
-
class CatalogExpression:
|
|
159
|
-
@dataclass
|
|
160
|
-
class Date:
|
|
161
|
-
field: Union[Literal["created_at"], Literal["modified_at"]]
|
|
162
|
-
since: Optional[datetime]
|
|
163
|
-
until: Optional[datetime]
|
|
164
|
-
|
|
165
|
-
bool_and: Optional[list["CatalogExpression"]] = None
|
|
166
|
-
bool_or: Optional[list["CatalogExpression"]] = None
|
|
167
|
-
bool_not: Optional["CatalogExpression"] = None
|
|
168
|
-
date: Optional[Date] = None
|
|
169
|
-
facet: Optional[str] = None
|
|
170
|
-
resource_id: Optional[str] = None
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
class CatalogQuery(BaseModel):
|
|
174
|
-
kbid: str
|
|
175
|
-
query: Optional[search_models.CatalogQuery]
|
|
176
|
-
filters: Optional[CatalogExpression]
|
|
177
|
-
sort: search_models.SortOptions
|
|
178
|
-
faceted: list[str]
|
|
179
|
-
page_size: int
|
|
180
|
-
page_number: int
|
|
181
|
-
|
|
182
|
-
|
|
183
155
|
### Graph
|
|
184
156
|
|
|
185
157
|
|
|
@@ -63,7 +63,7 @@ class _AskParser:
|
|
|
63
63
|
)
|
|
64
64
|
elif isinstance(self.item.max_tokens, MaxTokens):
|
|
65
65
|
max_tokens = self.item.max_tokens
|
|
66
|
-
else: # pragma:
|
|
66
|
+
else: # pragma: no cover
|
|
67
67
|
# This is a trick so mypy generates an error if this branch can be reached,
|
|
68
68
|
# that is, if we are missing some ifs
|
|
69
69
|
_a: int = "a"
|
|
@@ -19,13 +19,10 @@
|
|
|
19
19
|
#
|
|
20
20
|
|
|
21
21
|
from nucliadb.common import datamanagers
|
|
22
|
+
from nucliadb.common.catalog.interface import CatalogExpression, CatalogQuery
|
|
22
23
|
from nucliadb.common.exceptions import InvalidQueryError
|
|
23
24
|
from nucliadb.common.filter_expression import FacetFilterTypes, facet_from_filter
|
|
24
25
|
from nucliadb.search.search.filters import translate_label
|
|
25
|
-
from nucliadb.search.search.query_parser.models import (
|
|
26
|
-
CatalogExpression,
|
|
27
|
-
CatalogQuery,
|
|
28
|
-
)
|
|
29
26
|
from nucliadb_models import search as search_models
|
|
30
27
|
from nucliadb_models.filters import (
|
|
31
28
|
And,
|
|
@@ -185,7 +182,7 @@ async def parse_filter_expression(expr: ResourceFilterExpression, kbid: str) ->
|
|
|
185
182
|
if rid is None:
|
|
186
183
|
raise InvalidQueryError("slug", f"Cannot find slug {expr.slug}")
|
|
187
184
|
cat.resource_id = rid
|
|
188
|
-
else: # pragma:
|
|
185
|
+
else: # pragma: no cover
|
|
189
186
|
# Cannot happen due to model validation
|
|
190
187
|
raise ValueError("Resource needs id or slug")
|
|
191
188
|
elif isinstance(expr, DateCreated):
|
|
@@ -153,7 +153,7 @@ def parse_path_query(expr: graph_requests.GraphPathQuery) -> nodereader_pb2.Grap
|
|
|
153
153
|
elif isinstance(expr, graph_requests.Generated):
|
|
154
154
|
_set_generated_to_pb(expr, pb)
|
|
155
155
|
|
|
156
|
-
else: # pragma:
|
|
156
|
+
else: # pragma: no cover
|
|
157
157
|
# This is a trick so mypy generates an error if this branch can be reached,
|
|
158
158
|
# that is, if we are missing some ifs
|
|
159
159
|
_a: int = "a"
|
|
@@ -182,7 +182,7 @@ def _parse_node_query(expr: graph_requests.GraphNodesQuery) -> nodereader_pb2.Gr
|
|
|
182
182
|
elif isinstance(expr, graph_requests.Generated):
|
|
183
183
|
_set_generated_to_pb(expr, pb)
|
|
184
184
|
|
|
185
|
-
else: # pragma:
|
|
185
|
+
else: # pragma: no cover
|
|
186
186
|
# This is a trick so mypy generates an error if this branch can be reached,
|
|
187
187
|
# that is, if we are missing some ifs
|
|
188
188
|
_a: int = "a"
|
|
@@ -212,7 +212,7 @@ def _parse_relation_query(
|
|
|
212
212
|
elif isinstance(expr, graph_requests.Generated):
|
|
213
213
|
_set_generated_to_pb(expr, pb)
|
|
214
214
|
|
|
215
|
-
else: # pragma:
|
|
215
|
+
else: # pragma: no cover
|
|
216
216
|
# This is a trick so mypy generates an error if this branch can be reached,
|
|
217
217
|
# that is, if we are missing some ifs
|
|
218
218
|
_a: int = "a"
|
|
@@ -230,7 +230,7 @@ def _set_node_to_pb(node: graph_requests.GraphNode, pb: nodereader_pb2.GraphQuer
|
|
|
230
230
|
pb.fuzzy.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.PREFIX
|
|
231
231
|
pb.fuzzy.distance = 1
|
|
232
232
|
|
|
233
|
-
else: # pragma:
|
|
233
|
+
else: # pragma: no cover
|
|
234
234
|
# This is a trick so mypy generates an error if this branch can be reached,
|
|
235
235
|
# that is, if we are missing some ifs
|
|
236
236
|
_a: int = "a"
|
|
@@ -263,7 +263,7 @@ def _set_generated_to_pb(generated: graph_requests.Generated, pb: nodereader_pb2
|
|
|
263
263
|
|
|
264
264
|
pb.facet.facet = facet
|
|
265
265
|
|
|
266
|
-
else: # pragma:
|
|
266
|
+
else: # pragma: no cover
|
|
267
267
|
# This is a trick so mypy generates an error if this branch can be reached,
|
|
268
268
|
# that is, if we are missing some ifs
|
|
269
269
|
_a: int = "a"
|
|
@@ -181,7 +181,7 @@ def get_reranker(reranker: parser_models.Reranker) -> Reranker:
|
|
|
181
181
|
elif isinstance(reranker, parser_models.PredictReranker):
|
|
182
182
|
algorithm = PredictReranker(reranker.window)
|
|
183
183
|
|
|
184
|
-
else: # pragma:
|
|
184
|
+
else: # pragma: no cover
|
|
185
185
|
# This is a trick so mypy generates an error if this branch can be reached,
|
|
186
186
|
# that is, if we are missing some ifs
|
|
187
187
|
_a: int = "a"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nucliadb
|
|
3
|
-
Version: 6.7.2.
|
|
3
|
+
Version: 6.7.2.post4911
|
|
4
4
|
Summary: NucliaDB
|
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: <4,>=3.9
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.7.2.
|
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.
|
|
24
|
-
Requires-Dist: nucliadb-protos>=6.7.2.
|
|
25
|
-
Requires-Dist: nucliadb-models>=6.7.2.
|
|
26
|
-
Requires-Dist: nidx-protos>=6.7.2.
|
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.7.2.post4911
|
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.post4911
|
|
24
|
+
Requires-Dist: nucliadb-protos>=6.7.2.post4911
|
|
25
|
+
Requires-Dist: nucliadb-models>=6.7.2.post4911
|
|
26
|
+
Requires-Dist: nidx-protos>=6.7.2.post4911
|
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
|
28
28
|
Requires-Dist: nuclia-models>=0.46.0
|
|
29
29
|
Requires-Dist: uvicorn[standard]
|
|
@@ -19,11 +19,11 @@ migrations/0019_upgrade_to_paragraphs_v3.py,sha256=XVuku1QLvbzTi9Akzh3a72qohXilh
|
|
|
19
19
|
migrations/0020_drain_nodes_from_cluster.py,sha256=BeECAI0T8u14M2U5USl1fFNcsfmdMerNhisolYQN_eA,1411
|
|
20
20
|
migrations/0021_overwrite_vectorsets_key.py,sha256=lXNkMsa_DWEceci29c0tDO4Y4WsJQZS6p26Sh-Ya9Mg,1586
|
|
21
21
|
migrations/0022_fix_paragraph_deletion_bug.py,sha256=-tH342VXF-8xwc_h3P1cYaUtTT1wHSGf7ZoeVEpnaYs,1422
|
|
22
|
-
migrations/0023_backfill_pg_catalog.py,sha256=
|
|
22
|
+
migrations/0023_backfill_pg_catalog.py,sha256=cg9pCVwOMIadpC2p2D0iX7ciuEhFETM7ex5KoT8X0sM,3212
|
|
23
23
|
migrations/0025_assign_models_to_kbs_v2.py,sha256=YqzXvsru08gBsbRjuhgc3ttoEANwxaWkYeFw62J7nn0,4634
|
|
24
24
|
migrations/0026_fix_high_cardinality_content_types.py,sha256=BsbBkvZDzjRHQfoouZNNtHA1xMxTKm8wOVnp_WAS9j4,2322
|
|
25
25
|
migrations/0027_rollover_texts3.py,sha256=EWoUloOwv4alEqr1-DUA3tb5KLC5Vza-bBQDfZqy5jI,2818
|
|
26
|
-
migrations/0028_extracted_vectors_reference.py,sha256=
|
|
26
|
+
migrations/0028_extracted_vectors_reference.py,sha256=HIykGAWMt5FfIM6QyNznLNsQ--3LrzXdM5oiXQg2q0c,2393
|
|
27
27
|
migrations/0029_backfill_field_status.py,sha256=W3BWa3KsstDt2xundn0ED6FoG9sqRh7Y9IDjP1yTrLw,5651
|
|
28
28
|
migrations/0030_label_deduplication.py,sha256=y14TxtCMi3-TBMz_eZoyyPDHNlZb29taJujlDuHumsA,2008
|
|
29
29
|
migrations/0031_languages_deduplication.py,sha256=o6va6lP3oTRT1uSzp5MIhHHBFbhCxSZ-oNlXXpiAdUo,2340
|
|
@@ -33,7 +33,7 @@ migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQ
|
|
|
33
33
|
migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
|
|
34
34
|
migrations/0036_backfill_catalog_slug.py,sha256=toYqxH_EfUFqoVn_cOdR5Fg8bWZU5BoFMfPBSf74LKU,2957
|
|
35
35
|
migrations/0037_backfill_catalog_facets.py,sha256=IH7H4OZ4tzws6xEh7Qro0bPDHDYOoVViEUj-JwPPe1U,2791
|
|
36
|
-
migrations/0038_backfill_catalog_field_labels.py,sha256=
|
|
36
|
+
migrations/0038_backfill_catalog_field_labels.py,sha256=F519nYngJDb1Mtwf-OQpweDPWKPxAlqdxy5E-DyQrhA,3492
|
|
37
37
|
migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
38
38
|
migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
|
|
39
39
|
migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
|
|
@@ -65,16 +65,21 @@ nucliadb/common/cache.py,sha256=FmMVPoxmKGODvOCyO_pxEHjAV3sYeefW9Jeh1cVLTU0,6538
|
|
|
65
65
|
nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
|
|
66
66
|
nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
|
|
67
67
|
nucliadb/common/exceptions.py,sha256=_PJk_NfAhZBFBvmgAfvsJKZ9KuRt5Y1cNsH3-cXE07w,1120
|
|
68
|
-
nucliadb/common/filter_expression.py,sha256
|
|
68
|
+
nucliadb/common/filter_expression.py,sha256=-6buKY1SCVYpkrG_60Ui3ebSDWnXeF_xVmBCrNipoII,6569
|
|
69
69
|
nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
|
|
70
70
|
nucliadb/common/locking.py,sha256=eZG47mI1OPnKbxSd95qa6jDXBhUoxVBIuSjxoEuBRWE,5894
|
|
71
71
|
nucliadb/common/nidx.py,sha256=4WHcEkLdH54S2C0FcLwHvcQXMiRj2lZ7zqOEbD8eEM8,9189
|
|
72
|
-
nucliadb/common/vector_index_config.py,sha256=
|
|
72
|
+
nucliadb/common/vector_index_config.py,sha256=DQrlraTWE5uUn68l9s10d3wobNeVtbP-ANEQmUfSWyo,1553
|
|
73
73
|
nucliadb/common/back_pressure/__init__.py,sha256=paAcAZcfGRTyURF9lnn3vX0vcwakTEVswG_xcdGBH-U,928
|
|
74
74
|
nucliadb/common/back_pressure/cache.py,sha256=ANvXglWzI5naAD6N4E_fNi17qS6KNyAhjLeh6WlZZ84,2931
|
|
75
75
|
nucliadb/common/back_pressure/materializer.py,sha256=bXUalaaTMdrltm23ezkoymcRPJl7Ha8RVTj7xdVfHgQ,11468
|
|
76
76
|
nucliadb/common/back_pressure/settings.py,sha256=3qNOzbI0KC6LMy-wMilXRSBfZu6CCpGHod26MTgAZ2o,3082
|
|
77
77
|
nucliadb/common/back_pressure/utils.py,sha256=aZeP1XSkdgaRgZC76yR9Kje3511ZUCp7KB-XzcvhMYY,2018
|
|
78
|
+
nucliadb/common/catalog/__init__.py,sha256=6ZTYyXuUcvUH4Z97CT-lBvy8OCKnA5Hkq7yHPp4lJOw,3250
|
|
79
|
+
nucliadb/common/catalog/dummy.py,sha256=-LA29hSHm2XPTXjOTV2g5w4yNZm9XvGzIyO2he7rJDw,1490
|
|
80
|
+
nucliadb/common/catalog/interface.py,sha256=IBgvlOEvZ46szcAc6FpIb3Fr0pelGVvjOzRDkA1tC10,3281
|
|
81
|
+
nucliadb/common/catalog/pg.py,sha256=hRdiR3RDGyZnjAB0dWu4PZBgAfiZLjVIh08nCK34Nl8,20193
|
|
82
|
+
nucliadb/common/catalog/utils.py,sha256=lQLTe3rN_ra0CiNjTQIiIaU4lhx5beRkx1Va10ZBk_Y,2222
|
|
78
83
|
nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
79
84
|
nucliadb/common/cluster/exceptions.py,sha256=t7v_l93t44l2tQpdQXgO_w-c4YZRcaayOz1A2i0w4RQ,1258
|
|
80
85
|
nucliadb/common/cluster/grpc_node_dummy.py,sha256=JkufazWzMA4KFEU8EBkMbiiDW4C8lLcRhiiCxP7aCQY,2949
|
|
@@ -136,7 +141,7 @@ nucliadb/ingest/partitions.py,sha256=c1OWrFWgadNtvghY3Fl-xlurdyV5hZpVJPEoRAsBt1k
|
|
|
136
141
|
nucliadb/ingest/processing.py,sha256=IKXMZXIPuuojKQiXR2T5-5NwMvmUnIQIhBXUGgzyFFo,21551
|
|
137
142
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
143
|
nucliadb/ingest/serialize.py,sha256=hiddxbV5gxVk8uY8-Q1AEq2DhJx5fOBP34zq5ONGgcs,16240
|
|
139
|
-
nucliadb/ingest/settings.py,sha256=
|
|
144
|
+
nucliadb/ingest/settings.py,sha256=DouvoeZxu4uLgEIDLCLKNrzu8SLZpgQxTB--Ly6pRJI,4448
|
|
140
145
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
|
141
146
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
142
147
|
nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
|
|
@@ -156,7 +161,7 @@ nucliadb/ingest/fields/generic.py,sha256=elgtqv15aJUq3zY7X_g0bli_2BpcwPArVvzhe54
|
|
|
156
161
|
nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJs,4172
|
|
157
162
|
nucliadb/ingest/fields/text.py,sha256=2grxo8twWbpXEd_iwUMBw9q0dWorVmlPONmY5d1ThwQ,1684
|
|
158
163
|
nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
159
|
-
nucliadb/ingest/orm/brain_v2.py,sha256=
|
|
164
|
+
nucliadb/ingest/orm/brain_v2.py,sha256=8MAo1N_nhoGy73TvKBuaw-NaMxIanRPCNttw6dFY4jk,33647
|
|
160
165
|
nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
|
|
161
166
|
nucliadb/ingest/orm/entities.py,sha256=kXyeF6XOpFKhEsGLcY-GLIk21Exp0cJst4XQQ9jJoug,14791
|
|
162
167
|
nucliadb/ingest/orm/exceptions.py,sha256=gsp7TtVNQPiIEh-zf_UEJClwuFU0iu-5vzj0OrKMScg,1550
|
|
@@ -168,8 +173,7 @@ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,
|
|
|
168
173
|
nucliadb/ingest/orm/processor/__init__.py,sha256=xhDNKCxY0XNOlIVKEtM8QT75vDUkJIt7K-_VgGbbOQU,904
|
|
169
174
|
nucliadb/ingest/orm/processor/auditing.py,sha256=gxn5v30KVaH0TnIjo715mWjzKGJ-DMviElEXJG9BNN4,4612
|
|
170
175
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
|
|
171
|
-
nucliadb/ingest/orm/processor/
|
|
172
|
-
nucliadb/ingest/orm/processor/processor.py,sha256=vCLJ3e9ByW80aPFsA1lAFSjyDihULPVId1OQGyC7huw,33841
|
|
176
|
+
nucliadb/ingest/orm/processor/processor.py,sha256=CPOf5KZgTcKLO6LxkFia2LueskcKBlv5bTifnTOZ1KE,33845
|
|
173
177
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=kUH0bCuM6NqpA0xSwfyb9igig3Btu57pc8VYnKggqx4,1693
|
|
174
178
|
nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
|
|
175
179
|
nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
@@ -222,7 +226,7 @@ nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,
|
|
|
222
226
|
nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
223
227
|
nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
|
|
224
228
|
nucliadb/search/api/v1/ask.py,sha256=hZUnk1opZuXp1IwTiingSatlUefg2CZ9r_Z9sUwZMaU,5698
|
|
225
|
-
nucliadb/search/api/v1/catalog.py,sha256=
|
|
229
|
+
nucliadb/search/api/v1/catalog.py,sha256=zMflTu9UKfvuEO_u4Et33Q2kSni0TIk2E5t-_Ad5BXM,8069
|
|
226
230
|
nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
|
|
227
231
|
nucliadb/search/api/v1/find.py,sha256=j6mxEyxjlLnZSqCT_N2LmOJlytsm1vkY4KFFmJRrtP8,10904
|
|
228
232
|
nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
|
|
@@ -255,11 +259,10 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
|
|
|
255
259
|
nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
|
|
256
260
|
nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
|
|
257
261
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
|
258
|
-
nucliadb/search/search/pgcatalog.py,sha256=0n_gDihZZhqrDLRHvHzS3IESvMRTcU6YShqizQMyE_Y,16807
|
|
259
262
|
nucliadb/search/search/predict_proxy.py,sha256=Df8F5K-oS4TIXJc_y8UDViJTo7st5L0kMgxYPFZ39Vk,8806
|
|
260
263
|
nucliadb/search/search/query.py,sha256=lYCesbUv-B7IyVFQoCCurcxl_Azc5nq3jtVQJ9tk1Ao,11552
|
|
261
264
|
nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
|
|
262
|
-
nucliadb/search/search/rerankers.py,sha256=
|
|
265
|
+
nucliadb/search/search/rerankers.py,sha256=2LNC0I28EXriffMuBlOYzjQq0vCTjpCxaK29f852n3s,7473
|
|
263
266
|
nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8PC8,2636
|
|
264
267
|
nucliadb/search/search/summarize.py,sha256=3lLdwsM28W505bKvmK7JLXmz7kcjd8Hp70LQs391ofY,5087
|
|
265
268
|
nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
|
|
@@ -272,14 +275,14 @@ nucliadb/search/search/chat/query.py,sha256=AhOPMf68p2BRjKz7CdkcUIDMANtxr00oGt42
|
|
|
272
275
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
273
276
|
nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
|
|
274
277
|
nucliadb/search/search/query_parser/fetcher.py,sha256=0Eg_7x9BaAQ1AuTK6NXQMUoGFAXIZiMRurR32tydeNM,17198
|
|
275
|
-
nucliadb/search/search/query_parser/models.py,sha256=
|
|
278
|
+
nucliadb/search/search/query_parser/models.py,sha256=ARJYv88bj-u1G4-JxLKNbaz8gwC5sIyeZx_9Aai3KKI,4302
|
|
276
279
|
nucliadb/search/search/query_parser/old_filters.py,sha256=GsU3T3-WiSPvjucP7evHkshzAWZOli8qsuXChvWRCY0,9092
|
|
277
280
|
nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
|
|
278
|
-
nucliadb/search/search/query_parser/parsers/ask.py,sha256=
|
|
279
|
-
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=
|
|
281
|
+
nucliadb/search/search/query_parser/parsers/ask.py,sha256=ySa3lBhUuammIchJvj7xodeGIYGkR0uyLnHuOtLfWE8,2810
|
|
282
|
+
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=DErVfWJ9a_F5a8Qbht1WY4Lm8r3ui9Y3C7oJMOOZOng,7474
|
|
280
283
|
nucliadb/search/search/query_parser/parsers/common.py,sha256=jbQweWVufngbobr99qpHh1iiaGICOC6-e9AV33x0-Gk,6594
|
|
281
284
|
nucliadb/search/search/query_parser/parsers/find.py,sha256=4xQwa0BxNucenUrW_iZ2jCGd15Dm4AKS_B91BE8sDi4,12773
|
|
282
|
-
nucliadb/search/search/query_parser/parsers/graph.py,sha256=
|
|
285
|
+
nucliadb/search/search/query_parser/parsers/graph.py,sha256=s7nCB7ly_4BZWds-8zce1R-r2fHSiEhAK8P-eL14wTk,9390
|
|
283
286
|
nucliadb/search/search/query_parser/parsers/search.py,sha256=78KSJ9t3I7nFVY2Qk2fMw2P1RHUdGRsWzBf59FdAeTA,10503
|
|
284
287
|
nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=xiOQ7_X6MkcZs3W_0DjdVfyk-G1AY6RBx3oG5hsq7ig,11455
|
|
285
288
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
@@ -376,8 +379,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
|
376
379
|
nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
|
|
377
380
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
|
378
381
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
|
379
|
-
nucliadb-6.7.2.
|
|
380
|
-
nucliadb-6.7.2.
|
|
381
|
-
nucliadb-6.7.2.
|
|
382
|
-
nucliadb-6.7.2.
|
|
383
|
-
nucliadb-6.7.2.
|
|
382
|
+
nucliadb-6.7.2.post4911.dist-info/METADATA,sha256=sWlsMPT1JH0UwpkUlpsNhj5RYdZyyd77MzrsmFh7fcc,4158
|
|
383
|
+
nucliadb-6.7.2.post4911.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
384
|
+
nucliadb-6.7.2.post4911.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
|
385
|
+
nucliadb-6.7.2.post4911.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
|
386
|
+
nucliadb-6.7.2.post4911.dist-info/RECORD,,
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
-
#
|
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
-
#
|
|
6
|
-
# AGPL:
|
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
-
# License, or (at your option) any later version.
|
|
11
|
-
#
|
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
-
# GNU Affero General Public License for more details.
|
|
16
|
-
#
|
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
from typing import cast
|
|
22
|
-
|
|
23
|
-
from nidx_protos.noderesources_pb2 import Resource as IndexMessage
|
|
24
|
-
|
|
25
|
-
from nucliadb.common.maindb.driver import Transaction
|
|
26
|
-
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
|
27
|
-
from nucliadb.common.maindb.utils import get_driver
|
|
28
|
-
from nucliadb_telemetry import metrics
|
|
29
|
-
|
|
30
|
-
from ..resource import Resource
|
|
31
|
-
|
|
32
|
-
observer = metrics.Observer("pg_catalog_write", labels={"type": ""})
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def _pg_transaction(txn: Transaction) -> PGTransaction:
|
|
36
|
-
return cast(PGTransaction, txn)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def pgcatalog_enabled(kbid):
|
|
40
|
-
return isinstance(get_driver(), PGDriver)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def extract_facets(labels):
|
|
44
|
-
facets = set()
|
|
45
|
-
for label in labels:
|
|
46
|
-
parts = label.split("/")
|
|
47
|
-
facet = ""
|
|
48
|
-
for part in parts[1:]:
|
|
49
|
-
facet += f"/{part}"
|
|
50
|
-
facets.add(facet)
|
|
51
|
-
return facets
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@observer.wrap({"type": "update"})
|
|
55
|
-
async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, index_message: IndexMessage):
|
|
56
|
-
if not pgcatalog_enabled(kbid):
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
if resource.basic is None:
|
|
60
|
-
raise ValueError("Cannot index into the catalog a resource without basic metadata ")
|
|
61
|
-
|
|
62
|
-
created_at = resource.basic.created.ToDatetime()
|
|
63
|
-
modified_at = resource.basic.modified.ToDatetime()
|
|
64
|
-
if modified_at < created_at:
|
|
65
|
-
modified_at = created_at
|
|
66
|
-
|
|
67
|
-
async with _pg_transaction(txn).connection.cursor() as cur:
|
|
68
|
-
# Do not index canceled labels
|
|
69
|
-
cancelled_labels = {
|
|
70
|
-
f"/l/{clf.labelset}/{clf.label}"
|
|
71
|
-
for clf in resource.basic.usermetadata.classifications
|
|
72
|
-
if clf.cancelled_by_user
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
# Labels from the resource and classification labels from each field
|
|
76
|
-
labels = [label for label in index_message.labels]
|
|
77
|
-
for classification in resource.basic.computedmetadata.field_classifications:
|
|
78
|
-
for clf in classification.classifications:
|
|
79
|
-
label = f"/l/{clf.labelset}/{clf.label}"
|
|
80
|
-
if label not in cancelled_labels:
|
|
81
|
-
labels.append(label)
|
|
82
|
-
|
|
83
|
-
await cur.execute(
|
|
84
|
-
"""
|
|
85
|
-
INSERT INTO catalog
|
|
86
|
-
(kbid, rid, title, created_at, modified_at, labels, slug)
|
|
87
|
-
VALUES
|
|
88
|
-
(%(kbid)s, %(rid)s, %(title)s, %(created_at)s, %(modified_at)s, %(labels)s, %(slug)s)
|
|
89
|
-
ON CONFLICT (kbid, rid) DO UPDATE SET
|
|
90
|
-
title = excluded.title,
|
|
91
|
-
created_at = excluded.created_at,
|
|
92
|
-
modified_at = excluded.modified_at,
|
|
93
|
-
labels = excluded.labels,
|
|
94
|
-
slug = excluded.slug""",
|
|
95
|
-
{
|
|
96
|
-
"kbid": resource.kb.kbid,
|
|
97
|
-
"rid": resource.uuid,
|
|
98
|
-
"title": resource.basic.title,
|
|
99
|
-
"created_at": created_at,
|
|
100
|
-
"modified_at": modified_at,
|
|
101
|
-
"labels": labels,
|
|
102
|
-
"slug": resource.basic.slug,
|
|
103
|
-
},
|
|
104
|
-
)
|
|
105
|
-
await cur.execute(
|
|
106
|
-
"DELETE FROM catalog_facets WHERE kbid = %(kbid)s AND rid = %(rid)s",
|
|
107
|
-
{
|
|
108
|
-
"kbid": resource.kb.kbid,
|
|
109
|
-
"rid": resource.uuid,
|
|
110
|
-
},
|
|
111
|
-
)
|
|
112
|
-
await cur.execute(
|
|
113
|
-
"INSERT INTO catalog_facets (kbid, rid, facet) SELECT %(kbid)s AS kbid, %(rid)s AS rid, unnest(%(facets)s::text[]) AS facet",
|
|
114
|
-
{
|
|
115
|
-
"kbid": resource.kb.kbid,
|
|
116
|
-
"rid": resource.uuid,
|
|
117
|
-
"facets": list(extract_facets(labels)),
|
|
118
|
-
},
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
@observer.wrap({"type": "delete"})
|
|
123
|
-
async def pgcatalog_delete(txn: Transaction, kbid: str, rid: str):
|
|
124
|
-
if not pgcatalog_enabled(kbid):
|
|
125
|
-
return
|
|
126
|
-
async with _pg_transaction(txn).connection.cursor() as cur:
|
|
127
|
-
await cur.execute(
|
|
128
|
-
"DELETE FROM catalog where kbid = %(kbid)s AND rid = %(rid)s", {"kbid": kbid, "rid": rid}
|
|
129
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|