nucliadb 6.7.2.post4889__py3-none-any.whl → 6.7.2.post4908__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb might be problematic. Click here for more details.

@@ -28,6 +28,7 @@ from nidx_protos import noderesources_pb2, nodewriter_pb2
28
28
  from nidx_protos.noderesources_pb2 import Resource as PBBrainResource
29
29
 
30
30
  from nucliadb.common import datamanagers, locking
31
+ from nucliadb.common.catalog import catalog_delete, catalog_update
31
32
  from nucliadb.common.cluster.settings import settings as cluster_settings
32
33
  from nucliadb.common.cluster.utils import get_shard_manager
33
34
  from nucliadb.common.external_index_providers.base import ExternalIndexManager
@@ -61,8 +62,6 @@ from nucliadb_utils.cache.pubsub import PubSubDriver
61
62
  from nucliadb_utils.storages.storage import Storage
62
63
  from nucliadb_utils.utilities import get_storage, has_feature
63
64
 
64
- from .pgcatalog import pgcatalog_delete, pgcatalog_update
65
-
66
65
  logger = logging.getLogger("ingest-processor")
67
66
 
68
67
  MESSAGE_TO_NOTIFICATION_SOURCE = {
@@ -227,7 +226,8 @@ class Processor:
227
226
  shard = await kb.get_resource_shard(shard_id)
228
227
  if shard is None:
229
228
  raise AttributeError("Shard not available")
230
- await pgcatalog_delete(txn, message.kbid, uuid)
229
+
230
+ await catalog_delete(txn, message.kbid, uuid)
231
231
  external_index_manager = await get_external_index_manager(kbid=message.kbid)
232
232
  if external_index_manager is not None:
233
233
  await self.external_index_delete_resource(external_index_manager, uuid)
@@ -374,8 +374,7 @@ class Processor:
374
374
  index_message.labels.remove(current_status[0])
375
375
  index_message.labels.append("/n/s/ERROR")
376
376
 
377
- await pgcatalog_update(txn, kbid, resource, index_message)
378
-
377
+ await catalog_update(txn, kbid, resource, index_message)
379
378
  if transaction_check:
380
379
  await sequence_manager.set_last_seqid(txn, partition, seqid)
381
380
  await txn.commit()
@@ -67,6 +67,11 @@ class DriverSettings(BaseSettings):
67
67
  )
68
68
 
69
69
 
70
+ class CatalogConfig(Enum):
71
+ UNSET = "unset"
72
+ PG = "pg"
73
+
74
+
70
75
  # For use during migration from pull v1 to pull v2
71
76
  class ProcessingPullMode(Enum):
72
77
  OFF = "off"
@@ -75,6 +80,9 @@ class ProcessingPullMode(Enum):
75
80
 
76
81
 
77
82
  class Settings(DriverSettings):
83
+ # Catalog settings
84
+ catalog: CatalogConfig = Field(default=CatalogConfig.PG, description="Catalog backend")
85
+
78
86
  # Pull worker settings
79
87
  pull_time_error_backoff: int = 30
80
88
  pull_api_timeout: int = 60
@@ -25,6 +25,7 @@ from fastapi import Request, Response
25
25
  from fastapi_versioning import version
26
26
  from pydantic import ValidationError
27
27
 
28
+ from nucliadb.common.catalog import catalog_facets, catalog_search
28
29
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
29
30
  from nucliadb.common.exceptions import InvalidQueryError
30
31
  from nucliadb.models.responses import HTTPClientError
@@ -33,7 +34,6 @@ from nucliadb.search.api.v1.router import KB_PREFIX, api
33
34
  from nucliadb.search.api.v1.utils import fastapi_query
34
35
  from nucliadb.search.search import cache
35
36
  from nucliadb.search.search.merge import fetch_resources
36
- from nucliadb.search.search.pgcatalog import pgcatalog_facets, pgcatalog_search
37
37
  from nucliadb.search.search.query_parser.parsers import parse_catalog
38
38
  from nucliadb.search.search.utils import (
39
39
  maybe_log_request_payload,
@@ -164,7 +164,7 @@ async def catalog(
164
164
  query_parser = await parse_catalog(kbid, item)
165
165
 
166
166
  catalog_results = CatalogResponse()
167
- catalog_results.fulltext = await pgcatalog_search(query_parser)
167
+ catalog_results.fulltext = await catalog_search(query_parser)
168
168
  catalog_results.resources = await fetch_resources(
169
169
  resources=[r.rid for r in catalog_results.fulltext.results],
170
170
  kbid=kbid,
@@ -205,7 +205,7 @@ async def catalog(
205
205
  )
206
206
  @requires(NucliaDBRoles.READER)
207
207
  @version(1)
208
- async def catalog_facets(
208
+ async def catalog_facets_endpoint(
209
209
  request: Request, kbid: str, item: CatalogFacetsRequest
210
210
  ) -> CatalogFacetsResponse:
211
- return CatalogFacetsResponse(facets=await pgcatalog_facets(kbid, item))
211
+ return CatalogFacetsResponse(facets=await catalog_facets(kbid, item))
@@ -17,9 +17,8 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from dataclasses import dataclass
21
20
  from datetime import datetime
22
- from typing import Literal, Optional, Union
21
+ from typing import Optional, Union
23
22
 
24
23
  from nidx_protos import nodereader_pb2
25
24
  from pydantic import BaseModel, ConfigDict, Field
@@ -153,33 +152,6 @@ class ParsedQuery(BaseModel):
153
152
  generation: Optional[Generation] = None
154
153
 
155
154
 
156
- ### Catalog
157
- @dataclass
158
- class CatalogExpression:
159
- @dataclass
160
- class Date:
161
- field: Union[Literal["created_at"], Literal["modified_at"]]
162
- since: Optional[datetime]
163
- until: Optional[datetime]
164
-
165
- bool_and: Optional[list["CatalogExpression"]] = None
166
- bool_or: Optional[list["CatalogExpression"]] = None
167
- bool_not: Optional["CatalogExpression"] = None
168
- date: Optional[Date] = None
169
- facet: Optional[str] = None
170
- resource_id: Optional[str] = None
171
-
172
-
173
- class CatalogQuery(BaseModel):
174
- kbid: str
175
- query: Optional[search_models.CatalogQuery]
176
- filters: Optional[CatalogExpression]
177
- sort: search_models.SortOptions
178
- faceted: list[str]
179
- page_size: int
180
- page_number: int
181
-
182
-
183
155
  ### Graph
184
156
 
185
157
 
@@ -63,7 +63,7 @@ class _AskParser:
63
63
  )
64
64
  elif isinstance(self.item.max_tokens, MaxTokens):
65
65
  max_tokens = self.item.max_tokens
66
- else: # pragma: nocover
66
+ else: # pragma: no cover
67
67
  # This is a trick so mypy generates an error if this branch can be reached,
68
68
  # that is, if we are missing some ifs
69
69
  _a: int = "a"
@@ -19,13 +19,10 @@
19
19
  #
20
20
 
21
21
  from nucliadb.common import datamanagers
22
+ from nucliadb.common.catalog.interface import CatalogExpression, CatalogQuery
22
23
  from nucliadb.common.exceptions import InvalidQueryError
23
24
  from nucliadb.common.filter_expression import FacetFilterTypes, facet_from_filter
24
25
  from nucliadb.search.search.filters import translate_label
25
- from nucliadb.search.search.query_parser.models import (
26
- CatalogExpression,
27
- CatalogQuery,
28
- )
29
26
  from nucliadb_models import search as search_models
30
27
  from nucliadb_models.filters import (
31
28
  And,
@@ -185,7 +182,7 @@ async def parse_filter_expression(expr: ResourceFilterExpression, kbid: str) ->
185
182
  if rid is None:
186
183
  raise InvalidQueryError("slug", f"Cannot find slug {expr.slug}")
187
184
  cat.resource_id = rid
188
- else: # pragma: nocover
185
+ else: # pragma: no cover
189
186
  # Cannot happen due to model validation
190
187
  raise ValueError("Resource needs id or slug")
191
188
  elif isinstance(expr, DateCreated):
@@ -153,7 +153,7 @@ def parse_path_query(expr: graph_requests.GraphPathQuery) -> nodereader_pb2.Grap
153
153
  elif isinstance(expr, graph_requests.Generated):
154
154
  _set_generated_to_pb(expr, pb)
155
155
 
156
- else: # pragma: nocover
156
+ else: # pragma: no cover
157
157
  # This is a trick so mypy generates an error if this branch can be reached,
158
158
  # that is, if we are missing some ifs
159
159
  _a: int = "a"
@@ -182,7 +182,7 @@ def _parse_node_query(expr: graph_requests.GraphNodesQuery) -> nodereader_pb2.Gr
182
182
  elif isinstance(expr, graph_requests.Generated):
183
183
  _set_generated_to_pb(expr, pb)
184
184
 
185
- else: # pragma: nocover
185
+ else: # pragma: no cover
186
186
  # This is a trick so mypy generates an error if this branch can be reached,
187
187
  # that is, if we are missing some ifs
188
188
  _a: int = "a"
@@ -212,7 +212,7 @@ def _parse_relation_query(
212
212
  elif isinstance(expr, graph_requests.Generated):
213
213
  _set_generated_to_pb(expr, pb)
214
214
 
215
- else: # pragma: nocover
215
+ else: # pragma: no cover
216
216
  # This is a trick so mypy generates an error if this branch can be reached,
217
217
  # that is, if we are missing some ifs
218
218
  _a: int = "a"
@@ -230,7 +230,7 @@ def _set_node_to_pb(node: graph_requests.GraphNode, pb: nodereader_pb2.GraphQuer
230
230
  pb.fuzzy.kind = nodereader_pb2.GraphQuery.Node.MatchLocation.PREFIX
231
231
  pb.fuzzy.distance = 1
232
232
 
233
- else: # pragma: nocover
233
+ else: # pragma: no cover
234
234
  # This is a trick so mypy generates an error if this branch can be reached,
235
235
  # that is, if we are missing some ifs
236
236
  _a: int = "a"
@@ -263,7 +263,7 @@ def _set_generated_to_pb(generated: graph_requests.Generated, pb: nodereader_pb2
263
263
 
264
264
  pb.facet.facet = facet
265
265
 
266
- else: # pragma: nocover
266
+ else: # pragma: no cover
267
267
  # This is a trick so mypy generates an error if this branch can be reached,
268
268
  # that is, if we are missing some ifs
269
269
  _a: int = "a"
@@ -181,7 +181,7 @@ def get_reranker(reranker: parser_models.Reranker) -> Reranker:
181
181
  elif isinstance(reranker, parser_models.PredictReranker):
182
182
  algorithm = PredictReranker(reranker.window)
183
183
 
184
- else: # pragma: nocover
184
+ else: # pragma: no cover
185
185
  # This is a trick so mypy generates an error if this branch can be reached,
186
186
  # that is, if we are missing some ifs
187
187
  _a: int = "a"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.7.2.post4889
3
+ Version: 6.7.2.post4908
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.7.2.post4889
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.post4889
24
- Requires-Dist: nucliadb-protos>=6.7.2.post4889
25
- Requires-Dist: nucliadb-models>=6.7.2.post4889
26
- Requires-Dist: nidx-protos>=6.7.2.post4889
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.7.2.post4908
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.post4908
24
+ Requires-Dist: nucliadb-protos>=6.7.2.post4908
25
+ Requires-Dist: nucliadb-models>=6.7.2.post4908
26
+ Requires-Dist: nidx-protos>=6.7.2.post4908
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.46.0
29
29
  Requires-Dist: uvicorn[standard]
@@ -19,11 +19,11 @@ migrations/0019_upgrade_to_paragraphs_v3.py,sha256=XVuku1QLvbzTi9Akzh3a72qohXilh
19
19
  migrations/0020_drain_nodes_from_cluster.py,sha256=BeECAI0T8u14M2U5USl1fFNcsfmdMerNhisolYQN_eA,1411
20
20
  migrations/0021_overwrite_vectorsets_key.py,sha256=lXNkMsa_DWEceci29c0tDO4Y4WsJQZS6p26Sh-Ya9Mg,1586
21
21
  migrations/0022_fix_paragraph_deletion_bug.py,sha256=-tH342VXF-8xwc_h3P1cYaUtTT1wHSGf7ZoeVEpnaYs,1422
22
- migrations/0023_backfill_pg_catalog.py,sha256=bBSDpU55trAthgtvVrpRheZAlcnGbVb468AsNkfJ6hk,3105
22
+ migrations/0023_backfill_pg_catalog.py,sha256=cg9pCVwOMIadpC2p2D0iX7ciuEhFETM7ex5KoT8X0sM,3212
23
23
  migrations/0025_assign_models_to_kbs_v2.py,sha256=YqzXvsru08gBsbRjuhgc3ttoEANwxaWkYeFw62J7nn0,4634
24
24
  migrations/0026_fix_high_cardinality_content_types.py,sha256=BsbBkvZDzjRHQfoouZNNtHA1xMxTKm8wOVnp_WAS9j4,2322
25
25
  migrations/0027_rollover_texts3.py,sha256=EWoUloOwv4alEqr1-DUA3tb5KLC5Vza-bBQDfZqy5jI,2818
26
- migrations/0028_extracted_vectors_reference.py,sha256=49DHCIlBpjofU8cYVHTdWv0EBIlnPTWV2WCezf0rJUo,2392
26
+ migrations/0028_extracted_vectors_reference.py,sha256=HIykGAWMt5FfIM6QyNznLNsQ--3LrzXdM5oiXQg2q0c,2393
27
27
  migrations/0029_backfill_field_status.py,sha256=W3BWa3KsstDt2xundn0ED6FoG9sqRh7Y9IDjP1yTrLw,5651
28
28
  migrations/0030_label_deduplication.py,sha256=y14TxtCMi3-TBMz_eZoyyPDHNlZb29taJujlDuHumsA,2008
29
29
  migrations/0031_languages_deduplication.py,sha256=o6va6lP3oTRT1uSzp5MIhHHBFbhCxSZ-oNlXXpiAdUo,2340
@@ -33,7 +33,7 @@ migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQ
33
33
  migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
34
34
  migrations/0036_backfill_catalog_slug.py,sha256=toYqxH_EfUFqoVn_cOdR5Fg8bWZU5BoFMfPBSf74LKU,2957
35
35
  migrations/0037_backfill_catalog_facets.py,sha256=IH7H4OZ4tzws6xEh7Qro0bPDHDYOoVViEUj-JwPPe1U,2791
36
- migrations/0038_backfill_catalog_field_labels.py,sha256=PUkgghFiaCSuGYOiOZqp7p352-C4mi7DNMAZ-B-S0SE,3385
36
+ migrations/0038_backfill_catalog_field_labels.py,sha256=F519nYngJDb1Mtwf-OQpweDPWKPxAlqdxy5E-DyQrhA,3492
37
37
  migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
38
38
  migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
39
39
  migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
@@ -65,16 +65,21 @@ nucliadb/common/cache.py,sha256=FmMVPoxmKGODvOCyO_pxEHjAV3sYeefW9Jeh1cVLTU0,6538
65
65
  nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
66
66
  nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
67
67
  nucliadb/common/exceptions.py,sha256=_PJk_NfAhZBFBvmgAfvsJKZ9KuRt5Y1cNsH3-cXE07w,1120
68
- nucliadb/common/filter_expression.py,sha256=aRbGfg2pci7sUo1KgSqcm72Eu-3Ordy8knnW6DYId5o,6568
68
+ nucliadb/common/filter_expression.py,sha256=-6buKY1SCVYpkrG_60Ui3ebSDWnXeF_xVmBCrNipoII,6569
69
69
  nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
70
70
  nucliadb/common/locking.py,sha256=eZG47mI1OPnKbxSd95qa6jDXBhUoxVBIuSjxoEuBRWE,5894
71
71
  nucliadb/common/nidx.py,sha256=4WHcEkLdH54S2C0FcLwHvcQXMiRj2lZ7zqOEbD8eEM8,9189
72
- nucliadb/common/vector_index_config.py,sha256=LqGwhrDCp1q1vBow3scd1Chhr4GLYjYnGL72FKvOYYc,1552
72
+ nucliadb/common/vector_index_config.py,sha256=DQrlraTWE5uUn68l9s10d3wobNeVtbP-ANEQmUfSWyo,1553
73
73
  nucliadb/common/back_pressure/__init__.py,sha256=paAcAZcfGRTyURF9lnn3vX0vcwakTEVswG_xcdGBH-U,928
74
74
  nucliadb/common/back_pressure/cache.py,sha256=ANvXglWzI5naAD6N4E_fNi17qS6KNyAhjLeh6WlZZ84,2931
75
75
  nucliadb/common/back_pressure/materializer.py,sha256=bXUalaaTMdrltm23ezkoymcRPJl7Ha8RVTj7xdVfHgQ,11468
76
76
  nucliadb/common/back_pressure/settings.py,sha256=3qNOzbI0KC6LMy-wMilXRSBfZu6CCpGHod26MTgAZ2o,3082
77
77
  nucliadb/common/back_pressure/utils.py,sha256=aZeP1XSkdgaRgZC76yR9Kje3511ZUCp7KB-XzcvhMYY,2018
78
+ nucliadb/common/catalog/__init__.py,sha256=6ZTYyXuUcvUH4Z97CT-lBvy8OCKnA5Hkq7yHPp4lJOw,3250
79
+ nucliadb/common/catalog/dummy.py,sha256=-LA29hSHm2XPTXjOTV2g5w4yNZm9XvGzIyO2he7rJDw,1490
80
+ nucliadb/common/catalog/interface.py,sha256=IBgvlOEvZ46szcAc6FpIb3Fr0pelGVvjOzRDkA1tC10,3281
81
+ nucliadb/common/catalog/pg.py,sha256=hRdiR3RDGyZnjAB0dWu4PZBgAfiZLjVIh08nCK34Nl8,20193
82
+ nucliadb/common/catalog/utils.py,sha256=lQLTe3rN_ra0CiNjTQIiIaU4lhx5beRkx1Va10ZBk_Y,2222
78
83
  nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
79
84
  nucliadb/common/cluster/exceptions.py,sha256=t7v_l93t44l2tQpdQXgO_w-c4YZRcaayOz1A2i0w4RQ,1258
80
85
  nucliadb/common/cluster/grpc_node_dummy.py,sha256=JkufazWzMA4KFEU8EBkMbiiDW4C8lLcRhiiCxP7aCQY,2949
@@ -136,7 +141,7 @@ nucliadb/ingest/partitions.py,sha256=c1OWrFWgadNtvghY3Fl-xlurdyV5hZpVJPEoRAsBt1k
136
141
  nucliadb/ingest/processing.py,sha256=IKXMZXIPuuojKQiXR2T5-5NwMvmUnIQIhBXUGgzyFFo,21551
137
142
  nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
143
  nucliadb/ingest/serialize.py,sha256=hiddxbV5gxVk8uY8-Q1AEq2DhJx5fOBP34zq5ONGgcs,16240
139
- nucliadb/ingest/settings.py,sha256=8OJMjVVbI3OWIbZLrXBqpB79zHbbLkCSb9VJA0IzRss,4269
144
+ nucliadb/ingest/settings.py,sha256=DouvoeZxu4uLgEIDLCLKNrzu8SLZpgQxTB--Ly6pRJI,4448
140
145
  nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
141
146
  nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
142
147
  nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
@@ -156,7 +161,7 @@ nucliadb/ingest/fields/generic.py,sha256=elgtqv15aJUq3zY7X_g0bli_2BpcwPArVvzhe54
156
161
  nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJs,4172
157
162
  nucliadb/ingest/fields/text.py,sha256=2grxo8twWbpXEd_iwUMBw9q0dWorVmlPONmY5d1ThwQ,1684
158
163
  nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
159
- nucliadb/ingest/orm/brain_v2.py,sha256=AhDjTmHAYPJe1xbhQGMgoqqCPMMaeEHKZcQPzB0HKrE,33646
164
+ nucliadb/ingest/orm/brain_v2.py,sha256=8MAo1N_nhoGy73TvKBuaw-NaMxIanRPCNttw6dFY4jk,33647
160
165
  nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
161
166
  nucliadb/ingest/orm/entities.py,sha256=kXyeF6XOpFKhEsGLcY-GLIk21Exp0cJst4XQQ9jJoug,14791
162
167
  nucliadb/ingest/orm/exceptions.py,sha256=gsp7TtVNQPiIEh-zf_UEJClwuFU0iu-5vzj0OrKMScg,1550
@@ -168,8 +173,7 @@ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,
168
173
  nucliadb/ingest/orm/processor/__init__.py,sha256=xhDNKCxY0XNOlIVKEtM8QT75vDUkJIt7K-_VgGbbOQU,904
169
174
  nucliadb/ingest/orm/processor/auditing.py,sha256=gxn5v30KVaH0TnIjo715mWjzKGJ-DMviElEXJG9BNN4,4612
170
175
  nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
171
- nucliadb/ingest/orm/processor/pgcatalog.py,sha256=VPQ_Evme7xmmGoQ45zt0Am0yPkaD4hxN1r5rEaVt6s8,4633
172
- nucliadb/ingest/orm/processor/processor.py,sha256=vCLJ3e9ByW80aPFsA1lAFSjyDihULPVId1OQGyC7huw,33841
176
+ nucliadb/ingest/orm/processor/processor.py,sha256=CPOf5KZgTcKLO6LxkFia2LueskcKBlv5bTifnTOZ1KE,33845
173
177
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=kUH0bCuM6NqpA0xSwfyb9igig3Btu57pc8VYnKggqx4,1693
174
178
  nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
175
179
  nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -222,7 +226,7 @@ nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,
222
226
  nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
223
227
  nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
224
228
  nucliadb/search/api/v1/ask.py,sha256=hZUnk1opZuXp1IwTiingSatlUefg2CZ9r_Z9sUwZMaU,5698
225
- nucliadb/search/api/v1/catalog.py,sha256=5ZY3d8sVia1traUxVS0Q4aQJmgcOuXzbxis_uY4ulE4,8077
229
+ nucliadb/search/api/v1/catalog.py,sha256=zMflTu9UKfvuEO_u4Et33Q2kSni0TIk2E5t-_Ad5BXM,8069
226
230
  nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
227
231
  nucliadb/search/api/v1/find.py,sha256=j6mxEyxjlLnZSqCT_N2LmOJlytsm1vkY4KFFmJRrtP8,10904
228
232
  nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
@@ -255,11 +259,10 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
255
259
  nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
256
260
  nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
257
261
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
258
- nucliadb/search/search/pgcatalog.py,sha256=0n_gDihZZhqrDLRHvHzS3IESvMRTcU6YShqizQMyE_Y,16807
259
262
  nucliadb/search/search/predict_proxy.py,sha256=Df8F5K-oS4TIXJc_y8UDViJTo7st5L0kMgxYPFZ39Vk,8806
260
263
  nucliadb/search/search/query.py,sha256=lYCesbUv-B7IyVFQoCCurcxl_Azc5nq3jtVQJ9tk1Ao,11552
261
264
  nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
262
- nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
265
+ nucliadb/search/search/rerankers.py,sha256=2LNC0I28EXriffMuBlOYzjQq0vCTjpCxaK29f852n3s,7473
263
266
  nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8PC8,2636
264
267
  nucliadb/search/search/summarize.py,sha256=3lLdwsM28W505bKvmK7JLXmz7kcjd8Hp70LQs391ofY,5087
265
268
  nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
@@ -272,14 +275,14 @@ nucliadb/search/search/chat/query.py,sha256=AhOPMf68p2BRjKz7CdkcUIDMANtxr00oGt42
272
275
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
273
276
  nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
274
277
  nucliadb/search/search/query_parser/fetcher.py,sha256=0Eg_7x9BaAQ1AuTK6NXQMUoGFAXIZiMRurR32tydeNM,17198
275
- nucliadb/search/search/query_parser/models.py,sha256=kAslqX_-zaIdUpcpdNU2a5uQPQh7LC605qWLZ4aZ5T4,5064
278
+ nucliadb/search/search/query_parser/models.py,sha256=ARJYv88bj-u1G4-JxLKNbaz8gwC5sIyeZx_9Aai3KKI,4302
276
279
  nucliadb/search/search/query_parser/old_filters.py,sha256=GsU3T3-WiSPvjucP7evHkshzAWZOli8qsuXChvWRCY0,9092
277
280
  nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
278
- nucliadb/search/search/query_parser/parsers/ask.py,sha256=mc4Rx7jLM101tRZYOxEQC-txO5C-fDJid_oMAtZTRug,2809
279
- nucliadb/search/search/query_parser/parsers/catalog.py,sha256=JuDiBL2wdjAuEFEPo0e2nQ4VqWjF3FXakT0ziZk3Oes,7495
281
+ nucliadb/search/search/query_parser/parsers/ask.py,sha256=ySa3lBhUuammIchJvj7xodeGIYGkR0uyLnHuOtLfWE8,2810
282
+ nucliadb/search/search/query_parser/parsers/catalog.py,sha256=DErVfWJ9a_F5a8Qbht1WY4Lm8r3ui9Y3C7oJMOOZOng,7474
280
283
  nucliadb/search/search/query_parser/parsers/common.py,sha256=jbQweWVufngbobr99qpHh1iiaGICOC6-e9AV33x0-Gk,6594
281
284
  nucliadb/search/search/query_parser/parsers/find.py,sha256=4xQwa0BxNucenUrW_iZ2jCGd15Dm4AKS_B91BE8sDi4,12773
282
- nucliadb/search/search/query_parser/parsers/graph.py,sha256=zyqdUg5Afmhb2_-hvj9FUCaoLh026MUP1fgY2j-lD7c,9385
285
+ nucliadb/search/search/query_parser/parsers/graph.py,sha256=s7nCB7ly_4BZWds-8zce1R-r2fHSiEhAK8P-eL14wTk,9390
283
286
  nucliadb/search/search/query_parser/parsers/search.py,sha256=78KSJ9t3I7nFVY2Qk2fMw2P1RHUdGRsWzBf59FdAeTA,10503
284
287
  nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=xiOQ7_X6MkcZs3W_0DjdVfyk-G1AY6RBx3oG5hsq7ig,11455
285
288
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -376,8 +379,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
376
379
  nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
377
380
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
378
381
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
379
- nucliadb-6.7.2.post4889.dist-info/METADATA,sha256=n5UTXqF3fg0n3X1orrwnWa9yaPykhsM2ntFhyJm-Z8E,4158
380
- nucliadb-6.7.2.post4889.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
381
- nucliadb-6.7.2.post4889.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
382
- nucliadb-6.7.2.post4889.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
383
- nucliadb-6.7.2.post4889.dist-info/RECORD,,
382
+ nucliadb-6.7.2.post4908.dist-info/METADATA,sha256=-hdK9V9pdxcHO7mkQqqnfX3XYOR7RqrzKlgdVL4Tj7k,4158
383
+ nucliadb-6.7.2.post4908.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
384
+ nucliadb-6.7.2.post4908.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
385
+ nucliadb-6.7.2.post4908.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
386
+ nucliadb-6.7.2.post4908.dist-info/RECORD,,
@@ -1,129 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from typing import cast
22
-
23
- from nidx_protos.noderesources_pb2 import Resource as IndexMessage
24
-
25
- from nucliadb.common.maindb.driver import Transaction
26
- from nucliadb.common.maindb.pg import PGDriver, PGTransaction
27
- from nucliadb.common.maindb.utils import get_driver
28
- from nucliadb_telemetry import metrics
29
-
30
- from ..resource import Resource
31
-
32
- observer = metrics.Observer("pg_catalog_write", labels={"type": ""})
33
-
34
-
35
- def _pg_transaction(txn: Transaction) -> PGTransaction:
36
- return cast(PGTransaction, txn)
37
-
38
-
39
- def pgcatalog_enabled(kbid):
40
- return isinstance(get_driver(), PGDriver)
41
-
42
-
43
- def extract_facets(labels):
44
- facets = set()
45
- for label in labels:
46
- parts = label.split("/")
47
- facet = ""
48
- for part in parts[1:]:
49
- facet += f"/{part}"
50
- facets.add(facet)
51
- return facets
52
-
53
-
54
- @observer.wrap({"type": "update"})
55
- async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, index_message: IndexMessage):
56
- if not pgcatalog_enabled(kbid):
57
- return
58
-
59
- if resource.basic is None:
60
- raise ValueError("Cannot index into the catalog a resource without basic metadata ")
61
-
62
- created_at = resource.basic.created.ToDatetime()
63
- modified_at = resource.basic.modified.ToDatetime()
64
- if modified_at < created_at:
65
- modified_at = created_at
66
-
67
- async with _pg_transaction(txn).connection.cursor() as cur:
68
- # Do not index canceled labels
69
- cancelled_labels = {
70
- f"/l/{clf.labelset}/{clf.label}"
71
- for clf in resource.basic.usermetadata.classifications
72
- if clf.cancelled_by_user
73
- }
74
-
75
- # Labels from the resource and classification labels from each field
76
- labels = [label for label in index_message.labels]
77
- for classification in resource.basic.computedmetadata.field_classifications:
78
- for clf in classification.classifications:
79
- label = f"/l/{clf.labelset}/{clf.label}"
80
- if label not in cancelled_labels:
81
- labels.append(label)
82
-
83
- await cur.execute(
84
- """
85
- INSERT INTO catalog
86
- (kbid, rid, title, created_at, modified_at, labels, slug)
87
- VALUES
88
- (%(kbid)s, %(rid)s, %(title)s, %(created_at)s, %(modified_at)s, %(labels)s, %(slug)s)
89
- ON CONFLICT (kbid, rid) DO UPDATE SET
90
- title = excluded.title,
91
- created_at = excluded.created_at,
92
- modified_at = excluded.modified_at,
93
- labels = excluded.labels,
94
- slug = excluded.slug""",
95
- {
96
- "kbid": resource.kb.kbid,
97
- "rid": resource.uuid,
98
- "title": resource.basic.title,
99
- "created_at": created_at,
100
- "modified_at": modified_at,
101
- "labels": labels,
102
- "slug": resource.basic.slug,
103
- },
104
- )
105
- await cur.execute(
106
- "DELETE FROM catalog_facets WHERE kbid = %(kbid)s AND rid = %(rid)s",
107
- {
108
- "kbid": resource.kb.kbid,
109
- "rid": resource.uuid,
110
- },
111
- )
112
- await cur.execute(
113
- "INSERT INTO catalog_facets (kbid, rid, facet) SELECT %(kbid)s AS kbid, %(rid)s AS rid, unnest(%(facets)s::text[]) AS facet",
114
- {
115
- "kbid": resource.kb.kbid,
116
- "rid": resource.uuid,
117
- "facets": list(extract_facets(labels)),
118
- },
119
- )
120
-
121
-
122
- @observer.wrap({"type": "delete"})
123
- async def pgcatalog_delete(txn: Transaction, kbid: str, rid: str):
124
- if not pgcatalog_enabled(kbid):
125
- return
126
- async with _pg_transaction(txn).connection.cursor() as cur:
127
- await cur.execute(
128
- "DELETE FROM catalog where kbid = %(kbid)s AND rid = %(rid)s", {"kbid": kbid, "rid": rid}
129
- )