nucliadb 6.6.1.post4596__py3-none-any.whl → 6.6.1.post4601__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #38
22
+
23
+ Backfill the catalog with labels from fields metadata
24
+
25
+ """
26
+
27
+ import logging
28
+ from typing import cast
29
+
30
+ from nucliadb.common import datamanagers
31
+ from nucliadb.common.maindb.pg import PGDriver, PGTransaction
32
+ from nucliadb.ingest.orm.index_message import get_resource_index_message
33
+ from nucliadb.ingest.orm.processor.pgcatalog import pgcatalog_update
34
+ from nucliadb.migrator.context import ExecutionContext
35
+ from nucliadb_protos import resources_pb2
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ async def migrate(context: ExecutionContext) -> None: ...
41
+
42
+
43
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
44
+ if not isinstance(context.kv_driver, PGDriver):
45
+ return
46
+
47
+ BATCH_SIZE = 100
48
+ async with context.kv_driver.transaction() as txn:
49
+ txn = cast(PGTransaction, txn)
50
+ start = ""
51
+ while True:
52
+ async with txn.connection.cursor() as cur:
53
+ # Get list of resources except those already in the catalog
54
+ await cur.execute(
55
+ """
56
+ SELECT key, value FROM resources
57
+ WHERE key ~ ('^/kbs/' || %s || '/r/[^/]*$')
58
+ AND key > %s
59
+ ORDER BY key
60
+ LIMIT %s""",
61
+ (kbid, start, BATCH_SIZE),
62
+ )
63
+
64
+ to_index = []
65
+ rows = await cur.fetchall()
66
+ if len(rows) == 0:
67
+ return
68
+ for key, basic_pb in rows:
69
+ start = key
70
+
71
+ # Only reindex resources with labels in field computed metadata
72
+ basic = resources_pb2.Basic()
73
+ basic.ParseFromString(basic_pb)
74
+ if basic.computedmetadata.field_classifications:
75
+ to_index.append(key)
76
+
77
+ logger.info(f"Reindexing {len(to_index)} catalog entries from {start}")
78
+ # Index each resource
79
+ for key in to_index:
80
+ rid = key.split("/")[4]
81
+ resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
82
+ if resource is None:
83
+ logger.warning(f"Could not load resource {rid} for kbid {kbid}")
84
+ continue
85
+
86
+ index_message = await get_resource_index_message(resource, reindex=False)
87
+ await pgcatalog_update(txn, kbid, resource, index_message)
88
+
89
+ if to_index:
90
+ await txn.commit()
@@ -65,12 +65,20 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
65
65
  modified_at = created_at
66
66
 
67
67
  async with _pg_transaction(txn).connection.cursor() as cur:
68
+ # Do not index canceled labels
69
+ cancelled_labels = {
70
+ f"/l/{clf.labelset}/{clf.label}"
71
+ for clf in resource.basic.usermetadata.classifications
72
+ if clf.cancelled_by_user
73
+ }
74
+
68
75
  # Labels from the resource and classification labels from each field
69
76
  labels = [label for label in index_message.labels]
70
- for field in (await resource.get_fields()).values():
71
- meta = await field.get_field_metadata()
72
- if meta:
73
- labels += [f"/l/{c.labelset}/{c.label}" for c in meta.metadata.classifications]
77
+ for classification in resource.basic.computedmetadata.field_classifications:
78
+ for clf in classification.classifications:
79
+ label = f"/l/{clf.labelset}/{clf.label}"
80
+ if label not in cancelled_labels:
81
+ labels.append(label)
74
82
 
75
83
  await cur.execute(
76
84
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.6.1.post4596
3
+ Version: 6.6.1.post4601
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post4596
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post4596
24
- Requires-Dist: nucliadb-protos>=6.6.1.post4596
25
- Requires-Dist: nucliadb-models>=6.6.1.post4596
26
- Requires-Dist: nidx-protos>=6.6.1.post4596
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post4601
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post4601
24
+ Requires-Dist: nucliadb-protos>=6.6.1.post4601
25
+ Requires-Dist: nucliadb-models>=6.6.1.post4601
26
+ Requires-Dist: nidx-protos>=6.6.1.post4601
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.43.0
29
29
  Requires-Dist: uvicorn[standard]
@@ -33,6 +33,7 @@ migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQ
33
33
  migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
34
34
  migrations/0036_backfill_catalog_slug.py,sha256=mizRM-HfPswKq4iEmqofu4kIT6Gd97ruT3qhb257vZk,2954
35
35
  migrations/0037_backfill_catalog_facets.py,sha256=KAf3VKbKePw7ykDnJi47LyJ7pK1JwYkwMxrsXUnbt9g,2788
36
+ migrations/0038_backfill_catalog_field_labels.py,sha256=EKJwJfU0p1nDq7s71CpGhaX4t1iD2d1ZCzTmLcUAhDs,3382
36
37
  migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
37
38
  migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
38
39
  migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
@@ -167,7 +168,7 @@ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,
167
168
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
168
169
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
169
170
  nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
170
- nucliadb/ingest/orm/processor/pgcatalog.py,sha256=f0_bV_5qMGXMG7iLK1DnVwi_pXwFvezfdt3aJ0fZfaQ,4347
171
+ nucliadb/ingest/orm/processor/pgcatalog.py,sha256=VPQ_Evme7xmmGoQ45zt0Am0yPkaD4hxN1r5rEaVt6s8,4633
171
172
  nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
172
173
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
173
174
  nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
@@ -375,8 +376,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
375
376
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
376
377
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
377
378
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
378
- nucliadb-6.6.1.post4596.dist-info/METADATA,sha256=BGQonZKHKd6s_8MEWZ2NMWLh1cw9lLxHES6QWnoVQww,4158
379
- nucliadb-6.6.1.post4596.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
- nucliadb-6.6.1.post4596.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
- nucliadb-6.6.1.post4596.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
- nucliadb-6.6.1.post4596.dist-info/RECORD,,
379
+ nucliadb-6.6.1.post4601.dist-info/METADATA,sha256=sIQKpJ7uabOsctChT2-wqcLap668DGEKKG6v_PVYAJ4,4158
380
+ nucliadb-6.6.1.post4601.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
381
+ nucliadb-6.6.1.post4601.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
382
+ nucliadb-6.6.1.post4601.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
383
+ nucliadb-6.6.1.post4601.dist-info/RECORD,,