nucliadb 6.6.1.post4596__py3-none-any.whl → 6.6.1.post4601__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0038_backfill_catalog_field_labels.py +90 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +12 -4
- {nucliadb-6.6.1.post4596.dist-info → nucliadb-6.6.1.post4601.dist-info}/METADATA +6 -6
- {nucliadb-6.6.1.post4596.dist-info → nucliadb-6.6.1.post4601.dist-info}/RECORD +7 -6
- {nucliadb-6.6.1.post4596.dist-info → nucliadb-6.6.1.post4601.dist-info}/WHEEL +0 -0
- {nucliadb-6.6.1.post4596.dist-info → nucliadb-6.6.1.post4601.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.6.1.post4596.dist-info → nucliadb-6.6.1.post4601.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #38
|
22
|
+
|
23
|
+
Backfill the catalog with labels from fields metadata
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
import logging
|
28
|
+
from typing import cast
|
29
|
+
|
30
|
+
from nucliadb.common import datamanagers
|
31
|
+
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
32
|
+
from nucliadb.ingest.orm.index_message import get_resource_index_message
|
33
|
+
from nucliadb.ingest.orm.processor.pgcatalog import pgcatalog_update
|
34
|
+
from nucliadb.migrator.context import ExecutionContext
|
35
|
+
from nucliadb_protos import resources_pb2
|
36
|
+
|
37
|
+
logger = logging.getLogger(__name__)
|
38
|
+
|
39
|
+
|
40
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
41
|
+
|
42
|
+
|
43
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
44
|
+
if not isinstance(context.kv_driver, PGDriver):
|
45
|
+
return
|
46
|
+
|
47
|
+
BATCH_SIZE = 100
|
48
|
+
async with context.kv_driver.transaction() as txn:
|
49
|
+
txn = cast(PGTransaction, txn)
|
50
|
+
start = ""
|
51
|
+
while True:
|
52
|
+
async with txn.connection.cursor() as cur:
|
53
|
+
# Get list of resources except those already in the catalog
|
54
|
+
await cur.execute(
|
55
|
+
"""
|
56
|
+
SELECT key, value FROM resources
|
57
|
+
WHERE key ~ ('^/kbs/' || %s || '/r/[^/]*$')
|
58
|
+
AND key > %s
|
59
|
+
ORDER BY key
|
60
|
+
LIMIT %s""",
|
61
|
+
(kbid, start, BATCH_SIZE),
|
62
|
+
)
|
63
|
+
|
64
|
+
to_index = []
|
65
|
+
rows = await cur.fetchall()
|
66
|
+
if len(rows) == 0:
|
67
|
+
return
|
68
|
+
for key, basic_pb in rows:
|
69
|
+
start = key
|
70
|
+
|
71
|
+
# Only reindex resources with labels in field computed metadata
|
72
|
+
basic = resources_pb2.Basic()
|
73
|
+
basic.ParseFromString(basic_pb)
|
74
|
+
if basic.computedmetadata.field_classifications:
|
75
|
+
to_index.append(key)
|
76
|
+
|
77
|
+
logger.info(f"Reindexing {len(to_index)} catalog entries from {start}")
|
78
|
+
# Index each resource
|
79
|
+
for key in to_index:
|
80
|
+
rid = key.split("/")[4]
|
81
|
+
resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
|
82
|
+
if resource is None:
|
83
|
+
logger.warning(f"Could not load resource {rid} for kbid {kbid}")
|
84
|
+
continue
|
85
|
+
|
86
|
+
index_message = await get_resource_index_message(resource, reindex=False)
|
87
|
+
await pgcatalog_update(txn, kbid, resource, index_message)
|
88
|
+
|
89
|
+
if to_index:
|
90
|
+
await txn.commit()
|
@@ -65,12 +65,20 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
|
|
65
65
|
modified_at = created_at
|
66
66
|
|
67
67
|
async with _pg_transaction(txn).connection.cursor() as cur:
|
68
|
+
# Do not index canceled labels
|
69
|
+
cancelled_labels = {
|
70
|
+
f"/l/{clf.labelset}/{clf.label}"
|
71
|
+
for clf in resource.basic.usermetadata.classifications
|
72
|
+
if clf.cancelled_by_user
|
73
|
+
}
|
74
|
+
|
68
75
|
# Labels from the resource and classification labels from each field
|
69
76
|
labels = [label for label in index_message.labels]
|
70
|
-
for
|
71
|
-
|
72
|
-
|
73
|
-
|
77
|
+
for classification in resource.basic.computedmetadata.field_classifications:
|
78
|
+
for clf in classification.classifications:
|
79
|
+
label = f"/l/{clf.labelset}/{clf.label}"
|
80
|
+
if label not in cancelled_labels:
|
81
|
+
labels.append(label)
|
74
82
|
|
75
83
|
await cur.execute(
|
76
84
|
"""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.6.1.
|
3
|
+
Version: 6.6.1.post4601
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: <4,>=3.9
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.6.1.
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.
|
24
|
-
Requires-Dist: nucliadb-protos>=6.6.1.
|
25
|
-
Requires-Dist: nucliadb-models>=6.6.1.
|
26
|
-
Requires-Dist: nidx-protos>=6.6.1.
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post4601
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post4601
|
24
|
+
Requires-Dist: nucliadb-protos>=6.6.1.post4601
|
25
|
+
Requires-Dist: nucliadb-models>=6.6.1.post4601
|
26
|
+
Requires-Dist: nidx-protos>=6.6.1.post4601
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
28
28
|
Requires-Dist: nuclia-models>=0.43.0
|
29
29
|
Requires-Dist: uvicorn[standard]
|
@@ -33,6 +33,7 @@ migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQ
|
|
33
33
|
migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
|
34
34
|
migrations/0036_backfill_catalog_slug.py,sha256=mizRM-HfPswKq4iEmqofu4kIT6Gd97ruT3qhb257vZk,2954
|
35
35
|
migrations/0037_backfill_catalog_facets.py,sha256=KAf3VKbKePw7ykDnJi47LyJ7pK1JwYkwMxrsXUnbt9g,2788
|
36
|
+
migrations/0038_backfill_catalog_field_labels.py,sha256=EKJwJfU0p1nDq7s71CpGhaX4t1iD2d1ZCzTmLcUAhDs,3382
|
36
37
|
migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
37
38
|
migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
|
38
39
|
migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
|
@@ -167,7 +168,7 @@ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,
|
|
167
168
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
168
169
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
169
170
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
|
170
|
-
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=
|
171
|
+
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=VPQ_Evme7xmmGoQ45zt0Am0yPkaD4hxN1r5rEaVt6s8,4633
|
171
172
|
nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
|
172
173
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
|
173
174
|
nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
|
@@ -375,8 +376,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
375
376
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
376
377
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
377
378
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
378
|
-
nucliadb-6.6.1.
|
379
|
-
nucliadb-6.6.1.
|
380
|
-
nucliadb-6.6.1.
|
381
|
-
nucliadb-6.6.1.
|
382
|
-
nucliadb-6.6.1.
|
379
|
+
nucliadb-6.6.1.post4601.dist-info/METADATA,sha256=sIQKpJ7uabOsctChT2-wqcLap668DGEKKG6v_PVYAJ4,4158
|
380
|
+
nucliadb-6.6.1.post4601.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
381
|
+
nucliadb-6.6.1.post4601.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
382
|
+
nucliadb-6.6.1.post4601.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
383
|
+
nucliadb-6.6.1.post4601.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|