nucliadb 6.2.1.post3382__py3-none-any.whl → 6.2.1.post3395__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0031_languages_deduplication.py +60 -0
- nucliadb/ingest/orm/resource.py +10 -0
- {nucliadb-6.2.1.post3382.dist-info → nucliadb-6.2.1.post3395.dist-info}/METADATA +6 -6
- {nucliadb-6.2.1.post3382.dist-info → nucliadb-6.2.1.post3395.dist-info}/RECORD +7 -6
- {nucliadb-6.2.1.post3382.dist-info → nucliadb-6.2.1.post3395.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3382.dist-info → nucliadb-6.2.1.post3395.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3382.dist-info → nucliadb-6.2.1.post3395.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #31
|
22
|
+
|
23
|
+
At some point we had a bug that allowed to have repeated languages in the metadata basic.
|
24
|
+
This migration aims to fix that by removing the duplicates.
|
25
|
+
|
26
|
+
"""
|
27
|
+
|
28
|
+
import logging
|
29
|
+
|
30
|
+
from nucliadb.common import datamanagers
|
31
|
+
from nucliadb.migrator.context import ExecutionContext
|
32
|
+
|
33
|
+
logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
37
|
+
|
38
|
+
|
39
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
40
|
+
async with datamanagers.with_ro_transaction() as rs_txn:
|
41
|
+
async for rid in datamanagers.resources.iterate_resource_ids(kbid=kbid):
|
42
|
+
basic = await datamanagers.resources.get_basic(rs_txn, kbid=kbid, rid=rid)
|
43
|
+
if basic is None:
|
44
|
+
continue
|
45
|
+
unique_langs = set(basic.metadata.languages)
|
46
|
+
if len(unique_langs) != len(basic.metadata.languages):
|
47
|
+
await fix_resource(kbid=kbid, rid=rid)
|
48
|
+
|
49
|
+
|
50
|
+
async def fix_resource(kbid: str, rid: str):
|
51
|
+
async with datamanagers.with_rw_transaction() as txn:
|
52
|
+
basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=rid)
|
53
|
+
if basic is None:
|
54
|
+
return
|
55
|
+
logger.info(f"Fixing duplicate languages", extra={"kbid": kbid, "rid": rid})
|
56
|
+
unique_langs = set(basic.metadata.languages)
|
57
|
+
basic.metadata.ClearField("languages")
|
58
|
+
basic.metadata.languages.extend(list(unique_langs))
|
59
|
+
await datamanagers.resources.set_basic(txn, kbid=kbid, rid=rid, basic=basic)
|
60
|
+
await txn.commit()
|
nucliadb/ingest/orm/resource.py
CHANGED
@@ -176,6 +176,16 @@ class Resource:
|
|
176
176
|
|
177
177
|
self.basic.MergeFrom(payload)
|
178
178
|
|
179
|
+
# Prevent duplicated languages
|
180
|
+
unique_languages = set(self.basic.metadata.languages)
|
181
|
+
self.basic.metadata.ClearField("languages")
|
182
|
+
self.basic.metadata.languages.extend(unique_languages)
|
183
|
+
|
184
|
+
# Prevent duplicated labels
|
185
|
+
unique_labels = set(self.basic.labels)
|
186
|
+
self.basic.ClearField("labels")
|
187
|
+
self.basic.labels.extend(unique_labels)
|
188
|
+
|
179
189
|
self.set_processing_status(self.basic, payload)
|
180
190
|
|
181
191
|
# We force the usermetadata classification to be the one defined
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3395
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
27
|
-
Requires-Dist: nidx-protos>=6.2.1.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3395
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3395
|
25
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3395
|
26
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3395
|
27
|
+
Requires-Dist: nidx-protos>=6.2.1.post3395
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -26,6 +26,7 @@ migrations/0027_rollover_texts3.py,sha256=UQDaMOayVuqDisf82NDrPStoEVveHvdjkSmzbI
|
|
26
26
|
migrations/0028_extracted_vectors_reference.py,sha256=49DHCIlBpjofU8cYVHTdWv0EBIlnPTWV2WCezf0rJUo,2392
|
27
27
|
migrations/0029_backfill_field_status.py,sha256=QWF69n1da9lpRnbEpgbqPjSQ-Wfn6rMC7Enz6bBYGt4,5663
|
28
28
|
migrations/0030_label_deduplication.py,sha256=y14TxtCMi3-TBMz_eZoyyPDHNlZb29taJujlDuHumsA,2008
|
29
|
+
migrations/0031_languages_deduplication.py,sha256=o6va6lP3oTRT1uSzp5MIhHHBFbhCxSZ-oNlXXpiAdUo,2340
|
29
30
|
migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
30
31
|
migrations/pg/0001_bootstrap.py,sha256=Fsqkeof50m7fKiJN05kmNEMwiKDlOrAgcAS5sLLkutA,1256
|
31
32
|
migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
|
@@ -133,7 +134,7 @@ nucliadb/ingest/orm/entities.py,sha256=3_n6lKhBy2GsdmNmkh0_mvxP8md20OZsbtTNEmfJ8
|
|
133
134
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
134
135
|
nucliadb/ingest/orm/knowledgebox.py,sha256=IGOPvBR1qXqDxE5DeiOdYCLdPgjzOVVpsASJ2zYvWwQ,23651
|
135
136
|
nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
|
136
|
-
nucliadb/ingest/orm/resource.py,sha256=
|
137
|
+
nucliadb/ingest/orm/resource.py,sha256=YEhTn3VB5IHOx6eOrEIMx0wlBBDO1WZOb-fhA494YSM,44640
|
137
138
|
nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
|
138
139
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
139
140
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
@@ -337,8 +338,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
337
338
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
338
339
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
339
340
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
340
|
-
nucliadb-6.2.1.
|
341
|
-
nucliadb-6.2.1.
|
342
|
-
nucliadb-6.2.1.
|
343
|
-
nucliadb-6.2.1.
|
344
|
-
nucliadb-6.2.1.
|
341
|
+
nucliadb-6.2.1.post3395.dist-info/METADATA,sha256=dK45Sc6ekVyUZED7nXwb3j74ycmcL-hyQexGDV2hqxA,4291
|
342
|
+
nucliadb-6.2.1.post3395.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
343
|
+
nucliadb-6.2.1.post3395.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
344
|
+
nucliadb-6.2.1.post3395.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
345
|
+
nucliadb-6.2.1.post3395.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|