nucliadb 6.2.1.post3385__py3-none-any.whl → 6.2.1.post3395__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #31
22
+
23
+ At some point we had a bug that allowed to have repeated languages in the metadata basic.
24
+ This migration aims to fix that by removing the duplicates.
25
+
26
+ """
27
+
28
+ import logging
29
+
30
+ from nucliadb.common import datamanagers
31
+ from nucliadb.migrator.context import ExecutionContext
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ async def migrate(context: ExecutionContext) -> None: ...
37
+
38
+
39
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
40
+ async with datamanagers.with_ro_transaction() as rs_txn:
41
+ async for rid in datamanagers.resources.iterate_resource_ids(kbid=kbid):
42
+ basic = await datamanagers.resources.get_basic(rs_txn, kbid=kbid, rid=rid)
43
+ if basic is None:
44
+ continue
45
+ unique_langs = set(basic.metadata.languages)
46
+ if len(unique_langs) != len(basic.metadata.languages):
47
+ await fix_resource(kbid=kbid, rid=rid)
48
+
49
+
50
+ async def fix_resource(kbid: str, rid: str):
51
+ async with datamanagers.with_rw_transaction() as txn:
52
+ basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=rid)
53
+ if basic is None:
54
+ return
55
+ logger.info(f"Fixing duplicate languages", extra={"kbid": kbid, "rid": rid})
56
+ unique_langs = set(basic.metadata.languages)
57
+ basic.metadata.ClearField("languages")
58
+ basic.metadata.languages.extend(list(unique_langs))
59
+ await datamanagers.resources.set_basic(txn, kbid=kbid, rid=rid, basic=basic)
60
+ await txn.commit()
@@ -176,6 +176,16 @@ class Resource:
176
176
 
177
177
  self.basic.MergeFrom(payload)
178
178
 
179
+ # Prevent duplicated languages
180
+ unique_languages = set(self.basic.metadata.languages)
181
+ self.basic.metadata.ClearField("languages")
182
+ self.basic.metadata.languages.extend(unique_languages)
183
+
184
+ # Prevent duplicated labels
185
+ unique_labels = set(self.basic.labels)
186
+ self.basic.ClearField("labels")
187
+ self.basic.labels.extend(unique_labels)
188
+
179
189
  self.set_processing_status(self.basic, payload)
180
190
 
181
191
  # We force the usermetadata classification to be the one defined
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3385
3
+ Version: 6.2.1.post3395
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3385
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3385
25
- Requires-Dist: nucliadb-protos>=6.2.1.post3385
26
- Requires-Dist: nucliadb-models>=6.2.1.post3385
27
- Requires-Dist: nidx-protos>=6.2.1.post3385
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3395
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3395
25
+ Requires-Dist: nucliadb-protos>=6.2.1.post3395
26
+ Requires-Dist: nucliadb-models>=6.2.1.post3395
27
+ Requires-Dist: nidx-protos>=6.2.1.post3395
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -26,6 +26,7 @@ migrations/0027_rollover_texts3.py,sha256=UQDaMOayVuqDisf82NDrPStoEVveHvdjkSmzbI
26
26
  migrations/0028_extracted_vectors_reference.py,sha256=49DHCIlBpjofU8cYVHTdWv0EBIlnPTWV2WCezf0rJUo,2392
27
27
  migrations/0029_backfill_field_status.py,sha256=QWF69n1da9lpRnbEpgbqPjSQ-Wfn6rMC7Enz6bBYGt4,5663
28
28
  migrations/0030_label_deduplication.py,sha256=y14TxtCMi3-TBMz_eZoyyPDHNlZb29taJujlDuHumsA,2008
29
+ migrations/0031_languages_deduplication.py,sha256=o6va6lP3oTRT1uSzp5MIhHHBFbhCxSZ-oNlXXpiAdUo,2340
29
30
  migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
30
31
  migrations/pg/0001_bootstrap.py,sha256=Fsqkeof50m7fKiJN05kmNEMwiKDlOrAgcAS5sLLkutA,1256
31
32
  migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
@@ -133,7 +134,7 @@ nucliadb/ingest/orm/entities.py,sha256=3_n6lKhBy2GsdmNmkh0_mvxP8md20OZsbtTNEmfJ8
133
134
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
134
135
  nucliadb/ingest/orm/knowledgebox.py,sha256=IGOPvBR1qXqDxE5DeiOdYCLdPgjzOVVpsASJ2zYvWwQ,23651
135
136
  nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
136
- nucliadb/ingest/orm/resource.py,sha256=9J1O9VW2ZNTEbJ9Fv0J3yIGR5Gf_FFJJjiBzO0SeRXE,44219
137
+ nucliadb/ingest/orm/resource.py,sha256=YEhTn3VB5IHOx6eOrEIMx0wlBBDO1WZOb-fhA494YSM,44640
137
138
  nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
138
139
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
139
140
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
@@ -337,8 +338,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
337
338
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
338
339
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
339
340
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
340
- nucliadb-6.2.1.post3385.dist-info/METADATA,sha256=9yWSfqPtCZGTZuIyyp0ieqekuoXpw7xCqq4TejYH29M,4291
341
- nucliadb-6.2.1.post3385.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
342
- nucliadb-6.2.1.post3385.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
343
- nucliadb-6.2.1.post3385.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
344
- nucliadb-6.2.1.post3385.dist-info/RECORD,,
341
+ nucliadb-6.2.1.post3395.dist-info/METADATA,sha256=dK45Sc6ekVyUZED7nXwb3j74ycmcL-hyQexGDV2hqxA,4291
342
+ nucliadb-6.2.1.post3395.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
343
+ nucliadb-6.2.1.post3395.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
344
+ nucliadb-6.2.1.post3395.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
345
+ nucliadb-6.2.1.post3395.dist-info/RECORD,,