nucliadb 6.2.1.post2715__py3-none-any.whl → 6.2.1.post2720__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/export_import/models.py +3 -3
- nucliadb/ingest/orm/brain.py +37 -38
- nucliadb/ingest/settings.py +1 -1
- {nucliadb-6.2.1.post2715.dist-info → nucliadb-6.2.1.post2720.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post2715.dist-info → nucliadb-6.2.1.post2720.dist-info}/RECORD +9 -9
- {nucliadb-6.2.1.post2715.dist-info → nucliadb-6.2.1.post2720.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post2715.dist-info → nucliadb-6.2.1.post2720.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post2715.dist-info → nucliadb-6.2.1.post2720.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post2715.dist-info → nucliadb-6.2.1.post2720.dist-info}/zip-safe +0 -0
nucliadb/export_import/models.py
CHANGED
@@ -17,7 +17,7 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
|
20
|
+
import datetime
|
21
21
|
from enum import Enum
|
22
22
|
from typing import Any
|
23
23
|
|
@@ -57,8 +57,8 @@ class Metadata(BaseModel):
|
|
57
57
|
task: TaskMetadata = TaskMetadata(status=Status.SCHEDULED)
|
58
58
|
total: int = 0
|
59
59
|
processed: int = 0
|
60
|
-
created: datetime = datetime.
|
61
|
-
modified: datetime = datetime.
|
60
|
+
created: datetime.datetime = datetime.datetime.now(datetime.timezone.utc)
|
61
|
+
modified: datetime.datetime = datetime.datetime.now(datetime.timezone.utc)
|
62
62
|
|
63
63
|
|
64
64
|
class ExportMetadata(Metadata):
|
nucliadb/ingest/orm/brain.py
CHANGED
@@ -490,22 +490,29 @@ class ResourceBrain:
|
|
490
490
|
):
|
491
491
|
if metadata.mime_type != "":
|
492
492
|
labels["mt"].add(metadata.mime_type)
|
493
|
+
|
494
|
+
base_classification_relation = Relation(
|
495
|
+
relation=Relation.ABOUT,
|
496
|
+
source=relation_node_document,
|
497
|
+
to=RelationNode(
|
498
|
+
ntype=RelationNode.NodeType.LABEL,
|
499
|
+
),
|
500
|
+
)
|
493
501
|
for classification in metadata.classifications:
|
494
502
|
label = f"{classification.labelset}/{classification.label}"
|
495
503
|
if label not in user_canceled_labels:
|
496
504
|
labels["l"].add(label)
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
)
|
501
|
-
|
502
|
-
Relation(
|
503
|
-
relation=Relation.ABOUT,
|
504
|
-
source=relation_node_document,
|
505
|
-
to=relation_node_label,
|
506
|
-
)
|
507
|
-
)
|
505
|
+
relation = Relation()
|
506
|
+
relation.CopyFrom(base_classification_relation)
|
507
|
+
relation.to.value = label
|
508
|
+
self.brain.relations.append(relation)
|
509
|
+
|
508
510
|
# Data Augmentation + Processor entities
|
511
|
+
base_entity_relation = Relation(
|
512
|
+
relation=Relation.ENTITY,
|
513
|
+
source=relation_node_document,
|
514
|
+
to=RelationNode(ntype=RelationNode.NodeType.ENTITY),
|
515
|
+
)
|
509
516
|
use_legacy_entities = True
|
510
517
|
for data_augmentation_task_id, entities in metadata.entities.items():
|
511
518
|
# If we recieved the entities from the processor here, we don't want to use the legacy entities
|
@@ -521,38 +528,30 @@ class ResourceBrain:
|
|
521
528
|
labels["e"].add(
|
522
529
|
f"{entity_label}/{entity_text}"
|
523
530
|
) # Add data_augmentation_task_id as a prefix?
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
)
|
529
|
-
rel = Relation(
|
530
|
-
relation=Relation.ENTITY,
|
531
|
-
source=relation_node_document,
|
532
|
-
to=relation_node_entity,
|
533
|
-
)
|
534
|
-
self.brain.relations.append(rel)
|
531
|
+
relation = Relation()
|
532
|
+
relation.CopyFrom(base_entity_relation)
|
533
|
+
relation.to.value = entity_text
|
534
|
+
relation.to.subtype = entity_label
|
535
|
+
self.brain.relations.append(relation)
|
535
536
|
|
536
537
|
# Legacy processor entities
|
537
538
|
# TODO: Remove once processor doesn't use this anymore and remove the positions and ner fields from the message
|
539
|
+
def _parse_entity(klass_entity: str) -> tuple[str, str]:
|
540
|
+
try:
|
541
|
+
klass, entity = klass_entity.split("/", 1)
|
542
|
+
return klass, entity
|
543
|
+
except ValueError:
|
544
|
+
raise AttributeError(f"Entity should be with type {klass_entity}")
|
545
|
+
|
538
546
|
if use_legacy_entities:
|
539
|
-
for klass_entity
|
547
|
+
for klass_entity in metadata.positions.keys():
|
540
548
|
labels["e"].add(klass_entity)
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
relation_node_entity = RelationNode(
|
548
|
-
value=entity, ntype=RelationNode.NodeType.ENTITY, subtype=klass
|
549
|
-
)
|
550
|
-
rel = Relation(
|
551
|
-
relation=Relation.ENTITY,
|
552
|
-
source=relation_node_document,
|
553
|
-
to=relation_node_entity,
|
554
|
-
)
|
555
|
-
self.brain.relations.append(rel)
|
549
|
+
klass, entity = _parse_entity(klass_entity)
|
550
|
+
relation = Relation()
|
551
|
+
relation.CopyFrom(base_entity_relation)
|
552
|
+
relation.to.value = entity
|
553
|
+
relation.to.subtype = klass
|
554
|
+
self.brain.relations.append(relation)
|
556
555
|
|
557
556
|
def apply_field_labels(
|
558
557
|
self,
|
nucliadb/ingest/settings.py
CHANGED
@@ -77,7 +77,7 @@ class Settings(DriverSettings):
|
|
77
77
|
total_replicas: int = 1 # number of ingest processor replicas in the cluster
|
78
78
|
nuclia_partitions: int = 50
|
79
79
|
|
80
|
-
max_receive_message_length: int =
|
80
|
+
max_receive_message_length: int = 500 # In MB
|
81
81
|
|
82
82
|
# Search query timeouts
|
83
83
|
relation_search_timeout: float = 10.0
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post2720
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2720
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2720
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post2720
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post2720
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nucliadb-node-binding>=2.26.0
|
31
31
|
Requires-Dist: nuclia-models>=0.24.2
|
@@ -101,7 +101,7 @@ nucliadb/export_import/datamanager.py,sha256=b9Vhf-WqJ8HosTdNpKXlGj-Vi7MHyMoPxL0
|
|
101
101
|
nucliadb/export_import/exceptions.py,sha256=Dw8WqfG4r6MPJc5TPfbjMvCgXXWTcTOecGHRVU1h3kM,1949
|
102
102
|
nucliadb/export_import/exporter.py,sha256=kgbW-B7FNW7mlc9rBVEfwkkFTqD58TWSTDe9zkmEnBc,7098
|
103
103
|
nucliadb/export_import/importer.py,sha256=v5cq9Nn8c2zrY_K_00mydR52f8mdFxR7tLdtNLQ0qvk,4229
|
104
|
-
nucliadb/export_import/models.py,sha256=
|
104
|
+
nucliadb/export_import/models.py,sha256=dbjScNkiMRv4X3Ktudy1JRliD25bfoDTy3JmEZgQSCc,2121
|
105
105
|
nucliadb/export_import/tasks.py,sha256=fpCBeFYPReyLIdk38LDM9Tpnw_VczeMrobT4n1RAIp4,2507
|
106
106
|
nucliadb/export_import/utils.py,sha256=zrNrkkc9i3uT-R6Ju4J_0WNrzayln3KuQFCz-_qIaIA,19613
|
107
107
|
nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
|
@@ -111,7 +111,7 @@ nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE
|
|
111
111
|
nucliadb/ingest/processing.py,sha256=0iWEgJVdcOHudwY8Uz9vXRdDoUznM3_WGmviY8FQWT0,20276
|
112
112
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
113
113
|
nucliadb/ingest/serialize.py,sha256=GSDfrO4JLm-QLKw8LJ7TD1JFcXXvwm-ugXzbCfGh3Fk,15492
|
114
|
-
nucliadb/ingest/settings.py,sha256=
|
114
|
+
nucliadb/ingest/settings.py,sha256=0B-wQNa8FLqtNcQgRzh-fuIuGptM816XHcbH1NQKfmE,3050
|
115
115
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
116
116
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
117
117
|
nucliadb/ingest/consumer/auditing.py,sha256=EJoqRRr4dk2eUMK0GOY6b9xHO0YLQ0LjoP_xZBLACZo,7280
|
@@ -131,7 +131,7 @@ nucliadb/ingest/fields/generic.py,sha256=elgtqv15aJUq3zY7X_g0bli_2BpcwPArVvzhe54
|
|
131
131
|
nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJs,4172
|
132
132
|
nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOFw,1594
|
133
133
|
nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
134
|
-
nucliadb/ingest/orm/brain.py,sha256=
|
134
|
+
nucliadb/ingest/orm/brain.py,sha256=5n1m2ysTC5dlzOUL2cBa27wCLG9wDqfe85wK7id80AI,28437
|
135
135
|
nucliadb/ingest/orm/broker_message.py,sha256=JYYUJIZEL_EqovQuw6u-FmEkjyoYlxIXJq9hFekOiks,6441
|
136
136
|
nucliadb/ingest/orm/entities.py,sha256=2PslT1FZ6yCvJtjR0UpKTSzxJrtS-C_gZx4ZTWHunTc,15759
|
137
137
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
@@ -332,9 +332,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
332
332
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
333
333
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
334
334
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
335
|
-
nucliadb-6.2.1.
|
336
|
-
nucliadb-6.2.1.
|
337
|
-
nucliadb-6.2.1.
|
338
|
-
nucliadb-6.2.1.
|
339
|
-
nucliadb-6.2.1.
|
340
|
-
nucliadb-6.2.1.
|
335
|
+
nucliadb-6.2.1.post2720.dist-info/METADATA,sha256=3VJn78tBFIRKgQm1EtUtv44xJS9T8cPTUK3LuUuYwgg,4429
|
336
|
+
nucliadb-6.2.1.post2720.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
337
|
+
nucliadb-6.2.1.post2720.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
338
|
+
nucliadb-6.2.1.post2720.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
339
|
+
nucliadb-6.2.1.post2720.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
340
|
+
nucliadb-6.2.1.post2720.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|