ingestify 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestify/__init__.py +1 -1
- ingestify/domain/models/ingestion/ingestion_job.py +1 -1
- ingestify/domain/models/resources/dataset_resource.py +6 -3
- ingestify/infra/store/dataset/sqlalchemy/repository.py +7 -2
- ingestify/tests/test_engine.py +27 -5
- {ingestify-0.9.2.dist-info → ingestify-0.9.4.dist-info}/METADATA +1 -1
- {ingestify-0.9.2.dist-info → ingestify-0.9.4.dist-info}/RECORD +10 -10
- {ingestify-0.9.2.dist-info → ingestify-0.9.4.dist-info}/WHEEL +0 -0
- {ingestify-0.9.2.dist-info → ingestify-0.9.4.dist-info}/entry_points.txt +0 -0
- {ingestify-0.9.2.dist-info → ingestify-0.9.4.dist-info}/top_level.txt +0 -0
ingestify/__init__.py
CHANGED
|
@@ -137,7 +137,7 @@ class UpdateDatasetTask(Task):
|
|
|
137
137
|
for file_id, file_resource in self.dataset_resource.files.items()
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
-
self.dataset_resource.run_post_load_files(files)
|
|
140
|
+
self.dataset_resource.run_post_load_files(files, self.dataset)
|
|
141
141
|
|
|
142
142
|
try:
|
|
143
143
|
revision = self.store.update_dataset(
|
|
@@ -3,6 +3,7 @@ from typing import Optional, Callable, Any, Protocol, TYPE_CHECKING, Dict # noq
|
|
|
3
3
|
from pydantic import Field
|
|
4
4
|
|
|
5
5
|
from ingestify.domain.models.base import BaseModel
|
|
6
|
+
from ingestify.domain.models.dataset import Dataset
|
|
6
7
|
from ingestify.domain.models.dataset.dataset_state import DatasetState
|
|
7
8
|
from ingestify.exceptions import DuplicateFile
|
|
8
9
|
|
|
@@ -51,17 +52,19 @@ class DatasetResource(BaseModel):
|
|
|
51
52
|
state: DatasetState = Field(default_factory=lambda: DatasetState.COMPLETE)
|
|
52
53
|
files: dict[str, FileResource] = Field(default_factory=dict)
|
|
53
54
|
post_load_files: Optional[
|
|
54
|
-
Callable[["DatasetResource", Dict[str, DraftFile]], None]
|
|
55
|
+
Callable[["DatasetResource", Dict[str, DraftFile], Optional[Dataset]], None]
|
|
55
56
|
] = None
|
|
56
57
|
|
|
57
|
-
def run_post_load_files(
|
|
58
|
+
def run_post_load_files(
|
|
59
|
+
self, files: Dict[str, DraftFile], existing_dataset: Optional[Dataset] = None
|
|
60
|
+
):
|
|
58
61
|
"""Hook to modify dataset attributes based on loaded file content.
|
|
59
62
|
|
|
60
63
|
Useful for setting state based on file content, e.g., keep state=SCHEDULED
|
|
61
64
|
when files contain '{}', change to COMPLETE when they contain actual data.
|
|
62
65
|
"""
|
|
63
66
|
if self.post_load_files:
|
|
64
|
-
self.post_load_files(self, files)
|
|
67
|
+
self.post_load_files(self, files, existing_dataset)
|
|
65
68
|
|
|
66
69
|
def add_file(
|
|
67
70
|
self,
|
|
@@ -470,9 +470,14 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
470
470
|
dataset_ids = [row.dataset_id for row in dataset_query]
|
|
471
471
|
datasets = self._load_datasets(dataset_ids)
|
|
472
472
|
|
|
473
|
+
last_modified_values = [
|
|
474
|
+
dataset.last_modified_at
|
|
475
|
+
for dataset in datasets
|
|
476
|
+
if dataset.last_modified_at is not None
|
|
477
|
+
]
|
|
473
478
|
dataset_collection_metadata = DatasetCollectionMetadata(
|
|
474
|
-
last_modified=max(
|
|
475
|
-
if
|
|
479
|
+
last_modified=max(last_modified_values)
|
|
480
|
+
if last_modified_values
|
|
476
481
|
else None,
|
|
477
482
|
row_count=len(datasets),
|
|
478
483
|
)
|
ingestify/tests/test_engine.py
CHANGED
|
@@ -460,7 +460,9 @@ def test_dev_engine():
|
|
|
460
460
|
assert datasets.first().name == "Test Dataset"
|
|
461
461
|
|
|
462
462
|
|
|
463
|
-
def post_load_hook(
|
|
463
|
+
def post_load_hook(
|
|
464
|
+
dataset_resource: DatasetResource, files: dict[str, DraftFile], existing_dataset
|
|
465
|
+
):
|
|
464
466
|
# Change state to COMPLETE if file content is not '{}'
|
|
465
467
|
for file in files.values():
|
|
466
468
|
if file.size > 2:
|
|
@@ -526,12 +528,32 @@ def test_post_load_files_hook(config_file):
|
|
|
526
528
|
def test_force_save_creates_revision(config_file):
|
|
527
529
|
"""Test that datasets get a revision even when no files are persisted."""
|
|
528
530
|
engine = get_engine(config_file, "main")
|
|
531
|
+
|
|
532
|
+
# Create one dataset with files and one without
|
|
533
|
+
add_ingestion_plan(
|
|
534
|
+
engine, SimpleFakeSource("fake-source"), competition_id=1, season_id=2
|
|
535
|
+
)
|
|
529
536
|
add_ingestion_plan(
|
|
530
|
-
engine, NoFilesSource("fake-source"), competition_id=1, season_id=
|
|
537
|
+
engine, NoFilesSource("fake-source"), competition_id=1, season_id=3
|
|
531
538
|
)
|
|
532
539
|
|
|
533
540
|
engine.load()
|
|
534
|
-
dataset = engine.store.get_dataset_collection().first()
|
|
535
541
|
|
|
536
|
-
|
|
537
|
-
|
|
542
|
+
# This should not fail even though one dataset has no last_modified_at
|
|
543
|
+
datasets = engine.store.get_dataset_collection()
|
|
544
|
+
assert len(datasets) == 2
|
|
545
|
+
|
|
546
|
+
# Verify the dataset without files still has a revision
|
|
547
|
+
dataset_without_files = engine.store.get_dataset_collection(season_id=3).first()
|
|
548
|
+
assert len(dataset_without_files.revisions) == 1
|
|
549
|
+
assert len(dataset_without_files.current_revision.modified_files) == 0
|
|
550
|
+
|
|
551
|
+
dataset_with_last_modified = engine.store.get_dataset_collection(
|
|
552
|
+
season_id=2
|
|
553
|
+
).first()
|
|
554
|
+
|
|
555
|
+
dataset_without_files = engine.store.get_dataset_collection(metadata_only=True)
|
|
556
|
+
assert (
|
|
557
|
+
dataset_without_files.metadata.last_modified
|
|
558
|
+
== dataset_with_last_modified.last_modified_at
|
|
559
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
ingestify/__init__.py,sha256=
|
|
1
|
+
ingestify/__init__.py,sha256=LJSJBoIR12MN0q1qS4q-tYq9_yc0n67c-hKzVgYyuUo,336
|
|
2
2
|
ingestify/cmdline.py,sha256=Rs1_lSKSIJrcygH5fvtOGicOl_e0sZYW7deqp4_jGbY,6233
|
|
3
3
|
ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
|
|
4
4
|
ingestify/main.py,sha256=mMXDNzSl1dzN03BUiS97uP3XwFMdgadxP0hJlONsZ_g,15789
|
|
@@ -39,11 +39,11 @@ ingestify/domain/models/event/event_bus.py,sha256=feVXsbBcRNkbWYvXbmz-Yi9-3R690y
|
|
|
39
39
|
ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
|
|
40
40
|
ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
|
|
41
41
|
ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
ingestify/domain/models/ingestion/ingestion_job.py,sha256=
|
|
42
|
+
ingestify/domain/models/ingestion/ingestion_job.py,sha256=3Icbgte5QDbho7bk7mPATsflWAtL95wOWa0WKtd8hEE,15588
|
|
43
43
|
ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=7dmkWEvE7lMSv1ILWcSvys1bUGuGe_s-YbOFC6eYMBI,4794
|
|
44
44
|
ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
|
|
45
45
|
ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
|
|
46
|
-
ingestify/domain/models/resources/dataset_resource.py,sha256=
|
|
46
|
+
ingestify/domain/models/resources/dataset_resource.py,sha256=UJYd3UVY-NP2PF0oYAiGp1aOuyxKfLJZKJE5Lxd44yY,3745
|
|
47
47
|
ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
|
|
48
48
|
ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
|
|
49
49
|
ingestify/domain/models/task/task.py,sha256=OwLZQi9GGe0O8m1dKvJdN2Rham5oilI49KyKc5uV20A,161
|
|
@@ -66,7 +66,7 @@ ingestify/infra/source/statsbomb/match.py,sha256=8Zpdys6-bB_ral2AmjGKhF4BnXW3F0Y
|
|
|
66
66
|
ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
|
|
67
67
|
ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
68
|
ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
|
|
69
|
-
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=
|
|
69
|
+
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=2z_a1ICIokx5yUGz1Jn-DBvFzK1pryAjPYLQTP3pxAw,24447
|
|
70
70
|
ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=1FewCsN7jdk1ITzL_neOwJWtHD03NxCS9E8dhZcz4oY,12236
|
|
71
71
|
ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
|
|
72
72
|
ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
|
|
@@ -75,14 +75,14 @@ ingestify/infra/store/file/s3_file_repository.py,sha256=tz_EZ_gun7W2qJMlI3j_R03i
|
|
|
75
75
|
ingestify/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
ingestify/tests/conftest.py,sha256=Cr768nLMWUfIP6FMR7aFhUCY4uQ9Tz_bXOq0udpiDEE,411
|
|
77
77
|
ingestify/tests/test_auto_ingest.py,sha256=coMOzJBTbeDwUSYDLnqmkaCXpG-6WQeKqf_nj9XFiA4,14502
|
|
78
|
-
ingestify/tests/test_engine.py,sha256=
|
|
78
|
+
ingestify/tests/test_engine.py,sha256=iOjAoPvJSdR1WCbNjTSTnzFMVdusi_MZWN6InUqi9Ew,17558
|
|
79
79
|
ingestify/tests/test_events.py,sha256=A1f8H4HRyn52SWo3wV_MgSeb6IbT_lNi9wWAK8EGsK4,7806
|
|
80
80
|
ingestify/tests/test_file_cache.py,sha256=Xbh_VLLDH-KQXE3MeujDeOjjYYbAnjGR6wsHwMInKco,3049
|
|
81
81
|
ingestify/tests/test_pagination.py,sha256=uAKDMsM6fYSa4NcAlXDllu2y-8lnh0AclhPZ5MWJKn8,5539
|
|
82
82
|
ingestify/tests/test_store_version.py,sha256=4czUG8LtaGxgjW4trw7BzYJA8blQp3-HM8w-7HjqFl0,2508
|
|
83
83
|
ingestify/tests/test_table_prefix.py,sha256=6N42T6hfulqTlsUlrwhNmZ-TK-ZOt4U8Jx9NxKyLS4I,2844
|
|
84
|
-
ingestify-0.9.
|
|
85
|
-
ingestify-0.9.
|
|
86
|
-
ingestify-0.9.
|
|
87
|
-
ingestify-0.9.
|
|
88
|
-
ingestify-0.9.
|
|
84
|
+
ingestify-0.9.4.dist-info/METADATA,sha256=sIvu3pk8p4F2cpNDHA7RQV8fmRfr01Ifc_DsIcM3PL4,8263
|
|
85
|
+
ingestify-0.9.4.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
86
|
+
ingestify-0.9.4.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
|
|
87
|
+
ingestify-0.9.4.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
|
|
88
|
+
ingestify-0.9.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|