ingestify 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestify/__init__.py +1 -1
- ingestify/application/dataset_store.py +5 -2
- ingestify/domain/models/dataset/revision.py +3 -1
- ingestify/domain/models/task/task_summary.py +1 -3
- ingestify/infra/fetch/http.py +3 -1
- ingestify/infra/store/dataset/sqlalchemy/repository.py +9 -7
- ingestify/tests/test_engine.py +36 -0
- {ingestify-0.9.1.dist-info → ingestify-0.9.2.dist-info}/METADATA +1 -1
- {ingestify-0.9.1.dist-info → ingestify-0.9.2.dist-info}/RECORD +12 -12
- {ingestify-0.9.1.dist-info → ingestify-0.9.2.dist-info}/WHEEL +0 -0
- {ingestify-0.9.1.dist-info → ingestify-0.9.2.dist-info}/entry_points.txt +0 -0
- {ingestify-0.9.1.dist-info → ingestify-0.9.2.dist-info}/top_level.txt +0 -0
ingestify/__init__.py
CHANGED
|
@@ -383,6 +383,7 @@ class DatasetStore:
|
|
|
383
383
|
files: Dict[str, DraftFile],
|
|
384
384
|
revision_source: RevisionSource,
|
|
385
385
|
description: str = "Update",
|
|
386
|
+
force_save: bool = False,
|
|
386
387
|
):
|
|
387
388
|
"""
|
|
388
389
|
Create new revision first, so FileRepository can use
|
|
@@ -392,7 +393,7 @@ class DatasetStore:
|
|
|
392
393
|
created_at = utcnow()
|
|
393
394
|
|
|
394
395
|
persisted_files_ = self._persist_files(dataset, revision_id, files)
|
|
395
|
-
if persisted_files_:
|
|
396
|
+
if persisted_files_ or force_save:
|
|
396
397
|
# It can happen an API tells us data is changed, but it was not changed. In this case
|
|
397
398
|
# we decide to ignore it.
|
|
398
399
|
# Make sure there are files changed before creating a new revision
|
|
@@ -487,7 +488,9 @@ class DatasetStore:
|
|
|
487
488
|
updated_at=now,
|
|
488
489
|
last_modified_at=None, # Not known at this moment
|
|
489
490
|
)
|
|
490
|
-
revision = self.add_revision(
|
|
491
|
+
revision = self.add_revision(
|
|
492
|
+
dataset, files, revision_source, description, force_save=True
|
|
493
|
+
)
|
|
491
494
|
|
|
492
495
|
self.dispatch(DatasetCreated(dataset=dataset))
|
|
493
496
|
return revision
|
|
@@ -38,7 +38,9 @@ class Revision(BaseModel):
|
|
|
38
38
|
|
|
39
39
|
@property
|
|
40
40
|
def last_modified_at(self):
|
|
41
|
-
|
|
41
|
+
if self.modified_files:
|
|
42
|
+
return max(file.modified_at for file in self.modified_files)
|
|
43
|
+
return None
|
|
42
44
|
|
|
43
45
|
@property
|
|
44
46
|
def modified_files_map(self) -> Dict[str, File]:
|
|
@@ -86,9 +86,7 @@ class TaskSummary(BaseModel, HasTiming):
|
|
|
86
86
|
if revision:
|
|
87
87
|
self.persisted_file_count = len(revision.modified_files)
|
|
88
88
|
self.bytes_retrieved = sum(file.size for file in revision.modified_files)
|
|
89
|
-
self.last_modified =
|
|
90
|
-
file.modified_at for file in revision.modified_files
|
|
91
|
-
)
|
|
89
|
+
self.last_modified = revision.last_modified_at
|
|
92
90
|
else:
|
|
93
91
|
self.state = TaskState.FINISHED_IGNORED
|
|
94
92
|
|
ingestify/infra/fetch/http.py
CHANGED
|
@@ -58,7 +58,9 @@ def retrieve_http(
|
|
|
58
58
|
)
|
|
59
59
|
# else:
|
|
60
60
|
# print(f"{current_file.modified_at=} {last_modified=}")
|
|
61
|
-
headers["if-modified-since"] = format_datetime(
|
|
61
|
+
headers["if-modified-since"] = format_datetime(
|
|
62
|
+
current_file.modified_at, usegmt=True
|
|
63
|
+
)
|
|
62
64
|
headers["if-none-match"] = current_file.tag
|
|
63
65
|
|
|
64
66
|
http_kwargs = {}
|
|
@@ -375,7 +375,9 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
375
375
|
dataset_ids_cte.c.dataset_id == self.revision_table.c.dataset_id,
|
|
376
376
|
)
|
|
377
377
|
)
|
|
378
|
-
.order_by(
|
|
378
|
+
.order_by(
|
|
379
|
+
self.revision_table.c.dataset_id, self.revision_table.c.revision_id
|
|
380
|
+
)
|
|
379
381
|
)
|
|
380
382
|
|
|
381
383
|
for dataset_id, revisions in itertools.groupby(
|
|
@@ -560,22 +562,22 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
560
562
|
try:
|
|
561
563
|
# Delete modified files related to the dataset
|
|
562
564
|
connection.execute(
|
|
563
|
-
file_table.delete().where(
|
|
564
|
-
file_table.c.dataset_id == dataset.dataset_id
|
|
565
|
+
self.file_table.delete().where(
|
|
566
|
+
self.file_table.c.dataset_id == dataset.dataset_id
|
|
565
567
|
)
|
|
566
568
|
)
|
|
567
569
|
|
|
568
570
|
# Delete revisions related to the dataset
|
|
569
571
|
connection.execute(
|
|
570
|
-
revision_table.delete().where(
|
|
571
|
-
revision_table.c.dataset_id == dataset.dataset_id
|
|
572
|
+
self.revision_table.delete().where(
|
|
573
|
+
self.revision_table.c.dataset_id == dataset.dataset_id
|
|
572
574
|
)
|
|
573
575
|
)
|
|
574
576
|
|
|
575
577
|
# Delete the dataset itself
|
|
576
578
|
connection.execute(
|
|
577
|
-
dataset_table.delete().where(
|
|
578
|
-
dataset_table.c.dataset_id == dataset.dataset_id
|
|
579
|
+
self.dataset_table.delete().where(
|
|
580
|
+
self.dataset_table.c.dataset_id == dataset.dataset_id
|
|
579
581
|
)
|
|
580
582
|
)
|
|
581
583
|
|
ingestify/tests/test_engine.py
CHANGED
|
@@ -251,6 +251,28 @@ class FailingJobSource(Source):
|
|
|
251
251
|
raise Exception("some failure")
|
|
252
252
|
|
|
253
253
|
|
|
254
|
+
class NoFilesSource(Source):
|
|
255
|
+
provider = "fake"
|
|
256
|
+
|
|
257
|
+
def find_datasets(
|
|
258
|
+
self,
|
|
259
|
+
dataset_type: str,
|
|
260
|
+
data_spec_versions: DataSpecVersionCollection,
|
|
261
|
+
dataset_collection_metadata: DatasetCollectionMetadata,
|
|
262
|
+
competition_id,
|
|
263
|
+
season_id,
|
|
264
|
+
**kwargs,
|
|
265
|
+
):
|
|
266
|
+
yield DatasetResource(
|
|
267
|
+
dataset_resource_id=dict(
|
|
268
|
+
competition_id=competition_id, season_id=season_id, match_id=1
|
|
269
|
+
),
|
|
270
|
+
provider="fake",
|
|
271
|
+
dataset_type="match",
|
|
272
|
+
name="Dataset Without Files",
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
254
276
|
def test_engine(config_file):
|
|
255
277
|
engine = get_engine(config_file, "main")
|
|
256
278
|
|
|
@@ -499,3 +521,17 @@ def test_post_load_files_hook(config_file):
|
|
|
499
521
|
engine.load()
|
|
500
522
|
dataset2 = engine.store.get_dataset_collection().first()
|
|
501
523
|
assert dataset2.state == DatasetState.COMPLETE
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def test_force_save_creates_revision(config_file):
|
|
527
|
+
"""Test that datasets get a revision even when no files are persisted."""
|
|
528
|
+
engine = get_engine(config_file, "main")
|
|
529
|
+
add_ingestion_plan(
|
|
530
|
+
engine, NoFilesSource("fake-source"), competition_id=1, season_id=2
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
engine.load()
|
|
534
|
+
dataset = engine.store.get_dataset_collection().first()
|
|
535
|
+
|
|
536
|
+
assert len(dataset.revisions) == 1
|
|
537
|
+
assert len(dataset.current_revision.modified_files) == 0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
ingestify/__init__.py,sha256=
|
|
1
|
+
ingestify/__init__.py,sha256=zFlvArmYGeebwjI5szpq8FLAYq8kpDb0ri9uh-5Yjrs,336
|
|
2
2
|
ingestify/cmdline.py,sha256=Rs1_lSKSIJrcygH5fvtOGicOl_e0sZYW7deqp4_jGbY,6233
|
|
3
3
|
ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
|
|
4
4
|
ingestify/main.py,sha256=mMXDNzSl1dzN03BUiS97uP3XwFMdgadxP0hJlONsZ_g,15789
|
|
@@ -6,7 +6,7 @@ ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
|
|
|
6
6
|
ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
|
|
7
7
|
ingestify/utils.py,sha256=tsoo-GgeSrwK161WCqW793BAm5bjvnGwI8yGgLTJ1lk,6486
|
|
8
8
|
ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
ingestify/application/dataset_store.py,sha256=
|
|
9
|
+
ingestify/application/dataset_store.py,sha256=9JKx260vA07Pv9hpW_1R2S2DbXcMyQsirvDpPRakyWE,20440
|
|
10
10
|
ingestify/application/ingestion_engine.py,sha256=we16yiDS9QGOlAUiP1vidDycihjWK3B2jo64uqKmrXE,11246
|
|
11
11
|
ingestify/application/loader.py,sha256=K99ZJuHMEJFO6CIlxoyHKGSQtXw63JgOYu3moUD6sR0,13400
|
|
12
12
|
ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
|
|
@@ -29,7 +29,7 @@ ingestify/domain/models/dataset/file.py,sha256=cXDjSw19HRMCGFpVN4u1oejxE1V8SMQpt
|
|
|
29
29
|
ingestify/domain/models/dataset/file_collection.py,sha256=yaQmqFlmbajLCkU5QnjgqCvKzvVEZJrXVvinx5UGHcM,1193
|
|
30
30
|
ingestify/domain/models/dataset/file_repository.py,sha256=9EQprch9isAH2pbK7e7tfOKl6ulip4Ij1kBCTbO_rTc,1721
|
|
31
31
|
ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
|
|
32
|
-
ingestify/domain/models/dataset/revision.py,sha256=
|
|
32
|
+
ingestify/domain/models/dataset/revision.py,sha256=KK-RPQgRVgNWJcjpfYPt9r6tk_0Z4FfT6oM2QuLWlIA,2105
|
|
33
33
|
ingestify/domain/models/dataset/selector.py,sha256=qGRA22gDAHhjDAhMWzOjZPz3Rrs1V-DZ32z75NARoTQ,1448
|
|
34
34
|
ingestify/domain/models/event/__init__.py,sha256=OdPTpE9bj5QqdGmrYqRTLPX1f-LR9GWJYlGMPPEsuL8,138
|
|
35
35
|
ingestify/domain/models/event/_old_event.py,sha256=RktgCAj9SMdtqkAc_bOwoghEb2Z6m4r5_xWXin9wqx4,472
|
|
@@ -47,14 +47,14 @@ ingestify/domain/models/resources/dataset_resource.py,sha256=zhTCM4bX6Wf4iWG2g8_
|
|
|
47
47
|
ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
|
|
48
48
|
ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
|
|
49
49
|
ingestify/domain/models/task/task.py,sha256=OwLZQi9GGe0O8m1dKvJdN2Rham5oilI49KyKc5uV20A,161
|
|
50
|
-
ingestify/domain/models/task/task_summary.py,sha256=
|
|
50
|
+
ingestify/domain/models/task/task_summary.py,sha256=T6fvAxsxq861gKj6DFbo6WMt7GJtg_fhQ5pbujqCskk,3119
|
|
51
51
|
ingestify/domain/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
ingestify/domain/services/identifier_key_transformer.py,sha256=y4GS9u9Ej1MO2jUhAxWbifp0mrE_MqTHvVVcoQzSKb4,4034
|
|
53
53
|
ingestify/domain/services/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
54
|
ingestify/domain/services/transformers/kloppy_to_pandas.py,sha256=NcN6nTBGVn9gz-_hWZJTMcduS1Gg7EM4X95Cqxi1QIM,809
|
|
55
55
|
ingestify/infra/__init__.py,sha256=V0hpLzPVTcOHRVh0gguF6FT30YIgEOUd5v87xUHkfZ4,88
|
|
56
56
|
ingestify/infra/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
-
ingestify/infra/fetch/http.py,sha256=
|
|
57
|
+
ingestify/infra/fetch/http.py,sha256=_ZuRsVplpya8GdhNl7SqVBXOtCnAU3V5Umt0vtaxGQk,4644
|
|
58
58
|
ingestify/infra/serialization/__init__.py,sha256=UqXWJmKTp7Mi58ZyDASGguPFlqdVWVUbm_sg9GWx9eI,702
|
|
59
59
|
ingestify/infra/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
60
|
ingestify/infra/sink/postgresql.py,sha256=SxuM3LntfYcpCriUpqJhMvgAf0s9cohXf6WkxSEDYDY,1816
|
|
@@ -66,7 +66,7 @@ ingestify/infra/source/statsbomb/match.py,sha256=8Zpdys6-bB_ral2AmjGKhF4BnXW3F0Y
|
|
|
66
66
|
ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
|
|
67
67
|
ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
68
|
ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
|
|
69
|
-
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=
|
|
69
|
+
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=v-JrX09zqheUklevXryev2_H1m0flYxJQFQBkZhVS24,24255
|
|
70
70
|
ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=1FewCsN7jdk1ITzL_neOwJWtHD03NxCS9E8dhZcz4oY,12236
|
|
71
71
|
ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
|
|
72
72
|
ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
|
|
@@ -75,14 +75,14 @@ ingestify/infra/store/file/s3_file_repository.py,sha256=tz_EZ_gun7W2qJMlI3j_R03i
|
|
|
75
75
|
ingestify/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
ingestify/tests/conftest.py,sha256=Cr768nLMWUfIP6FMR7aFhUCY4uQ9Tz_bXOq0udpiDEE,411
|
|
77
77
|
ingestify/tests/test_auto_ingest.py,sha256=coMOzJBTbeDwUSYDLnqmkaCXpG-6WQeKqf_nj9XFiA4,14502
|
|
78
|
-
ingestify/tests/test_engine.py,sha256=
|
|
78
|
+
ingestify/tests/test_engine.py,sha256=ooqy4EjDSBFo3aHIlnqjokj9r8PnNcYxzl0rhPc0fVo,16783
|
|
79
79
|
ingestify/tests/test_events.py,sha256=A1f8H4HRyn52SWo3wV_MgSeb6IbT_lNi9wWAK8EGsK4,7806
|
|
80
80
|
ingestify/tests/test_file_cache.py,sha256=Xbh_VLLDH-KQXE3MeujDeOjjYYbAnjGR6wsHwMInKco,3049
|
|
81
81
|
ingestify/tests/test_pagination.py,sha256=uAKDMsM6fYSa4NcAlXDllu2y-8lnh0AclhPZ5MWJKn8,5539
|
|
82
82
|
ingestify/tests/test_store_version.py,sha256=4czUG8LtaGxgjW4trw7BzYJA8blQp3-HM8w-7HjqFl0,2508
|
|
83
83
|
ingestify/tests/test_table_prefix.py,sha256=6N42T6hfulqTlsUlrwhNmZ-TK-ZOt4U8Jx9NxKyLS4I,2844
|
|
84
|
-
ingestify-0.9.
|
|
85
|
-
ingestify-0.9.
|
|
86
|
-
ingestify-0.9.
|
|
87
|
-
ingestify-0.9.
|
|
88
|
-
ingestify-0.9.
|
|
84
|
+
ingestify-0.9.2.dist-info/METADATA,sha256=wbLlX2aaVowBJRKKOvz817ipB48gZmMeE48EcJgMycs,8263
|
|
85
|
+
ingestify-0.9.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
86
|
+
ingestify-0.9.2.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
|
|
87
|
+
ingestify-0.9.2.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
|
|
88
|
+
ingestify-0.9.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|