ingestify 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ingestify/__init__.py CHANGED
@@ -9,4 +9,4 @@ if not __INGESTIFY_SETUP__:
9
9
  from .source_base import Source, DatasetResource
10
10
  from .main import debug_source
11
11
 
12
- __version__ = "0.9.2"
12
+ __version__ = "0.9.4"
@@ -137,7 +137,7 @@ class UpdateDatasetTask(Task):
137
137
  for file_id, file_resource in self.dataset_resource.files.items()
138
138
  }
139
139
 
140
- self.dataset_resource.run_post_load_files(files)
140
+ self.dataset_resource.run_post_load_files(files, self.dataset)
141
141
 
142
142
  try:
143
143
  revision = self.store.update_dataset(
@@ -3,6 +3,7 @@ from typing import Optional, Callable, Any, Protocol, TYPE_CHECKING, Dict # noq
3
3
  from pydantic import Field
4
4
 
5
5
  from ingestify.domain.models.base import BaseModel
6
+ from ingestify.domain.models.dataset import Dataset
6
7
  from ingestify.domain.models.dataset.dataset_state import DatasetState
7
8
  from ingestify.exceptions import DuplicateFile
8
9
 
@@ -51,17 +52,19 @@ class DatasetResource(BaseModel):
51
52
  state: DatasetState = Field(default_factory=lambda: DatasetState.COMPLETE)
52
53
  files: dict[str, FileResource] = Field(default_factory=dict)
53
54
  post_load_files: Optional[
54
- Callable[["DatasetResource", Dict[str, DraftFile]], None]
55
+ Callable[["DatasetResource", Dict[str, DraftFile], Optional[Dataset]], None]
55
56
  ] = None
56
57
 
57
- def run_post_load_files(self, files: Dict[str, DraftFile]):
58
+ def run_post_load_files(
59
+ self, files: Dict[str, DraftFile], existing_dataset: Optional[Dataset] = None
60
+ ):
58
61
  """Hook to modify dataset attributes based on loaded file content.
59
62
 
60
63
  Useful for setting state based on file content, e.g., keep state=SCHEDULED
61
64
  when files contain '{}', change to COMPLETE when they contain actual data.
62
65
  """
63
66
  if self.post_load_files:
64
- self.post_load_files(self, files)
67
+ self.post_load_files(self, files, existing_dataset)
65
68
 
66
69
  def add_file(
67
70
  self,
@@ -470,9 +470,14 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
470
470
  dataset_ids = [row.dataset_id for row in dataset_query]
471
471
  datasets = self._load_datasets(dataset_ids)
472
472
 
473
+ last_modified_values = [
474
+ dataset.last_modified_at
475
+ for dataset in datasets
476
+ if dataset.last_modified_at is not None
477
+ ]
473
478
  dataset_collection_metadata = DatasetCollectionMetadata(
474
- last_modified=max(dataset.last_modified_at for dataset in datasets)
475
- if datasets
479
+ last_modified=max(last_modified_values)
480
+ if last_modified_values
476
481
  else None,
477
482
  row_count=len(datasets),
478
483
  )
@@ -460,7 +460,9 @@ def test_dev_engine():
460
460
  assert datasets.first().name == "Test Dataset"
461
461
 
462
462
 
463
- def post_load_hook(dataset_resource: DatasetResource, files: dict[str, DraftFile]):
463
+ def post_load_hook(
464
+ dataset_resource: DatasetResource, files: dict[str, DraftFile], existing_dataset
465
+ ):
464
466
  # Change state to COMPLETE if file content is not '{}'
465
467
  for file in files.values():
466
468
  if file.size > 2:
@@ -526,12 +528,32 @@ def test_post_load_files_hook(config_file):
526
528
  def test_force_save_creates_revision(config_file):
527
529
  """Test that datasets get a revision even when no files are persisted."""
528
530
  engine = get_engine(config_file, "main")
531
+
532
+ # Create one dataset with files and one without
533
+ add_ingestion_plan(
534
+ engine, SimpleFakeSource("fake-source"), competition_id=1, season_id=2
535
+ )
529
536
  add_ingestion_plan(
530
- engine, NoFilesSource("fake-source"), competition_id=1, season_id=2
537
+ engine, NoFilesSource("fake-source"), competition_id=1, season_id=3
531
538
  )
532
539
 
533
540
  engine.load()
534
- dataset = engine.store.get_dataset_collection().first()
535
541
 
536
- assert len(dataset.revisions) == 1
537
- assert len(dataset.current_revision.modified_files) == 0
542
+ # This should not fail even though one dataset has no last_modified_at
543
+ datasets = engine.store.get_dataset_collection()
544
+ assert len(datasets) == 2
545
+
546
+ # Verify the dataset without files still has a revision
547
+ dataset_without_files = engine.store.get_dataset_collection(season_id=3).first()
548
+ assert len(dataset_without_files.revisions) == 1
549
+ assert len(dataset_without_files.current_revision.modified_files) == 0
550
+
551
+ dataset_with_last_modified = engine.store.get_dataset_collection(
552
+ season_id=2
553
+ ).first()
554
+
555
+ dataset_without_files = engine.store.get_dataset_collection(metadata_only=True)
556
+ assert (
557
+ dataset_without_files.metadata.last_modified
558
+ == dataset_with_last_modified.last_modified_at
559
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestify
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: Data Ingestion Framework
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
@@ -1,4 +1,4 @@
1
- ingestify/__init__.py,sha256=zFlvArmYGeebwjI5szpq8FLAYq8kpDb0ri9uh-5Yjrs,336
1
+ ingestify/__init__.py,sha256=LJSJBoIR12MN0q1qS4q-tYq9_yc0n67c-hKzVgYyuUo,336
2
2
  ingestify/cmdline.py,sha256=Rs1_lSKSIJrcygH5fvtOGicOl_e0sZYW7deqp4_jGbY,6233
3
3
  ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
4
4
  ingestify/main.py,sha256=mMXDNzSl1dzN03BUiS97uP3XwFMdgadxP0hJlONsZ_g,15789
@@ -39,11 +39,11 @@ ingestify/domain/models/event/event_bus.py,sha256=feVXsbBcRNkbWYvXbmz-Yi9-3R690y
39
39
  ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
40
40
  ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
41
41
  ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- ingestify/domain/models/ingestion/ingestion_job.py,sha256=Ou8v_FXDNnbrzPjHYiLoXMEg7ZRNFPjK1BMk9DY7L2E,15574
42
+ ingestify/domain/models/ingestion/ingestion_job.py,sha256=3Icbgte5QDbho7bk7mPATsflWAtL95wOWa0WKtd8hEE,15588
43
43
  ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=7dmkWEvE7lMSv1ILWcSvys1bUGuGe_s-YbOFC6eYMBI,4794
44
44
  ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
45
45
  ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
46
- ingestify/domain/models/resources/dataset_resource.py,sha256=zhTCM4bX6Wf4iWG2g8_SBx2U05YFxTxiZK3f6EqwD6I,3598
46
+ ingestify/domain/models/resources/dataset_resource.py,sha256=UJYd3UVY-NP2PF0oYAiGp1aOuyxKfLJZKJE5Lxd44yY,3745
47
47
  ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
48
48
  ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
49
49
  ingestify/domain/models/task/task.py,sha256=OwLZQi9GGe0O8m1dKvJdN2Rham5oilI49KyKc5uV20A,161
@@ -66,7 +66,7 @@ ingestify/infra/source/statsbomb/match.py,sha256=8Zpdys6-bB_ral2AmjGKhF4BnXW3F0Y
66
66
  ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
67
67
  ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
68
  ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
69
- ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=v-JrX09zqheUklevXryev2_H1m0flYxJQFQBkZhVS24,24255
69
+ ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=2z_a1ICIokx5yUGz1Jn-DBvFzK1pryAjPYLQTP3pxAw,24447
70
70
  ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=1FewCsN7jdk1ITzL_neOwJWtHD03NxCS9E8dhZcz4oY,12236
71
71
  ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
72
72
  ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
@@ -75,14 +75,14 @@ ingestify/infra/store/file/s3_file_repository.py,sha256=tz_EZ_gun7W2qJMlI3j_R03i
75
75
  ingestify/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  ingestify/tests/conftest.py,sha256=Cr768nLMWUfIP6FMR7aFhUCY4uQ9Tz_bXOq0udpiDEE,411
77
77
  ingestify/tests/test_auto_ingest.py,sha256=coMOzJBTbeDwUSYDLnqmkaCXpG-6WQeKqf_nj9XFiA4,14502
78
- ingestify/tests/test_engine.py,sha256=ooqy4EjDSBFo3aHIlnqjokj9r8PnNcYxzl0rhPc0fVo,16783
78
+ ingestify/tests/test_engine.py,sha256=iOjAoPvJSdR1WCbNjTSTnzFMVdusi_MZWN6InUqi9Ew,17558
79
79
  ingestify/tests/test_events.py,sha256=A1f8H4HRyn52SWo3wV_MgSeb6IbT_lNi9wWAK8EGsK4,7806
80
80
  ingestify/tests/test_file_cache.py,sha256=Xbh_VLLDH-KQXE3MeujDeOjjYYbAnjGR6wsHwMInKco,3049
81
81
  ingestify/tests/test_pagination.py,sha256=uAKDMsM6fYSa4NcAlXDllu2y-8lnh0AclhPZ5MWJKn8,5539
82
82
  ingestify/tests/test_store_version.py,sha256=4czUG8LtaGxgjW4trw7BzYJA8blQp3-HM8w-7HjqFl0,2508
83
83
  ingestify/tests/test_table_prefix.py,sha256=6N42T6hfulqTlsUlrwhNmZ-TK-ZOt4U8Jx9NxKyLS4I,2844
84
- ingestify-0.9.2.dist-info/METADATA,sha256=wbLlX2aaVowBJRKKOvz817ipB48gZmMeE48EcJgMycs,8263
85
- ingestify-0.9.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
86
- ingestify-0.9.2.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
87
- ingestify-0.9.2.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
88
- ingestify-0.9.2.dist-info/RECORD,,
84
+ ingestify-0.9.4.dist-info/METADATA,sha256=sIvu3pk8p4F2cpNDHA7RQV8fmRfr01Ifc_DsIcM3PL4,8263
85
+ ingestify-0.9.4.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
86
+ ingestify-0.9.4.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
87
+ ingestify-0.9.4.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
88
+ ingestify-0.9.4.dist-info/RECORD,,