PyPI - ingestify - Versions diffs - 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

ingestify 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

ingestify/__init__.py +2 -1
ingestify/application/ingestion_engine.py +3 -0
ingestify/application/loader.py +12 -2
ingestify/domain/models/dataset/dataset_state.py +1 -0
ingestify/domain/models/dataset/file.py +6 -0
ingestify/domain/models/ingestion/ingestion_job.py +5 -1
ingestify/domain/models/resources/dataset_resource.py +13 -1
ingestify/infra/fetch/http.py +3 -3
ingestify/infra/store/dataset/sqlalchemy/repository.py +90 -50
ingestify/infra/store/dataset/sqlalchemy/tables.py +191 -174
ingestify/main.py +189 -5
ingestify/tests/__init__.py +0 -0
ingestify/tests/conftest.py +17 -0
ingestify/tests/test_auto_ingest.py +418 -0
ingestify/tests/test_engine.py +501 -0
ingestify/tests/test_events.py +201 -0
ingestify/tests/test_file_cache.py +98 -0
ingestify/tests/test_pagination.py +162 -0
ingestify/tests/test_store_version.py +73 -0
ingestify/tests/test_table_prefix.py +78 -0
{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/METADATA +59 -5
{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/RECORD +25 -16
{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/WHEEL +1 -1
{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/entry_points.txt +0 -0
{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/top_level.txt +0 -0

ingestify/tests/test_file_cache.py ADDED Viewed

@@ -0,0 +1,98 @@
+import pytest
+from io import BytesIO
+from unittest.mock import patch
+from datetime import datetime, timezone
+from ingestify.main import get_engine
+from ingestify.domain import Dataset, Identifier, Revision, File
+from ingestify.domain.models.dataset.revision import RevisionSource, SourceType
+def test_file_cache(config_file):
+    """Test file caching with the with_file_cache context manager."""
+    # Get engine from the fixture
+    engine = get_engine(config_file, "main")
+    store = engine.store
+    # Create a timestamp for test data
+    now = datetime.now(timezone.utc)
+    # Create a test file
+    test_file = File(
+        file_id="test_file_id",
+        data_feed_key="test_file",
+        tag="test_tag",
+        data_serialization_format="txt",
+        storage_path="test/path",
+        storage_size=100,
+        storage_compression_method="none",
+        created_at=now,
+        modified_at=now,
+        size=100,
+        content_type="text/plain",
+        data_spec_version="v1",
+    )
+    # Create a test revision with the file
+    revision = Revision(
+        revision_id=1,
+        created_at=now,
+        description="Test revision",
+        modified_files=[test_file],
+        source={"source_type": SourceType.MANUAL, "source_id": "test"},
+    )
+    # Create a test dataset with the revision
+    dataset = Dataset(
+        bucket="test-bucket",
+        dataset_id="test-dataset",
+        name="Test Dataset",
+        state="COMPLETE",
+        identifier=Identifier(test_id=1),
+        dataset_type="test",
+        provider="test-provider",
+        metadata={},
+        created_at=now,
+        updated_at=now,
+        last_modified_at=now,
+        revisions=[revision],
+    )
+    # Create a simple pass-through reader function to replace the gzip reader
+    def simple_reader(stream):
+        return stream
+    # Mock both the file repository and the _prepare_read_stream method
+    with patch.object(
+        store.file_repository, "load_content"
+    ) as mock_load_content, patch.object(
+        store, "_prepare_read_stream"
+    ) as mock_prepare_read_stream:
+        # Set up the mocks
+        mock_load_content.return_value = BytesIO(b"test content")
+        mock_prepare_read_stream.return_value = (simple_reader, "")
+        # Test without caching - should load files twice
+        store.load_files(dataset)
+        store.load_files(dataset)
+        # Should have called load_content twice (without caching)
+        assert mock_load_content.call_count == 2
+        # Reset the mock
+        mock_load_content.reset_mock()
+        # Test with caching - should load files only once
+        with store.with_file_cache():
+            store.load_files(dataset)
+            store.load_files(dataset)
+            # Should have called load_content only once (with caching)
+            assert mock_load_content.call_count == 1
+        # After exiting context, caching should be disabled
+        store.load_files(dataset)
+        # Should have called load_content again
+        assert mock_load_content.call_count == 2

ingestify/tests/test_pagination.py ADDED Viewed

@@ -0,0 +1,162 @@
+import pytest
+from datetime import datetime, timedelta
+import pytz
+from ingestify.domain import Dataset, Identifier, DatasetState
+from ingestify.main import get_engine
+def test_iter_dataset_collection_batches(config_file):
+    """Test iteration over datasets with batches using iter_dataset_collection_batches."""
+    # Get engine from the fixture
+    engine = get_engine(config_file, "main")
+    store = engine.store
+    bucket = store.bucket
+    # Create 30 datasets with different creation times
+    now = datetime.now(pytz.utc)
+    # Save datasets with ascending created_at timestamps
+    for i in range(30):
+        dataset = Dataset(
+            bucket=bucket,
+            dataset_id=f"dataset-{i}",
+            name=f"Dataset {i}",
+            state="COMPLETE",
+            identifier=Identifier(test_id=i),
+            dataset_type="test",
+            provider="test-provider",
+            metadata={},
+            created_at=now
+            + timedelta(minutes=i),  # Each dataset created 1 minute apart
+            updated_at=now + timedelta(minutes=i),
+            last_modified_at=now + timedelta(minutes=i),
+        )
+        store.dataset_repository.save(bucket, dataset)
+    # Test iteration with small batch_size (yields individual datasets)
+    dataset_ids = []
+    for dataset in store.iter_dataset_collection_batches(
+        dataset_type="test",
+        provider="test-provider",
+        batch_size=5,  # Small batch size to force multiple batches
+    ):
+        dataset_ids.append(dataset.dataset_id)
+    # Should get all 30 datasets
+    assert len(dataset_ids) == 30
+    # Make sure we have all datasets from 0 to 29
+    expected_ids = [f"dataset-{i}" for i in range(30)]
+    assert set(dataset_ids) == set(expected_ids)
+    # Test iteration yielding entire DatasetCollection objects
+    collections = []
+    for collection in store.iter_dataset_collection_batches(
+        dataset_type="test",
+        provider="test-provider",
+        batch_size=5,  # Small batch size to force multiple batches
+        yield_dataset_collection=True,
+    ):
+        collections.append(collection)
+    # Should have 6 collections (30 datasets / 5 per batch = 6 batches)
+    assert len(collections) == 6
+    # Verify total dataset count across all collections
+    total_datasets = sum(len(collection) for collection in collections)
+    assert total_datasets == 30
+    # Test iteration with a filter that returns fewer results
+    filtered_dataset_ids = []
+    for dataset in store.iter_dataset_collection_batches(
+        dataset_type="test",
+        provider="test-provider",
+        test_id=5,  # Only get dataset with test_id=5
+        batch_size=10,
+    ):
+        filtered_dataset_ids.append(dataset.dataset_id)
+    assert len(filtered_dataset_ids) == 1
+    assert filtered_dataset_ids[0] == "dataset-5"
+def test_dataset_state_filter(config_file):
+    """Test filtering datasets by state."""
+    # Get engine from the fixture
+    engine = get_engine(config_file, "main")
+    store = engine.store
+    bucket = store.bucket
+    now = datetime.now(pytz.utc)
+    # Create datasets with different states
+    states = [
+        DatasetState.COMPLETE,
+        DatasetState.PARTIAL,
+        DatasetState.SCHEDULED,
+        DatasetState.MISSING,
+    ]
+    for i in range(12):  # 3 datasets per state
+        state = states[i % 4]
+        dataset = Dataset(
+            bucket=bucket,
+            dataset_id=f"state-test-{i}",
+            name=f"State Test {i}",
+            state=state,
+            identifier=Identifier(test_id=i),
+            dataset_type="state-test",
+            provider="test-provider",
+            metadata={},
+            created_at=now + timedelta(minutes=i),
+            updated_at=now + timedelta(minutes=i),
+            last_modified_at=now + timedelta(minutes=i),
+        )
+        store.dataset_repository.save(bucket, dataset)
+    # Test filtering by a single state using enum
+    complete_datasets = store.get_dataset_collection(
+        dataset_type="state-test", dataset_state=DatasetState.COMPLETE
+    )
+    assert len(complete_datasets) == 3
+    # Test filtering by a single state using string
+    partial_datasets = store.get_dataset_collection(
+        dataset_type="state-test", dataset_state="PARTIAL"
+    )
+    assert len(partial_datasets) == 3
+    # Test filtering by multiple states using a list of enums
+    mixed_datasets = store.get_dataset_collection(
+        dataset_type="state-test",
+        dataset_state=[
+            DatasetState.COMPLETE,
+            DatasetState.SCHEDULED,
+            DatasetState.MISSING,
+        ],
+    )
+    assert len(mixed_datasets) == 9
+    # Test filtering by multiple states using a list of strings
+    mixed_datasets_strings = store.get_dataset_collection(
+        dataset_type="state-test", dataset_state=["COMPLETE", "SCHEDULED"]
+    )
+    assert len(mixed_datasets_strings) == 6
+    # Test case-insensitivity
+    lowercase_state_datasets = store.get_dataset_collection(
+        dataset_type="state-test", dataset_state="complete"
+    )
+    assert len(lowercase_state_datasets) == 3
+    # Test with iter_dataset_collection
+    scheduled_dataset_ids = []
+    for dataset in store.iter_dataset_collection_batches(
+        dataset_type="state-test",
+        dataset_state=DatasetState.SCHEDULED,
+        batch_size=2,  # Small batch size to test pagination with filters
+    ):
+        scheduled_dataset_ids.append(dataset.dataset_id)
+        assert dataset.state == DatasetState.SCHEDULED
+    assert len(scheduled_dataset_ids) == 3

ingestify/tests/test_store_version.py ADDED Viewed

@@ -0,0 +1,73 @@
+import pytest
+from unittest.mock import patch
+from ingestify.main import get_engine
+def test_store_version_tracking_new_store(config_file):
+    """Test that a new store gets initialized with the current version."""
+    with patch("ingestify.__version__", "1.0.0"):
+        engine = get_engine(config_file)
+        # Check that version was stored
+        stored_version = engine.store.dataset_repository.get_store_version()
+        assert stored_version == "1.0.0"
+def test_store_version_tracking_existing_store_same_version(config_file):
+    """Test that an existing store with same version doesn't cause issues."""
+    with patch("ingestify.__version__", "1.0.0"):
+        # Initialize store first time
+        engine1 = get_engine(config_file)
+        store1 = engine1.store
+        # Open store again with same version
+        engine2 = get_engine(config_file)
+        store2 = engine2.store
+        # Version should still be stored correctly
+        stored_version = store2.dataset_repository.get_store_version()
+        assert stored_version == "1.0.0"
+def test_store_version_tracking_version_mismatch(config_file, caplog):
+    """Test that version mismatch is logged as warning."""
+    # Initialize store with version 1.0.0
+    with patch("ingestify.__version__", "1.0.0"):
+        engine1 = get_engine(config_file)
+        store1 = engine1.store
+        stored_version = store1.dataset_repository.get_store_version()
+        assert stored_version == "1.0.0"
+    # Open store with different version
+    with patch("ingestify.__version__", "2.0.0"):
+        engine2 = get_engine(config_file)
+        store2 = engine2.store
+        # Version should still be the original one
+        stored_version = store2.dataset_repository.get_store_version()
+        assert stored_version == "1.0.0"
+        # Should have logged a warning about version mismatch
+        assert "Store version mismatch" in caplog.text
+        assert "stored=1.0.0, current=2.0.0" in caplog.text
+def test_store_version_methods(config_file):
+    """Test the repository version methods directly."""
+    engine = get_engine(config_file)
+    repo = engine.store.dataset_repository
+    from ingestify import __version__
+    # Initially the real version is stored
+    assert repo.get_store_version() == __version__
+    # Set a version
+    repo.set_store_version("1.2.3")
+    assert repo.get_store_version() == "1.2.3"
+    # Update version
+    repo.set_store_version("1.2.4")
+    assert repo.get_store_version() == "1.2.4"

ingestify/tests/test_table_prefix.py ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""End-to-end test for table_prefix configuration"""
+import tempfile
+import yaml
+from pathlib import Path
+from sqlalchemy import inspect
+from ingestify.main import get_datastore
+def test_table_prefix_from_config():
+    """Test that metadata_options.table_prefix is correctly applied from config"""
+    temp_dir = Path(tempfile.mkdtemp())
+    # Test 1: Config without metadata_options (default behavior)
+    config_no_prefix = {
+        "main": {
+            "metadata_url": f"sqlite:///{temp_dir / 'no_prefix.db'}",
+            "file_url": f"file://{temp_dir / 'files'}",
+            "default_bucket": "main",
+        }
+    }
+    config_path_no_prefix = temp_dir / "config_no_prefix.yaml"
+    config_path_no_prefix.write_text(yaml.dump(config_no_prefix))
+    store_no_prefix = get_datastore(str(config_path_no_prefix))
+    inspector = inspect(store_no_prefix.dataset_repository.session_provider.engine)
+    tables = inspector.get_table_names()
+    assert "dataset" in tables
+    assert "revision" in tables
+    assert "file" in tables
+    assert store_no_prefix.dataset_repository.dataset_table.name == "dataset"
+    # Test 2: Config with metadata_options.table_prefix
+    config_with_prefix = {
+        "main": {
+            "metadata_url": f"sqlite:///{temp_dir / 'with_prefix.db'}",
+            "file_url": f"file://{temp_dir / 'files'}",
+            "default_bucket": "main",
+            "metadata_options": {"table_prefix": "prod_"},
+        }
+    }
+    config_path_with_prefix = temp_dir / "config_with_prefix.yaml"
+    config_path_with_prefix.write_text(yaml.dump(config_with_prefix))
+    store_with_prefix = get_datastore(str(config_path_with_prefix))
+    inspector_prefixed = inspect(
+        store_with_prefix.dataset_repository.session_provider.engine
+    )
+    tables_prefixed = inspector_prefixed.get_table_names()
+    assert "prod_dataset" in tables_prefixed
+    assert "prod_revision" in tables_prefixed
+    assert "prod_file" in tables_prefixed
+    assert "prod_ingestion_job_summary" in tables_prefixed
+    assert "prod_task_summary" in tables_prefixed
+    assert "prod_store_version" in tables_prefixed
+    assert store_with_prefix.dataset_repository.dataset_table.name == "prod_dataset"
+    # Verify foreign keys reference prefixed tables
+    revision_fks = inspector_prefixed.get_foreign_keys("prod_revision")
+    assert revision_fks[0]["referred_table"] == "prod_dataset"
+    file_fks = inspector_prefixed.get_foreign_keys("prod_file")
+    assert file_fks[0]["referred_table"] == "prod_revision"
+    task_fks = inspector_prefixed.get_foreign_keys("prod_task_summary")
+    assert task_fks[0]["referred_table"] == "prod_ingestion_job_summary"
+    import shutil
+    shutil.rmtree(temp_dir)
+if __name__ == "__main__":
+    test_table_prefix_from_config()
+    print("✓ All tests passed")

{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: ingestify
-Version: 0.7.0
+Version: 0.9.0
 Summary: Data Ingestion Framework
 Author: Koen Vossen
 Author-email: info@koenvossen.nl
@@ -10,12 +10,20 @@ Requires-Dist: requests<3,>=2.0.0
 Requires-Dist: SQLAlchemy<3,>=2
 Requires-Dist: click>=8
 Requires-Dist: python-dotenv
-Requires-Dist: pyaml-env
+Requires-Dist: pyaml_env
 Requires-Dist: boto3
 Requires-Dist: pydantic>=2.0.0
 Provides-Extra: test
 Requires-Dist: pytest<7,>=6.2.5; extra == "test"
 Requires-Dist: pytz; extra == "test"
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: license
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: summary
 # Ingestify
@@ -77,6 +85,43 @@ Ingestify fixes that by building **your own data lake** of untouched provider fi
 pip install ingestify            # or: pip install git+https://github.com/PySport/ingestify.git
 ```
+### Developing a new Source
+When developing a new `Source`, use the `debug_source()` helper for rapid iteration:
+```python
+from ingestify import Source, debug_source
+class MyCustomSource(Source):
+    provider = "my_provider"
+    def __init__(self, name: str, api_key: str):
+        super().__init__(name)
+        self.api_key = api_key
+    def find_datasets(self, dataset_type, data_spec_versions, **kwargs):
+        # Your source implementation
+        ...
+# Quick debug - runs full ingestion with temp storage
+if __name__ == "__main__":
+    source = MyCustomSource(name="test", api_key="...")
+    debug_source(
+        source,
+        dataset_type="match",
+        data_spec_versions={"events": "v1"},
+    )
+```
+The `debug_source()` helper:
+- ✅ Creates an ephemeral dev engine with temp storage
+- ✅ Configures logging automatically
+- ✅ Runs the full ingestion cycle
+- ✅ Shows storage location and results
+Perfect for testing your source before adding it to production config!
 ### Minimal `config.yaml`
 ```yaml
@@ -175,8 +220,16 @@ pip install kloppy
 ```
 ```python
+import logging, sys
 from ingestify.main import get_engine
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    stream=sys.stderr,
+)
 engine = get_engine(
     metadata_url="sqlite:///database_open_data/catalog.db",
     file_url="file://database_open_data/files/"
@@ -188,12 +241,13 @@ dataset_iter = engine.iter_datasets(
     provider="statsbomb",
     dataset_type="match",
-    competition_id=43,
-    season_id=281
+    competition_id=43,  # "FIFA World Cup"
+    #season_id=281
 )
 for dataset in dataset_iter:
     kloppy_dataset = engine.load_dataset_with_kloppy(dataset)
+    logging.info(f"Loaded {kloppy_dataset}")
 ```

{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
-ingestify/__init__.py,sha256=IuO5KQRTWAjWxmJ6Knte5-Q2Ybq1BDDkM5UlZjYRl84,301
+ingestify/__init__.py,sha256=fVzksB6rrJmEm-2P5DvT2JbShYaTZ2nKdbbZf8KabC4,336
 ingestify/cmdline.py,sha256=Rs1_lSKSIJrcygH5fvtOGicOl_e0sZYW7deqp4_jGbY,6233
 ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
-ingestify/main.py,sha256=YegWoI_xIgoz30BSS7N6Ew3SAXSr1-jPFMPqvFda3DI,9797
+ingestify/main.py,sha256=mMXDNzSl1dzN03BUiS97uP3XwFMdgadxP0hJlONsZ_g,15789
 ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
 ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
 ingestify/utils.py,sha256=tsoo-GgeSrwK161WCqW793BAm5bjvnGwI8yGgLTJ1lk,6486
 ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ingestify/application/dataset_store.py,sha256=GP6wGjVirefEn6hlqWIkOBqdELad9L_mmTpdHdzj18M,20353
-ingestify/application/ingestion_engine.py,sha256=cG4JgU667PcsHBngOWUk58KffDrHkJOMv8LrVjaRQ1o,11163
-ingestify/application/loader.py,sha256=xImeaOdj97iCpprZ0WxRbQJ4w6nS1cEmPba7TN1lE6I,13038
+ingestify/application/ingestion_engine.py,sha256=we16yiDS9QGOlAUiP1vidDycihjWK3B2jo64uqKmrXE,11246
+ingestify/application/loader.py,sha256=K99ZJuHMEJFO6CIlxoyHKGSQtXw63JgOYu3moUD6sR0,13400
 ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
 ingestify/domain/__init__.py,sha256=M7_fVTJjQUx53P4UQUPhowRKPKsIIjx4JYZL1yjHKsM,46
 ingestify/domain/models/__init__.py,sha256=WuKS34uiR1EwyczKujBHYGupqseJP-U2P5IQS4kpsA8,838
@@ -23,9 +23,9 @@ ingestify/domain/models/dataset/collection.py,sha256=YKGQv6hqm88MYlNp2c47CoWysyN
 ingestify/domain/models/dataset/collection_metadata.py,sha256=aWY6O3_JLj_jKfVfUTjmi3-E4heBmmmtqX81vhdzr0I,498
 ingestify/domain/models/dataset/dataset.py,sha256=OiP03nY0-m06y2GTrs_m-RiZE8HwypIHRwSqoM_DNnQ,4049
 ingestify/domain/models/dataset/dataset_repository.py,sha256=bf3F_1cKw0CvUberD3FMROE8iowAmYefnD4L6aPB39k,989
-ingestify/domain/models/dataset/dataset_state.py,sha256=IaYG02WzgooGaM_AuwRhZgljs-9NhCF_LpBZXkl5ELY,324
+ingestify/domain/models/dataset/dataset_state.py,sha256=AHJSoCXGVJeBe0eyFJMfvLdAZuf82xjdReCyCWZFlSY,348
 ingestify/domain/models/dataset/events.py,sha256=M8jrHWCm9iXapAy3xjvZZtiiOxXDnfefBixiMwkas24,786
-ingestify/domain/models/dataset/file.py,sha256=2wpBfluS8i_mCPQkdSu1x1Af3kc15bVBLAKXeogB4jA,4243
+ingestify/domain/models/dataset/file.py,sha256=cXDjSw19HRMCGFpVN4u1oejxE1V8SMQptfNVDVixj6o,4464
 ingestify/domain/models/dataset/file_collection.py,sha256=yaQmqFlmbajLCkU5QnjgqCvKzvVEZJrXVvinx5UGHcM,1193
 ingestify/domain/models/dataset/file_repository.py,sha256=9EQprch9isAH2pbK7e7tfOKl6ulip4Ij1kBCTbO_rTc,1721
 ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
@@ -39,11 +39,11 @@ ingestify/domain/models/event/event_bus.py,sha256=feVXsbBcRNkbWYvXbmz-Yi9-3R690y
 ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
 ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
 ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ingestify/domain/models/ingestion/ingestion_job.py,sha256=KaKpAu0XKvWV1YoWaTlOjbapcs-CCAvOHlSxUHZxZwI,15450
+ingestify/domain/models/ingestion/ingestion_job.py,sha256=Ou8v_FXDNnbrzPjHYiLoXMEg7ZRNFPjK1BMk9DY7L2E,15574
 ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=7dmkWEvE7lMSv1ILWcSvys1bUGuGe_s-YbOFC6eYMBI,4794
 ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
 ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
-ingestify/domain/models/resources/dataset_resource.py,sha256=Le_C4nPzPPTDq75_amKSNsR94QvVWdZ_ZkjYIKa6whM,3084
+ingestify/domain/models/resources/dataset_resource.py,sha256=zhTCM4bX6Wf4iWG2g8_SBx2U05YFxTxiZK3f6EqwD6I,3598
 ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
 ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
 ingestify/domain/models/task/task.py,sha256=OwLZQi9GGe0O8m1dKvJdN2Rham5oilI49KyKc5uV20A,161
@@ -54,7 +54,7 @@ ingestify/domain/services/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
 ingestify/domain/services/transformers/kloppy_to_pandas.py,sha256=NcN6nTBGVn9gz-_hWZJTMcduS1Gg7EM4X95Cqxi1QIM,809
 ingestify/infra/__init__.py,sha256=V0hpLzPVTcOHRVh0gguF6FT30YIgEOUd5v87xUHkfZ4,88
 ingestify/infra/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ingestify/infra/fetch/http.py,sha256=MkD59snSvqQOPwp1dJa0jJTS_3z3-OWrCDlUOjvEh2o,4653
+ingestify/infra/fetch/http.py,sha256=oaERHk-0Azu3T2-r5gHPuC9qvmD4cPURGY02q3GoI00,4647
 ingestify/infra/serialization/__init__.py,sha256=UqXWJmKTp7Mi58ZyDASGguPFlqdVWVUbm_sg9GWx9eI,702
 ingestify/infra/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ingestify/infra/sink/postgresql.py,sha256=SxuM3LntfYcpCriUpqJhMvgAf0s9cohXf6WkxSEDYDY,1816
@@ -66,14 +66,23 @@ ingestify/infra/source/statsbomb/match.py,sha256=8Zpdys6-bB_ral2AmjGKhF4BnXW3F0Y
 ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
 ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
-ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=cMmhXqUNp_HUg_IgsUlJ439VXX_H67pnivaToUlqlA4,22552
-ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=ffHop9DQeVE9JrCMLJ2EvF7MD7j8thfjVwv2xcsbJtY,10954
+ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=6WPHKxA6UhgzO3P4Sdbq7W14tTMxisC8Js3MZilPoNc,24160
+ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=1FewCsN7jdk1ITzL_neOwJWtHD03NxCS9E8dhZcz4oY,12236
 ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
 ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
 ingestify/infra/store/file/local_file_repository.py,sha256=1hhLqds5LlppJq2QBB0oN0Q98j6aXreCtYQYz3Q1P8g,819
 ingestify/infra/store/file/s3_file_repository.py,sha256=tz_EZ_gun7W2qJMlI3j_R03iKBZlJSDcG7AUJ1JkdpE,1501
-ingestify-0.7.0.dist-info/METADATA,sha256=cNjbTVDpw0MTkPj-xoywVXzdrynoRAO5z7qyu1LftLg,6871
-ingestify-0.7.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ingestify-0.7.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
-ingestify-0.7.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
-ingestify-0.7.0.dist-info/RECORD,,
+ingestify/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ingestify/tests/conftest.py,sha256=Cr768nLMWUfIP6FMR7aFhUCY4uQ9Tz_bXOq0udpiDEE,411
+ingestify/tests/test_auto_ingest.py,sha256=coMOzJBTbeDwUSYDLnqmkaCXpG-6WQeKqf_nj9XFiA4,14502
+ingestify/tests/test_engine.py,sha256=x3_o6M3satos029Er84ptbzxRKoBw5KB0am2KJSQ16Q,15738
+ingestify/tests/test_events.py,sha256=A1f8H4HRyn52SWo3wV_MgSeb6IbT_lNi9wWAK8EGsK4,7806
+ingestify/tests/test_file_cache.py,sha256=Xbh_VLLDH-KQXE3MeujDeOjjYYbAnjGR6wsHwMInKco,3049
+ingestify/tests/test_pagination.py,sha256=uAKDMsM6fYSa4NcAlXDllu2y-8lnh0AclhPZ5MWJKn8,5539
+ingestify/tests/test_store_version.py,sha256=4czUG8LtaGxgjW4trw7BzYJA8blQp3-HM8w-7HjqFl0,2508
+ingestify/tests/test_table_prefix.py,sha256=6N42T6hfulqTlsUlrwhNmZ-TK-ZOt4U8Jx9NxKyLS4I,2844
+ingestify-0.9.0.dist-info/METADATA,sha256=1j8178-ZiJrZb3CkWSiKU4rlBfRnmSMdAaVytjrKc9w,8263
+ingestify-0.9.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
+ingestify-0.9.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
+ingestify-0.9.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
+ingestify-0.9.0.dist-info/RECORD,,

{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.45.1)
+Generator: setuptools (79.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestify-0.7.0.dist-info → ingestify-0.9.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

ingestify 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

ingestify 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl