PyPI - ingestify - Versions diffs - 0.6.4__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

ingestify 0.6.4py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

ingestify/__init__.py +2 -1
ingestify/application/dataset_store.py +228 -11
ingestify/application/ingestion_engine.py +232 -7
ingestify/application/loader.py +163 -28
ingestify/cmdline.py +0 -48
ingestify/domain/models/__init__.py +2 -0
ingestify/domain/models/dataset/collection.py +0 -9
ingestify/domain/models/dataset/dataset_repository.py +4 -0
ingestify/domain/models/dataset/dataset_state.py +5 -0
ingestify/domain/models/dataset/events.py +13 -0
ingestify/domain/models/dataset/file.py +7 -1
ingestify/domain/models/dataset/selector.py +8 -1
ingestify/domain/models/event/event_bus.py +16 -1
ingestify/domain/models/ingestion/ingestion_job.py +23 -4
ingestify/domain/models/resources/dataset_resource.py +0 -1
ingestify/infra/source/statsbomb/base.py +36 -0
ingestify/infra/source/statsbomb/match.py +137 -0
ingestify/infra/source/statsbomb_github.py +46 -44
ingestify/infra/store/dataset/sqlalchemy/repository.py +77 -10
ingestify/infra/store/dataset/sqlalchemy/tables.py +10 -0
ingestify/main.py +190 -10
ingestify/utils.py +2 -32
ingestify-0.8.0.dist-info/METADATA +257 -0
{ingestify-0.6.4.dist-info → ingestify-0.8.0.dist-info}/RECORD +28 -36
ingestify/infra/source/wyscout.py +0 -175
ingestify/static/templates/statsbomb_github/config.yaml.jinja2 +0 -19
ingestify/static/templates/statsbomb_github/database/README.md +0 -1
ingestify/static/templates/statsbomb_github/query.py +0 -14
ingestify/static/templates/wyscout/.env +0 -5
ingestify/static/templates/wyscout/.gitignore +0 -2
ingestify/static/templates/wyscout/README.md +0 -0
ingestify/static/templates/wyscout/config.yaml.jinja2 +0 -18
ingestify/static/templates/wyscout/database/README.md +0 -1
ingestify/static/templates/wyscout/query.py +0 -14
ingestify-0.6.4.dist-info/METADATA +0 -266
/ingestify/{static/templates/statsbomb_github/README.md → infra/source/statsbomb/__init__.py} +0 -0
{ingestify-0.6.4.dist-info → ingestify-0.8.0.dist-info}/WHEEL +0 -0
{ingestify-0.6.4.dist-info → ingestify-0.8.0.dist-info}/entry_points.txt +0 -0
{ingestify-0.6.4.dist-info → ingestify-0.8.0.dist-info}/top_level.txt +0 -0

ingestify/infra/source/statsbomb/match.py ADDED Viewed

@@ -0,0 +1,137 @@
+from datetime import datetime
+from ingestify import DatasetResource
+from ingestify.domain.models.dataset.dataset import DatasetState
+from .base import StatsBombBaseAPI
+class StatsBombMatchAPI(StatsBombBaseAPI):
+    def discover_selectors(self, dataset_type: str):
+        assert dataset_type == "match"
+        competitions = self.get(data_spec_version="v4", path="competitions")
+        def get_last_modified(competition):
+            if not competition["match_updated"]:
+                return None
+            last_modified = datetime.fromisoformat(
+                competition["match_updated"] + "+00:00"
+            )
+            if competition["match_updated_360"]:
+                last_modified = max(
+                    last_modified,
+                    datetime.fromisoformat(competition["match_updated_360"] + "+00:00"),
+                )
+            return last_modified
+        return [
+            dict(
+                competition_id=competition["competition_id"],
+                season_id=competition["season_id"],
+                # Passing the LastModified for an entire competition allows Ingestify to entirely skip
+                # this Selector based on a datetime based check. Dataset comparison won't happen. When the
+                # DataSpecVersion is changed, but LastModified isn't changed on the Source, new files ARE NOT ingested!
+                _last_modified=get_last_modified(competition),
+            )
+            for competition in competitions
+        ]
+    def find_datasets(
+        self,
+        dataset_type: str,
+        competition_id: str,
+        season_id: str,
+        match_id: str = None,
+        data_spec_versions=None,
+        dataset_collection_metadata=None,
+    ):
+        assert dataset_type == "match"
+        match_data_spec_version = data_spec_versions.get_version("match")
+        matches = self.get(
+            path=f"competitions/{competition_id}/seasons/{season_id}/matches",
+            data_spec_version=match_data_spec_version,
+        )
+        for match in matches:
+            if match_id:
+                if match["match_id"] != match_id:
+                    continue
+            last_modified = datetime.fromisoformat(match["last_updated"] + "+00:00")
+            if match["collection_status"] == "Complete":
+                if match["match_status"] == "available":
+                    state = DatasetState.COMPLETE
+                else:
+                    # This could be "processing"
+                    state = DatasetState.PARTIAL
+            else:
+                state = DatasetState.SCHEDULED
+            name = (
+                f"{match['match_date']} / "
+                f"{match['home_team']['home_team_name']} - {match['away_team']['away_team_name']}"
+            )
+            dataset_resource = DatasetResource(
+                dataset_resource_id=dict(
+                    competition_id=competition_id,
+                    season_id=season_id,
+                    match_id=match["match_id"],
+                ),
+                dataset_type=dataset_type,
+                provider=self.provider,
+                name=name,
+                metadata=match,
+                state=state,
+            )
+            dataset_resource.add_file(
+                last_modified=last_modified,
+                data_feed_key="match",
+                data_spec_version=match_data_spec_version,
+                json_content=match,
+            )
+            if state.is_complete:
+                name += f" / {match['home_score']}-{match['away_score']}"
+                for data_feed_key in ["lineups", "events"]:
+                    for data_spec_version in data_spec_versions[data_feed_key]:
+                        dataset_resource.add_file(
+                            # Note: the LastModified value can be incorrect when only match Metadata (match file)
+                            #       is changed. Use it anyway for indication. Ingestify will also use the
+                            #       Dataset.last_modified_at value to determine if a file should be refetched
+                            last_modified=last_modified,
+                            data_feed_key=data_feed_key,
+                            data_spec_version=data_spec_version,
+                            url=self.get_url(
+                                data_feed_key, data_spec_version, match["match_id"]
+                            ),
+                            http_options=dict(auth=(self.username, self.password)),
+                            data_serialization_format="json",
+                        )
+                if (
+                    match["last_updated_360"]
+                    and match["match_status_360"] == "available"
+                ):
+                    for data_spec_version in data_spec_versions.get("360-frames", []):
+                        dataset_resource.add_file(
+                            last_modified=datetime.fromisoformat(
+                                match["last_updated_360"] + "+00:00"
+                            ),
+                            data_feed_key="360-frames",
+                            data_spec_version=data_spec_version,
+                            url=self.get_url(
+                                "360-frames", data_spec_version, match["match_id"]
+                            ),
+                            http_options=dict(auth=(self.username, self.password)),
+                            data_serialization_format="json",
+                        )
+            yield dataset_resource

ingestify/infra/source/statsbomb_github.py CHANGED Viewed

@@ -21,6 +21,7 @@ class StatsbombGithub(Source):
             dict(
                 competition_id=competition["competition_id"],
                 season_id=competition["season_id"],
+                _name=f"{competition['competition_name']} - {competition['season_name']}",
             )
             for competition in competitions
         ]
@@ -53,53 +54,54 @@ class StatsbombGithub(Source):
             name = (
                 f"{match['match_date']} / "
                 f"{match['home_team']['home_team_name']} - {match['away_team']['away_team_name']}"
+                f" / {match['home_score']}-{match['away_score']}"
             )
-            dataset_resource = DatasetResource(
-                dataset_resource_id=dict(
-                    competition_id=competition_id,
-                    season_id=season_id,
-                    match_id=match["match_id"],
-                ),
-                dataset_type=dataset_type,
-                provider=self.provider,
-                name=name,
-                metadata=match,
-                state=state,
+            dataset_resource = (
+                DatasetResource(
+                    dataset_resource_id=dict(
+                        competition_id=competition_id,
+                        season_id=season_id,
+                        match_id=match["match_id"],
+                    ),
+                    dataset_type=dataset_type,
+                    provider=self.provider,
+                    name=name,
+                    metadata=match,
+                    state=state,
+                )
+                .add_file(
+                    last_modified=last_modified,
+                    data_feed_key="match",
+                    data_spec_version=DATA_SPEC_VERSION,
+                    json_content=match,
+                )
+                .add_file(
+                    last_modified=last_modified,
+                    data_feed_key="lineups",
+                    data_spec_version=DATA_SPEC_VERSION,
+                    url=f"{BASE_URL}/lineups/{match['match_id']}.json",
+                    data_serialization_format="json",
+                )
+                .add_file(
+                    last_modified=last_modified,
+                    data_feed_key="events",
+                    data_spec_version=DATA_SPEC_VERSION,
+                    url=f"{BASE_URL}/events/{match['match_id']}.json",
+                    data_serialization_format="json",
+                )
             )
-            dataset_resource.add_file(
-                last_modified=last_modified,
-                data_feed_key="match",
-                data_spec_version=DATA_SPEC_VERSION,
-                json_content=match,
-            )
-            if state.is_complete:
-                name += f" / {match['home_score']}-{match['away_score']}"
-                for data_feed_key in ["lineups", "events"]:
-                    dataset_resource.add_file(
-                        last_modified=last_modified,
-                        data_feed_key=data_feed_key,
-                        data_spec_version=DATA_SPEC_VERSION,
-                        url=f"{BASE_URL}/{data_feed_key}/{match['match_id']}.json",
-                        data_serialization_format="json",
-                    )
-                if (
-                    match["last_updated_360"]
-                    and match["match_status_360"] == "available"
-                ):
-                    dataset_resource.add_file(
-                        last_modified=datetime.fromisoformat(
-                            match["last_updated_360"] + "+00:00"
-                        ),
-                        data_feed_key="360-frames",
-                        data_spec_version=DATA_SPEC_VERSION,
-                        url=f"{BASE_URL}/three-sixty/{match['match_id']}.json",
-                        data_serialization_format="json",
-                        http_options={"ignore_not_found": True},
-                    )
+            if match["last_updated_360"] and match["match_status_360"] == "available":
+                dataset_resource.add_file(
+                    last_modified=datetime.fromisoformat(
+                        match["last_updated_360"] + "+00:00"
+                    ),
+                    data_feed_key="360-frames",
+                    data_spec_version=DATA_SPEC_VERSION,
+                    url=f"{BASE_URL}/three-sixty/{match['match_id']}.json",
+                    data_serialization_format="json",
+                    http_options={"ignore_not_found": True},
+                )
             yield dataset_resource

ingestify/infra/store/dataset/sqlalchemy/repository.py CHANGED Viewed

@@ -29,6 +29,7 @@ from ingestify.domain.models import (
     Dataset,
     DatasetCollection,
     DatasetRepository,
+    DatasetState,
     Selector,
 )
 from ingestify.domain.models.dataset.collection_metadata import (
@@ -46,6 +47,7 @@ from .tables import (
     revision_table,
     ingestion_job_summary_table,
     task_summary_table,
+    store_version_table,
 )
 logger = logging.getLogger(__name__)
@@ -159,6 +161,10 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
         entities: list[dict],
         immutable_rows: bool = False,
     ):
+        if not entities:
+            # Nothing to do
+            return
         dialect = self.dialect.name
         if dialect == "mysql":
             from sqlalchemy.dialects.mysql import insert
@@ -230,6 +236,7 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
         provider: Optional[str] = None,
         dataset_id: Optional[Union[str, List[str]]] = None,
         selector: Optional[Union[Selector, List[Selector]]] = None,
+        dataset_state: Optional[List[DatasetState]] = None,
     ):
         if dataset_id is not None:
             if isinstance(dataset_id, list):
@@ -306,6 +313,8 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
             query = query.filter(dataset_table.c.dataset_type == dataset_type)
         if provider:
             query = query.filter(dataset_table.c.provider == provider)
+        if dataset_state:
+            query = query.filter(dataset_table.c.state.in_(dataset_state))
         return query
@@ -395,6 +404,9 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
         dataset_id: Optional[Union[str, List[str]]] = None,
         selector: Optional[Union[Selector, List[Selector]]] = None,
         metadata_only: bool = False,
+        page: Optional[int] = None,
+        page_size: Optional[int] = None,
+        dataset_state: Optional[List[DatasetState]] = None,
     ) -> DatasetCollection:
         def apply_query_filter(query):
             return self._filter_query(
@@ -404,15 +416,23 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
                 provider=provider,
                 dataset_id=dataset_id,
                 selector=selector,
+                dataset_state=dataset_state,
             )
         with self.session:
             # Use a contextmanager to make sure it's closed afterwards
             if not metadata_only:
+                # Apply sorting by created_at in ascending order
                 dataset_query = apply_query_filter(
                     self.session.query(dataset_table.c.dataset_id)
-                )
+                ).order_by(dataset_table.c.created_at.asc())
+                # Apply pagination if both page and page_size are provided
+                if page is not None and page_size is not None:
+                    offset = (page - 1) * page_size
+                    dataset_query = dataset_query.offset(offset).limit(page_size)
                 self._debug_query(dataset_query)
                 dataset_ids = [row.dataset_id for row in dataset_query]
                 datasets = self._load_datasets(dataset_ids)
@@ -503,19 +523,25 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
         with self.connect() as connection:
             try:
                 # Delete modified files related to the dataset
-                file_table.delete().where(
-                    file_table.c.dataset_id == dataset.dataset_id
-                ).execute()
+                connection.execute(
+                    file_table.delete().where(
+                        file_table.c.dataset_id == dataset.dataset_id
+                    )
+                )
                 # Delete revisions related to the dataset
-                revision_table.delete().where(
-                    revision_table.c.dataset_id == dataset.dataset_id
-                ).execute()
+                connection.execute(
+                    revision_table.delete().where(
+                        revision_table.c.dataset_id == dataset.dataset_id
+                    )
+                )
                 # Delete the dataset itself
-                dataset_table.delete().where(
-                    dataset_table.c.dataset_id == dataset.dataset_id
-                ).execute()
+                connection.execute(
+                    dataset_table.delete().where(
+                        dataset_table.c.dataset_id == dataset.dataset_id
+                    )
+                )
                 connection.commit()
             except Exception:
@@ -606,3 +632,44 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
                 )
             )
         return ingestion_job_summaries
+    def get_store_version(self) -> Optional[str]:
+        """Get the current Ingestify version stored for this store."""
+        with self.session:
+            row = self.session.query(store_version_table.c.ingestify_version).first()
+            return row.ingestify_version if row else None
+    def set_store_version(self, version: str):
+        """Set the Ingestify version for this store."""
+        from ingestify.utils import utcnow
+        now = utcnow()
+        entity = {
+            "id": 1,
+            "ingestify_version": version,
+            "created_at": now,
+            "updated_at": now,
+        }
+        with self.connect() as connection:
+            try:
+                self._upsert(connection, store_version_table, [entity])
+                connection.commit()
+            except Exception:
+                connection.rollback()
+                raise
+    def ensure_compatible_version(self, current_version: str):
+        """Ensure the store is compatible with the current Ingestify version."""
+        stored_version = self.get_store_version()
+        if stored_version is None:
+            # First time setup - store the current version
+            self.set_store_version(current_version)
+            logger.info(f"Initialized store with Ingestify version {current_version}")
+        elif stored_version != current_version:
+            # Version mismatch - for now just log, future: trigger migration
+            logger.warning(
+                f"Store version mismatch: stored={stored_version}, current={current_version}. "
+                f"Future versions may require migration."
+            )

ingestify/infra/store/dataset/sqlalchemy/tables.py CHANGED Viewed

@@ -84,6 +84,7 @@ class PathString(TypeDecorator):
 class DatasetStateString(TypeDecorator):
+    cache_ok = True
     impl = String(255)
     def process_bind_param(self, value: DatasetState, dialect):
@@ -318,6 +319,15 @@ task_summary_table = Table(
     # Column("state", RevisionStateString, default=RevisionState.PENDING_VALIDATION),
     # Column("source", JSONType()),
 )
+store_version_table = Table(
+    "store_version",
+    metadata,
+    Column("id", Integer, primary_key=True, default=1),
+    Column("ingestify_version", String(255), nullable=False),
+    Column("created_at", TZDateTime(6), nullable=False),
+    Column("updated_at", TZDateTime(6), nullable=False),
+)
 #
 #
 # mapper_registry = registry()

ingestify/main.py CHANGED Viewed

@@ -138,12 +138,16 @@ def get_remote_datastore(url: str, bucket: str, **kwargs) -> DatasetStore:
 def get_source_cls(key: str) -> Type[Source]:
     if key.startswith("ingestify."):
-        _, type_ = key.split(".")
+        _, type_ = key.split(".", maxsplit=1)
         if type_ == "wyscout":
             from ingestify.infra.source.wyscout import Wyscout
             return Wyscout
+        elif type_ == "statsbomb.match":
+            from ingestify.infra.source.statsbomb.match import StatsBombMatchAPI
+            return StatsBombMatchAPI
         elif type_ == "statsbomb_github":
             from ingestify.infra.source.statsbomb_github import StatsbombGithub
@@ -183,15 +187,36 @@ def get_event_subscriber_cls(key: str) -> Type[Subscriber]:
 def get_engine(
-    config_file, bucket: Optional[str] = None, disable_events: bool = False
+    config_file: Optional[str] = None,
+    bucket: Optional[str] = None,
+    disable_events: bool = False,
+    metadata_url: Optional[str] = None,
+    file_url: Optional[str] = None,
 ) -> IngestionEngine:
-    config = parse_config(config_file, default_value="")
-    logger.info("Initializing sources")
     sources = {}
-    sys.path.append(os.path.dirname(config_file))
-    for name, source_args in config["sources"].items():
-        sources[name] = build_source(name=name, source_args=source_args)
+    if not config_file:
+        if not metadata_url or not file_url:
+            raise ValueError(
+                f"You must specify metadata_url and file_url in case you don't use a config_file"
+            )
+        config = {
+            "main": {
+                "metadata_url": metadata_url,
+                "file_url": file_url,
+                "default_bucket": bucket or "main",
+            }
+        }
+    elif not config_file:
+        raise ValueError("You must specify a config file")
+    else:
+        config = parse_config(config_file, default_value="")
+        logger.info("Initializing sources")
+        sys.path.append(os.path.dirname(config_file))
+        for name, source_args in config.get("sources", {}).items():
+            sources[name] = build_source(name=name, source_args=source_args)
     logger.info("Initializing IngestionEngine")
     store = get_dataset_store_by_urls(
@@ -244,13 +269,168 @@ def get_engine(
             # but makes it easier later one where we loop over selectors.
             selectors = [Selector.build({}, data_spec_versions=data_spec_versions)]
-        ingestion_plan = IngestionPlan(
+        ingestion_plan_ = IngestionPlan(
             source=sources[ingestion_plan["source"]],
             dataset_type=ingestion_plan["dataset_type"],
             selectors=selectors,
             fetch_policy=fetch_policy,
             data_spec_versions=data_spec_versions,
         )
-        ingestion_engine.add_ingestion_plan(ingestion_plan)
+        ingestion_engine.add_ingestion_plan(ingestion_plan_)
     return ingestion_engine
+def get_dev_engine(
+    source: Source,
+    dataset_type: str,
+    data_spec_versions: dict,
+    ephemeral: bool = True,
+    configure_logging: bool = True,
+    dev_dir: Optional[str] = None,
+) -> IngestionEngine:
+    """
+    Quick development helper - creates an engine with minimal setup.
+    Args:
+        source: The source to test
+        dataset_type: Dataset type to ingest
+        data_spec_versions: Dict like {"hops": "v1"}
+        ephemeral: If True, uses temp dir that gets cleaned. If False, uses persistent /tmp storage.
+        configure_logging: If True, configures basic logging (default: True)
+        dev_dir: Optional custom directory for data storage (overrides ephemeral)
+    Returns:
+        IngestionEngine configured for development
+    Example:
+        >>> source = MySource(name="test", ...)
+        >>> engine = get_dev_engine(source, "hops", {"hops": "v1"})
+        >>> engine.run()
+        >>>
+        >>> # Access the datasets
+        >>> datasets = engine.store.get_dataset_collection()
+        >>> print(f"Ingested {len(datasets)} datasets")
+    """
+    import tempfile
+    from pathlib import Path
+    if configure_logging:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        )
+    if dev_dir:
+        # Use provided directory
+        dev_dir = Path(dev_dir)
+    elif ephemeral:
+        # Use temp directory that will be cleaned up
+        import uuid
+        dev_dir = Path(tempfile.gettempdir()) / f"ingestify-dev-{uuid.uuid4().hex[:8]}"
+    else:
+        # Use persistent directory
+        dev_dir = Path(tempfile.gettempdir()) / "ingestify-dev"
+    dev_dir.mkdir(parents=True, exist_ok=True)
+    metadata_url = f"sqlite:///{dev_dir / 'database.db'}"
+    file_url = f"file://{dev_dir}"
+    logger.info(f"Dev mode: storing data in {dev_dir}")
+    engine = get_engine(
+        metadata_url=metadata_url,
+        file_url=file_url,
+        bucket="main",
+        disable_events=True,
+    )
+    data_spec_versions_obj = DataSpecVersionCollection.from_dict(data_spec_versions)
+    engine.add_ingestion_plan(
+        IngestionPlan(
+            source=source,
+            dataset_type=dataset_type,
+            selectors=[Selector.build({}, data_spec_versions=data_spec_versions_obj)],
+            fetch_policy=FetchPolicy(),
+            data_spec_versions=data_spec_versions_obj,
+        )
+    )
+    return engine
+def debug_source(
+    source: Source,
+    *,
+    dataset_type: str,
+    data_spec_versions: dict,
+    ephemeral: bool = True,
+    configure_logging: bool = True,
+    dev_dir: Optional[str] = None,
+    **kwargs,
+) -> IngestionEngine:
+    """
+    Debug helper - creates a dev engine, runs ingestion, and shows results.
+    This is a convenience wrapper around get_dev_engine() that does everything:
+    creates the engine, runs ingestion, and displays results.
+    Args:
+        source: The source to debug
+        dataset_type: Dataset type (e.g., "match")
+        data_spec_versions: Dict like {"match": "v1"} - explicit, no defaults!
+        ephemeral: If True, uses temp dir. If False, uses persistent /tmp storage.
+        configure_logging: If True, configures basic logging (default: True)
+        dev_dir: Optional custom directory for data storage (overrides ephemeral)
+        **kwargs: Selector arguments. For sources with discover_selectors(), these
+                  filter discovered selectors. Otherwise passed to find_datasets().
+    Returns:
+        IngestionEngine: The engine used for ingestion (for further inspection)
+    Example:
+        >>> # Simple source without discover_selectors
+        >>> source = StatsBombHOPSS3(name="test", s3_bucket="my-bucket", s3_prefix="HOPS")
+        >>> engine = debug_source(source, dataset_type="hops", data_spec_versions={"hops": "v1"})
+        >>> # Source with discover_selectors - discovers all competitions
+        >>> source = StatsBombMatchAPI(name="test", ...)
+        >>> engine = debug_source(
+        ...     source,
+        ...     dataset_type="match",
+        ...     data_spec_versions={"match": "v6"}
+        ... )
+        >>> # Filter discovered selectors
+        >>> engine = debug_source(
+        ...     source,
+        ...     dataset_type="match",
+        ...     data_spec_versions={"match": "v6"},
+        ...     competition_id=46  # Filters to specific competition
+        ... )
+    """
+    logger.info(f"Debug mode for source: {source.name}")
+    engine = get_dev_engine(
+        source=source,
+        dataset_type=dataset_type,
+        data_spec_versions=data_spec_versions,
+        ephemeral=ephemeral,
+        configure_logging=configure_logging,
+        dev_dir=dev_dir,
+    )
+    # Run ingestion
+    # Empty selector {} automatically triggers discover_selectors() if available
+    # kwargs filter discovered selectors or are passed to find_datasets()
+    engine.run(**kwargs)
+    # Show results
+    datasets = engine.store.get_dataset_collection()
+    logger.info("=" * 60)
+    logger.info(f"✓ Ingestion complete: {len(datasets)} dataset(s)")
+    logger.info("=" * 60)
+    return engine

ingestify 0.6.4__py3-none-any.whl → 0.8.0__py3-none-any.whl

ingestify 0.6.4py3-none-any.whl → 0.8.0py3-none-any.whl