ingestify 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ingestify-0.3.0 → ingestify-0.3.2}/PKG-INFO +2 -2
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/__init__.py +1 -1
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/application/dataset_store.py +1 -3
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/file.py +1 -1
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/ingestion/ingestion_job_summary.py +1 -1
- ingestify-0.3.2/ingestify/infra/store/file/dummy_file_repository.py +31 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/main.py +5 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify.egg-info/PKG-INFO +2 -2
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify.egg-info/SOURCES.txt +1 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/setup.py +1 -1
- {ingestify-0.3.0 → ingestify-0.3.2}/README.md +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/application/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/application/ingestion_engine.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/application/loader.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/application/secrets_manager.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/cmdline.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/base.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/data_spec_version_collection.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/collection.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/collection_metadata.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/dataset.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/dataset_repository.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/dataset_state.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/events.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/file_collection.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/file_repository.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/identifier.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/revision.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/dataset/selector.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/event/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/event/_old_event.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/event/dispatcher.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/event/domain_event.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/event/event_bus.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/event/publisher.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/event/subscriber.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/fetch_policy.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/ingestion/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/ingestion/ingestion_job.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/ingestion/ingestion_plan.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/resources/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/resources/dataset_resource.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/sink.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/source.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/task/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/task/set.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/task/task.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/task/task_summary.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/timing.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/services/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/services/identifier_key_transformer.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/services/transformers/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/services/transformers/kloppy_to_pandas.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/exceptions.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/fetch/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/fetch/http.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/serialization/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/sink/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/sink/postgresql.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/source/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/source/statsbomb_github.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/source/wyscout.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/dataset/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/dataset/sqlalchemy/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/dataset/sqlalchemy/mapping.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/dataset/sqlalchemy/repository.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/file/__init__.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/file/local_file_repository.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/infra/store/file/s3_file_repository.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/server.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/source_base.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/statsbomb_github/README.md +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/statsbomb_github/config.yaml.jinja2 +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/statsbomb_github/database/README.md +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/statsbomb_github/query.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/wyscout/.env +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/wyscout/.gitignore +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/wyscout/README.md +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/wyscout/config.yaml.jinja2 +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/wyscout/database/README.md +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/wyscout/query.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify/utils.py +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify.egg-info/dependency_links.txt +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify.egg-info/entry_points.txt +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify.egg-info/requires.txt +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/ingestify.egg-info/top_level.txt +0 -0
- {ingestify-0.3.0 → ingestify-0.3.2}/setup.cfg +0 -0
|
@@ -294,9 +294,7 @@ class DatasetStore:
|
|
|
294
294
|
|
|
295
295
|
def get_stream(file_):
|
|
296
296
|
return reader(
|
|
297
|
-
self.file_repository.load_content(
|
|
298
|
-
bucket=self.bucket, storage_path=file_.storage_path
|
|
299
|
-
)
|
|
297
|
+
self.file_repository.load_content(storage_path=file_.storage_path)
|
|
300
298
|
)
|
|
301
299
|
|
|
302
300
|
loaded_file = LoadedFile(
|
|
@@ -116,7 +116,7 @@ class LoadedFile(BaseModel):
|
|
|
116
116
|
data_serialization_format: Optional[str] # Example: 'json'
|
|
117
117
|
storage_compression_method: Optional[str] # Example: 'gzip'
|
|
118
118
|
storage_path: Path
|
|
119
|
-
_stream: Union[BinaryIO, Callable[[], Awaitable[BinaryIO]]]
|
|
119
|
+
_stream: Union[BinaryIO, BytesIO, Callable[[], Awaitable[Union[BinaryIO, BytesIO]]]]
|
|
120
120
|
revision_id: Optional[int] = None # This can be used when a Revision is squashed
|
|
121
121
|
|
|
122
122
|
def load_stream(self):
|
{ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/models/ingestion/ingestion_job_summary.py
RENAMED
|
@@ -112,7 +112,7 @@ class IngestionJobSummary(BaseModel):
|
|
|
112
112
|
|
|
113
113
|
print(f" - Failed tasks: {self.failed_tasks}")
|
|
114
114
|
print(f" - Successful tasks: {self.successful_tasks}")
|
|
115
|
-
print(f" - Successful ignored tasks: {self.
|
|
115
|
+
print(f" - Successful ignored tasks: {self.ignored_successful_tasks}")
|
|
116
116
|
print(f" - Skipped datasets: {self.skipped_datasets}")
|
|
117
117
|
print("--------------------")
|
|
118
118
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import BinaryIO
|
|
4
|
+
|
|
5
|
+
from ingestify.domain.models import Dataset, FileRepository
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DummyFileRepository(FileRepository):
|
|
12
|
+
@classmethod
|
|
13
|
+
def supports(cls, url: str) -> bool:
|
|
14
|
+
return url.startswith("dummy://")
|
|
15
|
+
|
|
16
|
+
def save_content(
|
|
17
|
+
self,
|
|
18
|
+
bucket: str,
|
|
19
|
+
dataset: Dataset,
|
|
20
|
+
revision_id: int,
|
|
21
|
+
filename: str,
|
|
22
|
+
stream: BinaryIO,
|
|
23
|
+
) -> Path:
|
|
24
|
+
path = self.get_write_path(bucket, dataset, revision_id, filename)
|
|
25
|
+
|
|
26
|
+
logger.info(f"Dummy save content to {path}")
|
|
27
|
+
|
|
28
|
+
return path
|
|
29
|
+
|
|
30
|
+
def load_content(self, storage_path: str) -> BinaryIO:
|
|
31
|
+
return BinaryIO()
|
|
@@ -26,6 +26,7 @@ from ingestify.infra.store.dataset.sqlalchemy import SqlAlchemyDatasetRepository
|
|
|
26
26
|
from ingestify.infra.store.dataset.sqlalchemy.repository import (
|
|
27
27
|
SqlAlchemySessionProvider,
|
|
28
28
|
)
|
|
29
|
+
from ingestify.infra.store.file.dummy_file_repository import DummyFileRepository
|
|
29
30
|
|
|
30
31
|
logger = logging.getLogger(__name__)
|
|
31
32
|
|
|
@@ -70,6 +71,10 @@ def build_file_repository(file_url: str, identifier_transformer) -> FileReposito
|
|
|
70
71
|
repository = LocalFileRepository(
|
|
71
72
|
url=file_url, identifier_transformer=identifier_transformer
|
|
72
73
|
)
|
|
74
|
+
elif file_url.startswith("dummy://"):
|
|
75
|
+
repository = DummyFileRepository(
|
|
76
|
+
url=file_url, identifier_transformer=identifier_transformer
|
|
77
|
+
)
|
|
73
78
|
else:
|
|
74
79
|
raise Exception(f"Cannot find repository to handle file {file_url}")
|
|
75
80
|
|
|
@@ -75,6 +75,7 @@ ingestify/infra/store/dataset/sqlalchemy/__init__.py
|
|
|
75
75
|
ingestify/infra/store/dataset/sqlalchemy/mapping.py
|
|
76
76
|
ingestify/infra/store/dataset/sqlalchemy/repository.py
|
|
77
77
|
ingestify/infra/store/file/__init__.py
|
|
78
|
+
ingestify/infra/store/file/dummy_file_repository.py
|
|
78
79
|
ingestify/infra/store/file/local_file_repository.py
|
|
79
80
|
ingestify/infra/store/file/s3_file_repository.py
|
|
80
81
|
ingestify/static/templates/statsbomb_github/README.md
|
|
@@ -26,7 +26,7 @@ def setup_package():
|
|
|
26
26
|
author="Koen Vossen",
|
|
27
27
|
author_email="info@koenvossen.nl",
|
|
28
28
|
license="AGPL",
|
|
29
|
-
description="
|
|
29
|
+
description="Data Ingestion Framework",
|
|
30
30
|
long_description=readme,
|
|
31
31
|
long_description_content_type="text/markdown",
|
|
32
32
|
packages=setuptools.find_packages(exclude=["tests"]),
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ingestify-0.3.0 → ingestify-0.3.2}/ingestify/domain/services/transformers/kloppy_to_pandas.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/statsbomb_github/config.yaml.jinja2
RENAMED
|
File without changes
|
{ingestify-0.3.0 → ingestify-0.3.2}/ingestify/static/templates/statsbomb_github/database/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|