ingestify 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ingestify/__init__.py CHANGED
@@ -8,4 +8,4 @@ if not __INGESTIFY_SETUP__:
8
8
  from .infra import retrieve_http
9
9
  from .source_base import Source, DatasetResource
10
10
 
11
- __version__ = "0.3.0"
11
+ __version__ = "0.3.2"
@@ -294,9 +294,7 @@ class DatasetStore:
294
294
 
295
295
  def get_stream(file_):
296
296
  return reader(
297
- self.file_repository.load_content(
298
- bucket=self.bucket, storage_path=file_.storage_path
299
- )
297
+ self.file_repository.load_content(storage_path=file_.storage_path)
300
298
  )
301
299
 
302
300
  loaded_file = LoadedFile(
@@ -116,7 +116,7 @@ class LoadedFile(BaseModel):
116
116
  data_serialization_format: Optional[str] # Example: 'json'
117
117
  storage_compression_method: Optional[str] # Example: 'gzip'
118
118
  storage_path: Path
119
- _stream: Union[BinaryIO, Callable[[], Awaitable[BinaryIO]]]
119
+ _stream: Union[BinaryIO, BytesIO, Callable[[], Awaitable[Union[BinaryIO, BytesIO]]]]
120
120
  revision_id: Optional[int] = None # This can be used when a Revision is squashed
121
121
 
122
122
  def load_stream(self):
@@ -112,7 +112,7 @@ class IngestionJobSummary(BaseModel):
112
112
 
113
113
  print(f" - Failed tasks: {self.failed_tasks}")
114
114
  print(f" - Successful tasks: {self.successful_tasks}")
115
- print(f" - Successful ignored tasks: {self.successful_tasks}")
115
+ print(f" - Successful ignored tasks: {self.ignored_successful_tasks}")
116
116
  print(f" - Skipped datasets: {self.skipped_datasets}")
117
117
  print("--------------------")
118
118
 
@@ -0,0 +1,31 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import BinaryIO
4
+
5
+ from ingestify.domain.models import Dataset, FileRepository
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class DummyFileRepository(FileRepository):
12
+ @classmethod
13
+ def supports(cls, url: str) -> bool:
14
+ return url.startswith("dummy://")
15
+
16
+ def save_content(
17
+ self,
18
+ bucket: str,
19
+ dataset: Dataset,
20
+ revision_id: int,
21
+ filename: str,
22
+ stream: BinaryIO,
23
+ ) -> Path:
24
+ path = self.get_write_path(bucket, dataset, revision_id, filename)
25
+
26
+ logger.info(f"Dummy save content to {path}")
27
+
28
+ return path
29
+
30
+ def load_content(self, storage_path: str) -> BinaryIO:
31
+ return BinaryIO()
ingestify/main.py CHANGED
@@ -26,6 +26,7 @@ from ingestify.infra.store.dataset.sqlalchemy import SqlAlchemyDatasetRepository
26
26
  from ingestify.infra.store.dataset.sqlalchemy.repository import (
27
27
  SqlAlchemySessionProvider,
28
28
  )
29
+ from ingestify.infra.store.file.dummy_file_repository import DummyFileRepository
29
30
 
30
31
  logger = logging.getLogger(__name__)
31
32
 
@@ -70,6 +71,10 @@ def build_file_repository(file_url: str, identifier_transformer) -> FileReposito
70
71
  repository = LocalFileRepository(
71
72
  url=file_url, identifier_transformer=identifier_transformer
72
73
  )
74
+ elif file_url.startswith("dummy://"):
75
+ repository = DummyFileRepository(
76
+ url=file_url, identifier_transformer=identifier_transformer
77
+ )
73
78
  else:
74
79
  raise Exception(f"Cannot find repository to handle file {file_url}")
75
80
 
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestify
3
- Version: 0.3.0
4
- Summary: Standardizing soccer tracking- and event data
3
+ Version: 0.3.2
4
+ Summary: Data Ingestion Framework
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
7
7
  License: AGPL
@@ -1,12 +1,12 @@
1
- ingestify/__init__.py,sha256=DnPPEtJT32gAPuUKXgIsqUE4fIvc6QA96vrcKr6nz6A,301
1
+ ingestify/__init__.py,sha256=2tHAJjPSk4iRGc9COnhyklt1-vjSeOYzUOOXHj7HxEA,301
2
2
  ingestify/cmdline.py,sha256=bIuyPgGEw4wIglNzpG9zp7TsJozsP8NSVsCe4eAyWUg,7189
3
3
  ingestify/exceptions.py,sha256=wMMuajl4AkQRfW60TLN7btJmQaH8-lUczXyW_2g9kOU,143
4
- ingestify/main.py,sha256=0sTNoLcS7euOavIAviQIMTolRnXsvOvNbmFdXgXgxhE,8516
4
+ ingestify/main.py,sha256=Xr0VbGgstPO7doDX18xqk4lBb4W2sbGWtQuXZaARsHA,8763
5
5
  ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
6
6
  ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
7
7
  ingestify/utils.py,sha256=HETGhAoUlutLG0cQR63nac2JbFei9gnktDHeBQoYWfU,5692
8
8
  ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- ingestify/application/dataset_store.py,sha256=6xMHa_ShyPOyegIKl2xwmRl3BlV5i21z95cpKW3oARw,11712
9
+ ingestify/application/dataset_store.py,sha256=5CZ2v_fjqhNyC8JdjE9O4huTwy3WtHhawyc8Gw4DeJ4,11646
10
10
  ingestify/application/ingestion_engine.py,sha256=PtMjKMpvfqB802G5zfKLzyamdH7qFOXl3x6_97y8w60,2288
11
11
  ingestify/application/loader.py,sha256=v8ZcpMDEml9k_uFPFqT4WaCjXED_OIpAr7g0Pz5Hp6Y,7153
12
12
  ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
@@ -25,7 +25,7 @@ ingestify/domain/models/dataset/dataset.py,sha256=ReL50BXNaJVU29OB5_9CQEI7BekWsg
25
25
  ingestify/domain/models/dataset/dataset_repository.py,sha256=kUjiqW58kOUOli1gZCLR5xw4dBX0bqI1UJsf16hgNsQ,812
26
26
  ingestify/domain/models/dataset/dataset_state.py,sha256=O95mea5N34HDXw7XsYzxHna4FVk_T-ZNUDezkvt7VzY,220
27
27
  ingestify/domain/models/dataset/events.py,sha256=58VacQejQt-WPh9BywP4st5McauM3gXBQo0kaDnSekY,481
28
- ingestify/domain/models/dataset/file.py,sha256=nuoZI9GI5OysYwWCCyNsHMlm1Z9A1GbEKd38jvBzJ4E,4119
28
+ ingestify/domain/models/dataset/file.py,sha256=1oj03zKdkO_9F85LuDcihbB0Kr3suf12KZNGHpVo3w0,4144
29
29
  ingestify/domain/models/dataset/file_collection.py,sha256=yaQmqFlmbajLCkU5QnjgqCvKzvVEZJrXVvinx5UGHcM,1193
30
30
  ingestify/domain/models/dataset/file_repository.py,sha256=9EQprch9isAH2pbK7e7tfOKl6ulip4Ij1kBCTbO_rTc,1721
31
31
  ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
@@ -40,7 +40,7 @@ ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDY
40
40
  ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
41
41
  ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  ingestify/domain/models/ingestion/ingestion_job.py,sha256=U6B62c7NGeHBAjmKhgOa4uHeul34xyR66WtWaPSRNTU,12276
43
- ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=1l9O3QJkYLs74HhrwAijwNEriPMwHN9OFG64Iz4z3uI,4262
43
+ ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=e8wULXsKAGNGrOV4dyiRcFlAfYCWcLa2iqJKNMwirlk,4270
44
44
  ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
45
45
  ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
46
46
  ingestify/domain/models/resources/dataset_resource.py,sha256=NRnN029ct3P_Eg2d9Unb1t7A12Ksv_emBGhoe9DpPwM,3118
@@ -67,6 +67,7 @@ ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOO
67
67
  ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256=UlEIfNusSOEWOxPi_ORrdLSylbi6-TO1qwEmcrBLwog,9447
68
68
  ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=-eSR_F9tS9Hd3JNEpoJoDAb5RY38rFaKLMI3eBedjx8,7068
69
69
  ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
70
+ ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
70
71
  ingestify/infra/store/file/local_file_repository.py,sha256=1hhLqds5LlppJq2QBB0oN0Q98j6aXreCtYQYz3Q1P8g,819
71
72
  ingestify/infra/store/file/s3_file_repository.py,sha256=_sekV1rfEbwIaSGhKRnFQlj92E9qNgONiwXt6ZLCyGg,1188
72
73
  ingestify/static/templates/statsbomb_github/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -79,8 +80,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
79
80
  ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
80
81
  ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
81
82
  ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
82
- ingestify-0.3.0.dist-info/METADATA,sha256=-QlChdV6OYWkqSyXUmkQTG4deBliRsSmmZMTWKeURnI,18853
83
- ingestify-0.3.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
84
- ingestify-0.3.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
85
- ingestify-0.3.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
86
- ingestify-0.3.0.dist-info/RECORD,,
83
+ ingestify-0.3.2.dist-info/METADATA,sha256=xV0iGbmGSRU0NUhOt-Q6-x3yeAyFZwzLc12js8ut3No,18832
84
+ ingestify-0.3.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
+ ingestify-0.3.2.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
86
+ ingestify-0.3.2.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
87
+ ingestify-0.3.2.dist-info/RECORD,,