ingestify 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ingestify/__init__.py CHANGED
@@ -8,4 +8,4 @@ if not __INGESTIFY_SETUP__:
8
8
  from .infra import retrieve_http
9
9
  from .source_base import Source, DatasetResource
10
10
 
11
- __version__ = "0.3.1"
11
+ __version__ = "0.3.3"
@@ -294,9 +294,7 @@ class DatasetStore:
294
294
 
295
295
  def get_stream(file_):
296
296
  return reader(
297
- self.file_repository.load_content(
298
- bucket=self.bucket, storage_path=file_.storage_path
299
- )
297
+ self.file_repository.load_content(storage_path=file_.storage_path)
300
298
  )
301
299
 
302
300
  loaded_file = LoadedFile(
@@ -116,7 +116,7 @@ class LoadedFile(BaseModel):
116
116
  data_serialization_format: Optional[str] # Example: 'json'
117
117
  storage_compression_method: Optional[str] # Example: 'gzip'
118
118
  storage_path: Path
119
- _stream: Union[BinaryIO, Callable[[], Awaitable[BinaryIO]]]
119
+ _stream: Union[BinaryIO, BytesIO, Callable[[], Awaitable[Union[BinaryIO, BytesIO]]]]
120
120
  revision_id: Optional[int] = None # This can be used when a Revision is squashed
121
121
 
122
122
  def load_stream(self):
@@ -112,7 +112,7 @@ class IngestionJobSummary(BaseModel):
112
112
 
113
113
  print(f" - Failed tasks: {self.failed_tasks}")
114
114
  print(f" - Successful tasks: {self.successful_tasks}")
115
- print(f" - Successful ignored tasks: {self.successful_tasks}")
115
+ print(f" - Successful ignored tasks: {self.ignored_successful_tasks}")
116
116
  print(f" - Skipped datasets: {self.skipped_datasets}")
117
117
  print("--------------------")
118
118
 
@@ -289,7 +289,7 @@ task_summary_table = Table(
289
289
  ForeignKey("ingestion_job_summary.ingestion_job_summary_id"),
290
290
  primary_key=True,
291
291
  ),
292
- Column("task_id", Integer, primary_key=True),
292
+ Column("task_id", String(255), primary_key=True),
293
293
  Column("started_at", TZDateTime(6)),
294
294
  Column("ended_at", TZDateTime(6)),
295
295
  Column("operation", OperationString),
@@ -17,7 +17,11 @@ class S3FileRepository(FileRepository):
17
17
  return self._s3
18
18
 
19
19
  def __getstate__(self):
20
- return {"base_dir": self.base_dir, "_s3": None}
20
+ return {
21
+ "base_dir": self.base_dir,
22
+ "_s3": None,
23
+ "identifier_transformer": self.identifier_transformer,
24
+ }
21
25
 
22
26
  def save_content(
23
27
  self,
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestify
3
- Version: 0.3.1
4
- Summary: Standardizing soccer tracking- and event data
3
+ Version: 0.3.3
4
+ Summary: Data Ingestion Framework
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
7
7
  License: AGPL
@@ -1,4 +1,4 @@
1
- ingestify/__init__.py,sha256=8y7U8yZ0adoZcmOnba3KaElfLIy3c9Md6GFYPOrt8xc,301
1
+ ingestify/__init__.py,sha256=skDa1VfOP7IslAz1tXtfTAwPzohhFlzwGkD_1wV8m50,301
2
2
  ingestify/cmdline.py,sha256=bIuyPgGEw4wIglNzpG9zp7TsJozsP8NSVsCe4eAyWUg,7189
3
3
  ingestify/exceptions.py,sha256=wMMuajl4AkQRfW60TLN7btJmQaH8-lUczXyW_2g9kOU,143
4
4
  ingestify/main.py,sha256=Xr0VbGgstPO7doDX18xqk4lBb4W2sbGWtQuXZaARsHA,8763
@@ -6,7 +6,7 @@ ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
6
6
  ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
7
7
  ingestify/utils.py,sha256=HETGhAoUlutLG0cQR63nac2JbFei9gnktDHeBQoYWfU,5692
8
8
  ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- ingestify/application/dataset_store.py,sha256=6xMHa_ShyPOyegIKl2xwmRl3BlV5i21z95cpKW3oARw,11712
9
+ ingestify/application/dataset_store.py,sha256=5CZ2v_fjqhNyC8JdjE9O4huTwy3WtHhawyc8Gw4DeJ4,11646
10
10
  ingestify/application/ingestion_engine.py,sha256=PtMjKMpvfqB802G5zfKLzyamdH7qFOXl3x6_97y8w60,2288
11
11
  ingestify/application/loader.py,sha256=v8ZcpMDEml9k_uFPFqT4WaCjXED_OIpAr7g0Pz5Hp6Y,7153
12
12
  ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
@@ -25,7 +25,7 @@ ingestify/domain/models/dataset/dataset.py,sha256=ReL50BXNaJVU29OB5_9CQEI7BekWsg
25
25
  ingestify/domain/models/dataset/dataset_repository.py,sha256=kUjiqW58kOUOli1gZCLR5xw4dBX0bqI1UJsf16hgNsQ,812
26
26
  ingestify/domain/models/dataset/dataset_state.py,sha256=O95mea5N34HDXw7XsYzxHna4FVk_T-ZNUDezkvt7VzY,220
27
27
  ingestify/domain/models/dataset/events.py,sha256=58VacQejQt-WPh9BywP4st5McauM3gXBQo0kaDnSekY,481
28
- ingestify/domain/models/dataset/file.py,sha256=nuoZI9GI5OysYwWCCyNsHMlm1Z9A1GbEKd38jvBzJ4E,4119
28
+ ingestify/domain/models/dataset/file.py,sha256=1oj03zKdkO_9F85LuDcihbB0Kr3suf12KZNGHpVo3w0,4144
29
29
  ingestify/domain/models/dataset/file_collection.py,sha256=yaQmqFlmbajLCkU5QnjgqCvKzvVEZJrXVvinx5UGHcM,1193
30
30
  ingestify/domain/models/dataset/file_repository.py,sha256=9EQprch9isAH2pbK7e7tfOKl6ulip4Ij1kBCTbO_rTc,1721
31
31
  ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
@@ -40,7 +40,7 @@ ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDY
40
40
  ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
41
41
  ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  ingestify/domain/models/ingestion/ingestion_job.py,sha256=U6B62c7NGeHBAjmKhgOa4uHeul34xyR66WtWaPSRNTU,12276
43
- ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=1l9O3QJkYLs74HhrwAijwNEriPMwHN9OFG64Iz4z3uI,4262
43
+ ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=e8wULXsKAGNGrOV4dyiRcFlAfYCWcLa2iqJKNMwirlk,4270
44
44
  ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
45
45
  ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
46
46
  ingestify/domain/models/resources/dataset_resource.py,sha256=NRnN029ct3P_Eg2d9Unb1t7A12Ksv_emBGhoe9DpPwM,3118
@@ -64,12 +64,12 @@ ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nO
64
64
  ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
65
65
  ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
66
  ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
67
- ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256=UlEIfNusSOEWOxPi_ORrdLSylbi6-TO1qwEmcrBLwog,9447
67
+ ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256=pyqxy7LAyRK2Mt6knaKYozXN07oNPYztU9x4DGDQD0U,9451
68
68
  ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=-eSR_F9tS9Hd3JNEpoJoDAb5RY38rFaKLMI3eBedjx8,7068
69
69
  ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
70
70
  ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
71
71
  ingestify/infra/store/file/local_file_repository.py,sha256=1hhLqds5LlppJq2QBB0oN0Q98j6aXreCtYQYz3Q1P8g,819
72
- ingestify/infra/store/file/s3_file_repository.py,sha256=_sekV1rfEbwIaSGhKRnFQlj92E9qNgONiwXt6ZLCyGg,1188
72
+ ingestify/infra/store/file/s3_file_repository.py,sha256=Zu7j3qqeQhKi9Lx8UQRKZ2g1vT0h0OucOaHjq0uZpFs,1290
73
73
  ingestify/static/templates/statsbomb_github/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  ingestify/static/templates/statsbomb_github/config.yaml.jinja2,sha256=_gAuAipfBL3ddLacyS1IBP5JluvPS2vmrb8GGaFtcUM,386
75
75
  ingestify/static/templates/statsbomb_github/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
@@ -80,8 +80,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
80
80
  ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
81
81
  ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
82
82
  ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
83
- ingestify-0.3.1.dist-info/METADATA,sha256=ddM76vlnc0jHh_b79Mxc9fHW5L38O2Aque3rhcKv5pI,18853
84
- ingestify-0.3.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
- ingestify-0.3.1.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
86
- ingestify-0.3.1.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
87
- ingestify-0.3.1.dist-info/RECORD,,
83
+ ingestify-0.3.3.dist-info/METADATA,sha256=ln_MGBlqm4wgPBldNv_VofJ4snw981jv667X4JOylmY,18832
84
+ ingestify-0.3.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
+ ingestify-0.3.3.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
86
+ ingestify-0.3.3.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
87
+ ingestify-0.3.3.dist-info/RECORD,,