ingestify 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestify/__init__.py +1 -1
- ingestify/application/dataset_store.py +4 -4
- ingestify/application/ingestion_engine.py +7 -2
- ingestify/application/loader.py +14 -1
- ingestify/cmdline.py +20 -2
- ingestify/domain/models/base.py +2 -19
- ingestify/domain/models/dataset/collection_metadata.py +2 -1
- ingestify/domain/models/dataset/dataset.py +18 -1
- ingestify/domain/models/dataset/file.py +5 -5
- ingestify/domain/models/dataset/revision.py +6 -2
- ingestify/domain/models/ingestion/ingestion_job.py +69 -45
- ingestify/domain/models/ingestion/ingestion_job_summary.py +48 -40
- ingestify/domain/models/task/task_summary.py +11 -32
- ingestify/exceptions.py +4 -0
- ingestify/infra/serialization/__init__.py +15 -43
- ingestify/infra/store/dataset/sqlalchemy/repository.py +263 -19
- ingestify/infra/store/dataset/sqlalchemy/{mapping.py → tables.py} +103 -79
- ingestify/utils.py +48 -16
- {ingestify-0.3.3.dist-info → ingestify-0.4.0.dist-info}/METADATA +16 -16
- {ingestify-0.3.3.dist-info → ingestify-0.4.0.dist-info}/RECORD +23 -23
- {ingestify-0.3.3.dist-info → ingestify-0.4.0.dist-info}/WHEEL +0 -0
- {ingestify-0.3.3.dist-info → ingestify-0.4.0.dist-info}/entry_points.txt +0 -0
- {ingestify-0.3.3.dist-info → ingestify-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
from dataclasses import is_dataclass, asdict
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
from typing import Optional
|
|
5
4
|
|
|
@@ -15,33 +14,31 @@ from sqlalchemy import (
|
|
|
15
14
|
String,
|
|
16
15
|
Table,
|
|
17
16
|
TypeDecorator,
|
|
18
|
-
Boolean,
|
|
19
17
|
)
|
|
20
|
-
from sqlalchemy.orm import registry, relationship
|
|
21
18
|
|
|
22
|
-
from
|
|
23
|
-
|
|
19
|
+
from sqlalchemy.dialects.postgresql import JSONB
|
|
20
|
+
|
|
21
|
+
from ingestify.domain import Identifier, DataSpecVersionCollection, Selector
|
|
24
22
|
from ingestify.domain.models.dataset.dataset import DatasetState
|
|
25
|
-
from ingestify.domain.models.ingestion.ingestion_job_summary import
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
from ingestify.domain.models.task.task_summary import TaskSummary, Operation, TaskStatus
|
|
23
|
+
from ingestify.domain.models.ingestion.ingestion_job_summary import IngestionJobState
|
|
24
|
+
|
|
25
|
+
from ingestify.domain.models.task.task_summary import Operation, TaskState
|
|
29
26
|
from ingestify.domain.models.timing import Timing
|
|
30
27
|
from ingestify.domain.models.dataset.revision import RevisionState
|
|
31
28
|
|
|
32
29
|
|
|
33
|
-
def JSONType(serializer=None, deserializer=None):
|
|
30
|
+
def JSONType(serializer=None, deserializer=None, base_type=JSON):
|
|
34
31
|
class _JsonType(TypeDecorator):
|
|
35
32
|
cache_ok = True
|
|
36
|
-
impl =
|
|
33
|
+
impl = base_type
|
|
37
34
|
|
|
38
35
|
def process_bind_param(self, value, dialect):
|
|
39
|
-
if serializer is not None:
|
|
36
|
+
if serializer and value is not None:
|
|
40
37
|
return serializer(value)
|
|
41
38
|
return value
|
|
42
39
|
|
|
43
40
|
def process_result_value(self, value, dialect):
|
|
44
|
-
if deserializer is not None:
|
|
41
|
+
if deserializer and value is not None:
|
|
45
42
|
return deserializer(value)
|
|
46
43
|
return value
|
|
47
44
|
|
|
@@ -106,7 +103,7 @@ class RevisionStateString(TypeDecorator):
|
|
|
106
103
|
|
|
107
104
|
def process_result_value(self, value, dialect):
|
|
108
105
|
if not value:
|
|
109
|
-
return
|
|
106
|
+
return RevisionState.PENDING_VALIDATION
|
|
110
107
|
|
|
111
108
|
return RevisionState[value]
|
|
112
109
|
|
|
@@ -124,20 +121,31 @@ class OperationString(TypeDecorator):
|
|
|
124
121
|
return Operation[value]
|
|
125
122
|
|
|
126
123
|
|
|
127
|
-
class
|
|
124
|
+
class TaskStateString(TypeDecorator):
|
|
128
125
|
impl = String(255)
|
|
129
126
|
|
|
130
|
-
def process_bind_param(self, value:
|
|
127
|
+
def process_bind_param(self, value: TaskState, dialect):
|
|
131
128
|
return value.value
|
|
132
129
|
|
|
133
130
|
def process_result_value(self, value, dialect):
|
|
134
131
|
if not value:
|
|
135
132
|
return value
|
|
136
133
|
|
|
137
|
-
return
|
|
134
|
+
return TaskState[value]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class IngestionJobStateString(TypeDecorator):
|
|
138
|
+
impl = String(255)
|
|
139
|
+
|
|
140
|
+
def process_bind_param(self, value: IngestionJobState, dialect):
|
|
141
|
+
return value.value
|
|
142
|
+
|
|
143
|
+
def process_result_value(self, value, dialect):
|
|
144
|
+
if not value:
|
|
145
|
+
return value
|
|
138
146
|
|
|
147
|
+
return IngestionJobState[value]
|
|
139
148
|
|
|
140
|
-
mapper_registry = registry()
|
|
141
149
|
|
|
142
150
|
metadata = MetaData()
|
|
143
151
|
|
|
@@ -146,14 +154,19 @@ dataset_table = Table(
|
|
|
146
154
|
metadata,
|
|
147
155
|
Column("bucket", String(255), default=None),
|
|
148
156
|
Column("dataset_id", String(255), primary_key=True),
|
|
149
|
-
Column("provider", String(255)),
|
|
150
|
-
Column("dataset_type", String(255)),
|
|
157
|
+
Column("provider", String(255), index=True),
|
|
158
|
+
Column("dataset_type", String(255), index=True),
|
|
151
159
|
Column("state", DatasetStateString),
|
|
152
160
|
Column("name", String(255)),
|
|
153
|
-
Column(
|
|
161
|
+
Column(
|
|
162
|
+
"identifier",
|
|
163
|
+
# Use JSONB when available
|
|
164
|
+
JSON().with_variant(JSONB(), "postgresql"),
|
|
165
|
+
),
|
|
154
166
|
Column("metadata", JSON),
|
|
155
167
|
Column("created_at", TZDateTime(6)),
|
|
156
168
|
Column("updated_at", TZDateTime(6)),
|
|
169
|
+
Column("last_modified_at", TZDateTime(6)),
|
|
157
170
|
)
|
|
158
171
|
|
|
159
172
|
revision_table = Table(
|
|
@@ -193,40 +206,7 @@ file_table = Table(
|
|
|
193
206
|
),
|
|
194
207
|
)
|
|
195
208
|
|
|
196
|
-
|
|
197
|
-
mapper_registry.map_imperatively(
|
|
198
|
-
Dataset,
|
|
199
|
-
dataset_table,
|
|
200
|
-
properties={
|
|
201
|
-
"revisions": relationship(
|
|
202
|
-
Revision,
|
|
203
|
-
backref="dataset",
|
|
204
|
-
order_by=revision_table.c.revision_id,
|
|
205
|
-
lazy="selectin",
|
|
206
|
-
cascade="all, delete-orphan",
|
|
207
|
-
),
|
|
208
|
-
},
|
|
209
|
-
)
|
|
210
|
-
|
|
211
|
-
mapper_registry.map_imperatively(
|
|
212
|
-
Revision,
|
|
213
|
-
revision_table,
|
|
214
|
-
properties={
|
|
215
|
-
"modified_files": relationship(
|
|
216
|
-
File,
|
|
217
|
-
order_by=file_table.c.file_id,
|
|
218
|
-
primaryjoin="and_(Revision.revision_id==File.revision_id, Revision.dataset_id==File.dataset_id)",
|
|
219
|
-
lazy="selectin",
|
|
220
|
-
cascade="all, delete-orphan",
|
|
221
|
-
)
|
|
222
|
-
},
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
mapper_registry.map_imperatively(File, file_table)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
ingestion_job_summary = Table(
|
|
209
|
+
ingestion_job_summary_table = Table(
|
|
230
210
|
"ingestion_job_summary",
|
|
231
211
|
metadata,
|
|
232
212
|
Column("ingestion_job_summary_id", String(255), primary_key=True),
|
|
@@ -238,18 +218,25 @@ ingestion_job_summary = Table(
|
|
|
238
218
|
Column(
|
|
239
219
|
"data_spec_versions",
|
|
240
220
|
JSONType(
|
|
241
|
-
serializer=lambda data_spec_versions:
|
|
221
|
+
serializer=lambda data_spec_versions: {
|
|
222
|
+
key: list(value) for key, value in data_spec_versions.items()
|
|
223
|
+
},
|
|
242
224
|
deserializer=lambda data_spec_versions: DataSpecVersionCollection.from_dict(
|
|
243
225
|
data_spec_versions
|
|
244
226
|
),
|
|
245
227
|
),
|
|
246
228
|
),
|
|
247
229
|
Column(
|
|
248
|
-
"selector",
|
|
230
|
+
"selector",
|
|
231
|
+
JSONType(
|
|
232
|
+
serializer=lambda selector: selector.filtered_attributes,
|
|
233
|
+
deserializer=lambda selector: Selector(**selector),
|
|
234
|
+
),
|
|
249
235
|
),
|
|
250
236
|
Column("started_at", TZDateTime(6)),
|
|
251
|
-
Column("
|
|
237
|
+
Column("ended_at", TZDateTime(6)),
|
|
252
238
|
# Some task counters
|
|
239
|
+
Column("state", IngestionJobStateString),
|
|
253
240
|
Column("successful_tasks", Integer),
|
|
254
241
|
Column("ignored_successful_tasks", Integer),
|
|
255
242
|
Column("skipped_datasets", Integer),
|
|
@@ -258,7 +245,10 @@ ingestion_job_summary = Table(
|
|
|
258
245
|
"timings",
|
|
259
246
|
JSONType(
|
|
260
247
|
serializer=lambda timings: [
|
|
261
|
-
|
|
248
|
+
# Timing is probably already a dictionary. Load it into Timing first, so it can be dumped
|
|
249
|
+
# in json mode
|
|
250
|
+
Timing.model_validate(timing).model_dump(mode="json")
|
|
251
|
+
for timing in timings
|
|
262
252
|
],
|
|
263
253
|
deserializer=lambda timings: [
|
|
264
254
|
Timing.model_validate(timing) for timing in timings
|
|
@@ -299,12 +289,13 @@ task_summary_table = Table(
|
|
|
299
289
|
Column("persisted_file_count", Integer),
|
|
300
290
|
Column("bytes_retrieved", Integer),
|
|
301
291
|
Column("last_modified", TZDateTime(6)),
|
|
302
|
-
Column("
|
|
292
|
+
Column("state", TaskStateString),
|
|
303
293
|
Column(
|
|
304
294
|
"timings",
|
|
305
295
|
JSONType(
|
|
306
296
|
serializer=lambda timings: [
|
|
307
|
-
timing.model_dump(mode="json")
|
|
297
|
+
Timing.model_validate(timing).model_dump(mode="json")
|
|
298
|
+
for timing in timings
|
|
308
299
|
],
|
|
309
300
|
deserializer=lambda timings: [
|
|
310
301
|
Timing.model_validate(timing) for timing in timings
|
|
@@ -316,21 +307,54 @@ task_summary_table = Table(
|
|
|
316
307
|
# Column("state", RevisionStateString, default=RevisionState.PENDING_VALIDATION),
|
|
317
308
|
# Column("source", JSONType()),
|
|
318
309
|
)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
mapper_registry
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
310
|
+
#
|
|
311
|
+
#
|
|
312
|
+
# mapper_registry = registry()
|
|
313
|
+
#
|
|
314
|
+
# mapper_registry.map_imperatively(
|
|
315
|
+
# Dataset,
|
|
316
|
+
# dataset_table,
|
|
317
|
+
# properties={
|
|
318
|
+
# "revisions": relationship(
|
|
319
|
+
# Revision,
|
|
320
|
+
# backref="dataset",
|
|
321
|
+
# order_by=revision_table.c.revision_id,
|
|
322
|
+
# lazy="selectin",
|
|
323
|
+
# cascade="all, delete-orphan",
|
|
324
|
+
# ),
|
|
325
|
+
# },
|
|
326
|
+
# )
|
|
327
|
+
#
|
|
328
|
+
# mapper_registry.map_imperatively(
|
|
329
|
+
# Revision,
|
|
330
|
+
# revision_table,
|
|
331
|
+
# properties={
|
|
332
|
+
# "modified_files": relationship(
|
|
333
|
+
# File,
|
|
334
|
+
# order_by=file_table.c.file_id,
|
|
335
|
+
# primaryjoin="and_(Revision.revision_id==File.revision_id, Revision.dataset_id==File.dataset_id)",
|
|
336
|
+
# lazy="selectin",
|
|
337
|
+
# cascade="all, delete-orphan",
|
|
338
|
+
# )
|
|
339
|
+
# },
|
|
340
|
+
# )
|
|
341
|
+
#
|
|
342
|
+
#
|
|
343
|
+
# mapper_registry.map_imperatively(File, file_table)
|
|
344
|
+
#
|
|
345
|
+
# mapper_registry.map_imperatively(
|
|
346
|
+
# IngestionJobSummary,
|
|
347
|
+
# ingestion_job_summary,
|
|
348
|
+
# properties={
|
|
349
|
+
# "task_summaries": relationship(
|
|
350
|
+
# TaskSummary,
|
|
351
|
+
# backref="ingestion_job_summary",
|
|
352
|
+
# # order_by=task_summary_table.c.revision_id,
|
|
353
|
+
# lazy="selectin",
|
|
354
|
+
# cascade="all, delete-orphan",
|
|
355
|
+
# ),
|
|
356
|
+
# },
|
|
357
|
+
# )
|
|
358
|
+
#
|
|
359
|
+
#
|
|
360
|
+
# mapper_registry.map_imperatively(TaskSummary, task_summary_table)
|
ingestify/utils.py
CHANGED
|
@@ -1,34 +1,23 @@
|
|
|
1
|
-
import abc
|
|
2
|
-
import asyncio
|
|
3
|
-
import inspect
|
|
4
1
|
import logging
|
|
5
2
|
import os
|
|
6
3
|
import time
|
|
7
4
|
import re
|
|
5
|
+
import traceback
|
|
6
|
+
from contextlib import contextmanager
|
|
8
7
|
from multiprocessing import get_context, cpu_count, get_all_start_methods
|
|
9
8
|
|
|
10
9
|
from datetime import datetime, timezone
|
|
11
10
|
from string import Template
|
|
12
|
-
from typing import
|
|
13
|
-
Dict,
|
|
14
|
-
Generic,
|
|
15
|
-
Type,
|
|
16
|
-
TypeVar,
|
|
17
|
-
Tuple,
|
|
18
|
-
Optional,
|
|
19
|
-
Any,
|
|
20
|
-
Callable,
|
|
21
|
-
Awaitable,
|
|
22
|
-
List,
|
|
23
|
-
Iterable,
|
|
24
|
-
)
|
|
11
|
+
from typing import Dict, Tuple, Optional, Any, List
|
|
25
12
|
|
|
26
13
|
import cloudpickle
|
|
14
|
+
from pydantic import Field
|
|
27
15
|
from typing_extensions import Self
|
|
28
16
|
|
|
29
17
|
|
|
30
18
|
from itertools import islice
|
|
31
19
|
|
|
20
|
+
from ingestify.domain.models.timing import Timing
|
|
32
21
|
|
|
33
22
|
logger = logging.getLogger(__name__)
|
|
34
23
|
|
|
@@ -221,3 +210,46 @@ def try_number(s: str):
|
|
|
221
210
|
return float(s)
|
|
222
211
|
except ValueError:
|
|
223
212
|
return s
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class HasTiming:
|
|
216
|
+
"""Mixin to give Pydantic models ability to time actions."""
|
|
217
|
+
|
|
218
|
+
timings: List[Timing] = Field(default_factory=list)
|
|
219
|
+
|
|
220
|
+
@contextmanager
|
|
221
|
+
def record_timing(
|
|
222
|
+
self, description: str, metadata: Optional[dict] = None
|
|
223
|
+
) -> Timing:
|
|
224
|
+
if not metadata:
|
|
225
|
+
metadata = {}
|
|
226
|
+
|
|
227
|
+
start = utcnow()
|
|
228
|
+
try:
|
|
229
|
+
result = None
|
|
230
|
+
yield
|
|
231
|
+
except Exception as e:
|
|
232
|
+
result = {
|
|
233
|
+
"type": type(e).__name__,
|
|
234
|
+
"message": str(e),
|
|
235
|
+
"traceback": traceback.format_exc(),
|
|
236
|
+
}
|
|
237
|
+
raise e
|
|
238
|
+
finally:
|
|
239
|
+
metadata = dict(result=result, **metadata)
|
|
240
|
+
self.timings.append(
|
|
241
|
+
Timing(
|
|
242
|
+
name=description,
|
|
243
|
+
started_at=start,
|
|
244
|
+
ended_at=utcnow(),
|
|
245
|
+
metadata=metadata,
|
|
246
|
+
)
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
def start_timing(self, name):
|
|
250
|
+
start = utcnow()
|
|
251
|
+
|
|
252
|
+
def finish():
|
|
253
|
+
self.timings.append(Timing(name=name, started_at=start, ended_at=utcnow()))
|
|
254
|
+
|
|
255
|
+
return finish
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ingestify
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Data Ingestion Framework
|
|
5
5
|
Author: Koen Vossen
|
|
6
6
|
Author-email: info@koenvossen.nl
|
|
@@ -227,23 +227,23 @@ dataset_collection = store.get_dataset_collection(
|
|
|
227
227
|
store.map(
|
|
228
228
|
lambda dataset: (
|
|
229
229
|
store
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
"*",
|
|
237
|
-
match_id=dataset.
|
|
238
|
-
competition_id=dataset.
|
|
239
|
-
season_id=dataset.
|
|
240
|
-
|
|
230
|
+
|
|
231
|
+
# As it's related to https://github.com/PySport/kloppy the store can load files using kloppy
|
|
232
|
+
.load_with_kloppy(dataset)
|
|
233
|
+
|
|
234
|
+
# Convert it into a polars dataframe using all columns in the original data and some more additional ones
|
|
235
|
+
.to_df(
|
|
236
|
+
"*",
|
|
237
|
+
match_id=dataset.dataset_resource_id.match_id,
|
|
238
|
+
competition_id=dataset.dataset_resource_id.competition_id,
|
|
239
|
+
season_id=dataset.dataset_resource_id.season_id,
|
|
240
|
+
|
|
241
241
|
engine="polars"
|
|
242
242
|
)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
f"/tmp/files/blaat/{dataset.
|
|
243
|
+
|
|
244
|
+
# Write to parquet format
|
|
245
|
+
.write_parquet(
|
|
246
|
+
f"/tmp/files/blaat/{dataset.dataset_resource_id.match_id}.parquet"
|
|
247
247
|
)
|
|
248
248
|
),
|
|
249
249
|
dataset_collection,
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
ingestify/__init__.py,sha256=
|
|
2
|
-
ingestify/cmdline.py,sha256=
|
|
3
|
-
ingestify/exceptions.py,sha256=
|
|
1
|
+
ingestify/__init__.py,sha256=pqbfmiQtpinnyWcferY68eDjMbrSslyzmNBk9XErzak,301
|
|
2
|
+
ingestify/cmdline.py,sha256=oagUe-Jup1SU3s6jVl25f0cSG0wlNYhxFY-gGBwWmr0,7482
|
|
3
|
+
ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
|
|
4
4
|
ingestify/main.py,sha256=Xr0VbGgstPO7doDX18xqk4lBb4W2sbGWtQuXZaARsHA,8763
|
|
5
5
|
ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
|
|
6
6
|
ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
|
|
7
|
-
ingestify/utils.py,sha256=
|
|
7
|
+
ingestify/utils.py,sha256=6BqgEZjecLW_anqYP5WrFpi93bmdhF-EdrebEkm59Ds,6806
|
|
8
8
|
ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
ingestify/application/dataset_store.py,sha256=
|
|
10
|
-
ingestify/application/ingestion_engine.py,sha256=
|
|
11
|
-
ingestify/application/loader.py,sha256=
|
|
9
|
+
ingestify/application/dataset_store.py,sha256=JkAb1W0HaUgOwbohKntM4ttyrFXQ7df1uZSu2rbZllY,11680
|
|
10
|
+
ingestify/application/ingestion_engine.py,sha256=4SAmPZDm3e2QA5jZvMrb6xz1eDDshKoSZDWH3TCe4Bo,2372
|
|
11
|
+
ingestify/application/loader.py,sha256=2LpYSHvedsoe5wFsIkQv0xLcKcqtebwVOSPWapAypao,7566
|
|
12
12
|
ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
|
|
13
13
|
ingestify/domain/__init__.py,sha256=M7_fVTJjQUx53P4UQUPhowRKPKsIIjx4JYZL1yjHKsM,46
|
|
14
14
|
ingestify/domain/models/__init__.py,sha256=cjQmdSDFA-saXjdF1mLPNWILFHIFgdj20J_fC5FmFsI,770
|
|
15
|
-
ingestify/domain/models/base.py,sha256=
|
|
15
|
+
ingestify/domain/models/base.py,sha256=4gKbREajxJHlS-VwKoosNtHVupZ4eDLKMqnJ4ib0aS8,184
|
|
16
16
|
ingestify/domain/models/data_spec_version_collection.py,sha256=CAXlO4W2AOOWAPdPAuymqBHnJpiYtkr2z7fYFJ3HSCk,1372
|
|
17
17
|
ingestify/domain/models/fetch_policy.py,sha256=d7K1TzliNJXxqaqzqEOQWLhvgIvmmqhUQEliXvSUcTs,1405
|
|
18
18
|
ingestify/domain/models/sink.py,sha256=OBVfFMpB7puJmHg4q2KYx4qgoAnlmX8xKWYnPi8a9pc,178
|
|
@@ -20,16 +20,16 @@ ingestify/domain/models/source.py,sha256=sB3aqr2LfjIbtw7ODJpHnPj3RUeo7gYmTU7MXvf
|
|
|
20
20
|
ingestify/domain/models/timing.py,sha256=TvvH6Szo61CD8wCP7Awyc45CXga5lKqvoW2U-0TRHlA,388
|
|
21
21
|
ingestify/domain/models/dataset/__init__.py,sha256=i1kswluvWjw0xn4OUByRt7yeRvNHu1mauevv-Vmayx4,630
|
|
22
22
|
ingestify/domain/models/dataset/collection.py,sha256=E2utQ6oyaFFrfQFMiwP9J_I7Wm21z0sRvE4Zc3QEs20,1310
|
|
23
|
-
ingestify/domain/models/dataset/collection_metadata.py,sha256=
|
|
24
|
-
ingestify/domain/models/dataset/dataset.py,sha256=
|
|
23
|
+
ingestify/domain/models/dataset/collection_metadata.py,sha256=u2H3XZ-6NMfuAcVD_mb7jEc1IkS1MgQahDIe5CWHjtc,458
|
|
24
|
+
ingestify/domain/models/dataset/dataset.py,sha256=STew8_zCBro_x_u03JrjMvq8yqUDaNndlOlolf9osdM,3332
|
|
25
25
|
ingestify/domain/models/dataset/dataset_repository.py,sha256=kUjiqW58kOUOli1gZCLR5xw4dBX0bqI1UJsf16hgNsQ,812
|
|
26
26
|
ingestify/domain/models/dataset/dataset_state.py,sha256=O95mea5N34HDXw7XsYzxHna4FVk_T-ZNUDezkvt7VzY,220
|
|
27
27
|
ingestify/domain/models/dataset/events.py,sha256=58VacQejQt-WPh9BywP4st5McauM3gXBQo0kaDnSekY,481
|
|
28
|
-
ingestify/domain/models/dataset/file.py,sha256=
|
|
28
|
+
ingestify/domain/models/dataset/file.py,sha256=1Thdv6A1YmC1UfutaRf2q3FGHQYO0SWEptCxur6Ahfs,4144
|
|
29
29
|
ingestify/domain/models/dataset/file_collection.py,sha256=yaQmqFlmbajLCkU5QnjgqCvKzvVEZJrXVvinx5UGHcM,1193
|
|
30
30
|
ingestify/domain/models/dataset/file_repository.py,sha256=9EQprch9isAH2pbK7e7tfOKl6ulip4Ij1kBCTbO_rTc,1721
|
|
31
31
|
ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
|
|
32
|
-
ingestify/domain/models/dataset/revision.py,sha256=
|
|
32
|
+
ingestify/domain/models/dataset/revision.py,sha256=jBjMqYXDbvt_VAIwL_db09jcH4W8JPRKsXJb4JCniuM,1447
|
|
33
33
|
ingestify/domain/models/dataset/selector.py,sha256=kEGpU8pIyjZ0zwE9n2uo_NY5xrNanWiTTgapyMAUEsw,1039
|
|
34
34
|
ingestify/domain/models/event/__init__.py,sha256=OdPTpE9bj5QqdGmrYqRTLPX1f-LR9GWJYlGMPPEsuL8,138
|
|
35
35
|
ingestify/domain/models/event/_old_event.py,sha256=RktgCAj9SMdtqkAc_bOwoghEb2Z6m4r5_xWXin9wqx4,472
|
|
@@ -39,15 +39,15 @@ ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmce
|
|
|
39
39
|
ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
|
|
40
40
|
ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
|
|
41
41
|
ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
ingestify/domain/models/ingestion/ingestion_job.py,sha256=
|
|
43
|
-
ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=
|
|
42
|
+
ingestify/domain/models/ingestion/ingestion_job.py,sha256=H9jfbbWFZw73nxMOW0480LgSHV-o4sA5IcvpUZmFpS4,13140
|
|
43
|
+
ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=Xvmtu0BwE9C7FxBl6D8tN49I6--E_RngcMfWeme4DPA,4499
|
|
44
44
|
ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
|
|
45
45
|
ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
|
|
46
46
|
ingestify/domain/models/resources/dataset_resource.py,sha256=NRnN029ct3P_Eg2d9Unb1t7A12Ksv_emBGhoe9DpPwM,3118
|
|
47
47
|
ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
|
|
48
48
|
ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
|
|
49
49
|
ingestify/domain/models/task/task.py,sha256=OwLZQi9GGe0O8m1dKvJdN2Rham5oilI49KyKc5uV20A,161
|
|
50
|
-
ingestify/domain/models/task/task_summary.py,sha256=
|
|
50
|
+
ingestify/domain/models/task/task_summary.py,sha256=T9BSGhOZjKCPfym34VUdBXuMy0o6E832GAI4WMtjGao,3181
|
|
51
51
|
ingestify/domain/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
ingestify/domain/services/identifier_key_transformer.py,sha256=y4GS9u9Ej1MO2jUhAxWbifp0mrE_MqTHvVVcoQzSKb4,4034
|
|
53
53
|
ingestify/domain/services/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -55,7 +55,7 @@ ingestify/domain/services/transformers/kloppy_to_pandas.py,sha256=NcN6nTBGVn9gz-
|
|
|
55
55
|
ingestify/infra/__init__.py,sha256=V0hpLzPVTcOHRVh0gguF6FT30YIgEOUd5v87xUHkfZ4,88
|
|
56
56
|
ingestify/infra/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
57
|
ingestify/infra/fetch/http.py,sha256=ldaXy6alBbI9z63H97lXfYZNT0ZCBkTac1W6-acNjjY,4127
|
|
58
|
-
ingestify/infra/serialization/__init__.py,sha256=
|
|
58
|
+
ingestify/infra/serialization/__init__.py,sha256=UqXWJmKTp7Mi58ZyDASGguPFlqdVWVUbm_sg9GWx9eI,702
|
|
59
59
|
ingestify/infra/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
60
|
ingestify/infra/sink/postgresql.py,sha256=SxuM3LntfYcpCriUpqJhMvgAf0s9cohXf6WkxSEDYDY,1816
|
|
61
61
|
ingestify/infra/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -64,8 +64,8 @@ ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nO
|
|
|
64
64
|
ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
|
|
65
65
|
ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
66
|
ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
|
|
67
|
-
ingestify/infra/store/dataset/sqlalchemy/
|
|
68
|
-
ingestify/infra/store/dataset/sqlalchemy/
|
|
67
|
+
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=R7OJVC_dh5hGhaCvU_Ixyfb16Xyd-hxvm7n0zJ6KxDk,15857
|
|
68
|
+
ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=FEMEkBiefozEoWCYMQnc4DD3ZsDeg3KaFs1c58A66ME,10314
|
|
69
69
|
ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
|
|
70
70
|
ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
|
|
71
71
|
ingestify/infra/store/file/local_file_repository.py,sha256=1hhLqds5LlppJq2QBB0oN0Q98j6aXreCtYQYz3Q1P8g,819
|
|
@@ -80,8 +80,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
80
80
|
ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
|
|
81
81
|
ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
|
|
82
82
|
ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
|
|
83
|
-
ingestify-0.
|
|
84
|
-
ingestify-0.
|
|
85
|
-
ingestify-0.
|
|
86
|
-
ingestify-0.
|
|
87
|
-
ingestify-0.
|
|
83
|
+
ingestify-0.4.0.dist-info/METADATA,sha256=t02z0kStKbk2lECODDJieRK4Ev1TDKR0Z6G2DpUeeGM,18854
|
|
84
|
+
ingestify-0.4.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
85
|
+
ingestify-0.4.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
|
|
86
|
+
ingestify-0.4.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
|
|
87
|
+
ingestify-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|