ingestify 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestify/__init__.py +1 -1
- ingestify/application/dataset_store.py +44 -24
- ingestify/application/ingestion_engine.py +3 -3
- ingestify/application/loader.py +67 -237
- ingestify/domain/models/__init__.py +1 -6
- ingestify/domain/models/base.py +22 -0
- ingestify/domain/models/data_spec_version_collection.py +6 -0
- ingestify/domain/models/dataset/__init__.py +3 -5
- ingestify/domain/models/dataset/dataset.py +15 -32
- ingestify/domain/models/dataset/dataset_repository.py +1 -15
- ingestify/domain/models/dataset/dataset_state.py +11 -0
- ingestify/domain/models/dataset/events.py +6 -16
- ingestify/domain/models/dataset/file.py +21 -34
- ingestify/domain/models/dataset/file_collection.py +3 -1
- ingestify/domain/models/dataset/file_repository.py +1 -10
- ingestify/domain/models/dataset/revision.py +26 -3
- ingestify/domain/models/event/domain_event.py +8 -4
- ingestify/domain/models/ingestion/__init__.py +0 -0
- ingestify/domain/models/ingestion/ingestion_job.py +292 -0
- ingestify/domain/models/ingestion/ingestion_job_summary.py +106 -0
- ingestify/domain/models/{extract_job.py → ingestion/ingestion_plan.py} +4 -4
- ingestify/domain/models/resources/dataset_resource.py +29 -37
- ingestify/domain/models/sink.py +1 -8
- ingestify/domain/models/task/task.py +3 -1
- ingestify/domain/models/task/task_summary.py +118 -0
- ingestify/domain/models/timing.py +16 -0
- ingestify/infra/fetch/http.py +5 -0
- ingestify/infra/source/statsbomb_github.py +67 -54
- ingestify/infra/store/dataset/__init__.py +0 -2
- ingestify/infra/store/dataset/sqlalchemy/mapping.py +184 -4
- ingestify/infra/store/dataset/sqlalchemy/repository.py +24 -22
- ingestify/main.py +42 -22
- ingestify/utils.py +15 -78
- {ingestify-0.1.3.dist-info → ingestify-0.2.0.dist-info}/METADATA +2 -1
- {ingestify-0.1.3.dist-info → ingestify-0.2.0.dist-info}/RECORD +38 -32
- {ingestify-0.1.3.dist-info → ingestify-0.2.0.dist-info}/WHEEL +1 -1
- ingestify/infra/store/dataset/local_dataset_repository.py +0 -73
- {ingestify-0.1.3.dist-info → ingestify-0.2.0.dist-info}/entry_points.txt +0 -0
- {ingestify-0.1.3.dist-info → ingestify-0.2.0.dist-info}/top_level.txt +0 -0
ingestify/utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import abc
|
|
2
|
+
import asyncio
|
|
2
3
|
import inspect
|
|
3
4
|
import logging
|
|
4
5
|
import os
|
|
@@ -8,7 +9,19 @@ from multiprocessing import get_context, cpu_count, get_all_start_methods
|
|
|
8
9
|
|
|
9
10
|
from datetime import datetime, timezone
|
|
10
11
|
from string import Template
|
|
11
|
-
from typing import
|
|
12
|
+
from typing import (
|
|
13
|
+
Dict,
|
|
14
|
+
Generic,
|
|
15
|
+
Type,
|
|
16
|
+
TypeVar,
|
|
17
|
+
Tuple,
|
|
18
|
+
Optional,
|
|
19
|
+
Any,
|
|
20
|
+
Callable,
|
|
21
|
+
Awaitable,
|
|
22
|
+
List,
|
|
23
|
+
Iterable,
|
|
24
|
+
)
|
|
12
25
|
|
|
13
26
|
import cloudpickle
|
|
14
27
|
from typing_extensions import Self
|
|
@@ -39,83 +52,6 @@ def sanitize_exception_message(exception_message):
|
|
|
39
52
|
return sanitized_message
|
|
40
53
|
|
|
41
54
|
|
|
42
|
-
class ComponentRegistry:
|
|
43
|
-
def __init__(self):
|
|
44
|
-
self.__registered_components = {}
|
|
45
|
-
|
|
46
|
-
class _Registered(abc.ABCMeta):
|
|
47
|
-
def __new__(mcs, cls_name, bases, class_dict):
|
|
48
|
-
class_dict["name"] = cls_name
|
|
49
|
-
component_cls = super(_Registered, mcs).__new__(
|
|
50
|
-
mcs, cls_name, bases, class_dict
|
|
51
|
-
)
|
|
52
|
-
if not inspect.isabstract(component_cls):
|
|
53
|
-
self.register_component(cls_name, component_cls)
|
|
54
|
-
else:
|
|
55
|
-
if bases[0] != abc.ABC:
|
|
56
|
-
raise Exception(
|
|
57
|
-
f"Class '{cls_name}' seems to be an concrete class, but missing some abstract methods"
|
|
58
|
-
)
|
|
59
|
-
return component_cls
|
|
60
|
-
|
|
61
|
-
self.__metaclass = _Registered
|
|
62
|
-
|
|
63
|
-
@property
|
|
64
|
-
def metaclass(self):
|
|
65
|
-
return self.__metaclass
|
|
66
|
-
|
|
67
|
-
def register_component(self, cls_name, component_cls):
|
|
68
|
-
self.__registered_components[cls_name] = component_cls
|
|
69
|
-
|
|
70
|
-
def get_component(self, cls_name: str):
|
|
71
|
-
return self.__registered_components[cls_name]
|
|
72
|
-
|
|
73
|
-
def get_supporting_component(self, **kwargs) -> str:
|
|
74
|
-
for cls_name, class_ in self.__registered_components.items():
|
|
75
|
-
if not hasattr(class_, "supports"):
|
|
76
|
-
raise Exception(
|
|
77
|
-
f"Class '{cls_name}' does not implemented a 'supports' classmethod. "
|
|
78
|
-
f"This is required when using 'get_supporting_component'."
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
if class_.supports(**kwargs):
|
|
82
|
-
return cls_name
|
|
83
|
-
|
|
84
|
-
kwargs_str = sanitize_exception_message(str(kwargs))
|
|
85
|
-
raise Exception(f"No supporting class found for {kwargs_str}")
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
T = TypeVar("T")
|
|
89
|
-
R = TypeVar("R")
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
class ComponentFactory(Generic[T]):
|
|
93
|
-
def __init__(self, registry: ComponentRegistry):
|
|
94
|
-
self.registry = registry
|
|
95
|
-
|
|
96
|
-
@classmethod
|
|
97
|
-
def build_factory(
|
|
98
|
-
cls, component_cls: Type[R], registry: ComponentRegistry
|
|
99
|
-
) -> "ComponentFactory[R]":
|
|
100
|
-
return cls[component_cls](registry)
|
|
101
|
-
|
|
102
|
-
def build(self, cls_name, **kwargs) -> T:
|
|
103
|
-
component_cls = self.registry.get_component(cls_name)
|
|
104
|
-
try:
|
|
105
|
-
return component_cls.from_dict(**kwargs)
|
|
106
|
-
except AttributeError:
|
|
107
|
-
pass
|
|
108
|
-
try:
|
|
109
|
-
return component_cls(**kwargs)
|
|
110
|
-
except TypeError as e:
|
|
111
|
-
raise e
|
|
112
|
-
# raise TypeError(f"Could not initialize {cls_name}")
|
|
113
|
-
|
|
114
|
-
def build_if_supports(self, **kwargs) -> T:
|
|
115
|
-
cls_name = self.registry.get_supporting_component(**kwargs)
|
|
116
|
-
return self.build(cls_name, **kwargs)
|
|
117
|
-
|
|
118
|
-
|
|
119
55
|
def key_from_dict(d: dict) -> str:
|
|
120
56
|
return "/".join([f"{k}={v}" for k, v in sorted(d.items()) if not k.startswith("_")])
|
|
121
57
|
|
|
@@ -270,6 +206,7 @@ class TaskExecutor:
|
|
|
270
206
|
logger.info(
|
|
271
207
|
f"Finished {len(res)} tasks in {took:.1f} seconds. {(len(res)/took):.1f} tasks/sec"
|
|
272
208
|
)
|
|
209
|
+
return res
|
|
273
210
|
|
|
274
211
|
def join(self):
|
|
275
212
|
self.pool.close()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ingestify
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Standardizing soccer tracking- and event data
|
|
5
5
|
Author: Koen Vossen
|
|
6
6
|
Author-email: info@koenvossen.nl
|
|
@@ -16,6 +16,7 @@ Requires-Dist: python-dotenv
|
|
|
16
16
|
Requires-Dist: pyaml-env
|
|
17
17
|
Requires-Dist: boto3
|
|
18
18
|
Requires-Dist: pytz
|
|
19
|
+
Requires-Dist: pydantic>=2.0.0
|
|
19
20
|
Provides-Extra: test
|
|
20
21
|
Requires-Dist: pytest<7,>=6.2.5; extra == "test"
|
|
21
22
|
|
|
@@ -1,64 +1,70 @@
|
|
|
1
|
-
ingestify/__init__.py,sha256=
|
|
1
|
+
ingestify/__init__.py,sha256=rzYt6rUUedAUB4VDxDENn6bzWpACW34yfbQKVjTzgQg,301
|
|
2
2
|
ingestify/cmdline.py,sha256=bIuyPgGEw4wIglNzpG9zp7TsJozsP8NSVsCe4eAyWUg,7189
|
|
3
3
|
ingestify/exceptions.py,sha256=wMMuajl4AkQRfW60TLN7btJmQaH8-lUczXyW_2g9kOU,143
|
|
4
|
-
ingestify/main.py,sha256=
|
|
4
|
+
ingestify/main.py,sha256=Lo8bCwOz3AOeO1pSTYhd7VjSZ8tcc9eSz0GLlwyy6DI,7632
|
|
5
5
|
ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
|
|
6
6
|
ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
|
|
7
|
-
ingestify/utils.py,sha256=
|
|
7
|
+
ingestify/utils.py,sha256=HETGhAoUlutLG0cQR63nac2JbFei9gnktDHeBQoYWfU,5692
|
|
8
8
|
ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
ingestify/application/dataset_store.py,sha256=
|
|
10
|
-
ingestify/application/ingestion_engine.py,sha256=
|
|
11
|
-
ingestify/application/loader.py,sha256=
|
|
9
|
+
ingestify/application/dataset_store.py,sha256=LccTpvsMWCIV0ewzS5sIXKk2kaQcZhnXGFT8Eao3U3Q,12074
|
|
10
|
+
ingestify/application/ingestion_engine.py,sha256=PtMjKMpvfqB802G5zfKLzyamdH7qFOXl3x6_97y8w60,2288
|
|
11
|
+
ingestify/application/loader.py,sha256=nqLKtwu48mJVumB9BtgTv79soCOtW9pzg-pvTvc66bc,7031
|
|
12
12
|
ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
|
|
13
13
|
ingestify/domain/__init__.py,sha256=M7_fVTJjQUx53P4UQUPhowRKPKsIIjx4JYZL1yjHKsM,46
|
|
14
|
-
ingestify/domain/models/__init__.py,sha256=
|
|
15
|
-
ingestify/domain/models/
|
|
16
|
-
ingestify/domain/models/
|
|
14
|
+
ingestify/domain/models/__init__.py,sha256=cjQmdSDFA-saXjdF1mLPNWILFHIFgdj20J_fC5FmFsI,770
|
|
15
|
+
ingestify/domain/models/base.py,sha256=6jzzIqSkH1mPsXZ2OTXMj09S_IlvMOrOBHBJyWAKEjE,555
|
|
16
|
+
ingestify/domain/models/data_spec_version_collection.py,sha256=CAXlO4W2AOOWAPdPAuymqBHnJpiYtkr2z7fYFJ3HSCk,1372
|
|
17
17
|
ingestify/domain/models/fetch_policy.py,sha256=d7K1TzliNJXxqaqzqEOQWLhvgIvmmqhUQEliXvSUcTs,1405
|
|
18
|
-
ingestify/domain/models/sink.py,sha256=
|
|
18
|
+
ingestify/domain/models/sink.py,sha256=OBVfFMpB7puJmHg4q2KYx4qgoAnlmX8xKWYnPi8a9pc,178
|
|
19
19
|
ingestify/domain/models/source.py,sha256=sB3aqr2LfjIbtw7ODJpHnPj3RUeo7gYmTU7MXvfaYg4,973
|
|
20
|
-
ingestify/domain/models/
|
|
20
|
+
ingestify/domain/models/timing.py,sha256=TvvH6Szo61CD8wCP7Awyc45CXga5lKqvoW2U-0TRHlA,388
|
|
21
|
+
ingestify/domain/models/dataset/__init__.py,sha256=i1kswluvWjw0xn4OUByRt7yeRvNHu1mauevv-Vmayx4,630
|
|
21
22
|
ingestify/domain/models/dataset/collection.py,sha256=E2utQ6oyaFFrfQFMiwP9J_I7Wm21z0sRvE4Zc3QEs20,1310
|
|
22
23
|
ingestify/domain/models/dataset/collection_metadata.py,sha256=gI5cb9M0QRsheIr2jA71wOyWfI5lGx5ES2Qw7rbDIoA,371
|
|
23
|
-
ingestify/domain/models/dataset/dataset.py,sha256=
|
|
24
|
-
ingestify/domain/models/dataset/dataset_repository.py,sha256=
|
|
25
|
-
ingestify/domain/models/dataset/
|
|
26
|
-
ingestify/domain/models/dataset/
|
|
27
|
-
ingestify/domain/models/dataset/
|
|
28
|
-
ingestify/domain/models/dataset/
|
|
24
|
+
ingestify/domain/models/dataset/dataset.py,sha256=ReL50BXNaJVU29OB5_9CQEI7BekWsgi1t3AR7e5jENc,2743
|
|
25
|
+
ingestify/domain/models/dataset/dataset_repository.py,sha256=kUjiqW58kOUOli1gZCLR5xw4dBX0bqI1UJsf16hgNsQ,812
|
|
26
|
+
ingestify/domain/models/dataset/dataset_state.py,sha256=O95mea5N34HDXw7XsYzxHna4FVk_T-ZNUDezkvt7VzY,220
|
|
27
|
+
ingestify/domain/models/dataset/events.py,sha256=58VacQejQt-WPh9BywP4st5McauM3gXBQo0kaDnSekY,481
|
|
28
|
+
ingestify/domain/models/dataset/file.py,sha256=nuoZI9GI5OysYwWCCyNsHMlm1Z9A1GbEKd38jvBzJ4E,4119
|
|
29
|
+
ingestify/domain/models/dataset/file_collection.py,sha256=yaQmqFlmbajLCkU5QnjgqCvKzvVEZJrXVvinx5UGHcM,1193
|
|
30
|
+
ingestify/domain/models/dataset/file_repository.py,sha256=ntzLiWZleZQFmrVsFvDSwfbOT86WtAXLbqgA8HlV56Q,1248
|
|
29
31
|
ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
|
|
30
|
-
ingestify/domain/models/dataset/revision.py,sha256=
|
|
32
|
+
ingestify/domain/models/dataset/revision.py,sha256=O_1HG2S2EmYdWqI2K282S_D-d6IhRh_f4Q3wV8MEhkk,1311
|
|
31
33
|
ingestify/domain/models/dataset/selector.py,sha256=kEGpU8pIyjZ0zwE9n2uo_NY5xrNanWiTTgapyMAUEsw,1039
|
|
32
34
|
ingestify/domain/models/event/__init__.py,sha256=OdPTpE9bj5QqdGmrYqRTLPX1f-LR9GWJYlGMPPEsuL8,138
|
|
33
35
|
ingestify/domain/models/event/_old_event.py,sha256=RktgCAj9SMdtqkAc_bOwoghEb2Z6m4r5_xWXin9wqx4,472
|
|
34
36
|
ingestify/domain/models/event/dispatcher.py,sha256=5WnyUJ7Qzr612btAtl1dMG9JBXDPcsBLyLmW6H7Q1zk,154
|
|
35
|
-
ingestify/domain/models/event/domain_event.py,sha256=
|
|
37
|
+
ingestify/domain/models/event/domain_event.py,sha256=OR6va417j2lisRr0gjQZ9rshAtlys5sVu7KU-W0r0xA,316
|
|
36
38
|
ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmceWLstOxiP3-2qU,576
|
|
37
39
|
ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
|
|
38
40
|
ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
|
|
41
|
+
ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
+
ingestify/domain/models/ingestion/ingestion_job.py,sha256=GnBQVnTU3FdKdSElXEISUrQz-orGIHchnNAo20Qg0DY,11511
|
|
43
|
+
ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=YygBv0GgU396HRe-exQqW2QmitBEnAh2VG_xkW3wdyQ,3645
|
|
44
|
+
ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
|
|
39
45
|
ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
|
|
40
|
-
ingestify/domain/models/resources/dataset_resource.py,sha256=
|
|
46
|
+
ingestify/domain/models/resources/dataset_resource.py,sha256=NRnN029ct3P_Eg2d9Unb1t7A12Ksv_emBGhoe9DpPwM,3118
|
|
41
47
|
ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
|
|
42
48
|
ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
|
|
43
|
-
ingestify/domain/models/task/task.py,sha256=
|
|
49
|
+
ingestify/domain/models/task/task.py,sha256=OwLZQi9GGe0O8m1dKvJdN2Rham5oilI49KyKc5uV20A,161
|
|
50
|
+
ingestify/domain/models/task/task_summary.py,sha256=ovzqKPstngRVzVA_JboQMluq5uQjKVJDsWNNcfcadhU,3774
|
|
44
51
|
ingestify/domain/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
52
|
ingestify/domain/services/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
53
|
ingestify/domain/services/transformers/kloppy_to_pandas.py,sha256=NcN6nTBGVn9gz-_hWZJTMcduS1Gg7EM4X95Cqxi1QIM,809
|
|
47
54
|
ingestify/infra/__init__.py,sha256=V0hpLzPVTcOHRVh0gguF6FT30YIgEOUd5v87xUHkfZ4,88
|
|
48
55
|
ingestify/infra/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
ingestify/infra/fetch/http.py,sha256=
|
|
56
|
+
ingestify/infra/fetch/http.py,sha256=ldaXy6alBbI9z63H97lXfYZNT0ZCBkTac1W6-acNjjY,4127
|
|
50
57
|
ingestify/infra/serialization/__init__.py,sha256=LwfmRoO4qykZkJZXxVPSKpwoVIkg9qzXa7Egut9JjL4,1772
|
|
51
58
|
ingestify/infra/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
59
|
ingestify/infra/sink/postgresql.py,sha256=SxuM3LntfYcpCriUpqJhMvgAf0s9cohXf6WkxSEDYDY,1816
|
|
53
60
|
ingestify/infra/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
ingestify/infra/source/statsbomb_github.py,sha256=
|
|
61
|
+
ingestify/infra/source/statsbomb_github.py,sha256=IzzrlIRqkChgJp87yW3ugG1my4g_5uMx_xEnoQLWNss,3543
|
|
55
62
|
ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nOUGxE,5626
|
|
56
63
|
ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
|
|
57
|
-
ingestify/infra/store/dataset/__init__.py,sha256=
|
|
58
|
-
ingestify/infra/store/dataset/local_dataset_repository.py,sha256=UMgSe1M9u_629V4WyuTJ-QegZJiDczzMo7vkNbNleqA,2064
|
|
64
|
+
ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
65
|
ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
|
|
60
|
-
ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256
|
|
61
|
-
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=
|
|
66
|
+
ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256=-iTkC4_YGkkFrIsEZVTW2eoaofj4c7QZFaq7tl1r2G4,9288
|
|
67
|
+
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=O2g7g_clNz43g9gXjjBJZsIGvRTntQ6rJpQeDT8yQ7c,7141
|
|
62
68
|
ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
|
|
63
69
|
ingestify/infra/store/file/local_file_repository.py,sha256=0oIzjjKO5U_7gPXhsBJFUqQBarQTFQS499ZK7HNxMxo,893
|
|
64
70
|
ingestify/infra/store/file/s3_file_repository.py,sha256=txDviBrY9EHn3soqLFvTrjSPkyh548RxUgx4T83j0QY,1331
|
|
@@ -72,8 +78,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
72
78
|
ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
|
|
73
79
|
ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
|
|
74
80
|
ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
|
|
75
|
-
ingestify-0.
|
|
76
|
-
ingestify-0.
|
|
77
|
-
ingestify-0.
|
|
78
|
-
ingestify-0.
|
|
79
|
-
ingestify-0.
|
|
81
|
+
ingestify-0.2.0.dist-info/METADATA,sha256=8974JGisSq9_Q-4M1cFYY_AU5zBW7n_UZ8NKjj_ZBDM,18853
|
|
82
|
+
ingestify-0.2.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
83
|
+
ingestify-0.2.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
|
|
84
|
+
ingestify-0.2.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
|
|
85
|
+
ingestify-0.2.0.dist-info/RECORD,,
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
import glob
|
|
2
|
-
import os
|
|
3
|
-
import pickle
|
|
4
|
-
import uuid
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Optional
|
|
7
|
-
|
|
8
|
-
from ingestify.domain.models import (
|
|
9
|
-
Dataset,
|
|
10
|
-
DatasetCollection,
|
|
11
|
-
DatasetRepository,
|
|
12
|
-
Selector,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def parse_value(v):
|
|
17
|
-
try:
|
|
18
|
-
return int(v)
|
|
19
|
-
except ValueError:
|
|
20
|
-
return v
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class LocalDatasetRepository(DatasetRepository):
|
|
24
|
-
def destroy(self, dataset: Dataset):
|
|
25
|
-
path = (
|
|
26
|
-
self.base_dir / dataset.identifier.key.replace("/", "__") / "dataset.pickle"
|
|
27
|
-
)
|
|
28
|
-
path.unlink()
|
|
29
|
-
|
|
30
|
-
@classmethod
|
|
31
|
-
def supports(cls, url: str) -> bool:
|
|
32
|
-
return url.startswith("file://")
|
|
33
|
-
|
|
34
|
-
def __init__(self, url: str):
|
|
35
|
-
self.base_dir = Path(url[7:])
|
|
36
|
-
raise DeprecationWarning(
|
|
37
|
-
"This Repository should not be used. Better use SqlAlchemyDatasetRepository with a local sqlite database."
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
def get_dataset_collection(
|
|
41
|
-
self,
|
|
42
|
-
dataset_type: Optional[str] = None,
|
|
43
|
-
provider: Optional[str] = None,
|
|
44
|
-
dataset_id: Optional[str] = None,
|
|
45
|
-
selector: Optional[Selector] = None,
|
|
46
|
-
**kwargs
|
|
47
|
-
) -> DatasetCollection:
|
|
48
|
-
|
|
49
|
-
datasets = []
|
|
50
|
-
for dir_name in glob.glob(str(self.base_dir / "*")):
|
|
51
|
-
attributes = {
|
|
52
|
-
item[0]: parse_value(item[1])
|
|
53
|
-
for item in [
|
|
54
|
-
part.split("=") for part in os.path.basename(dir_name).split("__")
|
|
55
|
-
]
|
|
56
|
-
}
|
|
57
|
-
if not selector or selector.matches(attributes):
|
|
58
|
-
with open(dir_name + "/dataset.pickle", "rb") as fp:
|
|
59
|
-
dataset = pickle.load(fp)
|
|
60
|
-
datasets.append(dataset)
|
|
61
|
-
return DatasetCollection(datasets)
|
|
62
|
-
|
|
63
|
-
def save(self, bucket: str, dataset: Dataset):
|
|
64
|
-
path = (
|
|
65
|
-
self.base_dir / dataset.identifier.key.replace("/", "__") / "dataset.pickle"
|
|
66
|
-
)
|
|
67
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
-
|
|
69
|
-
with open(path, "wb") as fp:
|
|
70
|
-
pickle.dump(dataset, fp)
|
|
71
|
-
|
|
72
|
-
def next_identity(self):
|
|
73
|
-
return str(uuid.uuid4())
|
|
File without changes
|
|
File without changes
|