ingestify 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. ingestify/__init__.py +1 -1
  2. ingestify/application/dataset_store.py +44 -24
  3. ingestify/application/ingestion_engine.py +3 -3
  4. ingestify/application/loader.py +67 -237
  5. ingestify/cmdline.py +2 -1
  6. ingestify/domain/models/__init__.py +1 -6
  7. ingestify/domain/models/base.py +22 -0
  8. ingestify/domain/models/data_spec_version_collection.py +6 -0
  9. ingestify/domain/models/dataset/__init__.py +3 -5
  10. ingestify/domain/models/dataset/dataset.py +15 -32
  11. ingestify/domain/models/dataset/dataset_repository.py +1 -15
  12. ingestify/domain/models/dataset/dataset_state.py +11 -0
  13. ingestify/domain/models/dataset/events.py +6 -16
  14. ingestify/domain/models/dataset/file.py +21 -34
  15. ingestify/domain/models/dataset/file_collection.py +3 -1
  16. ingestify/domain/models/dataset/file_repository.py +1 -10
  17. ingestify/domain/models/dataset/revision.py +26 -3
  18. ingestify/domain/models/event/domain_event.py +8 -4
  19. ingestify/domain/models/ingestion/__init__.py +0 -0
  20. ingestify/domain/models/ingestion/ingestion_job.py +292 -0
  21. ingestify/domain/models/ingestion/ingestion_job_summary.py +106 -0
  22. ingestify/domain/models/{extract_job.py → ingestion/ingestion_plan.py} +4 -4
  23. ingestify/domain/models/resources/dataset_resource.py +29 -37
  24. ingestify/domain/models/sink.py +1 -8
  25. ingestify/domain/models/task/task.py +3 -1
  26. ingestify/domain/models/task/task_summary.py +118 -0
  27. ingestify/domain/models/timing.py +16 -0
  28. ingestify/infra/fetch/http.py +5 -0
  29. ingestify/infra/source/statsbomb_github.py +67 -54
  30. ingestify/infra/store/dataset/__init__.py +0 -2
  31. ingestify/infra/store/dataset/sqlalchemy/mapping.py +184 -4
  32. ingestify/infra/store/dataset/sqlalchemy/repository.py +24 -22
  33. ingestify/main.py +42 -22
  34. ingestify/utils.py +25 -78
  35. {ingestify-0.1.2.dist-info → ingestify-0.2.0.dist-info}/METADATA +2 -1
  36. {ingestify-0.1.2.dist-info → ingestify-0.2.0.dist-info}/RECORD +39 -33
  37. {ingestify-0.1.2.dist-info → ingestify-0.2.0.dist-info}/WHEEL +1 -1
  38. ingestify/infra/store/dataset/local_dataset_repository.py +0 -73
  39. {ingestify-0.1.2.dist-info → ingestify-0.2.0.dist-info}/entry_points.txt +0 -0
  40. {ingestify-0.1.2.dist-info → ingestify-0.2.0.dist-info}/top_level.txt +0 -0
ingestify/utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import abc
2
+ import asyncio
2
3
  import inspect
3
4
  import logging
4
5
  import os
@@ -8,7 +9,19 @@ from multiprocessing import get_context, cpu_count, get_all_start_methods
8
9
 
9
10
  from datetime import datetime, timezone
10
11
  from string import Template
11
- from typing import Dict, Generic, Type, TypeVar, Tuple, Optional, Any
12
+ from typing import (
13
+ Dict,
14
+ Generic,
15
+ Type,
16
+ TypeVar,
17
+ Tuple,
18
+ Optional,
19
+ Any,
20
+ Callable,
21
+ Awaitable,
22
+ List,
23
+ Iterable,
24
+ )
12
25
 
13
26
  import cloudpickle
14
27
  from typing_extensions import Self
@@ -39,83 +52,6 @@ def sanitize_exception_message(exception_message):
39
52
  return sanitized_message
40
53
 
41
54
 
42
- class ComponentRegistry:
43
- def __init__(self):
44
- self.__registered_components = {}
45
-
46
- class _Registered(abc.ABCMeta):
47
- def __new__(mcs, cls_name, bases, class_dict):
48
- class_dict["name"] = cls_name
49
- component_cls = super(_Registered, mcs).__new__(
50
- mcs, cls_name, bases, class_dict
51
- )
52
- if not inspect.isabstract(component_cls):
53
- self.register_component(cls_name, component_cls)
54
- else:
55
- if bases[0] != abc.ABC:
56
- raise Exception(
57
- f"Class '{cls_name}' seems to be an concrete class, but missing some abstract methods"
58
- )
59
- return component_cls
60
-
61
- self.__metaclass = _Registered
62
-
63
- @property
64
- def metaclass(self):
65
- return self.__metaclass
66
-
67
- def register_component(self, cls_name, component_cls):
68
- self.__registered_components[cls_name] = component_cls
69
-
70
- def get_component(self, cls_name: str):
71
- return self.__registered_components[cls_name]
72
-
73
- def get_supporting_component(self, **kwargs) -> str:
74
- for cls_name, class_ in self.__registered_components.items():
75
- if not hasattr(class_, "supports"):
76
- raise Exception(
77
- f"Class '{cls_name}' does not implemented a 'supports' classmethod. "
78
- f"This is required when using 'get_supporting_component'."
79
- )
80
-
81
- if class_.supports(**kwargs):
82
- return cls_name
83
-
84
- kwargs_str = sanitize_exception_message(str(kwargs))
85
- raise Exception(f"No supporting class found for {kwargs_str}")
86
-
87
-
88
- T = TypeVar("T")
89
- R = TypeVar("R")
90
-
91
-
92
- class ComponentFactory(Generic[T]):
93
- def __init__(self, registry: ComponentRegistry):
94
- self.registry = registry
95
-
96
- @classmethod
97
- def build_factory(
98
- cls, component_cls: Type[R], registry: ComponentRegistry
99
- ) -> "ComponentFactory[R]":
100
- return cls[component_cls](registry)
101
-
102
- def build(self, cls_name, **kwargs) -> T:
103
- component_cls = self.registry.get_component(cls_name)
104
- try:
105
- return component_cls.from_dict(**kwargs)
106
- except AttributeError:
107
- pass
108
- try:
109
- return component_cls(**kwargs)
110
- except TypeError as e:
111
- raise e
112
- # raise TypeError(f"Could not initialize {cls_name}")
113
-
114
- def build_if_supports(self, **kwargs) -> T:
115
- cls_name = self.registry.get_supporting_component(**kwargs)
116
- return self.build(cls_name, **kwargs)
117
-
118
-
119
55
  def key_from_dict(d: dict) -> str:
120
56
  return "/".join([f"{k}={v}" for k, v in sorted(d.items()) if not k.startswith("_")])
121
57
 
@@ -270,7 +206,18 @@ class TaskExecutor:
270
206
  logger.info(
271
207
  f"Finished {len(res)} tasks in {took:.1f} seconds. {(len(res)/took):.1f} tasks/sec"
272
208
  )
209
+ return res
273
210
 
274
211
  def join(self):
275
212
  self.pool.close()
276
213
  self.pool.join()
214
+
215
+
216
+ def try_number(s: str):
217
+ try:
218
+ return int(s)
219
+ except ValueError:
220
+ try:
221
+ return float(s)
222
+ except ValueError:
223
+ return s
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestify
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Standardizing soccer tracking- and event data
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
@@ -16,6 +16,7 @@ Requires-Dist: python-dotenv
16
16
  Requires-Dist: pyaml-env
17
17
  Requires-Dist: boto3
18
18
  Requires-Dist: pytz
19
+ Requires-Dist: pydantic>=2.0.0
19
20
  Provides-Extra: test
20
21
  Requires-Dist: pytest<7,>=6.2.5; extra == "test"
21
22
 
@@ -1,64 +1,70 @@
1
- ingestify/__init__.py,sha256=Jq8NUvWIQpVwqUiHza92T9gPyuEZtcg7sKNJkyG6oCE,301
2
- ingestify/cmdline.py,sha256=gLy79Cq3OnEyoEcI6koWIEbCwvgFZ1E8n3UU1sKS8FM,7143
1
+ ingestify/__init__.py,sha256=rzYt6rUUedAUB4VDxDENn6bzWpACW34yfbQKVjTzgQg,301
2
+ ingestify/cmdline.py,sha256=bIuyPgGEw4wIglNzpG9zp7TsJozsP8NSVsCe4eAyWUg,7189
3
3
  ingestify/exceptions.py,sha256=wMMuajl4AkQRfW60TLN7btJmQaH8-lUczXyW_2g9kOU,143
4
- ingestify/main.py,sha256=YjrAOiGzwurtoDyIf981DSJHHA6IT5q09k3QNzTKCC8,6814
4
+ ingestify/main.py,sha256=Lo8bCwOz3AOeO1pSTYhd7VjSZ8tcc9eSz0GLlwyy6DI,7632
5
5
  ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
6
6
  ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
7
- ingestify/utils.py,sha256=eEHwulqNEb2YTRDrCMVxr6mWZYI6KOcNCAIWFTi74u0,8029
7
+ ingestify/utils.py,sha256=HETGhAoUlutLG0cQR63nac2JbFei9gnktDHeBQoYWfU,5692
8
8
  ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- ingestify/application/dataset_store.py,sha256=NAW-XSvp118Lr2hXZd3qtuQr6VkPdWCLksIwd5MSs30,11489
10
- ingestify/application/ingestion_engine.py,sha256=GYIhb8a9ePkEcNOBPdfu-YawiD7eRZMRlxCA-6g9DRA,2249
11
- ingestify/application/loader.py,sha256=DSdSNFf7WynGsMCoK3iQGiMKkO76fZ_KIOBDEMZK3zU,13495
9
+ ingestify/application/dataset_store.py,sha256=LccTpvsMWCIV0ewzS5sIXKk2kaQcZhnXGFT8Eao3U3Q,12074
10
+ ingestify/application/ingestion_engine.py,sha256=PtMjKMpvfqB802G5zfKLzyamdH7qFOXl3x6_97y8w60,2288
11
+ ingestify/application/loader.py,sha256=nqLKtwu48mJVumB9BtgTv79soCOtW9pzg-pvTvc66bc,7031
12
12
  ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
13
13
  ingestify/domain/__init__.py,sha256=M7_fVTJjQUx53P4UQUPhowRKPKsIIjx4JYZL1yjHKsM,46
14
- ingestify/domain/models/__init__.py,sha256=xHVQZP57ZQYUKwAtbccnDKX89_yTOvBKAtn4XDVbEbY,930
15
- ingestify/domain/models/data_spec_version_collection.py,sha256=qjEM6-gt-Uf5orQlv64P6NJCEdWiUPX2oTZv8cC-KVY,1203
16
- ingestify/domain/models/extract_job.py,sha256=yXrlF2Vt5hxB1Vo9CicpgyW5rjvJaEPfSiMzaAqhqB0,624
14
+ ingestify/domain/models/__init__.py,sha256=cjQmdSDFA-saXjdF1mLPNWILFHIFgdj20J_fC5FmFsI,770
15
+ ingestify/domain/models/base.py,sha256=6jzzIqSkH1mPsXZ2OTXMj09S_IlvMOrOBHBJyWAKEjE,555
16
+ ingestify/domain/models/data_spec_version_collection.py,sha256=CAXlO4W2AOOWAPdPAuymqBHnJpiYtkr2z7fYFJ3HSCk,1372
17
17
  ingestify/domain/models/fetch_policy.py,sha256=d7K1TzliNJXxqaqzqEOQWLhvgIvmmqhUQEliXvSUcTs,1405
18
- ingestify/domain/models/sink.py,sha256=AieqDQ76Vj7WGxCrl3-F93AKe-VBfoPHtMNH28GTQM4,384
18
+ ingestify/domain/models/sink.py,sha256=OBVfFMpB7puJmHg4q2KYx4qgoAnlmX8xKWYnPi8a9pc,178
19
19
  ingestify/domain/models/source.py,sha256=sB3aqr2LfjIbtw7ODJpHnPj3RUeo7gYmTU7MXvfaYg4,973
20
- ingestify/domain/models/dataset/__init__.py,sha256=kSn3XZo0o-D0WzMb2VDxhOXw9Rr9jvS-8fkHdOnrccU,748
20
+ ingestify/domain/models/timing.py,sha256=TvvH6Szo61CD8wCP7Awyc45CXga5lKqvoW2U-0TRHlA,388
21
+ ingestify/domain/models/dataset/__init__.py,sha256=i1kswluvWjw0xn4OUByRt7yeRvNHu1mauevv-Vmayx4,630
21
22
  ingestify/domain/models/dataset/collection.py,sha256=E2utQ6oyaFFrfQFMiwP9J_I7Wm21z0sRvE4Zc3QEs20,1310
22
23
  ingestify/domain/models/dataset/collection_metadata.py,sha256=gI5cb9M0QRsheIr2jA71wOyWfI5lGx5ES2Qw7rbDIoA,371
23
- ingestify/domain/models/dataset/dataset.py,sha256=m0iVJPXd1KOAHbDg7fmY_7MCdrKQaILUekIWUfo5pXI,2893
24
- ingestify/domain/models/dataset/dataset_repository.py,sha256=eiloP5msmDau4WRHee8gA7pLoH_ca2JXAhPx9UecPIA,1185
25
- ingestify/domain/models/dataset/events.py,sha256=x4l_pdzBHbemE_722EyCYXzWy9t8IcTx5j-wNFxWs6o,708
26
- ingestify/domain/models/dataset/file.py,sha256=O-yJom9dr13PaHfmc_4crtSa9B1Q9iruHsnf-m01McU,3943
27
- ingestify/domain/models/dataset/file_collection.py,sha256=V5wh2aSc61UA4HWcHi9PvyQUIUvssDRkaPVe2YR6XwU,1140
28
- ingestify/domain/models/dataset/file_repository.py,sha256=lxf3Dh8e-_67dRspMZHT1DZ79IWW_vlvb3z8lKjypj4,1514
24
+ ingestify/domain/models/dataset/dataset.py,sha256=ReL50BXNaJVU29OB5_9CQEI7BekWsgi1t3AR7e5jENc,2743
25
+ ingestify/domain/models/dataset/dataset_repository.py,sha256=kUjiqW58kOUOli1gZCLR5xw4dBX0bqI1UJsf16hgNsQ,812
26
+ ingestify/domain/models/dataset/dataset_state.py,sha256=O95mea5N34HDXw7XsYzxHna4FVk_T-ZNUDezkvt7VzY,220
27
+ ingestify/domain/models/dataset/events.py,sha256=58VacQejQt-WPh9BywP4st5McauM3gXBQo0kaDnSekY,481
28
+ ingestify/domain/models/dataset/file.py,sha256=nuoZI9GI5OysYwWCCyNsHMlm1Z9A1GbEKd38jvBzJ4E,4119
29
+ ingestify/domain/models/dataset/file_collection.py,sha256=yaQmqFlmbajLCkU5QnjgqCvKzvVEZJrXVvinx5UGHcM,1193
30
+ ingestify/domain/models/dataset/file_repository.py,sha256=ntzLiWZleZQFmrVsFvDSwfbOT86WtAXLbqgA8HlV56Q,1248
29
31
  ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
30
- ingestify/domain/models/dataset/revision.py,sha256=fiHnd_mad0iYmNCGswKImUHpauhIf2gW_ukztDFVP48,781
32
+ ingestify/domain/models/dataset/revision.py,sha256=O_1HG2S2EmYdWqI2K282S_D-d6IhRh_f4Q3wV8MEhkk,1311
31
33
  ingestify/domain/models/dataset/selector.py,sha256=kEGpU8pIyjZ0zwE9n2uo_NY5xrNanWiTTgapyMAUEsw,1039
32
34
  ingestify/domain/models/event/__init__.py,sha256=OdPTpE9bj5QqdGmrYqRTLPX1f-LR9GWJYlGMPPEsuL8,138
33
35
  ingestify/domain/models/event/_old_event.py,sha256=RktgCAj9SMdtqkAc_bOwoghEb2Z6m4r5_xWXin9wqx4,472
34
36
  ingestify/domain/models/event/dispatcher.py,sha256=5WnyUJ7Qzr612btAtl1dMG9JBXDPcsBLyLmW6H7Q1zk,154
35
- ingestify/domain/models/event/domain_event.py,sha256=a5nNNwDWSAqou8aSBGIEA6aQOHTOxYyMEUXB91fYUIM,187
37
+ ingestify/domain/models/event/domain_event.py,sha256=OR6va417j2lisRr0gjQZ9rshAtlys5sVu7KU-W0r0xA,316
36
38
  ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmceWLstOxiP3-2qU,576
37
39
  ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
38
40
  ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
41
+ ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ ingestify/domain/models/ingestion/ingestion_job.py,sha256=GnBQVnTU3FdKdSElXEISUrQz-orGIHchnNAo20Qg0DY,11511
43
+ ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=YygBv0GgU396HRe-exQqW2QmitBEnAh2VG_xkW3wdyQ,3645
44
+ ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
39
45
  ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
40
- ingestify/domain/models/resources/dataset_resource.py,sha256=HH5wMqzoWvcL84GzNa7QL3YsciI757FG4iZu9DbXn_k,3181
46
+ ingestify/domain/models/resources/dataset_resource.py,sha256=NRnN029ct3P_Eg2d9Unb1t7A12Ksv_emBGhoe9DpPwM,3118
41
47
  ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
42
48
  ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
43
- ingestify/domain/models/task/task.py,sha256=R6tEZub-N_Wjl4VjwlPySdFb3L9D7nH4St2CcDzFoKA,107
49
+ ingestify/domain/models/task/task.py,sha256=OwLZQi9GGe0O8m1dKvJdN2Rham5oilI49KyKc5uV20A,161
50
+ ingestify/domain/models/task/task_summary.py,sha256=ovzqKPstngRVzVA_JboQMluq5uQjKVJDsWNNcfcadhU,3774
44
51
  ingestify/domain/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
52
  ingestify/domain/services/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
53
  ingestify/domain/services/transformers/kloppy_to_pandas.py,sha256=NcN6nTBGVn9gz-_hWZJTMcduS1Gg7EM4X95Cqxi1QIM,809
47
54
  ingestify/infra/__init__.py,sha256=V0hpLzPVTcOHRVh0gguF6FT30YIgEOUd5v87xUHkfZ4,88
48
55
  ingestify/infra/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- ingestify/infra/fetch/http.py,sha256=4CcEkwtNzYkPspNIaQIfcthA5yLow0x_M9xpEsoucWw,3982
56
+ ingestify/infra/fetch/http.py,sha256=ldaXy6alBbI9z63H97lXfYZNT0ZCBkTac1W6-acNjjY,4127
50
57
  ingestify/infra/serialization/__init__.py,sha256=LwfmRoO4qykZkJZXxVPSKpwoVIkg9qzXa7Egut9JjL4,1772
51
58
  ingestify/infra/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
59
  ingestify/infra/sink/postgresql.py,sha256=SxuM3LntfYcpCriUpqJhMvgAf0s9cohXf6WkxSEDYDY,1816
53
60
  ingestify/infra/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- ingestify/infra/source/statsbomb_github.py,sha256=CuHZoJn6fU8ZKQl4f1-gyaVYsmxL6R33n0cbOx1jQmI,2895
61
+ ingestify/infra/source/statsbomb_github.py,sha256=IzzrlIRqkChgJp87yW3ugG1my4g_5uMx_xEnoQLWNss,3543
55
62
  ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nOUGxE,5626
56
63
  ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
57
- ingestify/infra/store/dataset/__init__.py,sha256=8oVJFiA-IKccrEpiYxAmSc65dfpNut7PYx8PUhylmdU,113
58
- ingestify/infra/store/dataset/local_dataset_repository.py,sha256=UMgSe1M9u_629V4WyuTJ-QegZJiDczzMo7vkNbNleqA,2064
64
+ ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
65
  ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
60
- ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256=Q7Od3zBnoZgxE5aThdZE93waWeKVut9dstrCnEYb9nc,3981
61
- ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=ynoIVMVD0_w9aa2hFKkcLxRKzJDoET_SNfGHXPIoN40,7067
66
+ ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256=-iTkC4_YGkkFrIsEZVTW2eoaofj4c7QZFaq7tl1r2G4,9288
67
+ ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=O2g7g_clNz43g9gXjjBJZsIGvRTntQ6rJpQeDT8yQ7c,7141
62
68
  ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
63
69
  ingestify/infra/store/file/local_file_repository.py,sha256=0oIzjjKO5U_7gPXhsBJFUqQBarQTFQS499ZK7HNxMxo,893
64
70
  ingestify/infra/store/file/s3_file_repository.py,sha256=txDviBrY9EHn3soqLFvTrjSPkyh548RxUgx4T83j0QY,1331
@@ -72,8 +78,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
72
78
  ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
73
79
  ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
74
80
  ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
75
- ingestify-0.1.2.dist-info/METADATA,sha256=66WW34koisv72Sqvwn9gpYI0Le1wuwEW_nZorjYc0MY,18822
76
- ingestify-0.1.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
77
- ingestify-0.1.2.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
78
- ingestify-0.1.2.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
79
- ingestify-0.1.2.dist-info/RECORD,,
81
+ ingestify-0.2.0.dist-info/METADATA,sha256=8974JGisSq9_Q-4M1cFYY_AU5zBW7n_UZ8NKjj_ZBDM,18853
82
+ ingestify-0.2.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
83
+ ingestify-0.2.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
84
+ ingestify-0.2.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
85
+ ingestify-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,73 +0,0 @@
1
- import glob
2
- import os
3
- import pickle
4
- import uuid
5
- from pathlib import Path
6
- from typing import Optional
7
-
8
- from ingestify.domain.models import (
9
- Dataset,
10
- DatasetCollection,
11
- DatasetRepository,
12
- Selector,
13
- )
14
-
15
-
16
- def parse_value(v):
17
- try:
18
- return int(v)
19
- except ValueError:
20
- return v
21
-
22
-
23
- class LocalDatasetRepository(DatasetRepository):
24
- def destroy(self, dataset: Dataset):
25
- path = (
26
- self.base_dir / dataset.identifier.key.replace("/", "__") / "dataset.pickle"
27
- )
28
- path.unlink()
29
-
30
- @classmethod
31
- def supports(cls, url: str) -> bool:
32
- return url.startswith("file://")
33
-
34
- def __init__(self, url: str):
35
- self.base_dir = Path(url[7:])
36
- raise DeprecationWarning(
37
- "This Repository should not be used. Better use SqlAlchemyDatasetRepository with a local sqlite database."
38
- )
39
-
40
- def get_dataset_collection(
41
- self,
42
- dataset_type: Optional[str] = None,
43
- provider: Optional[str] = None,
44
- dataset_id: Optional[str] = None,
45
- selector: Optional[Selector] = None,
46
- **kwargs
47
- ) -> DatasetCollection:
48
-
49
- datasets = []
50
- for dir_name in glob.glob(str(self.base_dir / "*")):
51
- attributes = {
52
- item[0]: parse_value(item[1])
53
- for item in [
54
- part.split("=") for part in os.path.basename(dir_name).split("__")
55
- ]
56
- }
57
- if not selector or selector.matches(attributes):
58
- with open(dir_name + "/dataset.pickle", "rb") as fp:
59
- dataset = pickle.load(fp)
60
- datasets.append(dataset)
61
- return DatasetCollection(datasets)
62
-
63
- def save(self, bucket: str, dataset: Dataset):
64
- path = (
65
- self.base_dir / dataset.identifier.key.replace("/", "__") / "dataset.pickle"
66
- )
67
- path.parent.mkdir(parents=True, exist_ok=True)
68
-
69
- with open(path, "wb") as fp:
70
- pickle.dump(dataset, fp)
71
-
72
- def next_identity(self):
73
- return str(uuid.uuid4())