ingestify 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. ingestify/__init__.py +11 -0
  2. ingestify/application/__init__.py +0 -0
  3. ingestify/application/dataset_store.py +339 -0
  4. ingestify/application/ingestion_engine.py +62 -0
  5. ingestify/application/loader.py +329 -0
  6. ingestify/application/secrets_manager.py +53 -0
  7. ingestify/cmdline.py +283 -0
  8. ingestify/domain/__init__.py +2 -0
  9. ingestify/domain/models/__init__.py +45 -0
  10. ingestify/domain/models/data_spec_version_collection.py +33 -0
  11. ingestify/domain/models/dataset/__init__.py +27 -0
  12. ingestify/domain/models/dataset/collection.py +44 -0
  13. ingestify/domain/models/dataset/collection_metadata.py +13 -0
  14. ingestify/domain/models/dataset/dataset.py +104 -0
  15. ingestify/domain/models/dataset/dataset_repository.py +46 -0
  16. ingestify/domain/models/dataset/events.py +31 -0
  17. ingestify/domain/models/dataset/file.py +146 -0
  18. ingestify/domain/models/dataset/file_collection.py +35 -0
  19. ingestify/domain/models/dataset/file_repository.py +59 -0
  20. ingestify/domain/models/dataset/identifier.py +24 -0
  21. ingestify/domain/models/dataset/revision.py +29 -0
  22. ingestify/domain/models/dataset/selector.py +37 -0
  23. ingestify/domain/models/event/__init__.py +4 -0
  24. ingestify/domain/models/event/_old_event.py +21 -0
  25. ingestify/domain/models/event/dispatcher.py +8 -0
  26. ingestify/domain/models/event/domain_event.py +10 -0
  27. ingestify/domain/models/event/event_bus.py +24 -0
  28. ingestify/domain/models/event/publisher.py +23 -0
  29. ingestify/domain/models/event/subscriber.py +39 -0
  30. ingestify/domain/models/extract_job.py +23 -0
  31. ingestify/domain/models/fetch_policy.py +40 -0
  32. ingestify/domain/models/resources/__init__.py +1 -0
  33. ingestify/domain/models/resources/dataset_resource.py +99 -0
  34. ingestify/domain/models/sink.py +16 -0
  35. ingestify/domain/models/source.py +34 -0
  36. ingestify/domain/models/task/__init__.py +4 -0
  37. ingestify/domain/models/task/set.py +21 -0
  38. ingestify/domain/models/task/task.py +7 -0
  39. ingestify/domain/services/__init__.py +0 -0
  40. ingestify/domain/services/transformers/__init__.py +0 -0
  41. ingestify/domain/services/transformers/kloppy_to_pandas.py +25 -0
  42. ingestify/exceptions.py +10 -0
  43. ingestify/infra/__init__.py +4 -0
  44. ingestify/infra/fetch/__init__.py +0 -0
  45. ingestify/infra/fetch/http.py +100 -0
  46. ingestify/infra/serialization/__init__.py +50 -0
  47. ingestify/infra/sink/__init__.py +0 -0
  48. ingestify/infra/sink/postgresql.py +50 -0
  49. ingestify/infra/source/__init__.py +0 -0
  50. ingestify/infra/source/statsbomb_github.py +92 -0
  51. ingestify/infra/source/wyscout.py +175 -0
  52. ingestify/infra/store/__init__.py +2 -0
  53. ingestify/infra/store/dataset/__init__.py +2 -0
  54. ingestify/infra/store/dataset/local_dataset_repository.py +73 -0
  55. ingestify/infra/store/dataset/sqlalchemy/__init__.py +1 -0
  56. ingestify/infra/store/dataset/sqlalchemy/mapping.py +153 -0
  57. ingestify/infra/store/dataset/sqlalchemy/repository.py +239 -0
  58. ingestify/infra/store/file/__init__.py +2 -0
  59. ingestify/infra/store/file/local_file_repository.py +32 -0
  60. ingestify/infra/store/file/s3_file_repository.py +50 -0
  61. ingestify/main.py +205 -0
  62. ingestify/server.py +78 -0
  63. ingestify/source_base.py +23 -0
  64. ingestify/static/templates/statsbomb_github/README.md +0 -0
  65. ingestify/static/templates/statsbomb_github/config.yaml.jinja2 +19 -0
  66. ingestify/static/templates/statsbomb_github/database/README.md +1 -0
  67. ingestify/static/templates/statsbomb_github/query.py +14 -0
  68. ingestify/static/templates/wyscout/.env +5 -0
  69. ingestify/static/templates/wyscout/.gitignore +2 -0
  70. ingestify/static/templates/wyscout/README.md +0 -0
  71. ingestify/static/templates/wyscout/config.yaml.jinja2 +18 -0
  72. ingestify/static/templates/wyscout/database/README.md +1 -0
  73. ingestify/static/templates/wyscout/query.py +14 -0
  74. ingestify/utils.py +276 -0
  75. ingestify-0.1.0.dist-info/METADATA +265 -0
  76. ingestify-0.1.0.dist-info/RECORD +79 -0
  77. ingestify-0.1.0.dist-info/WHEEL +5 -0
  78. ingestify-0.1.0.dist-info/entry_points.txt +2 -0
  79. ingestify-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,18 @@
1
+ ingestify_version: {{ ingestify_version }}
2
+
3
+ main:
4
+ dataset_url: sqlite:///database/catalog.db
5
+ file_url: file://database/files/
6
+ default_bucket: main
7
+
8
+ sources:
9
+ wyscout:
10
+ type: ingestify.wyscout
11
+ configuration:
12
+ username: !ENV ${WYSCOUT_USERNAME}
13
+ password: !ENV ${WYSCOUT_PASSWORD}
14
+
15
+ extract_jobs:
16
+ - source: wyscout
17
+ selectors:
18
+ - season_id: 188105
@@ -0,0 +1 @@
1
+ # This will contain the database
@@ -0,0 +1,14 @@
1
+ from ingestify.main import get_datastore
2
+
3
+
4
+ def main():
5
+ store = get_datastore("config.yaml")
6
+ dataset_collection = store.get_dataset_collection()
7
+
8
+ for dataset in dataset_collection:
9
+ kloppy_dataset = store.load_with_kloppy(dataset)
10
+ print(f"Loaded dataset with {len(kloppy_dataset.records)} events")
11
+
12
+
13
+ if __name__ == "__main__":
14
+ main()
ingestify/utils.py ADDED
@@ -0,0 +1,276 @@
1
+ import abc
2
+ import inspect
3
+ import logging
4
+ import os
5
+ import time
6
+ import re
7
+ from multiprocessing import get_context, cpu_count, get_all_start_methods
8
+
9
+ from datetime import datetime, timezone
10
+ from string import Template
11
+ from typing import Dict, Generic, Type, TypeVar, Tuple, Optional, Any
12
+
13
+ import cloudpickle
14
+ from typing_extensions import Self
15
+
16
+
17
+ from itertools import islice
18
+
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def chunker(it, size):
24
+ iterator = iter(it)
25
+ while chunk := list(islice(iterator, size)):
26
+ yield chunk
27
+
28
+
29
+ def sanitize_exception_message(exception_message):
30
+ """
31
+ Sanitizes an exception message by removing any sensitive information such as passwords.
32
+ """
33
+ # Regular expression to identify potential sensitive information like URLs with passwords
34
+ sensitive_info_pattern = r":(\w+)@"
35
+
36
+ # Replace sensitive information with a placeholder
37
+ sanitized_message = re.sub(sensitive_info_pattern, ":******@", exception_message)
38
+
39
+ return sanitized_message
40
+
41
+
42
+ class ComponentRegistry:
43
+ def __init__(self):
44
+ self.__registered_components = {}
45
+
46
+ class _Registered(abc.ABCMeta):
47
+ def __new__(mcs, cls_name, bases, class_dict):
48
+ class_dict["name"] = cls_name
49
+ component_cls = super(_Registered, mcs).__new__(
50
+ mcs, cls_name, bases, class_dict
51
+ )
52
+ if not inspect.isabstract(component_cls):
53
+ self.register_component(cls_name, component_cls)
54
+ else:
55
+ if bases[0] != abc.ABC:
56
+ raise Exception(
57
+ f"Class '{cls_name}' seems to be an concrete class, but missing some abstract methods"
58
+ )
59
+ return component_cls
60
+
61
+ self.__metaclass = _Registered
62
+
63
+ @property
64
+ def metaclass(self):
65
+ return self.__metaclass
66
+
67
+ def register_component(self, cls_name, component_cls):
68
+ self.__registered_components[cls_name] = component_cls
69
+
70
+ def get_component(self, cls_name: str):
71
+ return self.__registered_components[cls_name]
72
+
73
+ def get_supporting_component(self, **kwargs) -> str:
74
+ for cls_name, class_ in self.__registered_components.items():
75
+ if not hasattr(class_, "supports"):
76
+ raise Exception(
77
+ f"Class '{cls_name}' does not implemented a 'supports' classmethod. "
78
+ f"This is required when using 'get_supporting_component'."
79
+ )
80
+
81
+ if class_.supports(**kwargs):
82
+ return cls_name
83
+
84
+ kwargs_str = sanitize_exception_message(str(kwargs))
85
+ raise Exception(f"No supporting class found for {kwargs_str}")
86
+
87
+
88
+ T = TypeVar("T")
89
+ R = TypeVar("R")
90
+
91
+
92
+ class ComponentFactory(Generic[T]):
93
+ def __init__(self, registry: ComponentRegistry):
94
+ self.registry = registry
95
+
96
+ @classmethod
97
+ def build_factory(
98
+ cls, component_cls: Type[R], registry: ComponentRegistry
99
+ ) -> "ComponentFactory[R]":
100
+ return cls[component_cls](registry)
101
+
102
+ def build(self, cls_name, **kwargs) -> T:
103
+ component_cls = self.registry.get_component(cls_name)
104
+ try:
105
+ return component_cls.from_dict(**kwargs)
106
+ except AttributeError:
107
+ pass
108
+ try:
109
+ return component_cls(**kwargs)
110
+ except TypeError as e:
111
+ raise e
112
+ # raise TypeError(f"Could not initialize {cls_name}")
113
+
114
+ def build_if_supports(self, **kwargs) -> T:
115
+ cls_name = self.registry.get_supporting_component(**kwargs)
116
+ return self.build(cls_name, **kwargs)
117
+
118
+
119
+ def key_from_dict(d: dict) -> str:
120
+ return "/".join([f"{k}={v}" for k, v in sorted(d.items()) if not k.startswith("_")])
121
+
122
+
123
+ def utcnow() -> datetime:
124
+ return datetime.fromtimestamp(time.time(), timezone.utc)
125
+
126
+
127
+ NOT_SET = object()
128
+
129
+
130
+ class AttributeBag:
131
+ def __init__(self, attributes=NOT_SET, **kwargs):
132
+ if attributes is not NOT_SET:
133
+ self.attributes = attributes
134
+ else:
135
+ self.attributes = kwargs
136
+ self.key = key_from_dict(self.attributes)
137
+
138
+ def __getattr__(self, item):
139
+ if item in self.__dict__:
140
+ return self.__dict__[item]
141
+ if "attributes" in self.__dict__ and item in self.attributes:
142
+ return self.attributes[item]
143
+ raise AttributeError(f"{item} not found")
144
+
145
+ def items(self):
146
+ return self.attributes.items()
147
+
148
+ def format_string(self, string: str):
149
+ return Template(string).substitute(**self.attributes)
150
+
151
+ def matches(self, attributes: Dict) -> bool:
152
+ for k, v in self.attributes.items():
153
+ if attributes.get(k) != v:
154
+ return False
155
+ return True
156
+
157
+ @property
158
+ def filtered_attributes(self):
159
+ return {k: v for k, v in self.attributes.items() if not k.startswith("_")}
160
+
161
+ def __eq__(self, other):
162
+ if isinstance(other, AttributeBag):
163
+ return self.key == other.key
164
+
165
+ def __hash__(self):
166
+ return hash(self.key)
167
+
168
+ def __repr__(self):
169
+ return f"{self.__class__.__name__}({', '.join([f'{k}={v}' for k, v in self.filtered_attributes.items()])})"
170
+
171
+ def __str__(self):
172
+ return "/".join([f"{k}={v}" for k, v in self.filtered_attributes.items()])
173
+
174
+ @classmethod
175
+ def create_from(cls, other: "AttributeBag", **kwargs):
176
+ _args = dict(**other.attributes)
177
+ _args.update(kwargs)
178
+
179
+ return cls(**_args)
180
+
181
+ def split(self, attribute_name: str) -> Tuple[Self, Optional[Any]]:
182
+ return self.attributes.get(attribute_name), self.__class__(
183
+ **{k: v for k, v in self.attributes.items() if k != attribute_name}
184
+ )
185
+
186
+
187
+ def cloud_unpack_and_call(args):
188
+ f_pickled, org_args = args
189
+
190
+ f = cloudpickle.loads(f_pickled)
191
+ return f(org_args)
192
+
193
+
194
+ def map_in_pool(func, iterable, processes=0):
195
+ # TODO: move to cmdline
196
+ if os.environ.get("INGESTIFY_RUN_EAGER") == "true":
197
+ return list(map(func, iterable))
198
+
199
+ if not processes:
200
+ processes = int(os.environ.get("INGESTIFY_CONCURRENCY", "0"))
201
+
202
+ if "fork" in get_all_start_methods():
203
+ ctx = get_context("fork")
204
+ else:
205
+ ctx = get_context("spawn")
206
+
207
+ wrapped_fn = cloudpickle.dumps(func)
208
+
209
+ with ctx.Pool(processes or cpu_count()) as pool:
210
+ return pool.map(
211
+ cloud_unpack_and_call, ((wrapped_fn, item) for item in iterable)
212
+ )
213
+
214
+
215
+ class SyncPool:
216
+ def map(self, func, iterable):
217
+ return [func(item) for item in iterable]
218
+
219
+ def join(self):
220
+ return True
221
+
222
+ def close(self):
223
+ return True
224
+
225
+
226
+ class DummyPool:
227
+ def map(self, func, iterable):
228
+ logger.info(f"DummyPool: not running {len(list(iterable))} tasks")
229
+ return None
230
+
231
+ def join(self):
232
+ return True
233
+
234
+ def close(self):
235
+ return True
236
+
237
+
238
+ class TaskExecutor:
239
+ def __init__(self, processes=0, dry_run: bool = False):
240
+ if dry_run:
241
+ pool = DummyPool()
242
+ elif os.environ.get("INGESTIFY_RUN_EAGER") == "true":
243
+ pool = SyncPool()
244
+ else:
245
+ if not processes:
246
+ processes = int(os.environ.get("INGESTIFY_CONCURRENCY", "0"))
247
+
248
+ if "fork" in get_all_start_methods():
249
+ ctx = get_context("fork")
250
+ else:
251
+ ctx = get_context("spawn")
252
+
253
+ pool = ctx.Pool(processes or cpu_count())
254
+ self.pool = pool
255
+
256
+ def __enter__(self):
257
+ return self
258
+
259
+ def __exit__(self, exc_type, exc_val, exc_tb):
260
+ self.join()
261
+
262
+ def run(self, func, iterable):
263
+ wrapped_fn = cloudpickle.dumps(func)
264
+ start_time = time.time()
265
+ res = self.pool.map(
266
+ cloud_unpack_and_call, ((wrapped_fn, item) for item in iterable)
267
+ )
268
+ if res:
269
+ took = time.time() - start_time
270
+ logger.info(
271
+ f"Finished {len(res)} tasks in {took:.1f} seconds. {(len(res)/took):.1f} tasks/sec"
272
+ )
273
+
274
+ def join(self):
275
+ self.pool.close()
276
+ self.pool.join()
@@ -0,0 +1,265 @@
1
+ Metadata-Version: 2.1
2
+ Name: ingestify
3
+ Version: 0.1.0
4
+ Summary: Standardizing soccer tracking- and event data
5
+ Author: Koen Vossen
6
+ Author-email: info@koenvossen.nl
7
+ License: AGPL
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: requests<3,>=2.0.0
10
+ Requires-Dist: SQLAlchemy
11
+ Requires-Dist: dataclass-factory
12
+ Requires-Dist: cloudpickle
13
+ Requires-Dist: click
14
+ Requires-Dist: jinja2
15
+ Requires-Dist: python-dotenv
16
+ Requires-Dist: pyaml-env
17
+ Requires-Dist: boto3
18
+ Requires-Dist: pytz
19
+ Provides-Extra: test
20
+ Requires-Dist: pytest<7,>=6.2.5; extra == "test"
21
+
22
+ # Ingestify
23
+
24
+ ## Data Management Platform
25
+
26
+ In general a data management platform contains:
27
+ 1. Ingestion of data (Extract from Source into Load into Data Lake)
28
+ 2. Transformation of data (Extract from Data Lake, Transform and Load into Data Warehouse)
29
+ 3. Utilization of data
30
+
31
+ <img src="https://www.getdbt.com/ui/img/blog/what-exactly-is-dbt/1-BogoeTTK1OXFU1hPfUyCFw.png" />
32
+ Source: https://www.getdbt.com/blog/what-exactly-is-dbt/
33
+
34
+ TODO: Improve drawings and explain more
35
+
36
+ ## Ingestify
37
+
38
+ Ingestify focus' on Ingestion of data.
39
+
40
+ ### How does Ingestify work?
41
+
42
+ 1. A `Source` is asked for all available `Datasets` using the `discover_datasets` method
43
+ 2. All available `Datasets` are compared with what's already fetched, and if it's changed (using a `FetchPolicy`)
44
+ 3. A `TaskQueue` is filled with `Tasks` to fetch all missing or stale `Datasets`
45
+
46
+ <img src="https://raw.githubusercontent.com/PySport/ingestify/refs/heads/main/docs/overview.svg" />
47
+
48
+ - [Source](blob/main/ingestify/domain/models/source.py) is the main entrance from Ingestify to external sources. A Source must always define:
49
+ - `discover_datasets` - Creates a list of all available datasets on the Source
50
+ - `fetch_dataset_files` - Fetches a single dataset for a Source
51
+ - [Dataset Store](blob/main/ingestify/application/dataset_store.py) manages the access to the Metadata storage and the file storage. It keeps track of versions, and knows how to load data.
52
+ - [Loader](blob/main/ingestify/application/loader.py) organizes the fetching process. It does this by executing the following steps:
53
+ 1. Ask `Source` for all available datasets for a selector
54
+ 2. Ask `Dataset Store` for all available datasets for a selector
55
+ 3. Determines missing `Datasets`
56
+ 4. Create tasks for data retrieval and puts in `TaskQueue`
57
+ 5. Use multiprocessing to execute all tasks
58
+
59
+ ## Get started
60
+
61
+ ### Install
62
+
63
+ Make sure you have installed the latest version:
64
+ ```bash
65
+ pip install git+https://github.com/PySport/ingestify.git
66
+
67
+ # OR
68
+
69
+ pip install git+ssh://git@github.com/PySport/ingestify.git
70
+ ```
71
+
72
+ ### Using a template
73
+
74
+ Ingestify provides some templates to get started quickly. When using `ingestify init` a new project will be created and example files are copied.
75
+ Currently, Ingestify offers a `statsbomb_github` and `wyscout` template.
76
+
77
+ #### Statsbomb Github
78
+
79
+ This uses https://github.com/statsbomb/open-data as source and syncs some competitions.
80
+
81
+ ```
82
+ bash# ingestify init --template statsbomb_github /tmp/ingestify-test
83
+
84
+ 2023-05-23 08:57:51,250 [INFO] ingestify.cmdline: Initialized project at `/tmp/ingestify-test` with template `statsbomb_github`
85
+ ```
86
+
87
+ #### Wyscout
88
+
89
+ This requires valid Wyscout credentials. The templates includes some security best practices like using a `.env` file for credentials which isn't part of version control.
90
+
91
+ ```
92
+ bash# ingestify init --template wyscout /tmp/ingestify-test
93
+
94
+ 2023-05-23 08:58:18,720 [INFO] ingestify.cmdline: Initialized project at `/tmp/ingestify-test` with template `wyscout`
95
+ ```
96
+
97
+ ### Running Ingestify
98
+
99
+ To actually run Ingestify you first change the current directory to the project directory.
100
+
101
+ Then run:
102
+ ```bash
103
+ bash# ingestify run
104
+
105
+ 2023-05-23 08:59:07,066 [INFO] ingestify.main: Initializing sources
106
+ 2023-05-23 08:59:07,068 [INFO] ingestify.main: Initializing IngestionEngine
107
+ 2023-05-23 08:59:07,086 [INFO] ingestify.main: Determining tasks...
108
+ 2023-05-23 08:59:07,364 [INFO] ingestify.application.loader: Discovered 33 datasets from StatsbombGithub using selector competition_id=11/season_id=42 => 33 tasks. 0 skipped.
109
+ 2023-05-23 08:59:07,625 [INFO] ingestify.application.loader: Discovered 35 datasets from StatsbombGithub using selector competition_id=11/season_id=90 => 35 tasks. 0 skipped.
110
+ 2023-05-23 08:59:07,625 [INFO] ingestify.application.loader: Scheduled 68 tasks. With 10 processes
111
+ 2023-05-23 08:59:07,654 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303516)
112
+ 2023-05-23 08:59:07,654 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303731)
113
+ 2023-05-23 08:59:07,655 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303430)
114
+ 2023-05-23 08:59:07,655 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303504)
115
+ 2023-05-23 08:59:07,655 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303421)
116
+ 2023-05-23 08:59:07,655 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303400)
117
+ 2023-05-23 08:59:07,656 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303664)
118
+ 2023-05-23 08:59:07,656 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303680)
119
+ 2023-05-23 08:59:07,657 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303487)
120
+ 2023-05-23 08:59:07,658 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303615)
121
+ 2023-05-23 08:59:08,419 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303532)
122
+ 2023-05-23 08:59:08,421 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303682)
123
+ 2023-05-23 08:59:08,444 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303451)
124
+ 2023-05-23 08:59:08,462 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303596)
125
+ 2023-05-23 08:59:08,518 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303634)
126
+ 2023-05-23 08:59:08,528 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303479)
127
+ 2023-05-23 08:59:08,541 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303696)
128
+ 2023-05-23 08:59:08,638 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303725)
129
+ 2023-05-23 08:59:08,684 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303600)
130
+ 2023-05-23 08:59:08,962 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303493)
131
+ 2023-05-23 08:59:09,270 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303548)
132
+ 2023-05-23 08:59:09,276 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303674)
133
+ 2023-05-23 08:59:09,292 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303700)
134
+ 2023-05-23 08:59:09,332 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303666)
135
+ 2023-05-23 08:59:09,411 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303377)
136
+ 2023-05-23 08:59:09,462 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303517)
137
+ 2023-05-23 08:59:09,491 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303473)
138
+ 2023-05-23 08:59:09,511 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773631)
139
+ 2023-05-23 08:59:09,726 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773497)
140
+ 2023-05-23 08:59:09,757 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773593)
141
+ 2023-05-23 08:59:09,957 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303652)
142
+ 2023-05-23 08:59:09,999 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303715)
143
+ 2023-05-23 08:59:10,075 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303470)
144
+ 2023-05-23 08:59:10,103 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303707)
145
+ 2023-05-23 08:59:10,188 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773457)
146
+ 2023-05-23 08:59:10,248 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303524)
147
+ 2023-05-23 08:59:10,282 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773665)
148
+ 2023-05-23 08:59:10,411 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=42/match_id=303610)
149
+ 2023-05-23 08:59:10,563 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773466)
150
+ 2023-05-23 08:59:10,711 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773585)
151
+ 2023-05-23 08:59:10,768 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773672)
152
+ 2023-05-23 08:59:10,778 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773565)
153
+ 2023-05-23 08:59:10,867 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773660)
154
+ 2023-05-23 08:59:10,954 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773656)
155
+ 2023-05-23 08:59:10,974 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773586)
156
+ 2023-05-23 08:59:11,026 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773387)
157
+ 2023-05-23 08:59:11,136 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773369)
158
+ 2023-05-23 08:59:11,438 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773552)
159
+ 2023-05-23 08:59:11,515 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773597)
160
+ 2023-05-23 08:59:11,586 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773571)
161
+ 2023-05-23 08:59:11,610 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773587)
162
+ 2023-05-23 08:59:11,690 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773386)
163
+ 2023-05-23 08:59:11,727 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773377)
164
+ 2023-05-23 08:59:11,757 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773372)
165
+ 2023-05-23 08:59:11,899 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3764661)
166
+ 2023-05-23 08:59:11,901 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773695)
167
+ 2023-05-23 08:59:12,006 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773661)
168
+ 2023-05-23 08:59:12,186 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773474)
169
+ 2023-05-23 08:59:12,283 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773523)
170
+ 2023-05-23 08:59:12,339 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773403)
171
+ 2023-05-23 08:59:12,426 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773428)
172
+ 2023-05-23 08:59:12,582 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773415)
173
+ 2023-05-23 08:59:12,583 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773689)
174
+ 2023-05-23 08:59:12,705 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773526)
175
+ 2023-05-23 08:59:13,510 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773477)
176
+ 2023-05-23 08:59:13,538 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3764440)
177
+ 2023-05-23 08:59:13,592 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773625)
178
+ 2023-05-23 08:59:15,017 [INFO] ingestify.application.loader: Running task CreateDatasetTask(StatsbombGithub -> competition_id=11/season_id=90/match_id=3773547)
179
+ 2023-05-23 08:59:15,917 [INFO] ingestify.cmdline: Done
180
+ ```
181
+
182
+ When we run it for the second time:
183
+ ```bash
184
+ bash# ingestify run
185
+
186
+ 2023-05-23 08:59:48,001 [INFO] ingestify.main: Initializing sources
187
+ 2023-05-23 08:59:48,002 [INFO] ingestify.main: Initializing IngestionEngine
188
+ 2023-05-23 08:59:48,006 [INFO] ingestify.main: Determining tasks...
189
+ 2023-05-23 08:59:48,067 [INFO] ingestify.application.loader: Discovered 33 datasets from StatsbombGithub using selector competition_id=11/season_id=42 => 0 tasks. 33 skipped.
190
+ 2023-05-23 08:59:48,118 [INFO] ingestify.application.loader: Discovered 35 datasets from StatsbombGithub using selector competition_id=11/season_id=90 => 0 tasks. 35 skipped.
191
+ 2023-05-23 08:59:48,118 [INFO] ingestify.application.loader: Nothing to do.
192
+ 2023-05-23 08:59:48,119 [INFO] ingestify.cmdline: Done
193
+ ```
194
+
195
+ ## Using the data
196
+
197
+ The project contains a `query.py` file with an example of how to use the data.
198
+
199
+ ```bash
200
+ bash# python query.py
201
+
202
+ Loaded dataset with 3702 events
203
+ Loaded dataset with 3994 events
204
+ Loaded dataset with 3831 events
205
+ Loaded dataset with 3647 events
206
+ Loaded dataset with 4062 events
207
+ Loaded dataset with 4051 events
208
+
209
+ .....
210
+
211
+ ```
212
+
213
+
214
+ How to go from raw data to parquet files:
215
+
216
+ ```python
217
+ from ingestify.main import get_datastore
218
+
219
+ store = get_datastore("config.yaml")
220
+
221
+ dataset_collection = store.get_dataset_collection(
222
+ provider="statsbomb", stage="raw"
223
+ )
224
+
225
+ # Store.map is using multiprocessing by default
226
+ store.map(
227
+ lambda dataset: (
228
+ store
229
+
230
+ # As it's related to https://github.com/PySport/kloppy the store can load files using kloppy
231
+ .load_with_kloppy(dataset)
232
+
233
+ # Convert it into a polars dataframe using all columns in the original data and some more additional ones
234
+ .to_df(
235
+ "*",
236
+ match_id=dataset.identifier.match_id,
237
+ competition_id=dataset.identifier.competition_id,
238
+ season_id=dataset.identifier.season_id,
239
+
240
+ engine="polars"
241
+ )
242
+
243
+ # Write to parquet format
244
+ .write_parquet(
245
+ f"/tmp/files/blaat/{dataset.identifier.match_id}.parquet"
246
+ )
247
+ ),
248
+ dataset_collection,
249
+ )
250
+
251
+ # TODO:
252
+ # - when a file is written in parquet format (on any other format) it should be added as such to the store.
253
+ ```
254
+
255
+
256
+ ## Future work
257
+
258
+ Some future work include:
259
+ - Workflow tools - Run custom workflows using with tools like [Airflow](https://airflow.apache.org/), [Dagster](https://docs.dagster.io/getting-started), [Prefect](https://www.prefect.io/), [DBT](https://www.getdbt.com/)
260
+ - Execution engines - Run tasks on other execution engines like [AWS Lambda](https://aws.amazon.com/lambda/), [Dask](https://www.dask.org/)
261
+ - Lineage - Keep track of lineage with tools like [SQLLineage](https://sqllineage.readthedocs.io/en/latest/index.html)
262
+ - Data quality - Monitor data quality with tools like [Great Expectations](https://docs.greatexpectations.io/docs/tutorials/quickstart/)
263
+ - Event Bus - Automatically publish events to external systems like [AWS Event Bridge](https://aws.amazon.com/eventbridge/), [Azure Event Grid](https://learn.microsoft.com/en-us/azure/event-grid/overview), [Google Cloud Pub/Sub](https://cloud.google.com/pubsub/docs/overview), [Kafka](https://kafka.apache.org/), [RabbitMQ](https://www.rabbitmq.com/)
264
+ - Query Engines - Integrate with query engines to run SQL queries directly on the store using tools like [DuckDB](https://duckdb.org/), [DataBend](https://databend.rs/), [DataFusion](https://arrow.apache.org/datafusion/), [Polars](https://www.pola.rs/), [Spark](https://spark.apache.org/)
265
+ - Streaming Data - Ingest streaming data
@@ -0,0 +1,79 @@
1
+ ingestify/__init__.py,sha256=DdhKleT3RggJUMj5Auq_ImGLmKm-3HHs5Yerx_VsH_w,301
2
+ ingestify/cmdline.py,sha256=gLy79Cq3OnEyoEcI6koWIEbCwvgFZ1E8n3UU1sKS8FM,7143
3
+ ingestify/exceptions.py,sha256=wMMuajl4AkQRfW60TLN7btJmQaH8-lUczXyW_2g9kOU,143
4
+ ingestify/main.py,sha256=YjrAOiGzwurtoDyIf981DSJHHA6IT5q09k3QNzTKCC8,6814
5
+ ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
6
+ ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
7
+ ingestify/utils.py,sha256=eEHwulqNEb2YTRDrCMVxr6mWZYI6KOcNCAIWFTi74u0,8029
8
+ ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ ingestify/application/dataset_store.py,sha256=NAW-XSvp118Lr2hXZd3qtuQr6VkPdWCLksIwd5MSs30,11489
10
+ ingestify/application/ingestion_engine.py,sha256=GYIhb8a9ePkEcNOBPdfu-YawiD7eRZMRlxCA-6g9DRA,2249
11
+ ingestify/application/loader.py,sha256=d7iXmdHN_yhDkEc2MMZ_6BLMEdRz9ChpBMy4yCWvxQo,13317
12
+ ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
13
+ ingestify/domain/__init__.py,sha256=M7_fVTJjQUx53P4UQUPhowRKPKsIIjx4JYZL1yjHKsM,46
14
+ ingestify/domain/models/__init__.py,sha256=xHVQZP57ZQYUKwAtbccnDKX89_yTOvBKAtn4XDVbEbY,930
15
+ ingestify/domain/models/data_spec_version_collection.py,sha256=qjEM6-gt-Uf5orQlv64P6NJCEdWiUPX2oTZv8cC-KVY,1203
16
+ ingestify/domain/models/extract_job.py,sha256=yXrlF2Vt5hxB1Vo9CicpgyW5rjvJaEPfSiMzaAqhqB0,624
17
+ ingestify/domain/models/fetch_policy.py,sha256=d7K1TzliNJXxqaqzqEOQWLhvgIvmmqhUQEliXvSUcTs,1405
18
+ ingestify/domain/models/sink.py,sha256=AieqDQ76Vj7WGxCrl3-F93AKe-VBfoPHtMNH28GTQM4,384
19
+ ingestify/domain/models/source.py,sha256=sB3aqr2LfjIbtw7ODJpHnPj3RUeo7gYmTU7MXvfaYg4,973
20
+ ingestify/domain/models/dataset/__init__.py,sha256=kSn3XZo0o-D0WzMb2VDxhOXw9Rr9jvS-8fkHdOnrccU,748
21
+ ingestify/domain/models/dataset/collection.py,sha256=E2utQ6oyaFFrfQFMiwP9J_I7Wm21z0sRvE4Zc3QEs20,1310
22
+ ingestify/domain/models/dataset/collection_metadata.py,sha256=gI5cb9M0QRsheIr2jA71wOyWfI5lGx5ES2Qw7rbDIoA,371
23
+ ingestify/domain/models/dataset/dataset.py,sha256=m0iVJPXd1KOAHbDg7fmY_7MCdrKQaILUekIWUfo5pXI,2893
24
+ ingestify/domain/models/dataset/dataset_repository.py,sha256=eiloP5msmDau4WRHee8gA7pLoH_ca2JXAhPx9UecPIA,1185
25
+ ingestify/domain/models/dataset/events.py,sha256=x4l_pdzBHbemE_722EyCYXzWy9t8IcTx5j-wNFxWs6o,708
26
+ ingestify/domain/models/dataset/file.py,sha256=O-yJom9dr13PaHfmc_4crtSa9B1Q9iruHsnf-m01McU,3943
27
+ ingestify/domain/models/dataset/file_collection.py,sha256=V5wh2aSc61UA4HWcHi9PvyQUIUvssDRkaPVe2YR6XwU,1140
28
+ ingestify/domain/models/dataset/file_repository.py,sha256=lxf3Dh8e-_67dRspMZHT1DZ79IWW_vlvb3z8lKjypj4,1514
29
+ ingestify/domain/models/dataset/identifier.py,sha256=EJYsxt0OS_43Y989DZQq8U9NjwmtvnHGYGMe6-hOBlI,575
30
+ ingestify/domain/models/dataset/revision.py,sha256=fiHnd_mad0iYmNCGswKImUHpauhIf2gW_ukztDFVP48,781
31
+ ingestify/domain/models/dataset/selector.py,sha256=kEGpU8pIyjZ0zwE9n2uo_NY5xrNanWiTTgapyMAUEsw,1039
32
+ ingestify/domain/models/event/__init__.py,sha256=OdPTpE9bj5QqdGmrYqRTLPX1f-LR9GWJYlGMPPEsuL8,138
33
+ ingestify/domain/models/event/_old_event.py,sha256=RktgCAj9SMdtqkAc_bOwoghEb2Z6m4r5_xWXin9wqx4,472
34
+ ingestify/domain/models/event/dispatcher.py,sha256=5WnyUJ7Qzr612btAtl1dMG9JBXDPcsBLyLmW6H7Q1zk,154
35
+ ingestify/domain/models/event/domain_event.py,sha256=a5nNNwDWSAqou8aSBGIEA6aQOHTOxYyMEUXB91fYUIM,187
36
+ ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmceWLstOxiP3-2qU,576
37
+ ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
38
+ ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
39
+ ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
40
+ ingestify/domain/models/resources/dataset_resource.py,sha256=g0tu9QZQEdAGR-dRXQPL3ddcbEEGI__pvkDJGoscUTE,3027
41
+ ingestify/domain/models/task/__init__.py,sha256=BdlyIPvE07Xax_IzLgO9DUw0wsz9OZutxnxdDNyRlys,79
42
+ ingestify/domain/models/task/set.py,sha256=04txDYgS5rotXofD9TqChKdW0VZIYshrkfPIpXtlhW4,430
43
+ ingestify/domain/models/task/task.py,sha256=R6tEZub-N_Wjl4VjwlPySdFb3L9D7nH4St2CcDzFoKA,107
44
+ ingestify/domain/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ ingestify/domain/services/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ ingestify/domain/services/transformers/kloppy_to_pandas.py,sha256=NcN6nTBGVn9gz-_hWZJTMcduS1Gg7EM4X95Cqxi1QIM,809
47
+ ingestify/infra/__init__.py,sha256=V0hpLzPVTcOHRVh0gguF6FT30YIgEOUd5v87xUHkfZ4,88
48
+ ingestify/infra/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ ingestify/infra/fetch/http.py,sha256=gm7x0dACp3sTY1FMlbv8zRoQLZuZgtXmBg3HbhQ0syI,3086
50
+ ingestify/infra/serialization/__init__.py,sha256=LwfmRoO4qykZkJZXxVPSKpwoVIkg9qzXa7Egut9JjL4,1772
51
+ ingestify/infra/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
+ ingestify/infra/sink/postgresql.py,sha256=SxuM3LntfYcpCriUpqJhMvgAf0s9cohXf6WkxSEDYDY,1816
53
+ ingestify/infra/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
+ ingestify/infra/source/statsbomb_github.py,sha256=CuHZoJn6fU8ZKQl4f1-gyaVYsmxL6R33n0cbOx1jQmI,2895
55
+ ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nOUGxE,5626
56
+ ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
57
+ ingestify/infra/store/dataset/__init__.py,sha256=8oVJFiA-IKccrEpiYxAmSc65dfpNut7PYx8PUhylmdU,113
58
+ ingestify/infra/store/dataset/local_dataset_repository.py,sha256=UMgSe1M9u_629V4WyuTJ-QegZJiDczzMo7vkNbNleqA,2064
59
+ ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
60
+ ingestify/infra/store/dataset/sqlalchemy/mapping.py,sha256=Q7Od3zBnoZgxE5aThdZE93waWeKVut9dstrCnEYb9nc,3981
61
+ ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=ynoIVMVD0_w9aa2hFKkcLxRKzJDoET_SNfGHXPIoN40,7067
62
+ ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
63
+ ingestify/infra/store/file/local_file_repository.py,sha256=0oIzjjKO5U_7gPXhsBJFUqQBarQTFQS499ZK7HNxMxo,893
64
+ ingestify/infra/store/file/s3_file_repository.py,sha256=txDviBrY9EHn3soqLFvTrjSPkyh548RxUgx4T83j0QY,1331
65
+ ingestify/static/templates/statsbomb_github/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
+ ingestify/static/templates/statsbomb_github/config.yaml.jinja2,sha256=_gAuAipfBL3ddLacyS1IBP5JluvPS2vmrb8GGaFtcUM,386
67
+ ingestify/static/templates/statsbomb_github/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
68
+ ingestify/static/templates/statsbomb_github/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
69
+ ingestify/static/templates/wyscout/.env,sha256=o2kfuDC_seZNIqDscPf2Ww5TGiJmLh_DMOUNykGvs8Q,141
70
+ ingestify/static/templates/wyscout/.gitignore,sha256=db0A2IjIeZf5fLLwXKD-bLmC4pETofxm848bljymnNs,13
71
+ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
+ ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
73
+ ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
74
+ ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
75
+ ingestify-0.1.0.dist-info/METADATA,sha256=ryin_4RwMcyvqa4l6nESqysjlutcffHHWwoBvcwU784,18822
76
+ ingestify-0.1.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
77
+ ingestify-0.1.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
78
+ ingestify-0.1.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
79
+ ingestify-0.1.0.dist-info/RECORD,,