ingestify 0.4.2__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestify/__init__.py +1 -1
- ingestify/domain/models/ingestion/ingestion_job.py +8 -6
- ingestify/domain/models/ingestion/ingestion_job_summary.py +13 -6
- ingestify/infra/store/dataset/sqlalchemy/repository.py +55 -21
- ingestify/infra/store/dataset/sqlalchemy/tables.py +2 -1
- ingestify/infra/store/file/s3_file_repository.py +6 -1
- ingestify/utils.py +38 -28
- {ingestify-0.4.2.dist-info → ingestify-0.5.1.dist-info}/METADATA +1 -1
- {ingestify-0.4.2.dist-info → ingestify-0.5.1.dist-info}/RECORD +12 -12
- {ingestify-0.4.2.dist-info → ingestify-0.5.1.dist-info}/WHEEL +0 -0
- {ingestify-0.4.2.dist-info → ingestify-0.5.1.dist-info}/entry_points.txt +0 -0
- {ingestify-0.4.2.dist-info → ingestify-0.5.1.dist-info}/top_level.txt +0 -0
ingestify/__init__.py
CHANGED
|
@@ -24,7 +24,9 @@ from ingestify.utils import TaskExecutor, chunker
|
|
|
24
24
|
logger = logging.getLogger(__name__)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# Decrease batch size from 1_000 to 500. The sqlalchemy repository uses
|
|
28
|
+
# a compound select, which breaks at more than 500 select statements
|
|
29
|
+
DEFAULT_CHUNK_SIZE = 500
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
def run_task(task):
|
|
@@ -287,7 +289,7 @@ class IngestionJob:
|
|
|
287
289
|
selector=dataset_identifiers,
|
|
288
290
|
)
|
|
289
291
|
|
|
290
|
-
|
|
292
|
+
skipped_tasks = 0
|
|
291
293
|
|
|
292
294
|
task_set = TaskSet()
|
|
293
295
|
for dataset_resource in batch:
|
|
@@ -307,7 +309,7 @@ class IngestionJob:
|
|
|
307
309
|
)
|
|
308
310
|
)
|
|
309
311
|
else:
|
|
310
|
-
|
|
312
|
+
skipped_tasks += 1
|
|
311
313
|
else:
|
|
312
314
|
if self.ingestion_plan.fetch_policy.should_fetch(dataset_resource):
|
|
313
315
|
task_set.add(
|
|
@@ -317,12 +319,12 @@ class IngestionJob:
|
|
|
317
319
|
)
|
|
318
320
|
)
|
|
319
321
|
else:
|
|
320
|
-
|
|
322
|
+
skipped_tasks += 1
|
|
321
323
|
|
|
322
324
|
if task_set:
|
|
323
325
|
logger.info(
|
|
324
326
|
f"Discovered {len(dataset_identifiers)} datasets from {self.ingestion_plan.source.__class__.__name__} "
|
|
325
|
-
f"using selector {self.selector} => {len(task_set)} tasks. {
|
|
327
|
+
f"using selector {self.selector} => {len(task_set)} tasks. {skipped_tasks} skipped."
|
|
326
328
|
)
|
|
327
329
|
logger.info(f"Running {len(task_set)} tasks")
|
|
328
330
|
ingestion_job_summary.add_task_summaries(
|
|
@@ -334,7 +336,7 @@ class IngestionJob:
|
|
|
334
336
|
f"using selector {self.selector} => nothing to do"
|
|
335
337
|
)
|
|
336
338
|
|
|
337
|
-
ingestion_job_summary.
|
|
339
|
+
ingestion_job_summary.increase_skipped_tasks(skipped_tasks)
|
|
338
340
|
|
|
339
341
|
if ingestion_job_summary.task_count() >= MAX_TASKS_PER_CHUNK:
|
|
340
342
|
finish_task_timer()
|
|
@@ -41,7 +41,8 @@ class IngestionJobSummary(BaseModel, HasTiming):
|
|
|
41
41
|
state: IngestionJobState = IngestionJobState.RUNNING
|
|
42
42
|
task_summaries: List[TaskSummary] = Field(default_factory=list)
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
total_tasks: int = 0
|
|
45
|
+
skipped_tasks: int = 0
|
|
45
46
|
failed_tasks: int = 0
|
|
46
47
|
successful_tasks: int = 0
|
|
47
48
|
ignored_successful_tasks: int = 0
|
|
@@ -62,11 +63,11 @@ class IngestionJobSummary(BaseModel, HasTiming):
|
|
|
62
63
|
def add_task_summaries(self, task_summaries: List[TaskSummary]):
|
|
63
64
|
self.task_summaries.extend(task_summaries)
|
|
64
65
|
|
|
65
|
-
def
|
|
66
|
-
self.
|
|
66
|
+
def increase_skipped_tasks(self, skipped_tasks: int):
|
|
67
|
+
self.skipped_tasks += skipped_tasks
|
|
67
68
|
|
|
68
69
|
def task_count(self):
|
|
69
|
-
return len(self.task_summaries) + self.
|
|
70
|
+
return len(self.task_summaries) + self.skipped_tasks
|
|
70
71
|
|
|
71
72
|
def _set_ended(self):
|
|
72
73
|
self.failed_tasks = len(
|
|
@@ -82,6 +83,12 @@ class IngestionJobSummary(BaseModel, HasTiming):
|
|
|
82
83
|
if task.state == TaskState.FINISHED_IGNORED
|
|
83
84
|
]
|
|
84
85
|
)
|
|
86
|
+
self.total_tasks = (
|
|
87
|
+
self.failed_tasks
|
|
88
|
+
+ self.successful_tasks
|
|
89
|
+
+ self.ignored_successful_tasks
|
|
90
|
+
+ self.skipped_tasks
|
|
91
|
+
)
|
|
85
92
|
self.ended_at = utcnow()
|
|
86
93
|
|
|
87
94
|
# Only keep failed tasks. Rest isn't interesting
|
|
@@ -115,13 +122,13 @@ class IngestionJobSummary(BaseModel, HasTiming):
|
|
|
115
122
|
for timing in self.timings:
|
|
116
123
|
print(f" - {timing.name}: {format_duration(timing.duration)}")
|
|
117
124
|
print(
|
|
118
|
-
f" - Tasks: {
|
|
125
|
+
f" - Tasks: {self.total_tasks} - {(self.total_tasks / self.duration.total_seconds()):.1f} tasks/sec"
|
|
119
126
|
)
|
|
120
127
|
|
|
121
128
|
print(f" - Failed tasks: {self.failed_tasks}")
|
|
122
129
|
print(f" - Successful tasks: {self.successful_tasks}")
|
|
123
130
|
print(f" - Successful ignored tasks: {self.ignored_successful_tasks}")
|
|
124
|
-
print(f" - Skipped datasets: {self.
|
|
131
|
+
print(f" - Skipped datasets: {self.skipped_tasks}")
|
|
125
132
|
print("********************************")
|
|
126
133
|
|
|
127
134
|
def __enter__(self):
|
|
@@ -19,7 +19,7 @@ from sqlalchemy import (
|
|
|
19
19
|
)
|
|
20
20
|
from sqlalchemy.engine import make_url
|
|
21
21
|
from sqlalchemy.exc import NoSuchModuleError
|
|
22
|
-
from sqlalchemy.orm import Session, Query
|
|
22
|
+
from sqlalchemy.orm import Session, Query, sessionmaker, scoped_session
|
|
23
23
|
|
|
24
24
|
from ingestify.domain import File, Revision
|
|
25
25
|
from ingestify.domain.models import (
|
|
@@ -34,6 +34,7 @@ from ingestify.domain.models.dataset.collection_metadata import (
|
|
|
34
34
|
from ingestify.domain.models.ingestion.ingestion_job_summary import IngestionJobSummary
|
|
35
35
|
from ingestify.domain.models.task.task_summary import TaskSummary
|
|
36
36
|
from ingestify.exceptions import IngestifyError
|
|
37
|
+
from ingestify.utils import get_concurrency
|
|
37
38
|
|
|
38
39
|
from .tables import (
|
|
39
40
|
metadata,
|
|
@@ -96,17 +97,15 @@ class SqlAlchemySessionProvider:
|
|
|
96
97
|
self.url,
|
|
97
98
|
# Use the default isolation level, don't need SERIALIZABLE
|
|
98
99
|
# isolation_level="SERIALIZABLE",
|
|
100
|
+
pool_size=get_concurrency(), # Maximum number of connections in the pool
|
|
101
|
+
max_overflow=5,
|
|
102
|
+
pool_recycle=1800,
|
|
103
|
+
pool_pre_ping=True,
|
|
99
104
|
)
|
|
100
105
|
self.dialect = self.engine.dialect
|
|
101
|
-
self.session = Session(bind=self.engine)
|
|
102
106
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
self.url = url
|
|
107
|
-
self._init_engine()
|
|
108
|
-
|
|
109
|
-
metadata.create_all(self.engine)
|
|
107
|
+
session_factory = sessionmaker(bind=self.engine)
|
|
108
|
+
self.session = scoped_session(session_factory)
|
|
110
109
|
|
|
111
110
|
def __getstate__(self):
|
|
112
111
|
return {"url": self.url}
|
|
@@ -115,6 +114,14 @@ class SqlAlchemySessionProvider:
|
|
|
115
114
|
self.url = state["url"]
|
|
116
115
|
self._init_engine()
|
|
117
116
|
|
|
117
|
+
def __init__(self, url: str):
|
|
118
|
+
url = self.fix_url(url)
|
|
119
|
+
|
|
120
|
+
self.url = url
|
|
121
|
+
self._init_engine()
|
|
122
|
+
|
|
123
|
+
metadata.create_all(self.engine)
|
|
124
|
+
|
|
118
125
|
def __del__(self):
|
|
119
126
|
self.close()
|
|
120
127
|
|
|
@@ -123,16 +130,11 @@ class SqlAlchemySessionProvider:
|
|
|
123
130
|
self._init_engine()
|
|
124
131
|
|
|
125
132
|
def close(self):
|
|
126
|
-
if hasattr(self, "
|
|
127
|
-
self.session.close()
|
|
133
|
+
if hasattr(self, "engine"):
|
|
128
134
|
self.engine.dispose()
|
|
129
135
|
|
|
130
136
|
def get(self):
|
|
131
|
-
return self.session
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def in_(column: Column, values):
|
|
135
|
-
return or_(*[column == value for value in values])
|
|
137
|
+
return self.session()
|
|
136
138
|
|
|
137
139
|
|
|
138
140
|
class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
@@ -188,7 +190,19 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
188
190
|
# return an empty DatasetCollection
|
|
189
191
|
return DatasetCollection()
|
|
190
192
|
|
|
191
|
-
|
|
193
|
+
dataset_ids_cte = union_all(
|
|
194
|
+
*[
|
|
195
|
+
select(literal(dataset_id).label("dataset_id"))
|
|
196
|
+
for dataset_id in set(dataset_id)
|
|
197
|
+
]
|
|
198
|
+
).cte("dataset_ids")
|
|
199
|
+
|
|
200
|
+
query = query.select_from(
|
|
201
|
+
dataset_table.join(
|
|
202
|
+
dataset_ids_cte,
|
|
203
|
+
dataset_ids_cte.c.dataset_id == dataset_table.c.dataset_id,
|
|
204
|
+
)
|
|
205
|
+
)
|
|
192
206
|
else:
|
|
193
207
|
query = query.filter(dataset_table.c.dataset_id == dataset_id)
|
|
194
208
|
|
|
@@ -259,15 +273,30 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
259
273
|
if not dataset_ids:
|
|
260
274
|
return []
|
|
261
275
|
|
|
276
|
+
dataset_ids_cte = union_all(
|
|
277
|
+
*[
|
|
278
|
+
select(literal(dataset_id).label("dataset_id"))
|
|
279
|
+
for dataset_id in set(dataset_ids)
|
|
280
|
+
]
|
|
281
|
+
).cte("dataset_ids")
|
|
282
|
+
|
|
262
283
|
dataset_rows = list(
|
|
263
|
-
self.session.query(dataset_table).
|
|
264
|
-
|
|
284
|
+
self.session.query(dataset_table).select_from(
|
|
285
|
+
dataset_table.join(
|
|
286
|
+
dataset_ids_cte,
|
|
287
|
+
dataset_ids_cte.c.dataset_id == dataset_table.c.dataset_id,
|
|
288
|
+
)
|
|
265
289
|
)
|
|
266
290
|
)
|
|
267
291
|
revisions_per_dataset = {}
|
|
268
292
|
rows = (
|
|
269
293
|
self.session.query(revision_table)
|
|
270
|
-
.
|
|
294
|
+
.select_from(
|
|
295
|
+
revision_table.join(
|
|
296
|
+
dataset_ids_cte,
|
|
297
|
+
dataset_ids_cte.c.dataset_id == revision_table.c.dataset_id,
|
|
298
|
+
)
|
|
299
|
+
)
|
|
271
300
|
.order_by(revision_table.c.dataset_id)
|
|
272
301
|
)
|
|
273
302
|
|
|
@@ -279,7 +308,12 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
279
308
|
files_per_revision = {}
|
|
280
309
|
rows = (
|
|
281
310
|
self.session.query(file_table)
|
|
282
|
-
.
|
|
311
|
+
.select_from(
|
|
312
|
+
file_table.join(
|
|
313
|
+
dataset_ids_cte,
|
|
314
|
+
dataset_ids_cte.c.dataset_id == file_table.c.dataset_id,
|
|
315
|
+
)
|
|
316
|
+
)
|
|
283
317
|
.order_by(file_table.c.dataset_id, file_table.c.revision_id)
|
|
284
318
|
)
|
|
285
319
|
|
|
@@ -247,9 +247,10 @@ ingestion_job_summary_table = Table(
|
|
|
247
247
|
Column("ended_at", TZDateTime(6)),
|
|
248
248
|
# Some task counters
|
|
249
249
|
Column("state", IngestionJobStateString),
|
|
250
|
+
Column("total_tasks", Integer),
|
|
250
251
|
Column("successful_tasks", Integer),
|
|
251
252
|
Column("ignored_successful_tasks", Integer),
|
|
252
|
-
Column("
|
|
253
|
+
Column("skipped_tasks", Integer),
|
|
253
254
|
Column("failed_tasks", Integer),
|
|
254
255
|
Column(
|
|
255
256
|
"timings",
|
|
@@ -2,9 +2,11 @@ from pathlib import Path
|
|
|
2
2
|
from typing import BinaryIO
|
|
3
3
|
|
|
4
4
|
import boto3 as boto3
|
|
5
|
+
import botocore.config
|
|
5
6
|
|
|
6
7
|
from ingestify.domain import Dataset
|
|
7
8
|
from ingestify.domain.models import FileRepository
|
|
9
|
+
from ingestify.utils import get_concurrency
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class S3FileRepository(FileRepository):
|
|
@@ -13,7 +15,10 @@ class S3FileRepository(FileRepository):
|
|
|
13
15
|
@property
|
|
14
16
|
def s3(self):
|
|
15
17
|
if not self._s3:
|
|
16
|
-
|
|
18
|
+
client_config = botocore.config.Config(
|
|
19
|
+
max_pool_connections=get_concurrency(),
|
|
20
|
+
)
|
|
21
|
+
self._s3 = boto3.resource("s3", config=client_config)
|
|
17
22
|
return self._s3
|
|
18
23
|
|
|
19
24
|
def __getstate__(self):
|
ingestify/utils.py
CHANGED
|
@@ -3,6 +3,7 @@ import os
|
|
|
3
3
|
import time
|
|
4
4
|
import re
|
|
5
5
|
import traceback
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
7
|
from contextlib import contextmanager
|
|
7
8
|
from multiprocessing import get_context, cpu_count, get_all_start_methods
|
|
8
9
|
|
|
@@ -137,59 +138,65 @@ def map_in_pool(func, iterable, processes=0):
|
|
|
137
138
|
)
|
|
138
139
|
|
|
139
140
|
|
|
140
|
-
class
|
|
141
|
+
class SyncExecutor:
|
|
141
142
|
def map(self, func, iterable):
|
|
142
143
|
return [func(item) for item in iterable]
|
|
143
144
|
|
|
144
|
-
def
|
|
145
|
-
return
|
|
145
|
+
def __enter__(self):
|
|
146
|
+
return self
|
|
146
147
|
|
|
147
|
-
def
|
|
148
|
-
|
|
148
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
149
|
+
pass
|
|
149
150
|
|
|
150
151
|
|
|
151
|
-
class
|
|
152
|
+
class DummyExecutor:
|
|
152
153
|
def map(self, func, iterable):
|
|
153
154
|
logger.info(f"DummyPool: not running {len(list(iterable))} tasks")
|
|
154
155
|
return None
|
|
155
156
|
|
|
156
|
-
def
|
|
157
|
-
return
|
|
157
|
+
def __enter__(self):
|
|
158
|
+
return self
|
|
158
159
|
|
|
159
|
-
def
|
|
160
|
-
|
|
160
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
161
|
+
pass
|
|
161
162
|
|
|
162
163
|
|
|
163
164
|
class TaskExecutor:
|
|
164
165
|
def __init__(self, processes=0, dry_run: bool = False):
|
|
165
166
|
if dry_run:
|
|
166
|
-
|
|
167
|
+
executor = DummyExecutor()
|
|
167
168
|
elif os.environ.get("INGESTIFY_RUN_EAGER") == "true":
|
|
168
|
-
|
|
169
|
+
executor = SyncExecutor()
|
|
169
170
|
else:
|
|
170
171
|
if not processes:
|
|
171
|
-
processes =
|
|
172
|
+
processes = get_concurrency()
|
|
173
|
+
|
|
174
|
+
# if "fork" in get_all_start_methods():
|
|
175
|
+
# ctx = get_context("fork")
|
|
176
|
+
# else:
|
|
177
|
+
# ctx = get_context("spawn")
|
|
172
178
|
|
|
173
|
-
|
|
174
|
-
ctx = get_context("fork")
|
|
175
|
-
else:
|
|
176
|
-
ctx = get_context("spawn")
|
|
179
|
+
# pool = ctx.Pool(processes or cpu_count())
|
|
177
180
|
|
|
178
|
-
|
|
179
|
-
|
|
181
|
+
executor = ThreadPoolExecutor(max_workers=processes)
|
|
182
|
+
|
|
183
|
+
self.executor = executor
|
|
180
184
|
|
|
181
185
|
def __enter__(self):
|
|
186
|
+
self.executor.__enter__()
|
|
182
187
|
return self
|
|
183
188
|
|
|
184
189
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
185
|
-
self.
|
|
190
|
+
self.executor.__exit__(exc_type, exc_val, exc_tb)
|
|
186
191
|
|
|
187
192
|
def run(self, func, iterable):
|
|
188
|
-
|
|
193
|
+
# If multiprocessing
|
|
194
|
+
# wrapped_fn = cloudpickle.dumps(func)
|
|
195
|
+
# res = self.executor.map(
|
|
196
|
+
# cloud_unpack_and_call, ((wrapped_fn, item) for item in iterable)
|
|
197
|
+
# )
|
|
189
198
|
start_time = time.time()
|
|
190
|
-
res = self.
|
|
191
|
-
cloud_unpack_and_call, ((wrapped_fn, item) for item in iterable)
|
|
192
|
-
)
|
|
199
|
+
res = list(self.executor.map(func, iterable))
|
|
193
200
|
if res:
|
|
194
201
|
took = time.time() - start_time
|
|
195
202
|
logger.info(
|
|
@@ -197,10 +204,6 @@ class TaskExecutor:
|
|
|
197
204
|
)
|
|
198
205
|
return res
|
|
199
206
|
|
|
200
|
-
def join(self):
|
|
201
|
-
self.pool.close()
|
|
202
|
-
self.pool.join()
|
|
203
|
-
|
|
204
207
|
|
|
205
208
|
def try_number(s: str):
|
|
206
209
|
try:
|
|
@@ -253,3 +256,10 @@ class HasTiming:
|
|
|
253
256
|
self.timings.append(Timing(name=name, started_at=start, ended_at=utcnow()))
|
|
254
257
|
|
|
255
258
|
return finish
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def get_concurrency():
|
|
262
|
+
concurrency = int(os.environ.get("INGESTIFY_CONCURRENCY", "0"))
|
|
263
|
+
if not concurrency:
|
|
264
|
+
concurrency = min(32, (os.cpu_count() or 1) + 4)
|
|
265
|
+
return concurrency
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
ingestify/__init__.py,sha256=
|
|
1
|
+
ingestify/__init__.py,sha256=Un08YQgoC1u_2dbvOdtOD59OKsUL78ekru-86GA3zpA,301
|
|
2
2
|
ingestify/cmdline.py,sha256=JcveX6e4i6mJtIllhTuruwbqxyoKIITIWE8kB6byvJU,7721
|
|
3
3
|
ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
|
|
4
4
|
ingestify/main.py,sha256=yYKA-4WAk04RdBCGmatsCKiPFQzpyufoG4VzHiWkVtU,8979
|
|
5
5
|
ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
|
|
6
6
|
ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
|
|
7
|
-
ingestify/utils.py,sha256=
|
|
7
|
+
ingestify/utils.py,sha256=EMdG3ZP3bX9DHxHvBLdkYLC3vcEVym7dmpIXQTikI3I,7281
|
|
8
8
|
ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
ingestify/application/dataset_store.py,sha256=JkAb1W0HaUgOwbohKntM4ttyrFXQ7df1uZSu2rbZllY,11680
|
|
10
10
|
ingestify/application/ingestion_engine.py,sha256=4SAmPZDm3e2QA5jZvMrb6xz1eDDshKoSZDWH3TCe4Bo,2372
|
|
@@ -39,8 +39,8 @@ ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmce
|
|
|
39
39
|
ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
|
|
40
40
|
ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
|
|
41
41
|
ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
ingestify/domain/models/ingestion/ingestion_job.py,sha256=
|
|
43
|
-
ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=
|
|
42
|
+
ingestify/domain/models/ingestion/ingestion_job.py,sha256=2Tibe1fKZU84LO_qHniO33ChTcJy3K0YLkVro8CjJPs,13573
|
|
43
|
+
ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=ZEoL8kZfDM_NUYXD4_7Xpmtz6WduN50UcJBgNOxOxrE,4669
|
|
44
44
|
ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
|
|
45
45
|
ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
|
|
46
46
|
ingestify/domain/models/resources/dataset_resource.py,sha256=NRnN029ct3P_Eg2d9Unb1t7A12Ksv_emBGhoe9DpPwM,3118
|
|
@@ -64,12 +64,12 @@ ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nO
|
|
|
64
64
|
ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
|
|
65
65
|
ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
66
|
ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
|
|
67
|
-
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=
|
|
68
|
-
ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=
|
|
67
|
+
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=hn4x5tglqxO4EdyiAt_4wnDXmmWU87twHtBmVBRHVSY,18309
|
|
68
|
+
ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=kALM32jbdeZ4Wn9gON-w2WSb5tH1lIWaBFgn5i29qTk,10635
|
|
69
69
|
ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
|
|
70
70
|
ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
|
|
71
71
|
ingestify/infra/store/file/local_file_repository.py,sha256=1hhLqds5LlppJq2QBB0oN0Q98j6aXreCtYQYz3Q1P8g,819
|
|
72
|
-
ingestify/infra/store/file/s3_file_repository.py,sha256=
|
|
72
|
+
ingestify/infra/store/file/s3_file_repository.py,sha256=tz_EZ_gun7W2qJMlI3j_R03iKBZlJSDcG7AUJ1JkdpE,1501
|
|
73
73
|
ingestify/static/templates/statsbomb_github/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
74
|
ingestify/static/templates/statsbomb_github/config.yaml.jinja2,sha256=_gAuAipfBL3ddLacyS1IBP5JluvPS2vmrb8GGaFtcUM,386
|
|
75
75
|
ingestify/static/templates/statsbomb_github/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
|
|
@@ -80,8 +80,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
80
80
|
ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
|
|
81
81
|
ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
|
|
82
82
|
ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
|
|
83
|
-
ingestify-0.
|
|
84
|
-
ingestify-0.
|
|
85
|
-
ingestify-0.
|
|
86
|
-
ingestify-0.
|
|
87
|
-
ingestify-0.
|
|
83
|
+
ingestify-0.5.1.dist-info/METADATA,sha256=TOqbUz13KxM8v8kR-owtafHvYEXwj5ruaDP_bigoIbI,18854
|
|
84
|
+
ingestify-0.5.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
85
|
+
ingestify-0.5.1.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
|
|
86
|
+
ingestify-0.5.1.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
|
|
87
|
+
ingestify-0.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|