ingestify 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestify/__init__.py +1 -1
- ingestify/domain/models/ingestion/ingestion_job.py +3 -1
- ingestify/infra/store/dataset/sqlalchemy/repository.py +37 -9
- {ingestify-0.5.0.dist-info → ingestify-0.5.1.dist-info}/METADATA +1 -1
- {ingestify-0.5.0.dist-info → ingestify-0.5.1.dist-info}/RECORD +8 -8
- {ingestify-0.5.0.dist-info → ingestify-0.5.1.dist-info}/WHEEL +0 -0
- {ingestify-0.5.0.dist-info → ingestify-0.5.1.dist-info}/entry_points.txt +0 -0
- {ingestify-0.5.0.dist-info → ingestify-0.5.1.dist-info}/top_level.txt +0 -0
ingestify/__init__.py
CHANGED
|
@@ -24,7 +24,9 @@ from ingestify.utils import TaskExecutor, chunker
|
|
|
24
24
|
logger = logging.getLogger(__name__)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# Decrease batch size from 1_000 to 500. The sqlalchemy repository uses
|
|
28
|
+
# a compound select, which breaks at more than 500 select statements
|
|
29
|
+
DEFAULT_CHUNK_SIZE = 500
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
def run_task(task):
|
|
@@ -137,10 +137,6 @@ class SqlAlchemySessionProvider:
|
|
|
137
137
|
return self.session()
|
|
138
138
|
|
|
139
139
|
|
|
140
|
-
def in_(column: Column, values):
|
|
141
|
-
return or_(*[column == value for value in values])
|
|
142
|
-
|
|
143
|
-
|
|
144
140
|
class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
145
141
|
def __init__(self, session_provider: SqlAlchemySessionProvider):
|
|
146
142
|
self.session_provider = session_provider
|
|
@@ -194,7 +190,19 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
194
190
|
# return an empty DatasetCollection
|
|
195
191
|
return DatasetCollection()
|
|
196
192
|
|
|
197
|
-
|
|
193
|
+
dataset_ids_cte = union_all(
|
|
194
|
+
*[
|
|
195
|
+
select(literal(dataset_id).label("dataset_id"))
|
|
196
|
+
for dataset_id in set(dataset_id)
|
|
197
|
+
]
|
|
198
|
+
).cte("dataset_ids")
|
|
199
|
+
|
|
200
|
+
query = query.select_from(
|
|
201
|
+
dataset_table.join(
|
|
202
|
+
dataset_ids_cte,
|
|
203
|
+
dataset_ids_cte.c.dataset_id == dataset_table.c.dataset_id,
|
|
204
|
+
)
|
|
205
|
+
)
|
|
198
206
|
else:
|
|
199
207
|
query = query.filter(dataset_table.c.dataset_id == dataset_id)
|
|
200
208
|
|
|
@@ -265,15 +273,30 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
265
273
|
if not dataset_ids:
|
|
266
274
|
return []
|
|
267
275
|
|
|
276
|
+
dataset_ids_cte = union_all(
|
|
277
|
+
*[
|
|
278
|
+
select(literal(dataset_id).label("dataset_id"))
|
|
279
|
+
for dataset_id in set(dataset_ids)
|
|
280
|
+
]
|
|
281
|
+
).cte("dataset_ids")
|
|
282
|
+
|
|
268
283
|
dataset_rows = list(
|
|
269
|
-
self.session.query(dataset_table).
|
|
270
|
-
|
|
284
|
+
self.session.query(dataset_table).select_from(
|
|
285
|
+
dataset_table.join(
|
|
286
|
+
dataset_ids_cte,
|
|
287
|
+
dataset_ids_cte.c.dataset_id == dataset_table.c.dataset_id,
|
|
288
|
+
)
|
|
271
289
|
)
|
|
272
290
|
)
|
|
273
291
|
revisions_per_dataset = {}
|
|
274
292
|
rows = (
|
|
275
293
|
self.session.query(revision_table)
|
|
276
|
-
.
|
|
294
|
+
.select_from(
|
|
295
|
+
revision_table.join(
|
|
296
|
+
dataset_ids_cte,
|
|
297
|
+
dataset_ids_cte.c.dataset_id == revision_table.c.dataset_id,
|
|
298
|
+
)
|
|
299
|
+
)
|
|
277
300
|
.order_by(revision_table.c.dataset_id)
|
|
278
301
|
)
|
|
279
302
|
|
|
@@ -285,7 +308,12 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
|
|
|
285
308
|
files_per_revision = {}
|
|
286
309
|
rows = (
|
|
287
310
|
self.session.query(file_table)
|
|
288
|
-
.
|
|
311
|
+
.select_from(
|
|
312
|
+
file_table.join(
|
|
313
|
+
dataset_ids_cte,
|
|
314
|
+
dataset_ids_cte.c.dataset_id == file_table.c.dataset_id,
|
|
315
|
+
)
|
|
316
|
+
)
|
|
289
317
|
.order_by(file_table.c.dataset_id, file_table.c.revision_id)
|
|
290
318
|
)
|
|
291
319
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
ingestify/__init__.py,sha256=
|
|
1
|
+
ingestify/__init__.py,sha256=Un08YQgoC1u_2dbvOdtOD59OKsUL78ekru-86GA3zpA,301
|
|
2
2
|
ingestify/cmdline.py,sha256=JcveX6e4i6mJtIllhTuruwbqxyoKIITIWE8kB6byvJU,7721
|
|
3
3
|
ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
|
|
4
4
|
ingestify/main.py,sha256=yYKA-4WAk04RdBCGmatsCKiPFQzpyufoG4VzHiWkVtU,8979
|
|
@@ -39,7 +39,7 @@ ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmce
|
|
|
39
39
|
ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
|
|
40
40
|
ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
|
|
41
41
|
ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
ingestify/domain/models/ingestion/ingestion_job.py,sha256
|
|
42
|
+
ingestify/domain/models/ingestion/ingestion_job.py,sha256=2Tibe1fKZU84LO_qHniO33ChTcJy3K0YLkVro8CjJPs,13573
|
|
43
43
|
ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=ZEoL8kZfDM_NUYXD4_7Xpmtz6WduN50UcJBgNOxOxrE,4669
|
|
44
44
|
ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
|
|
45
45
|
ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
|
|
@@ -64,7 +64,7 @@ ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nO
|
|
|
64
64
|
ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
|
|
65
65
|
ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
66
|
ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
|
|
67
|
-
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=
|
|
67
|
+
ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=hn4x5tglqxO4EdyiAt_4wnDXmmWU87twHtBmVBRHVSY,18309
|
|
68
68
|
ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=kALM32jbdeZ4Wn9gON-w2WSb5tH1lIWaBFgn5i29qTk,10635
|
|
69
69
|
ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
|
|
70
70
|
ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
|
|
@@ -80,8 +80,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
80
80
|
ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
|
|
81
81
|
ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
|
|
82
82
|
ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
|
|
83
|
-
ingestify-0.5.
|
|
84
|
-
ingestify-0.5.
|
|
85
|
-
ingestify-0.5.
|
|
86
|
-
ingestify-0.5.
|
|
87
|
-
ingestify-0.5.
|
|
83
|
+
ingestify-0.5.1.dist-info/METADATA,sha256=TOqbUz13KxM8v8kR-owtafHvYEXwj5ruaDP_bigoIbI,18854
|
|
84
|
+
ingestify-0.5.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
85
|
+
ingestify-0.5.1.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
|
|
86
|
+
ingestify-0.5.1.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
|
|
87
|
+
ingestify-0.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|