ingestify 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ingestify/__init__.py CHANGED
@@ -8,4 +8,4 @@ if not __INGESTIFY_SETUP__:
8
8
  from .infra import retrieve_http
9
9
  from .source_base import Source, DatasetResource
10
10
 
11
- __version__ = "0.4.0"
11
+ __version__ = "0.4.2"
@@ -35,11 +35,8 @@ class Loader:
35
35
  provider: Optional[str] = None,
36
36
  source: Optional[str] = None,
37
37
  ):
38
- # First collect all selectors, before discovering datasets
39
- selectors = {}
38
+ ingestion_plans = []
40
39
  for ingestion_plan in self.ingestion_plans:
41
- logger.info(f"Determining selectors for {ingestion_plan}")
42
-
43
40
  if provider is not None:
44
41
  if ingestion_plan.source.provider != provider:
45
42
  logger.info(
@@ -54,6 +51,13 @@ class Loader:
54
51
  )
55
52
  continue
56
53
 
54
+ ingestion_plans.append(ingestion_plan)
55
+
56
+ # First collect all selectors, before discovering datasets
57
+ selectors = {}
58
+ for ingestion_plan in ingestion_plans:
59
+ logger.info(f"Determining selectors for {ingestion_plan}")
60
+
57
61
  static_selectors = [
58
62
  selector
59
63
  for selector in ingestion_plan.selectors
ingestify/cmdline.py CHANGED
@@ -88,6 +88,14 @@ def cli():
88
88
  help="Source - only run tasks for a single source",
89
89
  type=str,
90
90
  )
91
+ @click.option(
92
+ "--disable-events",
93
+ "disable_events",
94
+ required=False,
95
+ help="Disable events - disable all event handlers",
96
+ is_flag=True,
97
+ type=bool,
98
+ )
91
99
  def run(
92
100
  config_file: str,
93
101
  bucket: Optional[str],
@@ -95,9 +103,10 @@ def run(
95
103
  provider: Optional[str],
96
104
  source: Optional[str],
97
105
  debug: Optional[bool],
106
+ disable_events: Optional[bool],
98
107
  ):
99
108
  try:
100
- engine = get_engine(config_file, bucket)
109
+ engine = get_engine(config_file, bucket, disable_events=disable_events)
101
110
  except ConfigurationError as e:
102
111
  if debug:
103
112
  raise
@@ -218,7 +218,7 @@ class IngestionJob:
218
218
  # Process all items in batches. Yield a IngestionJobSummary per batch
219
219
 
220
220
  logger.info("Finding metadata")
221
- with ingestion_job_summary.record_timing("get_dataset_collection"):
221
+ with ingestion_job_summary.record_timing("get_dataset_collection_metadata"):
222
222
  dataset_collection_metadata = store.get_dataset_collection(
223
223
  dataset_type=self.ingestion_plan.dataset_type,
224
224
  provider=self.ingestion_plan.source.provider,
@@ -232,6 +232,7 @@ class IngestionJob:
232
232
  # 1. The discover_datasets returns a list, and the entire list can be processed at once
233
233
  # 2. The discover_datasets returns an iterator of batches, in this case we need to process each batch
234
234
  try:
235
+ logger.info(f"Finding datasets for selector={self.selector}")
235
236
  with ingestion_job_summary.record_timing("find_datasets"):
236
237
  dataset_resources = self.ingestion_plan.source.find_datasets(
237
238
  dataset_type=self.ingestion_plan.dataset_type,
@@ -249,6 +250,8 @@ class IngestionJob:
249
250
  yield ingestion_job_summary
250
251
  return
251
252
 
253
+ logger.info("Starting tasks")
254
+
252
255
  finish_task_timer = ingestion_job_summary.start_timing("tasks")
253
256
 
254
257
  while True:
@@ -273,13 +276,16 @@ class IngestionJob:
273
276
  for dataset_resource in batch
274
277
  ]
275
278
 
276
- # Load all available datasets based on the discovered dataset identifiers
277
- dataset_collection = store.get_dataset_collection(
278
- dataset_type=self.ingestion_plan.dataset_type,
279
- # Assume all DatasetResources share the same provider
280
- provider=batch[0].provider,
281
- selector=dataset_identifiers,
282
- )
279
+ logger.info(f"Searching for existing Datasets for DatasetResources")
280
+
281
+ with ingestion_job_summary.record_timing("get_dataset_collection"):
282
+ # Load all available datasets based on the discovered dataset identifiers
283
+ dataset_collection = store.get_dataset_collection(
284
+ dataset_type=self.ingestion_plan.dataset_type,
285
+ # Assume all DatasetResources share the same provider
286
+ provider=batch[0].provider,
287
+ selector=dataset_identifiers,
288
+ )
283
289
 
284
290
  skipped_datasets = 0
285
291
 
@@ -66,7 +66,7 @@ class IngestionJobSummary(BaseModel, HasTiming):
66
66
  self.skipped_datasets += skipped_datasets
67
67
 
68
68
  def task_count(self):
69
- return len(self.task_summaries)
69
+ return len(self.task_summaries) + self.skipped_datasets
70
70
 
71
71
  def _set_ended(self):
72
72
  self.failed_tasks = len(
@@ -106,22 +106,22 @@ class IngestionJobSummary(BaseModel, HasTiming):
106
106
  f"\nIngestionJobSummary {self.state.value} in {format_duration(self.duration)}"
107
107
  )
108
108
  print("********************************")
109
- print(f"* - IngestionPlan:")
110
- print(f"* Source: {self.source_name}")
111
- print(f"* Provider: {self.provider}")
112
- print(f"* DatasetType: {self.dataset_type}")
113
- print(f"* - Selector: {self.selector}")
114
- print(f"* - Timings: ")
109
+ print(f" - IngestionPlan:")
110
+ print(f" Source: {self.source_name}")
111
+ print(f" Provider: {self.provider}")
112
+ print(f" DatasetType: {self.dataset_type}")
113
+ print(f" - Selector: {self.selector}")
114
+ print(f" - Timings: ")
115
115
  for timing in self.timings:
116
- print(f"* - {timing.name}: {format_duration(timing.duration)}")
116
+ print(f" - {timing.name}: {format_duration(timing.duration)}")
117
117
  print(
118
- f"* - Tasks: {len(self.task_summaries)} - {(len(self.task_summaries) / self.duration.total_seconds()):.1f} tasks/sec"
118
+ f" - Tasks: {len(self.task_summaries)} - {(len(self.task_summaries) / self.duration.total_seconds()):.1f} tasks/sec"
119
119
  )
120
120
 
121
- print(f"* - Failed tasks: {self.failed_tasks}")
122
- print(f"* - Successful tasks: {self.successful_tasks}")
123
- print(f"* - Successful ignored tasks: {self.ignored_successful_tasks}")
124
- print(f"* - Skipped datasets: {self.skipped_datasets}")
121
+ print(f" - Failed tasks: {self.failed_tasks}")
122
+ print(f" - Successful tasks: {self.successful_tasks}")
123
+ print(f" - Successful ignored tasks: {self.ignored_successful_tasks}")
124
+ print(f" - Skipped datasets: {self.skipped_datasets}")
125
125
  print("********************************")
126
126
 
127
127
  def __enter__(self):
@@ -1,32 +1,33 @@
1
1
  import itertools
2
- import json
2
+ import logging
3
3
  import uuid
4
- from collections import defaultdict
5
4
  from typing import Optional, Union, List
6
5
 
7
6
  from sqlalchemy import (
8
7
  create_engine,
9
8
  func,
10
9
  text,
11
- tuple_,
12
10
  Table,
13
- insert,
14
- Transaction,
15
11
  Connection,
12
+ union_all,
13
+ literal,
14
+ select,
15
+ and_,
16
+ Column,
17
+ or_,
18
+ Dialect,
16
19
  )
17
20
  from sqlalchemy.engine import make_url
18
21
  from sqlalchemy.exc import NoSuchModuleError
19
- from sqlalchemy.orm import Session, joinedload
22
+ from sqlalchemy.orm import Session, Query
20
23
 
21
24
  from ingestify.domain import File, Revision
22
25
  from ingestify.domain.models import (
23
26
  Dataset,
24
27
  DatasetCollection,
25
28
  DatasetRepository,
26
- Identifier,
27
29
  Selector,
28
30
  )
29
- from ingestify.domain.models.base import BaseModel
30
31
  from ingestify.domain.models.dataset.collection_metadata import (
31
32
  DatasetCollectionMetadata,
32
33
  )
@@ -43,6 +44,8 @@ from .tables import (
43
44
  task_summary_table,
44
45
  )
45
46
 
47
+ logger = logging.getLogger(__name__)
48
+
46
49
 
47
50
  def parse_value(v):
48
51
  try:
@@ -94,6 +97,7 @@ class SqlAlchemySessionProvider:
94
97
  # Use the default isolation level, don't need SERIALIZABLE
95
98
  # isolation_level="SERIALIZABLE",
96
99
  )
100
+ self.dialect = self.engine.dialect
97
101
  self.session = Session(bind=self.engine)
98
102
 
99
103
  def __init__(self, url: str):
@@ -111,22 +115,26 @@ class SqlAlchemySessionProvider:
111
115
  self.url = state["url"]
112
116
  self._init_engine()
113
117
 
114
- def _close_engine(self):
115
- if hasattr(self, "session"):
116
- self.session.close()
117
- self.engine.dispose()
118
-
119
118
  def __del__(self):
120
- self._close_engine()
119
+ self.close()
121
120
 
122
121
  def reset(self):
123
- self._close_engine()
122
+ self.close()
124
123
  self._init_engine()
125
124
 
125
+ def close(self):
126
+ if hasattr(self, "session"):
127
+ self.session.close()
128
+ self.engine.dispose()
129
+
126
130
  def get(self):
127
131
  return self.session
128
132
 
129
133
 
134
+ def in_(column: Column, values):
135
+ return or_(*[column == value for value in values])
136
+
137
+
130
138
  class SqlAlchemyDatasetRepository(DatasetRepository):
131
139
  def __init__(self, session_provider: SqlAlchemySessionProvider):
132
140
  self.session_provider = session_provider
@@ -135,8 +143,12 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
135
143
  def session(self):
136
144
  return self.session_provider.get()
137
145
 
146
+ @property
147
+ def dialect(self) -> Dialect:
148
+ return self.session_provider.dialect
149
+
138
150
  def _upsert(self, connection: Connection, table: Table, entities: list[dict]):
139
- dialect = self.session.bind.dialect.name
151
+ dialect = self.dialect.name
140
152
  if dialect == "mysql":
141
153
  from sqlalchemy.dialects.mysql import insert
142
154
  elif dialect == "postgresql":
@@ -169,11 +181,6 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
169
181
  dataset_id: Optional[Union[str, List[str]]] = None,
170
182
  selector: Optional[Union[Selector, List[Selector]]] = None,
171
183
  ):
172
- query = query.filter(dataset_table.c.bucket == bucket)
173
- if dataset_type:
174
- query = query.filter(dataset_table.c.dataset_type == dataset_type)
175
- if provider:
176
- query = query.filter(dataset_table.c.provider == provider)
177
184
  if dataset_id is not None:
178
185
  if isinstance(dataset_id, list):
179
186
  if len(dataset_id) == 0:
@@ -181,11 +188,11 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
181
188
  # return an empty DatasetCollection
182
189
  return DatasetCollection()
183
190
 
184
- query = query.filter(dataset_table.c.dataset_id.in_(dataset_id))
191
+ query = query.filter(in_(dataset_table.c.dataset_id, dataset_id))
185
192
  else:
186
193
  query = query.filter(dataset_table.c.dataset_id == dataset_id)
187
194
 
188
- dialect = self.session.bind.dialect.name
195
+ dialect = self.dialect.name
189
196
 
190
197
  if not isinstance(selector, list):
191
198
  where, selector = selector.split("where")
@@ -201,13 +208,22 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
201
208
  if not selectors:
202
209
  raise ValueError("Selectors must contain at least one item")
203
210
 
204
- keys = list(selectors[0].filtered_attributes.keys())
211
+ attribute_sets = {
212
+ tuple(selector.filtered_attributes.items()) for selector in selectors
213
+ }
214
+
215
+ # Define a virtual table using a CTE for all attributes
216
+ attribute_cte = union_all(
217
+ *[
218
+ select(*(literal(value).label(key) for key, value in attr_set))
219
+ for attr_set in attribute_sets
220
+ ]
221
+ ).cte("attributes")
205
222
 
206
- columns = []
223
+ keys = list(selectors[0].filtered_attributes.keys())
207
224
  first_selector = selectors[0].filtered_attributes
208
225
 
209
- # Create a query like this:
210
- # SELECT * FROM dataset WHERE (column1, column2, column3) IN ((1, 2, 3), (4, 5, 6), (7, 8, 9))
226
+ join_conditions = []
211
227
  for k in keys:
212
228
  if dialect == "postgresql":
213
229
  column = dataset_table.c.identifier[k]
@@ -215,40 +231,43 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
215
231
  # Take the value from the first selector to determine the type.
216
232
  # TODO: check all selectors to determine the type
217
233
  v = first_selector[k]
218
- if isint(v):
234
+ if isinstance(v, int):
219
235
  column = column.as_integer()
220
- elif isfloat(v):
221
- column = column.as_float()
222
236
  else:
223
237
  column = column.as_string()
224
238
  else:
225
239
  column = func.json_extract(dataset_table.c.identifier, f"$.{k}")
226
- columns.append(column)
227
240
 
228
- values = []
229
- for selector in selectors:
230
- filtered_attributes = selector.filtered_attributes
231
- values.append(tuple([filtered_attributes[k] for k in keys]))
241
+ join_conditions.append(attribute_cte.c[k] == column)
232
242
 
233
- query = query.filter(tuple_(*columns).in_(values))
243
+ query = query.select_from(
244
+ dataset_table.join(attribute_cte, and_(*join_conditions))
245
+ )
234
246
 
235
247
  if where:
236
248
  query = query.filter(text(where))
249
+
250
+ query = query.filter(dataset_table.c.bucket == bucket)
251
+ if dataset_type:
252
+ query = query.filter(dataset_table.c.dataset_type == dataset_type)
253
+ if provider:
254
+ query = query.filter(dataset_table.c.provider == provider)
255
+
237
256
  return query
238
257
 
239
- def load_datasets(self, dataset_ids: list[str]) -> list[Dataset]:
258
+ def _load_datasets(self, dataset_ids: list[str]) -> list[Dataset]:
240
259
  if not dataset_ids:
241
260
  return []
242
261
 
243
262
  dataset_rows = list(
244
263
  self.session.query(dataset_table).filter(
245
- dataset_table.c.dataset_id.in_(dataset_ids)
264
+ in_(dataset_table.c.dataset_id, dataset_ids)
246
265
  )
247
266
  )
248
267
  revisions_per_dataset = {}
249
268
  rows = (
250
269
  self.session.query(revision_table)
251
- .filter(revision_table.c.dataset_id.in_(dataset_ids))
270
+ .filter(in_(revision_table.c.dataset_id, dataset_ids))
252
271
  .order_by(revision_table.c.dataset_id)
253
272
  )
254
273
 
@@ -260,7 +279,7 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
260
279
  files_per_revision = {}
261
280
  rows = (
262
281
  self.session.query(file_table)
263
- .filter(file_table.c.dataset_id.in_(dataset_ids))
282
+ .filter(in_(file_table.c.dataset_id, dataset_ids))
264
283
  .order_by(file_table.c.dataset_id, file_table.c.revision_id)
265
284
  )
266
285
 
@@ -290,6 +309,12 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
290
309
  )
291
310
  return datasets
292
311
 
312
+ def _debug_query(self, q: Query):
313
+ text_ = q.statement.compile(
314
+ compile_kwargs={"literal_binds": True}, dialect=self.dialect
315
+ )
316
+ logger.debug(f"Running query: {text_}")
317
+
293
318
  def get_dataset_collection(
294
319
  self,
295
320
  bucket: str,
@@ -309,22 +334,40 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
309
334
  selector=selector,
310
335
  )
311
336
 
312
- if not metadata_only:
313
- dataset_query = apply_query_filter(
314
- self.session.query(dataset_table.c.dataset_id)
315
- )
316
- dataset_ids = [row.dataset_id for row in dataset_query]
317
- datasets = self.load_datasets(dataset_ids)
318
- else:
319
- datasets = []
337
+ with self.session:
338
+ # Use a contextmanager to make sure it's closed afterwards
320
339
 
321
- metadata_result_row = apply_query_filter(
322
- self.session.query(
323
- func.max(dataset_table.c.last_modified_at).label("last_modified_at"),
324
- func.count().label("row_count"),
325
- )
326
- ).first()
327
- dataset_collection_metadata = DatasetCollectionMetadata(*metadata_result_row)
340
+ if not metadata_only:
341
+ dataset_query = apply_query_filter(
342
+ self.session.query(dataset_table.c.dataset_id)
343
+ )
344
+ self._debug_query(dataset_query)
345
+ dataset_ids = [row.dataset_id for row in dataset_query]
346
+ datasets = self._load_datasets(dataset_ids)
347
+
348
+ dataset_collection_metadata = DatasetCollectionMetadata(
349
+ last_modified=max(dataset.last_modified_at for dataset in datasets)
350
+ if datasets
351
+ else None,
352
+ row_count=len(datasets),
353
+ )
354
+ else:
355
+ datasets = []
356
+
357
+ metadata_result_query = apply_query_filter(
358
+ self.session.query(
359
+ func.max(dataset_table.c.last_modified_at).label(
360
+ "last_modified_at"
361
+ ),
362
+ func.count().label("row_count"),
363
+ )
364
+ )
365
+
366
+ self._debug_query(metadata_result_query)
367
+
368
+ dataset_collection_metadata = DatasetCollectionMetadata(
369
+ *metadata_result_query.first()
370
+ )
328
371
 
329
372
  return DatasetCollection(dataset_collection_metadata, datasets)
330
373
 
@@ -337,6 +380,9 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
337
380
  def connect(self):
338
381
  return self.session_provider.engine.connect()
339
382
 
383
+ def __del__(self):
384
+ self.session_provider.close()
385
+
340
386
  def _save(self, datasets: list[Dataset]):
341
387
  """Only do upserts. Never delete. Rows get only deleted when an entire Dataset is removed."""
342
388
  datasets_entities = []
@@ -14,6 +14,7 @@ from sqlalchemy import (
14
14
  String,
15
15
  Table,
16
16
  TypeDecorator,
17
+ Index,
17
18
  )
18
19
 
19
20
  from sqlalchemy.dialects.postgresql import JSONB
@@ -167,6 +168,15 @@ dataset_table = Table(
167
168
  Column("created_at", TZDateTime(6)),
168
169
  Column("updated_at", TZDateTime(6)),
169
170
  Column("last_modified_at", TZDateTime(6)),
171
+ # Required for performance querying when there are a lot of Datasets
172
+ # with the same provider and dataset_type
173
+ Index(
174
+ "idx_bucket_type_provider_last_modified",
175
+ "bucket",
176
+ "provider",
177
+ "dataset_type",
178
+ "last_modified_at",
179
+ ),
170
180
  )
171
181
 
172
182
  revision_table = Table(
ingestify/main.py CHANGED
@@ -182,7 +182,9 @@ def get_event_subscriber_cls(key: str) -> Type[Subscriber]:
182
182
  return import_cls(key)
183
183
 
184
184
 
185
- def get_engine(config_file, bucket: Optional[str] = None) -> IngestionEngine:
185
+ def get_engine(
186
+ config_file, bucket: Optional[str] = None, disable_events: bool = False
187
+ ) -> IngestionEngine:
186
188
  config = parse_config(config_file, default_value="")
187
189
 
188
190
  logger.info("Initializing sources")
@@ -201,11 +203,16 @@ def get_engine(config_file, bucket: Optional[str] = None) -> IngestionEngine:
201
203
 
202
204
  # Setup an EventBus and wire some more components
203
205
  event_bus = EventBus()
204
- publisher = Publisher()
205
- for subscriber in config.get("event_subscribers", []):
206
- cls = get_event_subscriber_cls(subscriber["type"])
207
- publisher.add_subscriber(cls(store))
208
- event_bus.register(publisher)
206
+ if not disable_events:
207
+ # When we disable all events we don't register any publishers
208
+ publisher = Publisher()
209
+ for subscriber in config.get("event_subscribers", []):
210
+ cls = get_event_subscriber_cls(subscriber["type"])
211
+ publisher.add_subscriber(cls(store))
212
+ event_bus.register(publisher)
213
+ else:
214
+ logger.info("Disabling all event handlers")
215
+
209
216
  store.set_event_bus(event_bus)
210
217
 
211
218
  ingestion_engine = IngestionEngine(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestify
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Data Ingestion Framework
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
@@ -1,14 +1,14 @@
1
- ingestify/__init__.py,sha256=pqbfmiQtpinnyWcferY68eDjMbrSslyzmNBk9XErzak,301
2
- ingestify/cmdline.py,sha256=oagUe-Jup1SU3s6jVl25f0cSG0wlNYhxFY-gGBwWmr0,7482
1
+ ingestify/__init__.py,sha256=x4r1Cw7NXlEu1lunx4jwI0b3SZ7MhTbWSVlHStDtVaI,301
2
+ ingestify/cmdline.py,sha256=JcveX6e4i6mJtIllhTuruwbqxyoKIITIWE8kB6byvJU,7721
3
3
  ingestify/exceptions.py,sha256=izRzaLQmMy-4P8ZqGqVZyf4k6LFYOYqwYLuRaUH8BJw,187
4
- ingestify/main.py,sha256=Xr0VbGgstPO7doDX18xqk4lBb4W2sbGWtQuXZaARsHA,8763
4
+ ingestify/main.py,sha256=yYKA-4WAk04RdBCGmatsCKiPFQzpyufoG4VzHiWkVtU,8979
5
5
  ingestify/server.py,sha256=OVrf_XtpAQIn88MzqQzShXgsA9_jbnqYvD8YPBjn3cs,2413
6
6
  ingestify/source_base.py,sha256=GXAFCoT11Zov9M2v-fqQr9gFCXbtVfEIEH32V7r2oE8,382
7
7
  ingestify/utils.py,sha256=6BqgEZjecLW_anqYP5WrFpi93bmdhF-EdrebEkm59Ds,6806
8
8
  ingestify/application/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  ingestify/application/dataset_store.py,sha256=JkAb1W0HaUgOwbohKntM4ttyrFXQ7df1uZSu2rbZllY,11680
10
10
  ingestify/application/ingestion_engine.py,sha256=4SAmPZDm3e2QA5jZvMrb6xz1eDDshKoSZDWH3TCe4Bo,2372
11
- ingestify/application/loader.py,sha256=2LpYSHvedsoe5wFsIkQv0xLcKcqtebwVOSPWapAypao,7566
11
+ ingestify/application/loader.py,sha256=Lg3qPLaeKOFGheeqqfVeCBEF3cn61oZThgYYHoqfOvQ,7694
12
12
  ingestify/application/secrets_manager.py,sha256=5qCbPfUvRGP1Xbq6xPkMfpgYl8uPzF_0NbiKeRY5zxU,1757
13
13
  ingestify/domain/__init__.py,sha256=M7_fVTJjQUx53P4UQUPhowRKPKsIIjx4JYZL1yjHKsM,46
14
14
  ingestify/domain/models/__init__.py,sha256=cjQmdSDFA-saXjdF1mLPNWILFHIFgdj20J_fC5FmFsI,770
@@ -39,8 +39,8 @@ ingestify/domain/models/event/event_bus.py,sha256=iseourbCwdUg-ODM5bM_u6cageJmce
39
39
  ingestify/domain/models/event/publisher.py,sha256=TOAawYYiPQCLR2Gm17LumMEzeapMDYcAYeklLFmwqAY,620
40
40
  ingestify/domain/models/event/subscriber.py,sha256=tP1ZFSvpJWKUITnATYekRxJzepz85UY7egBTMiP-dwg,1039
41
41
  ingestify/domain/models/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- ingestify/domain/models/ingestion/ingestion_job.py,sha256=H9jfbbWFZw73nxMOW0480LgSHV-o4sA5IcvpUZmFpS4,13140
43
- ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=Xvmtu0BwE9C7FxBl6D8tN49I6--E_RngcMfWeme4DPA,4499
42
+ ingestify/domain/models/ingestion/ingestion_job.py,sha256=Xprxv3SiMrJ5efleEbH2HS6MxZdMqDd7Pw2qp-yiM2U,13452
43
+ ingestify/domain/models/ingestion/ingestion_job_summary.py,sha256=MYd0-IYbEtAp4VWAXLA0xnyat1e52VNOevDZo3M4jg0,4499
44
44
  ingestify/domain/models/ingestion/ingestion_plan.py,sha256=KAvITBMQt3zmMFokESQJyp3rMuz1Hxr6msfZK1_faZM,648
45
45
  ingestify/domain/models/resources/__init__.py,sha256=ZuY9DPRfwk-aLB3Lj6DYP_NqMkcQfcYjZp4VejTtcbU,46
46
46
  ingestify/domain/models/resources/dataset_resource.py,sha256=NRnN029ct3P_Eg2d9Unb1t7A12Ksv_emBGhoe9DpPwM,3118
@@ -64,8 +64,8 @@ ingestify/infra/source/wyscout.py,sha256=DxCzdkzYpVRHTfV9GpF8pe3FzwIk-WHYUlea6nO
64
64
  ingestify/infra/store/__init__.py,sha256=3dA6NWfB6FS5SFdQiSlJ0ZghBfnUAUuGIP5Vr4rkCqk,43
65
65
  ingestify/infra/store/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
66
  ingestify/infra/store/dataset/sqlalchemy/__init__.py,sha256=Z5JHWGO_hwT6rO-ecMOOAmOKjFFJi449KZvJTQgt6vQ,52
67
- ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=R7OJVC_dh5hGhaCvU_Ixyfb16Xyd-hxvm7n0zJ6KxDk,15857
68
- ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=FEMEkBiefozEoWCYMQnc4DD3ZsDeg3KaFs1c58A66ME,10314
67
+ ingestify/infra/store/dataset/sqlalchemy/repository.py,sha256=ope_F-PVkXVo_oiUmsYdbUplC9aUnrTe4anlou-Y-y8,17078
68
+ ingestify/infra/store/dataset/sqlalchemy/tables.py,sha256=OLB1FMElb3gSAnOsKX-oiLl_YVXaVEa6Q29QoHp2okU,10602
69
69
  ingestify/infra/store/file/__init__.py,sha256=DuEekZa2pmDuRCFiulbgoGotN0wGv3OrRXSvokY0PhY,104
70
70
  ingestify/infra/store/file/dummy_file_repository.py,sha256=azUq9c43Mz9-GWk9j0E97BaqyUKu-ZMrcuaIednLq5E,723
71
71
  ingestify/infra/store/file/local_file_repository.py,sha256=1hhLqds5LlppJq2QBB0oN0Q98j6aXreCtYQYz3Q1P8g,819
@@ -80,8 +80,8 @@ ingestify/static/templates/wyscout/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
80
80
  ingestify/static/templates/wyscout/config.yaml.jinja2,sha256=0zQXuvJVwd0oL2OJsPMZ8sOvRbdfRbieSGLQ44ezmYc,379
81
81
  ingestify/static/templates/wyscout/query.py,sha256=wjAOMoKvhX-BzCRqEm1SJp6YAcF8Fsq7ddrOaOpAeOk,364
82
82
  ingestify/static/templates/wyscout/database/README.md,sha256=7IuzjKo7Pqkx5wkmOETRZDljVOslqfA3ALuHMONq5dg,32
83
- ingestify-0.4.0.dist-info/METADATA,sha256=t02z0kStKbk2lECODDJieRK4Ev1TDKR0Z6G2DpUeeGM,18854
84
- ingestify-0.4.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
- ingestify-0.4.0.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
86
- ingestify-0.4.0.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
87
- ingestify-0.4.0.dist-info/RECORD,,
83
+ ingestify-0.4.2.dist-info/METADATA,sha256=E_if9fF-7cbW-CD3a4aQyinXPCgna-ZEv4mg_sTyl-0,18854
84
+ ingestify-0.4.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
+ ingestify-0.4.2.dist-info/entry_points.txt,sha256=czYYXeX2ul4zdeB6bKlz3HaUF7zyVVcj9E_sRNDisI0,53
86
+ ingestify-0.4.2.dist-info/top_level.txt,sha256=Lwnjgns4KequS7KiicXhh6mLUvcdfjzLyPI4qf_s4A0,10
87
+ ingestify-0.4.2.dist-info/RECORD,,