data-validation-engine 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data_validation_engine-0.6.2.dist-info/METADATA +104 -0
  2. data_validation_engine-0.6.2.dist-info/RECORD +105 -0
  3. data_validation_engine-0.6.2.dist-info/WHEEL +4 -0
  4. data_validation_engine-0.6.2.dist-info/licenses/LICENSE +21 -0
  5. dve/__init__.py +0 -0
  6. dve/common/__init__.py +0 -0
  7. dve/common/error_utils.py +189 -0
  8. dve/core_engine/__init__.py +0 -0
  9. dve/core_engine/backends/__init__.py +1 -0
  10. dve/core_engine/backends/base/__init__.py +1 -0
  11. dve/core_engine/backends/base/auditing.py +618 -0
  12. dve/core_engine/backends/base/backend.py +240 -0
  13. dve/core_engine/backends/base/contract.py +454 -0
  14. dve/core_engine/backends/base/core.py +124 -0
  15. dve/core_engine/backends/base/reader.py +176 -0
  16. dve/core_engine/backends/base/reference_data.py +217 -0
  17. dve/core_engine/backends/base/rules.py +685 -0
  18. dve/core_engine/backends/base/utilities.py +146 -0
  19. dve/core_engine/backends/exceptions.py +311 -0
  20. dve/core_engine/backends/implementations/__init__.py +1 -0
  21. dve/core_engine/backends/implementations/duckdb/__init__.py +26 -0
  22. dve/core_engine/backends/implementations/duckdb/auditing.py +234 -0
  23. dve/core_engine/backends/implementations/duckdb/contract.py +213 -0
  24. dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py +288 -0
  25. dve/core_engine/backends/implementations/duckdb/readers/__init__.py +13 -0
  26. dve/core_engine/backends/implementations/duckdb/readers/csv.py +222 -0
  27. dve/core_engine/backends/implementations/duckdb/readers/json.py +50 -0
  28. dve/core_engine/backends/implementations/duckdb/readers/xml.py +45 -0
  29. dve/core_engine/backends/implementations/duckdb/reference_data.py +49 -0
  30. dve/core_engine/backends/implementations/duckdb/rules.py +534 -0
  31. dve/core_engine/backends/implementations/duckdb/types.py +47 -0
  32. dve/core_engine/backends/implementations/duckdb/utilities.py +41 -0
  33. dve/core_engine/backends/implementations/spark/__init__.py +22 -0
  34. dve/core_engine/backends/implementations/spark/auditing.py +230 -0
  35. dve/core_engine/backends/implementations/spark/backend.py +78 -0
  36. dve/core_engine/backends/implementations/spark/contract.py +241 -0
  37. dve/core_engine/backends/implementations/spark/readers/__init__.py +15 -0
  38. dve/core_engine/backends/implementations/spark/readers/csv.py +77 -0
  39. dve/core_engine/backends/implementations/spark/readers/json.py +66 -0
  40. dve/core_engine/backends/implementations/spark/readers/xml.py +202 -0
  41. dve/core_engine/backends/implementations/spark/reference_data.py +42 -0
  42. dve/core_engine/backends/implementations/spark/rules.py +430 -0
  43. dve/core_engine/backends/implementations/spark/spark_helpers.py +412 -0
  44. dve/core_engine/backends/implementations/spark/types.py +21 -0
  45. dve/core_engine/backends/implementations/spark/utilities.py +144 -0
  46. dve/core_engine/backends/metadata/__init__.py +47 -0
  47. dve/core_engine/backends/metadata/contract.py +80 -0
  48. dve/core_engine/backends/metadata/reporting.py +374 -0
  49. dve/core_engine/backends/metadata/rules.py +737 -0
  50. dve/core_engine/backends/readers/__init__.py +41 -0
  51. dve/core_engine/backends/readers/csv.py +232 -0
  52. dve/core_engine/backends/readers/utilities.py +21 -0
  53. dve/core_engine/backends/readers/xml.py +432 -0
  54. dve/core_engine/backends/readers/xml_linting.py +142 -0
  55. dve/core_engine/backends/types.py +26 -0
  56. dve/core_engine/backends/utilities.py +177 -0
  57. dve/core_engine/configuration/__init__.py +1 -0
  58. dve/core_engine/configuration/base.py +56 -0
  59. dve/core_engine/configuration/v1/__init__.py +351 -0
  60. dve/core_engine/configuration/v1/filters.py +60 -0
  61. dve/core_engine/configuration/v1/rule_stores/__init__.py +1 -0
  62. dve/core_engine/configuration/v1/rule_stores/models.py +57 -0
  63. dve/core_engine/configuration/v1/steps.py +365 -0
  64. dve/core_engine/constants.py +8 -0
  65. dve/core_engine/engine.py +265 -0
  66. dve/core_engine/exceptions.py +29 -0
  67. dve/core_engine/functions/__init__.py +6 -0
  68. dve/core_engine/functions/implementations.py +200 -0
  69. dve/core_engine/loggers.py +57 -0
  70. dve/core_engine/message.py +512 -0
  71. dve/core_engine/models.py +196 -0
  72. dve/core_engine/templating.py +114 -0
  73. dve/core_engine/type_hints.py +255 -0
  74. dve/core_engine/validation.py +160 -0
  75. dve/metadata_parser/__init__.py +2 -0
  76. dve/metadata_parser/domain_types.py +682 -0
  77. dve/metadata_parser/exc.py +44 -0
  78. dve/metadata_parser/function_library.py +64 -0
  79. dve/metadata_parser/function_wrapper.py +201 -0
  80. dve/metadata_parser/model_generator.py +119 -0
  81. dve/metadata_parser/models.py +410 -0
  82. dve/metadata_parser/utilities.py +54 -0
  83. dve/parser/__init__.py +1 -0
  84. dve/parser/exceptions.py +50 -0
  85. dve/parser/file_handling/__init__.py +31 -0
  86. dve/parser/file_handling/helpers.py +29 -0
  87. dve/parser/file_handling/implementations/__init__.py +7 -0
  88. dve/parser/file_handling/implementations/base.py +97 -0
  89. dve/parser/file_handling/implementations/dbfs.py +81 -0
  90. dve/parser/file_handling/implementations/file.py +203 -0
  91. dve/parser/file_handling/implementations/s3.py +371 -0
  92. dve/parser/file_handling/log_handler.py +215 -0
  93. dve/parser/file_handling/service.py +441 -0
  94. dve/parser/file_handling/utilities.py +53 -0
  95. dve/parser/type_hints.py +46 -0
  96. dve/parser/utilities.py +113 -0
  97. dve/pipeline/__init__.py +0 -0
  98. dve/pipeline/duckdb_pipeline.py +56 -0
  99. dve/pipeline/foundry_ddb_pipeline.py +171 -0
  100. dve/pipeline/pipeline.py +935 -0
  101. dve/pipeline/spark_pipeline.py +69 -0
  102. dve/pipeline/utils.py +96 -0
  103. dve/reporting/__init__.py +1 -0
  104. dve/reporting/error_report.py +153 -0
  105. dve/reporting/excel_report.py +319 -0
@@ -0,0 +1,618 @@
1
+ # pylint: disable=C0209
2
+ """Base auditing objects and managers for use of DVE services"""
3
+ import multiprocessing
4
+ import operator
5
+ import threading
6
+ import time
7
+ from abc import abstractmethod
8
+ from collections.abc import Callable, Iterable
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from dataclasses import dataclass
11
+ from datetime import date, datetime, timedelta
12
+ from multiprocessing import Queue as ProcessQueue
13
+ from queue import Queue as ThreadQueue
14
+ from types import TracebackType
15
+ from typing import Any, ClassVar, Generic, Optional, TypeVar, Union
16
+
17
+ from pydantic import ValidationError, validate_arguments
18
+ from typing_extensions import Literal, get_origin
19
+
20
+ from dve.core_engine.models import (
21
+ AuditRecord,
22
+ ProcessingStatusRecord,
23
+ SubmissionInfo,
24
+ SubmissionStatisticsRecord,
25
+ TransferRecord,
26
+ )
27
+ from dve.core_engine.type_hints import (
28
+ BinaryComparator,
29
+ ExecutorType,
30
+ ProcessingStatus,
31
+ QueueType,
32
+ SubmissionResult,
33
+ )
34
+ from dve.pipeline.utils import SubmissionStatus
35
+
36
+ AuditReturnType = TypeVar("AuditReturnType") # pylint: disable=invalid-name
37
+
38
+
39
+ @dataclass
40
+ class FilterCriteria:
41
+ """Stores information about a filter criteria to be applied to audit records"""
42
+
43
+ field: str
44
+ comparison_value: Any
45
+ operator_: Callable = operator.eq
46
+ operator_mapping: ClassVar[dict[BinaryComparator, str]] = {
47
+ operator.eq: "=",
48
+ operator.ne: "!=",
49
+ operator.lt: "<",
50
+ operator.le: "<=",
51
+ operator.gt: ">",
52
+ operator.ge: ">=",
53
+ operator.contains: "in",
54
+ }
55
+
56
+ @staticmethod
57
+ def _quote_comp_val(val: Any) -> str:
58
+ if isinstance(val, (list, tuple, set)):
59
+ return str(tuple(val))
60
+ return f"'{val}'"
61
+
62
+ def to_sql(self) -> str:
63
+ """Create sql expression from filter criteria"""
64
+ return "{} {} {}".format(
65
+ self.field,
66
+ self.operator_mapping[self.operator_],
67
+ self._quote_comp_val(self.comparison_value),
68
+ )
69
+
70
+ def __str__(self) -> str:
71
+ return self.to_sql()
72
+
73
+
74
+ @dataclass
75
+ class OrderCriteria:
76
+ """Stores information about ordering criteria to be applied to audit records"""
77
+
78
+ field_name: str
79
+ descending: bool = False
80
+
81
+ def to_sql(self) -> str:
82
+ """Create sql expression from order criteria"""
83
+ return self.field_name + (" DESC" if self.descending else "")
84
+
85
+ def __str__(self) -> str:
86
+ return self.to_sql()
87
+
88
+
89
+ class BaseAuditor(Generic[AuditReturnType]):
90
+ """Base auditor object - defines structure for implementations to use
91
+ in conjunction with AuditingManager"""
92
+
93
+ def __init__(self, name: str, record_type: type[AuditRecord]):
94
+ self._name = name
95
+ self._record_type = record_type
96
+
97
+ @property
98
+ def schema(self) -> dict[str, type]:
99
+ """Determine python schema of auditor"""
100
+ return {
101
+ fld: str if get_origin(mdl.type_) == Literal else mdl.type_
102
+ for fld, mdl in self._record_type.__fields__.items()
103
+ }
104
+
105
+ @staticmethod
106
+ def normalise_filter(filter_condition: FilterCriteria):
107
+ """Ensure filter criteria is converted to implementation specific format"""
108
+ raise NotImplementedError()
109
+
110
+ @staticmethod
111
+ def normalise_order(order_condition: OrderCriteria):
112
+ """Ensure order criteria is converted to implementation specific format"""
113
+ raise NotImplementedError()
114
+
115
+ @staticmethod
116
+ def normalise_field(field: str):
117
+ """Ensure field is converted to implementation specific format"""
118
+ raise NotImplementedError()
119
+
120
+ @abstractmethod
121
+ def conv_to_records(self, recs: AuditReturnType) -> Iterable[AuditRecord]:
122
+ """Convert the AuditReturnType for the implementation to an iterable of pydantic models"""
123
+ raise NotImplementedError()
124
+
125
+ @abstractmethod
126
+ def conv_to_entity(self, recs: list[AuditRecord]) -> AuditReturnType:
127
+ """Convert the list of pydantic models to an entity for use in pipelines"""
128
+ raise NotImplementedError()
129
+
130
+ @abstractmethod
131
+ def add_records(self, records: Iterable[dict[str, Any]]):
132
+ """Add audit records to the Auditor"""
133
+ raise NotImplementedError()
134
+
135
+ @abstractmethod
136
+ def retrieve_records(
137
+ self, filter_criteria: list[FilterCriteria], data: Optional[AuditReturnType] = None
138
+ ) -> AuditReturnType:
139
+ """Retrieve audit records from the Auditor"""
140
+ raise NotImplementedError()
141
+
142
+ def get_most_recent_records(
143
+ self,
144
+ order_criteria: list[OrderCriteria],
145
+ partition_fields: Optional[list[str]] = None,
146
+ pre_filter_criteria: Optional[list[FilterCriteria]] = None,
147
+ ) -> AuditReturnType:
148
+ """Retrieve the most recent records, defined by the ordering criteria
149
+ for each partition combination"""
150
+ raise NotImplementedError()
151
+
152
+
153
+ AuditorType = TypeVar("AuditorType", bound=BaseAuditor) # pylint: disable=C0103
154
+ SubmissionMetadata = TypeVar("SubmissionMetadata", bound=SubmissionInfo)
155
+
156
+
157
+ class BaseAuditingManager(
158
+ Generic[AuditorType, AuditReturnType]
159
+ ): # pylint: disable=too-many-public-methods
160
+ """Manager of auditors - controls adding records to and querying from the
161
+ audit objects supplied"""
162
+
163
+ def __init__(
164
+ self,
165
+ processing_status: AuditorType,
166
+ submission_info: AuditorType,
167
+ submission_statistics: AuditorType,
168
+ transfers: AuditorType,
169
+ pool: Optional[ExecutorType] = None,
170
+ ):
171
+ """Audit manager to handle writing of audit information to auditors."""
172
+ self._processing_status = processing_status
173
+ self._submission_info = submission_info
174
+ self._submission_statistics = submission_statistics
175
+ self._transfers = transfers
176
+ self.pool = pool
177
+ if self.pool is not None:
178
+ thread = isinstance(self.pool, ThreadPoolExecutor)
179
+ self.queue: QueueType = ThreadQueue() if thread else ProcessQueue()
180
+ self.pool_result = self.pool.submit(self._process_queue)
181
+ self.clear_down = False
182
+ self.processing_lock = threading.Lock() if thread else multiprocessing.Lock()
183
+
184
+ @abstractmethod
185
+ def combine_auditor_information(
186
+ self,
187
+ left: Union[AuditorType, AuditReturnType],
188
+ right: Union[AuditorType, AuditReturnType],
189
+ ) -> AuditReturnType:
190
+ """Method to combine audit information of two auditors based on submission_id"""
191
+ raise NotImplementedError()
192
+
193
+ @staticmethod
194
+ def conv_to_iterable(recs: Union[AuditorType, AuditReturnType]) -> Iterable[dict[str, Any]]:
195
+ """Convert AuditReturnType to iterable of dictionaries"""
196
+ raise NotImplementedError()
197
+
198
+ @validate_arguments
199
+ def add_processing_records(self, processing_records: list[ProcessingStatusRecord]):
200
+ """Add an entry to the processing_status auditor."""
201
+ if self.pool:
202
+ return self._submit(
203
+ audit_object=self._processing_status,
204
+ records=[dict(rec) for rec in processing_records],
205
+ )
206
+ return self._processing_status.add_records(
207
+ records=[dict(rec) for rec in processing_records]
208
+ )
209
+
210
+ @validate_arguments
211
+ def add_submission_statistics_records(self, sub_stats: list[SubmissionStatisticsRecord]):
212
+ """Add an entry to the submission statistics auditor."""
213
+ if self.pool:
214
+ return self._submit(
215
+ audit_object=self._submission_statistics,
216
+ records=[dict(rec) for rec in sub_stats],
217
+ )
218
+ return self._submission_statistics.add_records(records=[dict(rec) for rec in sub_stats])
219
+
220
+ @validate_arguments
221
+ def add_transfer_records(self, transfer_records: list[TransferRecord]):
222
+ """Add an entry to the transfers auditor"""
223
+ if self.pool:
224
+ return self._submit(
225
+ audit_object=self._transfers, records=[dict(rec) for rec in transfer_records]
226
+ )
227
+ return self._transfers.add_records(records=[dict(rec) for rec in transfer_records])
228
+
229
+ @validate_arguments
230
+ def add_new_submissions(
231
+ self,
232
+ submissions: list[SubmissionMetadata],
233
+ job_run_id: Optional[int] = None,
234
+ ):
235
+ """Add an entry to the submission_info auditor."""
236
+ # get timestamp of update and date of update to align
237
+ # submission_info and processing_status tables
238
+ time_now: datetime = datetime.now()
239
+ ts_info = {"time_updated": time_now, "date_updated": time_now.date()}
240
+
241
+ processing_status_recs: list[dict[str, Any]] = []
242
+ submission_info_recs: list[dict[str, Any]] = []
243
+
244
+ for sub_info in submissions:
245
+ # add processing_record - add time info
246
+ processing_rec = {
247
+ **ProcessingStatusRecord(
248
+ submission_id=sub_info.submission_id,
249
+ processing_status="received",
250
+ job_run_id=job_run_id,
251
+ **ts_info,
252
+ ).dict(),
253
+ }
254
+ processing_status_recs.append(processing_rec)
255
+ if sub_info:
256
+ sub_info_rec = {**dict(sub_info), **ts_info}
257
+ submission_info_recs.append(sub_info_rec)
258
+
259
+ if self.pool:
260
+ self._submit(audit_object=self._processing_status, records=processing_status_recs)
261
+ self._submit(audit_object=self._submission_info, records=submission_info_recs)
262
+ else:
263
+ self._processing_status.add_records(records=processing_status_recs)
264
+ self._submission_info.add_records(records=submission_info_recs)
265
+ return submissions
266
+
267
+ def _submit(self, **kwargs):
268
+ self.queue.put(kwargs)
269
+
270
+ def _process_queue(self):
271
+ while True:
272
+ if self.queue.empty():
273
+ time.sleep(2)
274
+ continue
275
+ item = self.queue.get()
276
+ if item is None:
277
+ break
278
+ try:
279
+ with self.processing_lock:
280
+ item.get("audit_object").add_records(item.get("records"))
281
+ except Exception as exc: # pylint: disable=broad-except
282
+ print(exc) # TODO - log this - rather than print
283
+
284
+ def is_writing(self) -> bool:
285
+ """Check if the audit manager is currently writing data to auditors"""
286
+ if self.pool is None:
287
+ return False
288
+ if isinstance(self.processing_lock, type(threading.Lock())):
289
+ locked = self.processing_lock.locked()
290
+ else:
291
+ # process locks don't have a locked method, if we try to aquire with a low timeout
292
+ # if it succeeds then the lock is free. if it fails then it's not
293
+ try:
294
+ self.processing_lock.acquire(timeout=0.001) # type: ignore
295
+ locked = False
296
+ self.processing_lock.release() # type: ignore
297
+ except TimeoutError:
298
+ locked = True
299
+
300
+ return not self.queue.empty() or locked
301
+
302
+ def mark_transform(self, submission_ids: list[str], **kwargs):
303
+ """Update submission processing_status to file_transformation."""
304
+
305
+ recs = [
306
+ ProcessingStatusRecord(
307
+ submission_id=submission_id, processing_status="file_transformation", **kwargs
308
+ )
309
+ for submission_id in submission_ids
310
+ ]
311
+
312
+ return self.add_processing_records(recs)
313
+
314
+ def mark_data_contract(self, submission_ids: list[str], **kwargs):
315
+ """Update submission processing_status to data_contract."""
316
+
317
+ recs = [
318
+ ProcessingStatusRecord(
319
+ submission_id=submission_id, processing_status="data_contract", **kwargs
320
+ )
321
+ for submission_id in submission_ids
322
+ ]
323
+
324
+ return self.add_processing_records(recs)
325
+
326
+ def mark_business_rules(self, submissions: list[tuple[str, bool]], **kwargs):
327
+ """Update submission processing_status to business_rules."""
328
+
329
+ recs = [
330
+ ProcessingStatusRecord(
331
+ submission_id=submission_id,
332
+ processing_status="business_rules",
333
+ submission_result="validation_failed" if failed else None,
334
+ **kwargs,
335
+ )
336
+ for submission_id, failed in submissions
337
+ ]
338
+
339
+ return self.add_processing_records(recs)
340
+
341
+ def mark_error_report(
342
+ self,
343
+ submissions: list[tuple[str, SubmissionResult]],
344
+ job_run_id: Optional[int] = None,
345
+ ):
346
+ """Mark the given submission as being ready for error report"""
347
+ processing_recs: list[ProcessingStatusRecord] = []
348
+
349
+ sub_id: str
350
+ sub_result: str
351
+
352
+ for sub_id, sub_result in submissions:
353
+ processing_recs.append(
354
+ ProcessingStatusRecord(
355
+ submission_id=sub_id,
356
+ processing_status="error_report",
357
+ submission_result=sub_result,
358
+ job_run_id=job_run_id,
359
+ )
360
+ )
361
+
362
+ return self.add_processing_records(processing_recs)
363
+
364
+ def mark_finished(self, submissions: list[tuple[str, SubmissionResult]], **kwargs):
365
+ """Update submission processing_status to finished."""
366
+
367
+ recs = [
368
+ ProcessingStatusRecord(
369
+ submission_id=sub_id,
370
+ processing_status="success",
371
+ submission_result=sub_res,
372
+ **kwargs,
373
+ )
374
+ for sub_id, sub_res in submissions
375
+ ]
376
+
377
+ return self.add_processing_records(recs)
378
+
379
+ def mark_failed(self, submissions: list[str], **kwargs):
380
+ """Update submission processing_status to failed."""
381
+ recs = [
382
+ ProcessingStatusRecord(
383
+ submission_id=submission_id,
384
+ processing_status="failed",
385
+ submission_result="processing_failed",
386
+ **kwargs,
387
+ )
388
+ for submission_id in submissions
389
+ ]
390
+
391
+ return self.add_processing_records(recs)
392
+
393
+ def mark_archived(self, submissions: list[str], **kwargs):
394
+ """Update submission processing_status to archived."""
395
+ recs = [
396
+ ProcessingStatusRecord(
397
+ submission_id=submission_id, processing_status="archived", **kwargs
398
+ )
399
+ for submission_id in submissions
400
+ ]
401
+
402
+ return self.add_processing_records(recs)
403
+
404
+ def add_feedback_transfer_ids(self, submissions: list[tuple[str, str]], **kwargs):
405
+ """Adds transfer_id for error report to submission"""
406
+ recs = [
407
+ TransferRecord(
408
+ submission_id=submission_id,
409
+ report_name="error_report",
410
+ transfer_id=transfer_id,
411
+ **kwargs,
412
+ )
413
+ for submission_id, transfer_id in submissions
414
+ ]
415
+
416
+ return self.add_transfer_records(recs)
417
+
418
+ def get_latest_processing_records(
419
+ self, filter_criteria: Optional[list[FilterCriteria]] = None
420
+ ) -> AuditReturnType:
421
+ """Get the most recent processing record for each submission_id stored in
422
+ the processing_status auditor"""
423
+ return self._processing_status.get_most_recent_records(
424
+ order_criteria=[OrderCriteria("time_updated", True)],
425
+ partition_fields=["submission_id"],
426
+ pre_filter_criteria=filter_criteria,
427
+ )
428
+
429
+ def downstream_pending(
430
+ self,
431
+ status: ProcessingStatus,
432
+ max_concurrency: int = 1,
433
+ run_number: int = 0,
434
+ max_days_old: int = 3,
435
+ statuses_to_include: Optional[list[ProcessingStatus]] = None,
436
+ ) -> bool:
437
+ """Checks if there are any downstream submissions currently pending"""
438
+ steps: list[ProcessingStatus] = [
439
+ "received",
440
+ "file_transformation",
441
+ "data_contract",
442
+ "business_rules",
443
+ "error_report",
444
+ ]
445
+
446
+ downstream: set[ProcessingStatus]
447
+ if statuses_to_include:
448
+ downstream = {status, *statuses_to_include}
449
+ else:
450
+ downstream = {*steps[: steps.index(status) + 1]}
451
+
452
+ pending = self._processing_status.conv_to_records(
453
+ self._processing_status.retrieve_records(
454
+ filter_criteria=[
455
+ FilterCriteria(
456
+ "date_updated",
457
+ str(date.today() - timedelta(days=max_days_old)),
458
+ operator.gt,
459
+ ),
460
+ FilterCriteria("processing_status", downstream, operator.contains),
461
+ ],
462
+ data=self.get_latest_processing_records(),
463
+ )
464
+ )
465
+ pending_for_job = filter(
466
+ lambda sub_id: int(sub_id, 16) % max_concurrency == run_number, # type: ignore
467
+ [rw.submission_id for rw in pending],
468
+ )
469
+ try:
470
+ next(pending_for_job)
471
+ return True
472
+ except StopIteration:
473
+ return False
474
+
475
+ def get_submission_info(self, submission_id: str) -> Optional[SubmissionInfo]:
476
+ """Get all stored info for a submission"""
477
+ try:
478
+ return next( # type: ignore
479
+ self._submission_info.conv_to_records(
480
+ self._submission_info.retrieve_records(
481
+ filter_criteria=[FilterCriteria("submission_id", submission_id)]
482
+ )
483
+ )
484
+ )
485
+ except StopIteration:
486
+ return None
487
+
488
+ def get_submission_statistics(self, submission_id: str) -> Optional[SubmissionStatisticsRecord]:
489
+ """Get submission statistics record for submission if one exists"""
490
+ try:
491
+ return next( # type: ignore
492
+ self._submission_statistics.conv_to_records(
493
+ self._submission_statistics.retrieve_records(
494
+ filter_criteria=[FilterCriteria("submission_id", submission_id)]
495
+ )
496
+ )
497
+ )
498
+ except StopIteration:
499
+ return None
500
+
501
+ def get_submission_status(self, submission_id: str) -> Optional[SubmissionStatus]:
502
+ """Get the latest submission status for a submission"""
503
+
504
+ try:
505
+ processing_rec: ProcessingStatusRecord = next( # type: ignore
506
+ self._processing_status.conv_to_records(
507
+ self._processing_status.get_most_recent_records(
508
+ order_criteria=[OrderCriteria("time_updated", True)],
509
+ pre_filter_criteria=[FilterCriteria("submission_id", submission_id)],
510
+ )
511
+ )
512
+ )
513
+ except StopIteration:
514
+ return None
515
+ sub_status = SubmissionStatus()
516
+ sub_stats_rec: Optional[SubmissionStatisticsRecord] = self.get_submission_statistics(
517
+ submission_id
518
+ )
519
+ if processing_rec.submission_result == "processing_failed":
520
+ sub_status.processing_failed = True
521
+ if processing_rec.submission_result == "validation_failed":
522
+ sub_status.validation_failed = True
523
+ if sub_stats_rec:
524
+ sub_status.number_of_records = sub_stats_rec.record_count
525
+
526
+ return sub_status
527
+
528
+ def __enter__(self):
529
+ """Use audit table as context manager"""
530
+ if self.pool and self.pool_result.done():
531
+ thread = isinstance(self.pool, ThreadPoolExecutor)
532
+ self.queue: QueueType = ThreadQueue() if thread else ProcessQueue()
533
+ self.pool_result = self.pool.submit(self._process_queue)
534
+ self.clear_down = False
535
+ self.processing_lock = threading.Lock() if thread else multiprocessing.Lock()
536
+ return self
537
+
538
+ def __exit__(
539
+ self,
540
+ exc_type: Optional[type[Exception]],
541
+ exc_value: Optional[Exception],
542
+ traceback: Optional[TracebackType],
543
+ ) -> None:
544
+ """Use audit table as context manager"""
545
+ if self.pool:
546
+ self.queue.put(None)
547
+ print(self.pool_result.result())
548
+ while not self.queue.empty():
549
+ time.sleep(1)
550
+
551
+ def _get_status(
552
+ self,
553
+ status: Union[ProcessingStatus, set[ProcessingStatus], list[ProcessingStatus]],
554
+ max_days_old: int,
555
+ ) -> AuditReturnType:
556
+ _filter = [
557
+ FilterCriteria(
558
+ "date_updated", str(date.today() - timedelta(days=max_days_old)), operator.gt
559
+ )
560
+ ]
561
+ if isinstance(status, (set, list)):
562
+ _filter.append(
563
+ FilterCriteria(
564
+ "processing_status",
565
+ status,
566
+ operator.contains,
567
+ )
568
+ )
569
+ else:
570
+ _filter.append(FilterCriteria("processing_status", status))
571
+ return self._processing_status.retrieve_records(filter_criteria=_filter)
572
+
573
+ def get_all_file_transformation_submissions(self, max_days_old: int = 3) -> AuditReturnType:
574
+ """Gets all of the submissions that are ready to be parsed"""
575
+ return self._get_status("file_transformation", max_days_old)
576
+
577
+ def get_all_data_contract_submissions(self, max_days_old: int = 3) -> AuditReturnType:
578
+ """Gets all of the submissions that are ready for data contract to be applied"""
579
+ return self._get_status("data_contract", max_days_old)
580
+
581
+ def get_all_business_rule_submissions(self, max_days_old: int = 3) -> AuditReturnType:
582
+ """Gets all of the submissions that are ready for business rules to be applied"""
583
+ return self._get_status("business_rules", max_days_old)
584
+
585
+ def get_all_error_report_submissions(self, max_days_old: int = 3):
586
+ """Gets all the submissions that are ready for error reports to be generated"""
587
+ subs = self._get_status("error_report", max_days_old)
588
+
589
+ sub_infos = self.conv_to_iterable(
590
+ self.combine_auditor_information(subs, self._submission_info)
591
+ )
592
+
593
+ processed: list[SubmissionInfo] = []
594
+ dodgy_info: list[tuple[dict, str]] = []
595
+
596
+ for sub_info in sub_infos:
597
+ try:
598
+ processed.append(SubmissionInfo(**sub_info))
599
+ except ValidationError:
600
+ dodgy_info.append((sub_info, sub_info["submission_id"]))
601
+
602
+ return processed, dodgy_info
603
+
604
+ def get_current_processing_info(self, submission_id: str) -> Optional[ProcessingStatusRecord]:
605
+ """Gets the current status of the record with the given submission_id"""
606
+ try:
607
+ return next( # type: ignore
608
+ iter(
609
+ self._processing_status.conv_to_records(
610
+ self._processing_status.get_most_recent_records(
611
+ pre_filter_criteria=[FilterCriteria("submission_id", submission_id)],
612
+ order_criteria=[OrderCriteria("time_updated", True)],
613
+ )
614
+ )
615
+ )
616
+ )
617
+ except StopIteration:
618
+ return None