data-validation-engine 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data_validation_engine-0.6.2.dist-info/METADATA +104 -0
  2. data_validation_engine-0.6.2.dist-info/RECORD +105 -0
  3. data_validation_engine-0.6.2.dist-info/WHEEL +4 -0
  4. data_validation_engine-0.6.2.dist-info/licenses/LICENSE +21 -0
  5. dve/__init__.py +0 -0
  6. dve/common/__init__.py +0 -0
  7. dve/common/error_utils.py +189 -0
  8. dve/core_engine/__init__.py +0 -0
  9. dve/core_engine/backends/__init__.py +1 -0
  10. dve/core_engine/backends/base/__init__.py +1 -0
  11. dve/core_engine/backends/base/auditing.py +618 -0
  12. dve/core_engine/backends/base/backend.py +240 -0
  13. dve/core_engine/backends/base/contract.py +454 -0
  14. dve/core_engine/backends/base/core.py +124 -0
  15. dve/core_engine/backends/base/reader.py +176 -0
  16. dve/core_engine/backends/base/reference_data.py +217 -0
  17. dve/core_engine/backends/base/rules.py +685 -0
  18. dve/core_engine/backends/base/utilities.py +146 -0
  19. dve/core_engine/backends/exceptions.py +311 -0
  20. dve/core_engine/backends/implementations/__init__.py +1 -0
  21. dve/core_engine/backends/implementations/duckdb/__init__.py +26 -0
  22. dve/core_engine/backends/implementations/duckdb/auditing.py +234 -0
  23. dve/core_engine/backends/implementations/duckdb/contract.py +213 -0
  24. dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py +288 -0
  25. dve/core_engine/backends/implementations/duckdb/readers/__init__.py +13 -0
  26. dve/core_engine/backends/implementations/duckdb/readers/csv.py +222 -0
  27. dve/core_engine/backends/implementations/duckdb/readers/json.py +50 -0
  28. dve/core_engine/backends/implementations/duckdb/readers/xml.py +45 -0
  29. dve/core_engine/backends/implementations/duckdb/reference_data.py +49 -0
  30. dve/core_engine/backends/implementations/duckdb/rules.py +534 -0
  31. dve/core_engine/backends/implementations/duckdb/types.py +47 -0
  32. dve/core_engine/backends/implementations/duckdb/utilities.py +41 -0
  33. dve/core_engine/backends/implementations/spark/__init__.py +22 -0
  34. dve/core_engine/backends/implementations/spark/auditing.py +230 -0
  35. dve/core_engine/backends/implementations/spark/backend.py +78 -0
  36. dve/core_engine/backends/implementations/spark/contract.py +241 -0
  37. dve/core_engine/backends/implementations/spark/readers/__init__.py +15 -0
  38. dve/core_engine/backends/implementations/spark/readers/csv.py +77 -0
  39. dve/core_engine/backends/implementations/spark/readers/json.py +66 -0
  40. dve/core_engine/backends/implementations/spark/readers/xml.py +202 -0
  41. dve/core_engine/backends/implementations/spark/reference_data.py +42 -0
  42. dve/core_engine/backends/implementations/spark/rules.py +430 -0
  43. dve/core_engine/backends/implementations/spark/spark_helpers.py +412 -0
  44. dve/core_engine/backends/implementations/spark/types.py +21 -0
  45. dve/core_engine/backends/implementations/spark/utilities.py +144 -0
  46. dve/core_engine/backends/metadata/__init__.py +47 -0
  47. dve/core_engine/backends/metadata/contract.py +80 -0
  48. dve/core_engine/backends/metadata/reporting.py +374 -0
  49. dve/core_engine/backends/metadata/rules.py +737 -0
  50. dve/core_engine/backends/readers/__init__.py +41 -0
  51. dve/core_engine/backends/readers/csv.py +232 -0
  52. dve/core_engine/backends/readers/utilities.py +21 -0
  53. dve/core_engine/backends/readers/xml.py +432 -0
  54. dve/core_engine/backends/readers/xml_linting.py +142 -0
  55. dve/core_engine/backends/types.py +26 -0
  56. dve/core_engine/backends/utilities.py +177 -0
  57. dve/core_engine/configuration/__init__.py +1 -0
  58. dve/core_engine/configuration/base.py +56 -0
  59. dve/core_engine/configuration/v1/__init__.py +351 -0
  60. dve/core_engine/configuration/v1/filters.py +60 -0
  61. dve/core_engine/configuration/v1/rule_stores/__init__.py +1 -0
  62. dve/core_engine/configuration/v1/rule_stores/models.py +57 -0
  63. dve/core_engine/configuration/v1/steps.py +365 -0
  64. dve/core_engine/constants.py +8 -0
  65. dve/core_engine/engine.py +265 -0
  66. dve/core_engine/exceptions.py +29 -0
  67. dve/core_engine/functions/__init__.py +6 -0
  68. dve/core_engine/functions/implementations.py +200 -0
  69. dve/core_engine/loggers.py +57 -0
  70. dve/core_engine/message.py +512 -0
  71. dve/core_engine/models.py +196 -0
  72. dve/core_engine/templating.py +114 -0
  73. dve/core_engine/type_hints.py +255 -0
  74. dve/core_engine/validation.py +160 -0
  75. dve/metadata_parser/__init__.py +2 -0
  76. dve/metadata_parser/domain_types.py +682 -0
  77. dve/metadata_parser/exc.py +44 -0
  78. dve/metadata_parser/function_library.py +64 -0
  79. dve/metadata_parser/function_wrapper.py +201 -0
  80. dve/metadata_parser/model_generator.py +119 -0
  81. dve/metadata_parser/models.py +410 -0
  82. dve/metadata_parser/utilities.py +54 -0
  83. dve/parser/__init__.py +1 -0
  84. dve/parser/exceptions.py +50 -0
  85. dve/parser/file_handling/__init__.py +31 -0
  86. dve/parser/file_handling/helpers.py +29 -0
  87. dve/parser/file_handling/implementations/__init__.py +7 -0
  88. dve/parser/file_handling/implementations/base.py +97 -0
  89. dve/parser/file_handling/implementations/dbfs.py +81 -0
  90. dve/parser/file_handling/implementations/file.py +203 -0
  91. dve/parser/file_handling/implementations/s3.py +371 -0
  92. dve/parser/file_handling/log_handler.py +215 -0
  93. dve/parser/file_handling/service.py +441 -0
  94. dve/parser/file_handling/utilities.py +53 -0
  95. dve/parser/type_hints.py +46 -0
  96. dve/parser/utilities.py +113 -0
  97. dve/pipeline/__init__.py +0 -0
  98. dve/pipeline/duckdb_pipeline.py +56 -0
  99. dve/pipeline/foundry_ddb_pipeline.py +171 -0
  100. dve/pipeline/pipeline.py +935 -0
  101. dve/pipeline/spark_pipeline.py +69 -0
  102. dve/pipeline/utils.py +96 -0
  103. dve/reporting/__init__.py +1 -0
  104. dve/reporting/error_report.py +153 -0
  105. dve/reporting/excel_report.py +319 -0
@@ -0,0 +1,53 @@
1
+ """Utilities for working with files."""
2
+
3
+ import tempfile
4
+ from pathlib import Path
5
+ from types import TracebackType
6
+ from typing import Optional
7
+
8
+ from dve.parser.exceptions import UnsupportedSchemeError
9
+ from dve.parser.file_handling.service import is_supported, remove_prefix
10
+ from dve.parser.type_hints import URI
11
+
12
+
13
+ class TemporaryPrefix:
14
+ """Like 'TemporaryDirectory', but with support for a URL prefix."""
15
+
16
+ def __init__(self, prefix: Optional[URI] = None):
17
+ """Set up the prefix.
18
+
19
+ Args:
20
+ - `prefix`: the URL prefix to use as temporary storage. This
21
+ will default to a local temporary folder.
22
+
23
+ """
24
+ if not prefix:
25
+ prefix = Path(tempfile.mkdtemp()).as_uri()
26
+ self._prefix = prefix.rstrip("/") + "/"
27
+
28
+ # Ensure we have an implementation for this prefix.
29
+ if not is_supported(self._prefix): # pragma: no cover
30
+ raise UnsupportedSchemeError(f"No supported implementation for {prefix!r}")
31
+ self._in_context = False
32
+
33
+ @property
34
+ def prefix(self) -> URI: # pragma: no cover
35
+ """The URI prefix of the temporary directory."""
36
+ if not self._in_context:
37
+ raise ValueError(f"`{self.__class__.__name__}` must be used as context manager")
38
+ return self._prefix
39
+
40
+ def __enter__(self) -> URI:
41
+ """Enters the context manager and yields the prefix"""
42
+ self._in_context = True
43
+ return self._prefix
44
+
45
+ def __exit__(
46
+ self,
47
+ exc_type: Optional[type[Exception]],
48
+ exc_value: Optional[Exception],
49
+ traceback: Optional[TracebackType],
50
+ ):
51
+ """Exits the context manager and cleans up the temporary prefix"""
52
+ self._in_context = False
53
+ remove_prefix(self._prefix, recursive=True)
@@ -0,0 +1,46 @@
1
+ """Type hints for the parser."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Optional, Union
5
+
6
+ from typing_extensions import Literal
7
+
8
+ PathStr = str
9
+ """A filesystem path, as a string (cursed)."""
10
+ URI = str
11
+ """A URI representing a remote or local resource."""
12
+ Filename = str
13
+ """A string representing a filename."""
14
+ Scheme = str
15
+ """The scheme attribute of the URI."""
16
+ Hostname = Optional[str]
17
+ """The hostname attribute of the URI."""
18
+ URIPath = str
19
+ """The path attribute of the URI."""
20
+ Extension = str
21
+ """A file extension (e.g. '.csv')."""
22
+ TextFileOpenMode = Literal["r", "a", "w", "a+"]
23
+ """An opening mode for a file in text mode."""
24
+ BinaryFileOpenMode = Literal["ab", "rb", "wb", "ba", "br", "bw"]
25
+ """An opening mode for a file in binary mode."""
26
+ FileOpenMode = Union[TextFileOpenMode, BinaryFileOpenMode]
27
+ """An opening mode for a file."""
28
+ NodeType = Literal["resource", "directory"]
29
+ """The type of node in a filesystem."""
30
+
31
+ Location = Union[PathStr, Path, URI]
32
+ """
33
+ A filesystem or remote location. An annoying, difficult to resolve union
34
+ (see `parser.file_handling.service.resolve_location`).
35
+
36
+ """
37
+
38
+ ReaderName = str
39
+ """A parser name. This must be importable from `parser.readers`"""
40
+ ReaderArgs = Optional[dict[str, Any]]
41
+ """Keyword arguments to be passed to the parser's constructor."""
42
+ FieldName = str
43
+ """The name of a field within the dataset."""
44
+
45
+ SparkXMLMode = Literal["PERMISSIVE", "FAILFAST", "DROPMALFORMED"]
46
+ """The mode to use when parsing XML files with Spark."""
@@ -0,0 +1,113 @@
1
+ """Useful helper functions.
2
+
3
+ This is functionality which may be useful for multiple readers,
4
+ but isn't quite universal enough to be in e.g. a base class.
5
+
6
+ """
7
+
8
+ from collections import defaultdict
9
+ from collections.abc import Iterable, Iterator
10
+ from itertools import tee
11
+ from typing import TypeVar, Union, overload
12
+
13
+ from pyspark.sql.types import ArrayType, StringType, StructField, StructType
14
+
15
+ T = TypeVar("T")
16
+ TemplateElement = Union[None, list["TemplateElement"], dict[str, "TemplateElement"]] # type: ignore
17
+ """The base types used in the template row."""
18
+ TemplateRow = dict[str, "TemplateElement"] # type: ignore
19
+ """The type of a template row."""
20
+
21
+
22
+ def peek(iterable: Iterable[T]) -> tuple[T, Iterator[T]]:
23
+ """Peek the first item from an iterable, returning the first item
24
+ and an iterator representing the state of the iterable _before_
25
+ the first item was taken.
26
+
27
+ """
28
+ current, clone = tee(iterable, 2)
29
+ return next(clone), current
30
+
31
+
32
+ @overload
33
+ def template_row_to_spark_schema(template_element: TemplateRow) -> StructType: ...
34
+
35
+
36
+ @overload
37
+ def template_row_to_spark_schema(
38
+ template_element: TemplateElement,
39
+ ) -> Union[ArrayType, StringType, StructType]: ...
40
+
41
+
42
+ def template_row_to_spark_schema(template_element):
43
+ """Get a Spark schema from a template row."""
44
+ # Should we implement the full logic from dve.core_engine.spark_helpers here?
45
+ if template_element is None:
46
+ return StringType()
47
+ if isinstance(template_element, list):
48
+ if not template_element:
49
+ nested_type = None
50
+ elif len(template_element) == 1:
51
+ nested_type = template_element[0]
52
+ else:
53
+ raise ValueError(f"Nested array longer than 1: {template_element!r}")
54
+ return ArrayType(template_row_to_spark_schema(nested_type))
55
+ if not isinstance(template_element, dict):
56
+ raise TypeError(f"Must be dict, list, or None, got {template_element!r}")
57
+
58
+ fields = []
59
+ for field_name, nested_type in template_element.items():
60
+ fields.append(StructField(str(field_name), template_row_to_spark_schema(nested_type)))
61
+ return StructType(fields)
62
+
63
+
64
+ def parse_template_row(field_names: Iterable[str]) -> TemplateRow:
65
+ """Parse a template row.
66
+
67
+ Field names can be separated by level using '.', and wrapping the
68
+ field name in square brackets indicates that the item is expected
69
+ to be an array.
70
+
71
+ >>> parse_template_row(['name'])
72
+ {'name': None}
73
+ >>> parse_template_row(['[name]'])
74
+ {'name': [None]}
75
+ >>> parse_template_row(['name', 'name.nested'])
76
+ {'name': {'nested': None}}
77
+ >>> parse_template_row(['[name]', 'name.nested'])
78
+ {'name': [{'nested': None}]}
79
+ >>> parse_template_row(['name', '[name.nested_list]'])
80
+ {'name': {'nested_list': [None]}}
81
+ >>> parse_template_row(['[name]', '[name.nested_list]'])
82
+ {'name': [{'nested_list': [None]}]}
83
+
84
+ """
85
+ array_levels = set()
86
+ sub_levels_by_level: dict[str, list[str]] = defaultdict(list)
87
+
88
+ for name in field_names:
89
+ is_array = name.startswith("[")
90
+ name = name.strip("[]")
91
+
92
+ if "." not in name:
93
+ # Add the key to the defaultdict, if it's not already added
94
+ sub_levels_by_level[name] # pylint: disable=pointless-statement
95
+ if is_array:
96
+ array_levels.add(name)
97
+ else:
98
+ level, sub_level = name.split(".", 1)
99
+ if is_array:
100
+ sub_level = f"[{sub_level}]"
101
+ sub_levels_by_level[level].append(sub_level)
102
+
103
+ row = {}
104
+ for level, sub_level_names in sub_levels_by_level.items():
105
+ value: TemplateElement = None
106
+ if sub_level_names:
107
+ value = parse_template_row(sub_level_names)
108
+
109
+ if level in array_levels:
110
+ value = [value]
111
+ row[level] = value
112
+
113
+ return row
File without changes
@@ -0,0 +1,56 @@
1
+ """DuckDB implementation for `Pipeline` object."""
2
+
3
+ import logging
4
+ from typing import Optional
5
+
6
+ from duckdb import DuckDBPyConnection, DuckDBPyRelation
7
+
8
+ from dve.core_engine.backends.base.reference_data import BaseRefDataLoader
9
+ from dve.core_engine.backends.implementations.duckdb.auditing import DDBAuditingManager
10
+ from dve.core_engine.backends.implementations.duckdb.contract import DuckDBDataContract
11
+ from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import duckdb_get_entity_count
12
+ from dve.core_engine.backends.implementations.duckdb.rules import DuckDBStepImplementations
13
+ from dve.core_engine.models import SubmissionInfo
14
+ from dve.core_engine.type_hints import URI
15
+ from dve.pipeline.pipeline import BaseDVEPipeline
16
+
17
+
18
+ # pylint: disable=abstract-method
19
+ @duckdb_get_entity_count
20
+ class DDBDVEPipeline(BaseDVEPipeline):
21
+ """
22
+ Modified Pipeline class for running a DVE Pipeline with Spark
23
+ """
24
+
25
+ # pylint: disable=R0913
26
+ def __init__(
27
+ self,
28
+ processed_files_path: URI,
29
+ audit_tables: DDBAuditingManager,
30
+ connection: DuckDBPyConnection,
31
+ rules_path: Optional[URI],
32
+ submitted_files_path: Optional[URI],
33
+ reference_data_loader: Optional[type[BaseRefDataLoader]] = None,
34
+ job_run_id: Optional[int] = None,
35
+ logger: Optional[logging.Logger] = None,
36
+ ):
37
+ self._connection = connection
38
+ super().__init__(
39
+ processed_files_path,
40
+ audit_tables,
41
+ DuckDBDataContract(connection=self._connection),
42
+ DuckDBStepImplementations.register_udfs(connection=self._connection),
43
+ rules_path,
44
+ submitted_files_path,
45
+ reference_data_loader,
46
+ job_run_id,
47
+ logger,
48
+ )
49
+
50
+ # pylint: disable=arguments-differ
51
+ def write_file_to_parquet( # type: ignore
52
+ self, submission_file_uri: URI, submission_info: SubmissionInfo, output: URI
53
+ ):
54
+ return super().write_file_to_parquet(
55
+ submission_file_uri, submission_info, output, DuckDBPyRelation
56
+ )
@@ -0,0 +1,171 @@
1
+ # pylint: disable=W0223
2
+ """A duckdb pipeline for running on Foundry platform"""
3
+
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ from dve.common.error_utils import dump_processing_errors
9
+ from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import (
10
+ duckdb_get_entity_count,
11
+ duckdb_write_parquet,
12
+ )
13
+ from dve.core_engine.exceptions import CriticalProcessingError
14
+ from dve.core_engine.models import SubmissionInfo
15
+ from dve.core_engine.type_hints import URI
16
+ from dve.parser import file_handling as fh
17
+ from dve.parser.file_handling.implementations.file import LocalFilesystemImplementation
18
+ from dve.parser.file_handling.service import _get_implementation
19
+ from dve.pipeline.duckdb_pipeline import DDBDVEPipeline
20
+ from dve.pipeline.utils import SubmissionStatus
21
+
22
+
23
+ @duckdb_get_entity_count
24
+ @duckdb_write_parquet
25
+ class FoundryDDBPipeline(DDBDVEPipeline):
26
+ """DuckDB pipeline for running on Foundry Platform"""
27
+
28
+ def _move_submission_to_processing_files_path(self, submission_info: SubmissionInfo):
29
+ """Move submitted file to 'processed_files_path'."""
30
+ _submitted_file_location = Path(
31
+ self._submitted_files_path, submission_info.file_name_with_ext # type: ignore
32
+ )
33
+ _dest = Path(self.processed_files_path, submission_info.submission_id)
34
+ _dest.mkdir(parents=True, exist_ok=True)
35
+ shutil.copy2(_submitted_file_location, _dest)
36
+
37
+ def persist_audit_records(self, submission_info: SubmissionInfo) -> URI:
38
+ """Write out key audit relations to parquet for persisting to datasets"""
39
+ write_to = fh.joinuri(self.processed_files_path, submission_info.submission_id, "audit/")
40
+ if isinstance(_get_implementation(write_to), LocalFilesystemImplementation):
41
+ write_to = fh.file_uri_to_local_path(write_to)
42
+ write_to.parent.mkdir(parents=True, exist_ok=True)
43
+ write_to = write_to.as_posix()
44
+ self.write_parquet( # type: ignore # pylint: disable=E1101
45
+ self._audit_tables._processing_status.get_relation(), # pylint: disable=W0212
46
+ fh.joinuri(write_to, "processing_status.parquet"),
47
+ )
48
+ self.write_parquet( # type: ignore # pylint: disable=E1101
49
+ self._audit_tables._submission_statistics.get_relation(), # pylint: disable=W0212
50
+ fh.joinuri(write_to, "submission_statistics.parquet"),
51
+ )
52
+ return write_to
53
+
54
+ def file_transformation(
55
+ self, submission_info: SubmissionInfo
56
+ ) -> tuple[SubmissionInfo, SubmissionStatus]:
57
+ try:
58
+ return super().file_transformation(submission_info)
59
+ except Exception as exc: # pylint: disable=W0718
60
+ self._logger.exception("File transformation raised exception:")
61
+ dump_processing_errors(
62
+ fh.joinuri(self.processed_files_path, submission_info.submission_id),
63
+ "file_transformation",
64
+ [CriticalProcessingError.from_exception(exc)],
65
+ )
66
+ self._audit_tables.mark_failed(submissions=[submission_info.submission_id])
67
+ return submission_info, SubmissionStatus(processing_failed=True)
68
+
69
+ def apply_data_contract(
70
+ self, submission_info: SubmissionInfo, submission_status: Optional[SubmissionStatus] = None
71
+ ) -> tuple[SubmissionInfo, SubmissionStatus]:
72
+ try:
73
+ return super().apply_data_contract(submission_info, submission_status)
74
+ except Exception as exc: # pylint: disable=W0718
75
+ self._logger.exception("Apply data contract raised exception:")
76
+ dump_processing_errors(
77
+ fh.joinuri(self.processed_files_path, submission_info.submission_id),
78
+ "data_contract",
79
+ [CriticalProcessingError.from_exception(exc)],
80
+ )
81
+ self._audit_tables.mark_failed(submissions=[submission_info.submission_id])
82
+ return submission_info, SubmissionStatus(processing_failed=True)
83
+
84
+ def apply_business_rules(
85
+ self, submission_info: SubmissionInfo, submission_status: Optional[SubmissionStatus] = None
86
+ ):
87
+ try:
88
+ return super().apply_business_rules(submission_info, submission_status)
89
+ except Exception as exc: # pylint: disable=W0718
90
+ self._logger.exception("Apply business rules raised exception:")
91
+ dump_processing_errors(
92
+ fh.joinuri(self.processed_files_path, submission_info.submission_id),
93
+ "business_rules",
94
+ [CriticalProcessingError.from_exception(exc)],
95
+ )
96
+ self._audit_tables.mark_failed(submissions=[submission_info.submission_id])
97
+ return submission_info, SubmissionStatus(processing_failed=True)
98
+
99
+ def error_report(
100
+ self, submission_info: SubmissionInfo, submission_status: Optional[SubmissionStatus] = None
101
+ ):
102
+ try:
103
+ return super().error_report(submission_info, submission_status)
104
+ except Exception as exc: # pylint: disable=W0718
105
+ self._logger.exception("Error reports raised exception:")
106
+ sub_stats = None
107
+ report_uri = None
108
+ submission_status = submission_status if submission_status else SubmissionStatus()
109
+ submission_status.processing_failed = True
110
+ dump_processing_errors(
111
+ fh.joinuri(self.processed_files_path, submission_info.submission_id),
112
+ "error_report",
113
+ [CriticalProcessingError.from_exception(exc)],
114
+ )
115
+ self._audit_tables.mark_failed(submissions=[submission_info.submission_id])
116
+ return submission_info, submission_status, sub_stats, report_uri
117
+
118
+ def run_pipeline(
119
+ self, submission_info: SubmissionInfo
120
+ ) -> tuple[Optional[URI], Optional[URI], URI]:
121
+ """Sequential single submission pipeline runner"""
122
+ try:
123
+ sub_id: str = submission_info.submission_id
124
+ report_uri = None
125
+ if self._submitted_files_path:
126
+ self._move_submission_to_processing_files_path(submission_info)
127
+ self._audit_tables.add_new_submissions(submissions=[submission_info])
128
+ self._audit_tables.mark_transform(submission_ids=[sub_id])
129
+ sub_info, sub_status = self.file_transformation(submission_info=submission_info)
130
+ if not (sub_status.validation_failed or sub_status.processing_failed):
131
+ self._audit_tables.mark_data_contract(submission_ids=[sub_id])
132
+ sub_info, sub_status = self.apply_data_contract(
133
+ submission_info=sub_info, submission_status=sub_status
134
+ )
135
+ self._audit_tables.mark_business_rules(
136
+ submissions=[(sub_id, sub_status.validation_failed)]
137
+ )
138
+ sub_info, sub_status = self.apply_business_rules(
139
+ submission_info=submission_info, submission_status=sub_status
140
+ )
141
+
142
+ if not sub_status.processing_failed:
143
+ self._audit_tables.mark_error_report(
144
+ submissions=[(sub_id, sub_status.submission_result)]
145
+ )
146
+ sub_info, sub_status, sub_stats, report_uri = self.error_report(
147
+ submission_info=submission_info, submission_status=sub_status
148
+ )
149
+ if sub_stats:
150
+ self._audit_tables.add_submission_statistics_records(sub_stats=[sub_stats])
151
+ except Exception as err: # pylint: disable=W0718
152
+ self._logger.exception(
153
+ f"During processing of submission_id: {sub_id}, this exception was raised:"
154
+ )
155
+ dump_processing_errors(
156
+ fh.joinuri(self.processed_files_path, submission_info.submission_id),
157
+ "pipeline",
158
+ [CriticalProcessingError.from_exception(err)],
159
+ )
160
+ self._audit_tables.mark_failed(submissions=[sub_id])
161
+ finally:
162
+ audit_files_uri = self.persist_audit_records(submission_info=submission_info)
163
+ return (
164
+ (
165
+ None
166
+ if (sub_status.validation_failed or sub_status.processing_failed)
167
+ else fh.joinuri(self.processed_files_path, sub_id, "business_rules")
168
+ ),
169
+ report_uri if report_uri else None,
170
+ audit_files_uri,
171
+ )