data-validation-engine 0.7.5__tar.gz → 0.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/PKG-INFO +1 -1
  2. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/pyproject.toml +1 -1
  3. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py +3 -9
  4. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/readers/json.py +7 -2
  5. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/readers/xml.py +5 -4
  6. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/pipeline/duckdb_pipeline.py +1 -0
  7. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/pipeline/pipeline.py +13 -2
  8. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/pipeline/utils.py +10 -3
  9. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/LICENSE +0 -0
  10. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/README.md +0 -0
  11. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/__init__.py +0 -0
  12. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/common/__init__.py +0 -0
  13. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/common/error_utils.py +0 -0
  14. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/__init__.py +0 -0
  15. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/__init__.py +0 -0
  16. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/__init__.py +0 -0
  17. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/auditing.py +0 -0
  18. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/backend.py +0 -0
  19. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/contract.py +0 -0
  20. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/core.py +0 -0
  21. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/reader.py +0 -0
  22. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/reference_data.py +0 -0
  23. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/rules.py +0 -0
  24. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/base/utilities.py +0 -0
  25. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/exceptions.py +0 -0
  26. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/__init__.py +0 -0
  27. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/__init__.py +0 -0
  28. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/auditing.py +0 -0
  29. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/contract.py +0 -0
  30. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py +0 -0
  31. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/readers/__init__.py +0 -0
  32. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/reference_data.py +0 -0
  33. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/rules.py +0 -0
  34. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/types.py +0 -0
  35. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/duckdb/utilities.py +0 -0
  36. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/__init__.py +0 -0
  37. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/auditing.py +0 -0
  38. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/backend.py +0 -0
  39. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/contract.py +0 -0
  40. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/readers/__init__.py +0 -0
  41. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/readers/csv.py +0 -0
  42. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/readers/json.py +0 -0
  43. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/readers/xml.py +0 -0
  44. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/reference_data.py +0 -0
  45. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/rules.py +0 -0
  46. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/spark_helpers.py +0 -0
  47. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/types.py +0 -0
  48. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/implementations/spark/utilities.py +0 -0
  49. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/metadata/__init__.py +0 -0
  50. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/metadata/contract.py +0 -0
  51. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/metadata/reporting.py +0 -0
  52. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/metadata/rules.py +0 -0
  53. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/readers/__init__.py +0 -0
  54. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/readers/csv.py +0 -0
  55. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/readers/utilities.py +0 -0
  56. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/readers/xml.py +0 -0
  57. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/readers/xml_linting.py +0 -0
  58. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/types.py +0 -0
  59. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/backends/utilities.py +0 -0
  60. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/configuration/__init__.py +0 -0
  61. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/configuration/base.py +0 -0
  62. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/configuration/v1/__init__.py +0 -0
  63. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/configuration/v1/filters.py +0 -0
  64. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/configuration/v1/rule_stores/__init__.py +0 -0
  65. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/configuration/v1/rule_stores/models.py +0 -0
  66. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/configuration/v1/steps.py +0 -0
  67. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/constants.py +0 -0
  68. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/engine.py +0 -0
  69. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/exceptions.py +0 -0
  70. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/functions/__init__.py +0 -0
  71. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/functions/implementations.py +0 -0
  72. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/loggers.py +0 -0
  73. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/message.py +0 -0
  74. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/models.py +0 -0
  75. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/templating.py +0 -0
  76. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/type_hints.py +0 -0
  77. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/core_engine/validation.py +0 -0
  78. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/__init__.py +0 -0
  79. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/domain_types.py +0 -0
  80. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/exc.py +0 -0
  81. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/function_library.py +0 -0
  82. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/function_wrapper.py +0 -0
  83. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/model_generator.py +0 -0
  84. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/models.py +0 -0
  85. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/metadata_parser/utilities.py +0 -0
  86. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/__init__.py +0 -0
  87. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/exceptions.py +0 -0
  88. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/__init__.py +0 -0
  89. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/helpers.py +0 -0
  90. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/implementations/__init__.py +0 -0
  91. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/implementations/base.py +0 -0
  92. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/implementations/dbfs.py +0 -0
  93. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/implementations/file.py +0 -0
  94. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/implementations/s3.py +0 -0
  95. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/log_handler.py +0 -0
  96. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/service.py +0 -0
  97. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/file_handling/utilities.py +0 -0
  98. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/type_hints.py +0 -0
  99. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/parser/utilities.py +0 -0
  100. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/pipeline/__init__.py +0 -0
  101. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/pipeline/foundry_ddb_pipeline.py +0 -0
  102. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/pipeline/spark_pipeline.py +0 -0
  103. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/reporting/__init__.py +0 -0
  104. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/reporting/error_report.py +0 -0
  105. {data_validation_engine-0.7.5 → data_validation_engine-0.7.6}/src/dve/reporting/excel_report.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-validation-engine
3
- Version: 0.7.5
3
+ Version: 0.7.6
4
4
  Summary: `nhs data validation engine` is a framework used to validate data
5
5
  License-Expression: MIT
6
6
  License-File: LICENSE
@@ -24,7 +24,7 @@ Issues = "https://github.com/NHSDigital/data-validation-engine/issues"
24
24
  Changelog = "https://github.com/NHSDigital/data-validation-engine/blob/main/CHANGELOG.md"
25
25
 
26
26
  [tool.poetry]
27
- version = "0.7.5"
27
+ version = "0.7.6"
28
28
  packages = [
29
29
  { include = "dve", from = "src" },
30
30
  ]
@@ -6,13 +6,7 @@ from typing import Any, Optional
6
6
 
7
7
  import duckdb as ddb
8
8
  import polars as pl
9
- from duckdb import (
10
- DuckDBPyConnection,
11
- DuckDBPyRelation,
12
- StarExpression,
13
- default_connection,
14
- read_csv,
15
- )
9
+ from duckdb import DuckDBPyConnection, DuckDBPyRelation, StarExpression, read_csv
16
10
  from pydantic import BaseModel
17
11
 
18
12
  from dve.core_engine.backends.base.reader import BaseFileReader, read_function
@@ -61,7 +55,7 @@ class DuckDBCSVReader(BaseFileReader):
61
55
  self.header = header
62
56
  self.delim = delim
63
57
  self.quotechar = quotechar
64
- self._connection = connection if connection else default_connection
58
+ self._connection = connection if connection else ddb.connect(":memory:")
65
59
  self.field_check = field_check
66
60
  self.field_check_error_code = field_check_error_code
67
61
  self.field_check_error_message = field_check_error_message
@@ -181,7 +175,7 @@ class PolarsToDuckDBCSVReader(DuckDBCSVReader):
181
175
  ] + [pl.col(RECORD_INDEX_COLUMN_NAME)]
182
176
  df = df.select(pl_exprs)
183
177
 
184
- return ddb.sql("SELECT * FROM df")
178
+ return self._connection.sql("SELECT * FROM df")
185
179
 
186
180
 
187
181
  class DuckDBCSVRepeatingHeaderReader(PolarsToDuckDBCSVReader):
@@ -4,7 +4,8 @@
4
4
  from collections.abc import Iterator
5
5
  from typing import Any, Optional
6
6
 
7
- from duckdb import DuckDBPyRelation, read_json
7
+ import duckdb
8
+ from duckdb import DuckDBPyConnection, DuckDBPyRelation
8
9
  from pydantic import BaseModel
9
10
 
10
11
  from dve.core_engine.backends.base.reader import BaseFileReader, read_function
@@ -26,9 +27,11 @@ class DuckDBJSONReader(BaseFileReader):
26
27
  self,
27
28
  *,
28
29
  json_format: Optional[str] = "array",
30
+ connection: Optional[DuckDBPyConnection] = None,
29
31
  **_,
30
32
  ):
31
33
  self._json_format = json_format
34
+ self._connection = duckdb.connect(":memory:") if not connection else connection
32
35
 
33
36
  super().__init__()
34
37
 
@@ -50,5 +53,7 @@ class DuckDBJSONReader(BaseFileReader):
50
53
  }
51
54
 
52
55
  return self.add_record_index(
53
- read_json(resource, columns=ddb_schema, format=self._json_format) # type: ignore
56
+ self._connection.read_json(
57
+ resource, columns=ddb_schema, format=self._json_format # type: ignore
58
+ )
54
59
  )
@@ -3,8 +3,9 @@
3
3
 
4
4
  from typing import Optional
5
5
 
6
+ import duckdb
6
7
  import polars as pl
7
- from duckdb import DuckDBPyConnection, DuckDBPyRelation, default_connection
8
+ from duckdb import DuckDBPyConnection, DuckDBPyRelation
8
9
  from pydantic import BaseModel
9
10
 
10
11
  from dve.core_engine.backends.base.reader import read_function
@@ -24,8 +25,8 @@ from dve.core_engine.type_hints import URI
24
25
  class DuckDBXMLStreamReader(XMLStreamReader):
25
26
  """A reader for XML files"""
26
27
 
27
- def __init__(self, *, ddb_connection: Optional[DuckDBPyConnection] = None, **kwargs):
28
- self.ddb_connection = ddb_connection if ddb_connection else default_connection
28
+ def __init__(self, *, connection: Optional[DuckDBPyConnection] = None, **kwargs):
29
+ self._connection = connection if connection else duckdb.connect(":memory:")
29
30
  super().__init__(**kwargs)
30
31
 
31
32
  @read_function(DuckDBPyRelation)
@@ -49,4 +50,4 @@ class DuckDBXMLStreamReader(XMLStreamReader):
49
50
  data=self.read_to_py_iterator(resource, entity_name, schema), schema=polars_schema
50
51
  )
51
52
  )
52
- return self.ddb_connection.sql("select * from _lazy_frame")
53
+ return self._connection.sql("select * from _lazy_frame")
@@ -45,6 +45,7 @@ class DDBDVEPipeline(BaseDVEPipeline):
45
45
  submitted_files_path,
46
46
  job_run_id,
47
47
  logger,
48
+ {"connection": self._connection},
48
49
  )
49
50
 
50
51
  def init_reference_data_loader(
@@ -9,7 +9,7 @@ from concurrent.futures import Executor, Future, ThreadPoolExecutor
9
9
  from functools import lru_cache
10
10
  from itertools import starmap
11
11
  from threading import Lock
12
- from typing import Optional, Union
12
+ from typing import Any, Optional, Union
13
13
  from uuid import uuid4
14
14
 
15
15
  import polars as pl
@@ -49,6 +49,7 @@ PERMISSIBLE_EXCEPTIONS: tuple[type[Exception]] = (
49
49
  )
50
50
 
51
51
 
52
+ # pylint: disable=R0904
52
53
  class BaseDVEPipeline:
53
54
  """
54
55
  Base class for running a DVE Pipeline either by a given step or a full e2e process.
@@ -64,6 +65,7 @@ class BaseDVEPipeline:
64
65
  submitted_files_path: Optional[URI],
65
66
  job_run_id: Optional[int] = None,
66
67
  logger: Optional[logging.Logger] = None,
68
+ backend_reader_kwargs: Optional[dict[str, Any]] = None,
67
69
  ):
68
70
  self._submitted_files_path = submitted_files_path
69
71
  self._processed_files_path = processed_files_path
@@ -76,6 +78,7 @@ class BaseDVEPipeline:
76
78
  self._summary_lock = Lock()
77
79
  self._rec_tracking_lock = Lock()
78
80
  self._aggregates_lock = Lock()
81
+ self._backend_reader_kwargs = backend_reader_kwargs
79
82
 
80
83
  if self._data_contract:
81
84
  self._data_contract.logger = self._logger
@@ -107,6 +110,12 @@ class BaseDVEPipeline:
107
110
  """The step implementations to apply the business rules to a given dataset"""
108
111
  return self._step_implementations
109
112
 
113
+ @property
114
+ def backend_reader_kwargs(self) -> dict[str, Any] | None:
115
+ """Important required arguments for all readers related to the specific backend
116
+ that can't be specified at time of writing config eg. duckdb connection"""
117
+ return self._backend_reader_kwargs
118
+
110
119
  @staticmethod
111
120
  def get_entity_count(entity: EntityType) -> int:
112
121
  """Get a row count of an entity stored as parquet"""
@@ -203,7 +212,9 @@ class BaseDVEPipeline:
203
212
 
204
213
  for model_name, model in models.items():
205
214
  self._logger.info(f"Transforming {model_name} to stringified parquet")
206
- reader: BaseFileReader = load_reader(dataset, model_name, ext)
215
+ reader: BaseFileReader = load_reader(
216
+ dataset, model_name, ext, self.backend_reader_kwargs
217
+ )
207
218
  try:
208
219
  if not entity_type:
209
220
  reader.write_parquet(
@@ -3,7 +3,7 @@
3
3
 
4
4
  import json
5
5
  from threading import Lock
6
- from typing import Optional
6
+ from typing import Any, Optional
7
7
 
8
8
  from pydantic.main import ModelMetaclass
9
9
  from pyspark.sql import SparkSession
@@ -45,10 +45,17 @@ def load_config(
45
45
  return models, config, dataset
46
46
 
47
47
 
48
- def load_reader(dataset: Dataset, model_name: str, file_extension: str):
48
+ def load_reader(
49
+ dataset: Dataset,
50
+ model_name: str,
51
+ file_extension: str,
52
+ backend_reader_kwargs: Optional[dict[str, Any]] = None,
53
+ ):
49
54
  """Loads the readers for the diven feed, model name and file extension"""
50
55
  reader_config = dataset[model_name].reader_config[f".{file_extension.lower()}"]
51
- reader = _READER_REGISTRY[reader_config.reader](**reader_config.kwargs_)
56
+ reader = _READER_REGISTRY[reader_config.reader](
57
+ **reader_config.kwargs_, **backend_reader_kwargs if backend_reader_kwargs else {}
58
+ )
52
59
  return reader
53
60
 
54
61