dataqe-framework 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataqe_framework-0.2.4/src/dataqe_framework.egg-info → dataqe_framework-0.2.6}/PKG-INFO +1 -1
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/pyproject.toml +1 -1
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/__init__.py +1 -1
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/cli.py +1 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/executor.py +57 -36
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/preprocessor.py +101 -3
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6/src/dataqe_framework.egg-info}/PKG-INFO +1 -1
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/LICENSE.txt +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/MANIFEST.in +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/README.md +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/setup.cfg +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/bigquery_client.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/comparison/comparator.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/comparison/threshold.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/config.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/config_loader.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/__init__.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/base_connector.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/bigquery_connector.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/mysql_connector.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/credentials_extractor.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/reporter.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/validator.py +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/SOURCES.txt +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/dependency_links.txt +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/entry_points.txt +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/requires.txt +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/top_level.txt +0 -0
- {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/tests/test_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dataqe-framework
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
|
|
5
5
|
Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
|
|
6
6
|
Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "dataqe-framework"
|
|
7
7
|
dynamic = []
|
|
8
|
-
version = "0.2.
|
|
8
|
+
version = "0.2.6"
|
|
9
9
|
description = "Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation"
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.9"
|
|
@@ -113,6 +113,7 @@ def main():
|
|
|
113
113
|
|
|
114
114
|
# Get preprocessor queries path if specified
|
|
115
115
|
preprocessor_queries_path = config_block["other"].get("preprocessor_queries")
|
|
116
|
+
|
|
116
117
|
if preprocessor_queries_path:
|
|
117
118
|
# Resolve relative path if needed
|
|
118
119
|
if not os.path.isabs(preprocessor_queries_path):
|
|
@@ -17,11 +17,51 @@ class ValidationExecutor:
|
|
|
17
17
|
|
|
18
18
|
self.source_connector = None
|
|
19
19
|
self.target_connector = None
|
|
20
|
-
self.
|
|
20
|
+
self.source_preprocessor = None
|
|
21
|
+
self.target_preprocessor = None
|
|
21
22
|
|
|
22
|
-
# Initialize
|
|
23
|
+
# Initialize preprocessors if path is provided
|
|
23
24
|
if preprocessor_queries_path:
|
|
24
|
-
|
|
25
|
+
# Extract config_query_key from source config (under gcp/mysql/etc)
|
|
26
|
+
src_config = self._extract_preprocessor_config(source_config)
|
|
27
|
+
self.source_preprocessor = QueryPreprocessor(preprocessor_queries_path, src_config)
|
|
28
|
+
|
|
29
|
+
# Extract config_query_key from target config (under gcp/mysql/etc)
|
|
30
|
+
tgt_config = self._extract_preprocessor_config(target_config)
|
|
31
|
+
self.target_preprocessor = QueryPreprocessor(preprocessor_queries_path, tgt_config)
|
|
32
|
+
|
|
33
|
+
def _extract_preprocessor_config(self, config: dict) -> dict:
|
|
34
|
+
"""
|
|
35
|
+
Extract preprocessor config (config_query_key) from database-specific config.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
config: Source or target config block
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Dictionary with config_query_key if found, empty dict otherwise
|
|
42
|
+
"""
|
|
43
|
+
if not config:
|
|
44
|
+
return {}
|
|
45
|
+
|
|
46
|
+
# Get database type from config
|
|
47
|
+
db_type = config.get("database_type")
|
|
48
|
+
if not db_type:
|
|
49
|
+
return {}
|
|
50
|
+
|
|
51
|
+
# Map database type to config key (gcpbq uses "gcp" config key)
|
|
52
|
+
config_key = "gcp" if db_type == "gcpbq" else db_type
|
|
53
|
+
|
|
54
|
+
# Extract database-specific config (gcp, mysql, etc.)
|
|
55
|
+
db_config = config.get(config_key)
|
|
56
|
+
if not db_config or not isinstance(db_config, dict):
|
|
57
|
+
return {}
|
|
58
|
+
|
|
59
|
+
# Extract config_query_key if present
|
|
60
|
+
config_query_key = db_config.get("config_query_key")
|
|
61
|
+
if config_query_key:
|
|
62
|
+
return {"config_query_key": config_query_key}
|
|
63
|
+
|
|
64
|
+
return {}
|
|
25
65
|
|
|
26
66
|
def setup_connectors(self):
|
|
27
67
|
if self.source_config:
|
|
@@ -59,11 +99,9 @@ class ValidationExecutor:
|
|
|
59
99
|
if "source" in test_config:
|
|
60
100
|
source_query = test_config["source"]["query"]
|
|
61
101
|
|
|
62
|
-
# Process query with preprocessor
|
|
102
|
+
# Process query with source preprocessor (automatic replacement of all release labels)
|
|
63
103
|
source_query = self._process_query_with_preprocessor(
|
|
64
|
-
source_query,
|
|
65
|
-
test_config["source"],
|
|
66
|
-
self.source_connector
|
|
104
|
+
source_query, self.source_connector, self.source_preprocessor
|
|
67
105
|
)
|
|
68
106
|
|
|
69
107
|
source_query_start = datetime.now()
|
|
@@ -75,11 +113,9 @@ class ValidationExecutor:
|
|
|
75
113
|
if "target" in test_config:
|
|
76
114
|
target_query = test_config["target"]["query"]
|
|
77
115
|
|
|
78
|
-
# Process query with preprocessor
|
|
116
|
+
# Process query with target preprocessor (automatic replacement of all release labels)
|
|
79
117
|
target_query = self._process_query_with_preprocessor(
|
|
80
|
-
target_query,
|
|
81
|
-
test_config["target"],
|
|
82
|
-
self.target_connector
|
|
118
|
+
target_query, self.target_connector, self.target_preprocessor
|
|
83
119
|
)
|
|
84
120
|
|
|
85
121
|
target_query_start = datetime.now()
|
|
@@ -128,43 +164,28 @@ class ValidationExecutor:
|
|
|
128
164
|
# assuming single value queries
|
|
129
165
|
return list(result[0].values())[0]
|
|
130
166
|
|
|
131
|
-
def _process_query_with_preprocessor(self, query: str,
|
|
167
|
+
def _process_query_with_preprocessor(self, query: str, connector, preprocessor) -> str:
|
|
132
168
|
"""
|
|
133
|
-
Process query with preprocessor
|
|
169
|
+
Process query with preprocessor to replace all release label placeholders.
|
|
170
|
+
|
|
171
|
+
Automatically replaces all SOURCE_CURR_WEEK and SOURCE_PREV_WEEK placeholders
|
|
172
|
+
without needing per-test configuration.
|
|
134
173
|
|
|
135
174
|
Args:
|
|
136
175
|
query: Original query string
|
|
137
|
-
config_block: Configuration block for source or target (should contain query and optionally config_query_key)
|
|
138
176
|
connector: Database connector to use
|
|
177
|
+
preprocessor: QueryPreprocessor instance (source or target)
|
|
139
178
|
|
|
140
179
|
Returns:
|
|
141
180
|
Processed query (with replacements if applicable) or original query
|
|
142
181
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
if not config_query_key:
|
|
147
|
-
# No preprocessor query key, return original query
|
|
182
|
+
if not preprocessor or not connector:
|
|
183
|
+
# Preprocessor not initialized or no connector, return original query
|
|
148
184
|
return query
|
|
149
185
|
|
|
150
|
-
if not self.preprocessor:
|
|
151
|
-
# Preprocessor not initialized, return original query
|
|
152
|
-
logger.warning(
|
|
153
|
-
f"config_query_key '{config_query_key}' specified but preprocessor not initialized"
|
|
154
|
-
)
|
|
155
|
-
return query
|
|
156
|
-
|
|
157
|
-
# Get source_name from config block if provided
|
|
158
|
-
source_name = config_block.get("source_name")
|
|
159
|
-
|
|
160
186
|
try:
|
|
161
|
-
# Process query through preprocessor
|
|
162
|
-
processed_query =
|
|
163
|
-
query,
|
|
164
|
-
config_query_key,
|
|
165
|
-
source_name,
|
|
166
|
-
connector
|
|
167
|
-
)
|
|
187
|
+
# Process query through preprocessor with automatic replacement
|
|
188
|
+
processed_query = preprocessor.replace_release_labels(query, connector)
|
|
168
189
|
return processed_query
|
|
169
190
|
except Exception as e:
|
|
170
191
|
logger.error(f"Error processing query with preprocessor: {str(e)}")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import yaml
|
|
2
2
|
import os
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Dict, Any, Optional
|
|
4
|
+
from typing import Dict, Any, Optional, List
|
|
5
5
|
from dataqe_framework.connectors import get_connector
|
|
6
6
|
|
|
7
7
|
logger = logging.getLogger(__name__)
|
|
@@ -15,17 +15,19 @@ class QueryPreprocessor:
|
|
|
15
15
|
and replaces placeholders in test queries with actual dataset names.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
def __init__(self, preprocessor_queries_path: str = None):
|
|
18
|
+
def __init__(self, preprocessor_queries_path: str = None, preprocessor_config: Dict[str, Any] = None):
|
|
19
19
|
"""
|
|
20
20
|
Initialize the QueryPreprocessor.
|
|
21
21
|
|
|
22
22
|
Args:
|
|
23
23
|
preprocessor_queries_path: Path to preprocessor_queries.yml file.
|
|
24
|
-
|
|
24
|
+
preprocessor_config: Configuration dict with config_query_key and other settings.
|
|
25
25
|
"""
|
|
26
26
|
self.preprocessor_queries_path = preprocessor_queries_path
|
|
27
|
+
self.preprocessor_config = preprocessor_config or {}
|
|
27
28
|
self.preprocessor_queries = {}
|
|
28
29
|
self.dataset_mappings = {}
|
|
30
|
+
self.release_labels_cache = None
|
|
29
31
|
|
|
30
32
|
if self.preprocessor_queries_path:
|
|
31
33
|
self._load_preprocessor_queries()
|
|
@@ -98,6 +100,14 @@ class QueryPreprocessor:
|
|
|
98
100
|
}
|
|
99
101
|
|
|
100
102
|
logger.info(f"Generated dataset mappings: {mappings}")
|
|
103
|
+
if mappings:
|
|
104
|
+
logger.info("=" * 60)
|
|
105
|
+
logger.info("PREPROCESSOR QUERY RESULTS:")
|
|
106
|
+
for source, mapping in mappings.items():
|
|
107
|
+
logger.info(f" Source: {source}")
|
|
108
|
+
logger.info(f" Current Release: {mapping.get('current_release')}")
|
|
109
|
+
logger.info(f" Previous Release: {mapping.get('previous_release')}")
|
|
110
|
+
logger.info("=" * 60)
|
|
101
111
|
return mappings
|
|
102
112
|
|
|
103
113
|
except Exception as e:
|
|
@@ -186,3 +196,91 @@ class QueryPreprocessor:
|
|
|
186
196
|
|
|
187
197
|
# Replace placeholders in query
|
|
188
198
|
return self.replace_placeholders_in_query(query, source_name, mappings)
|
|
199
|
+
|
|
200
|
+
def replace_release_labels(self, query: str, connector: Any) -> str:
|
|
201
|
+
"""
|
|
202
|
+
Automatically replace all release label placeholders in query without needing
|
|
203
|
+
source_name or config_query_key specified per query.
|
|
204
|
+
|
|
205
|
+
This method executes the preprocessor query defined in preprocessor_config,
|
|
206
|
+
gets all release label mappings, and replaces all SOURCE_CURR_WEEK and SOURCE_PREV_WEEK
|
|
207
|
+
placeholders in the query.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
query: Original query string with placeholders like SOURCE_CURR_WEEK, SOURCE_PREV_WEEK
|
|
211
|
+
connector: Database connector for executing preprocessor query
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Processed query with all placeholders replaced by actual dataset names
|
|
215
|
+
"""
|
|
216
|
+
# If no preprocessor config or config_query_key, return original query
|
|
217
|
+
if not self.preprocessor_config or not self.preprocessor_config.get("config_query_key"):
|
|
218
|
+
return query
|
|
219
|
+
|
|
220
|
+
# Get release labels (cache to avoid multiple queries)
|
|
221
|
+
if self.release_labels_cache is None:
|
|
222
|
+
config_query_key = self.preprocessor_config.get("config_query_key")
|
|
223
|
+
release_labels = self.get_dataset_mappings(config_query_key, connector)
|
|
224
|
+
|
|
225
|
+
if not release_labels:
|
|
226
|
+
return query
|
|
227
|
+
|
|
228
|
+
# Convert mappings to list format for easier iteration
|
|
229
|
+
self.release_labels_cache = [
|
|
230
|
+
{
|
|
231
|
+
"source": source,
|
|
232
|
+
"curr_release_label": mapping.get("current_release"),
|
|
233
|
+
"prev_release_label": mapping.get("previous_release")
|
|
234
|
+
}
|
|
235
|
+
for source, mapping in release_labels.items()
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
# Replace all placeholders in query
|
|
239
|
+
return self._replace_all_release_labels(query, self.release_labels_cache)
|
|
240
|
+
|
|
241
|
+
def _replace_all_release_labels(self, query: str, release_labels: List[Dict[str, str]]) -> str:
|
|
242
|
+
"""
|
|
243
|
+
Replace all SOURCE_CURR_WEEK and SOURCE_PREV_WEEK placeholders in query.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
query: Original query string
|
|
247
|
+
release_labels: List of release label mappings
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Query with all placeholders replaced
|
|
251
|
+
"""
|
|
252
|
+
modified_query = query
|
|
253
|
+
|
|
254
|
+
if not release_labels:
|
|
255
|
+
logger.debug("No release labels to replace")
|
|
256
|
+
return modified_query
|
|
257
|
+
|
|
258
|
+
replacements_made = False
|
|
259
|
+
for label in release_labels:
|
|
260
|
+
source = label.get("source", "").upper()
|
|
261
|
+
curr_label = label.get("curr_release_label")
|
|
262
|
+
prev_label = label.get("prev_release_label")
|
|
263
|
+
|
|
264
|
+
if not source or not curr_label or not prev_label:
|
|
265
|
+
logger.debug(f"Skipping incomplete label for source '{source}'")
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
# Check if placeholders exist in query before replacing
|
|
269
|
+
curr_placeholder = f"{source}_CURR_WEEK"
|
|
270
|
+
prev_placeholder = f"{source}_PREV_WEEK"
|
|
271
|
+
|
|
272
|
+
if curr_placeholder in modified_query or prev_placeholder in modified_query:
|
|
273
|
+
replacements_made = True
|
|
274
|
+
modified_query = modified_query.replace(curr_placeholder, curr_label).replace(prev_placeholder, prev_label)
|
|
275
|
+
logger.info(
|
|
276
|
+
f"Replaced placeholders for '{source}': "
|
|
277
|
+
f"{curr_placeholder} → {curr_label}, "
|
|
278
|
+
f"{prev_placeholder} → {prev_label}"
|
|
279
|
+
)
|
|
280
|
+
else:
|
|
281
|
+
logger.debug(f"No placeholders found for '{source}'")
|
|
282
|
+
|
|
283
|
+
if not replacements_made:
|
|
284
|
+
logger.info("No placeholder replacements made - query returned unchanged")
|
|
285
|
+
|
|
286
|
+
return modified_query
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dataqe-framework
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
|
|
5
5
|
Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
|
|
6
6
|
Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/comparison/comparator.py
RENAMED
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/comparison/threshold.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/__init__.py
RENAMED
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/base_connector.py
RENAMED
|
File without changes
|
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/mysql_connector.py
RENAMED
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/credentials_extractor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/requires.txt
RENAMED
|
File without changes
|
{dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|