dataqe-framework 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {dataqe_framework-0.2.4/src/dataqe_framework.egg-info → dataqe_framework-0.2.6}/PKG-INFO +1 -1
  2. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/pyproject.toml +1 -1
  3. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/__init__.py +1 -1
  4. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/cli.py +1 -0
  5. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/executor.py +57 -36
  6. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/preprocessor.py +101 -3
  7. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6/src/dataqe_framework.egg-info}/PKG-INFO +1 -1
  8. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/LICENSE.txt +0 -0
  9. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/MANIFEST.in +0 -0
  10. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/README.md +0 -0
  11. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/setup.cfg +0 -0
  12. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/bigquery_client.py +0 -0
  13. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/comparison/comparator.py +0 -0
  14. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/comparison/threshold.py +0 -0
  15. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/config.py +0 -0
  16. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/config_loader.py +0 -0
  17. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/__init__.py +0 -0
  18. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/base_connector.py +0 -0
  19. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/bigquery_connector.py +0 -0
  20. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/connectors/mysql_connector.py +0 -0
  21. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/credentials_extractor.py +0 -0
  22. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/reporter.py +0 -0
  23. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework/validator.py +0 -0
  24. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/SOURCES.txt +0 -0
  25. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/dependency_links.txt +0 -0
  26. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/entry_points.txt +0 -0
  27. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/requires.txt +0 -0
  28. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/src/dataqe_framework.egg-info/top_level.txt +0 -0
  29. {dataqe_framework-0.2.4 → dataqe_framework-0.2.6}/tests/test_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataqe-framework
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
5
5
  Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
6
6
  Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "dataqe-framework"
7
7
  dynamic = []
8
- version = "0.2.4"
8
+ version = "0.2.6"
9
9
  description = "Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation"
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
@@ -1,7 +1,7 @@
1
1
  from .validator import DataValidator
2
2
  from .credentials_extractor import CredentialsExtractor
3
3
 
4
- __version__ = "0.2.4"
4
+ __version__ = "0.2.6"
5
5
 
6
6
  __all__ = ["DataValidator", "CredentialsExtractor"]
7
7
 
@@ -113,6 +113,7 @@ def main():
113
113
 
114
114
  # Get preprocessor queries path if specified
115
115
  preprocessor_queries_path = config_block["other"].get("preprocessor_queries")
116
+
116
117
  if preprocessor_queries_path:
117
118
  # Resolve relative path if needed
118
119
  if not os.path.isabs(preprocessor_queries_path):
@@ -17,11 +17,51 @@ class ValidationExecutor:
17
17
 
18
18
  self.source_connector = None
19
19
  self.target_connector = None
20
- self.preprocessor = None
20
+ self.source_preprocessor = None
21
+ self.target_preprocessor = None
21
22
 
22
- # Initialize preprocessor if path is provided
23
+ # Initialize preprocessors if path is provided
23
24
  if preprocessor_queries_path:
24
- self.preprocessor = QueryPreprocessor(preprocessor_queries_path)
25
+ # Extract config_query_key from source config (under gcp/mysql/etc)
26
+ src_config = self._extract_preprocessor_config(source_config)
27
+ self.source_preprocessor = QueryPreprocessor(preprocessor_queries_path, src_config)
28
+
29
+ # Extract config_query_key from target config (under gcp/mysql/etc)
30
+ tgt_config = self._extract_preprocessor_config(target_config)
31
+ self.target_preprocessor = QueryPreprocessor(preprocessor_queries_path, tgt_config)
32
+
33
+ def _extract_preprocessor_config(self, config: dict) -> dict:
34
+ """
35
+ Extract preprocessor config (config_query_key) from database-specific config.
36
+
37
+ Args:
38
+ config: Source or target config block
39
+
40
+ Returns:
41
+ Dictionary with config_query_key if found, empty dict otherwise
42
+ """
43
+ if not config:
44
+ return {}
45
+
46
+ # Get database type from config
47
+ db_type = config.get("database_type")
48
+ if not db_type:
49
+ return {}
50
+
51
+ # Map database type to config key (gcpbq uses "gcp" config key)
52
+ config_key = "gcp" if db_type == "gcpbq" else db_type
53
+
54
+ # Extract database-specific config (gcp, mysql, etc.)
55
+ db_config = config.get(config_key)
56
+ if not db_config or not isinstance(db_config, dict):
57
+ return {}
58
+
59
+ # Extract config_query_key if present
60
+ config_query_key = db_config.get("config_query_key")
61
+ if config_query_key:
62
+ return {"config_query_key": config_query_key}
63
+
64
+ return {}
25
65
 
26
66
  def setup_connectors(self):
27
67
  if self.source_config:
@@ -59,11 +99,9 @@ class ValidationExecutor:
59
99
  if "source" in test_config:
60
100
  source_query = test_config["source"]["query"]
61
101
 
62
- # Process query with preprocessor if config_query_key exists
102
+ # Process query with source preprocessor (automatic replacement of all release labels)
63
103
  source_query = self._process_query_with_preprocessor(
64
- source_query,
65
- test_config["source"],
66
- self.source_connector
104
+ source_query, self.source_connector, self.source_preprocessor
67
105
  )
68
106
 
69
107
  source_query_start = datetime.now()
@@ -75,11 +113,9 @@ class ValidationExecutor:
75
113
  if "target" in test_config:
76
114
  target_query = test_config["target"]["query"]
77
115
 
78
- # Process query with preprocessor if config_query_key exists
116
+ # Process query with target preprocessor (automatic replacement of all release labels)
79
117
  target_query = self._process_query_with_preprocessor(
80
- target_query,
81
- test_config["target"],
82
- self.target_connector
118
+ target_query, self.target_connector, self.target_preprocessor
83
119
  )
84
120
 
85
121
  target_query_start = datetime.now()
@@ -128,43 +164,28 @@ class ValidationExecutor:
128
164
  # assuming single value queries
129
165
  return list(result[0].values())[0]
130
166
 
131
- def _process_query_with_preprocessor(self, query: str, config_block: dict, connector) -> str:
167
+ def _process_query_with_preprocessor(self, query: str, connector, preprocessor) -> str:
132
168
  """
133
- Process query with preprocessor if config_query_key is present in config block.
169
+ Process query with preprocessor to replace all release label placeholders.
170
+
171
+ Automatically replaces all SOURCE_CURR_WEEK and SOURCE_PREV_WEEK placeholders
172
+ without needing per-test configuration.
134
173
 
135
174
  Args:
136
175
  query: Original query string
137
- config_block: Configuration block for source or target (should contain query and optionally config_query_key)
138
176
  connector: Database connector to use
177
+ preprocessor: QueryPreprocessor instance (source or target)
139
178
 
140
179
  Returns:
141
180
  Processed query (with replacements if applicable) or original query
142
181
  """
143
- # Check if config_query_key exists in config block
144
- config_query_key = config_block.get("config_query_key")
145
-
146
- if not config_query_key:
147
- # No preprocessor query key, return original query
182
+ if not preprocessor or not connector:
183
+ # Preprocessor not initialized or no connector, return original query
148
184
  return query
149
185
 
150
- if not self.preprocessor:
151
- # Preprocessor not initialized, return original query
152
- logger.warning(
153
- f"config_query_key '{config_query_key}' specified but preprocessor not initialized"
154
- )
155
- return query
156
-
157
- # Get source_name from config block if provided
158
- source_name = config_block.get("source_name")
159
-
160
186
  try:
161
- # Process query through preprocessor
162
- processed_query = self.preprocessor.process_query(
163
- query,
164
- config_query_key,
165
- source_name,
166
- connector
167
- )
187
+ # Process query through preprocessor with automatic replacement
188
+ processed_query = preprocessor.replace_release_labels(query, connector)
168
189
  return processed_query
169
190
  except Exception as e:
170
191
  logger.error(f"Error processing query with preprocessor: {str(e)}")
@@ -1,7 +1,7 @@
1
1
  import yaml
2
2
  import os
3
3
  import logging
4
- from typing import Dict, Any, Optional
4
+ from typing import Dict, Any, Optional, List
5
5
  from dataqe_framework.connectors import get_connector
6
6
 
7
7
  logger = logging.getLogger(__name__)
@@ -15,17 +15,19 @@ class QueryPreprocessor:
15
15
  and replaces placeholders in test queries with actual dataset names.
16
16
  """
17
17
 
18
- def __init__(self, preprocessor_queries_path: str = None):
18
+ def __init__(self, preprocessor_queries_path: str = None, preprocessor_config: Dict[str, Any] = None):
19
19
  """
20
20
  Initialize the QueryPreprocessor.
21
21
 
22
22
  Args:
23
23
  preprocessor_queries_path: Path to preprocessor_queries.yml file.
24
- If not provided, attempts to load from default location.
24
+ preprocessor_config: Configuration dict with config_query_key and other settings.
25
25
  """
26
26
  self.preprocessor_queries_path = preprocessor_queries_path
27
+ self.preprocessor_config = preprocessor_config or {}
27
28
  self.preprocessor_queries = {}
28
29
  self.dataset_mappings = {}
30
+ self.release_labels_cache = None
29
31
 
30
32
  if self.preprocessor_queries_path:
31
33
  self._load_preprocessor_queries()
@@ -98,6 +100,14 @@ class QueryPreprocessor:
98
100
  }
99
101
 
100
102
  logger.info(f"Generated dataset mappings: {mappings}")
103
+ if mappings:
104
+ logger.info("=" * 60)
105
+ logger.info("PREPROCESSOR QUERY RESULTS:")
106
+ for source, mapping in mappings.items():
107
+ logger.info(f" Source: {source}")
108
+ logger.info(f" Current Release: {mapping.get('current_release')}")
109
+ logger.info(f" Previous Release: {mapping.get('previous_release')}")
110
+ logger.info("=" * 60)
101
111
  return mappings
102
112
 
103
113
  except Exception as e:
@@ -186,3 +196,91 @@ class QueryPreprocessor:
186
196
 
187
197
  # Replace placeholders in query
188
198
  return self.replace_placeholders_in_query(query, source_name, mappings)
199
+
200
+ def replace_release_labels(self, query: str, connector: Any) -> str:
201
+ """
202
+ Automatically replace all release label placeholders in query without needing
203
+ source_name or config_query_key specified per query.
204
+
205
+ This method executes the preprocessor query defined in preprocessor_config,
206
+ gets all release label mappings, and replaces all SOURCE_CURR_WEEK and SOURCE_PREV_WEEK
207
+ placeholders in the query.
208
+
209
+ Args:
210
+ query: Original query string with placeholders like SOURCE_CURR_WEEK, SOURCE_PREV_WEEK
211
+ connector: Database connector for executing preprocessor query
212
+
213
+ Returns:
214
+ Processed query with all placeholders replaced by actual dataset names
215
+ """
216
+ # If no preprocessor config or config_query_key, return original query
217
+ if not self.preprocessor_config or not self.preprocessor_config.get("config_query_key"):
218
+ return query
219
+
220
+ # Get release labels (cache to avoid multiple queries)
221
+ if self.release_labels_cache is None:
222
+ config_query_key = self.preprocessor_config.get("config_query_key")
223
+ release_labels = self.get_dataset_mappings(config_query_key, connector)
224
+
225
+ if not release_labels:
226
+ return query
227
+
228
+ # Convert mappings to list format for easier iteration
229
+ self.release_labels_cache = [
230
+ {
231
+ "source": source,
232
+ "curr_release_label": mapping.get("current_release"),
233
+ "prev_release_label": mapping.get("previous_release")
234
+ }
235
+ for source, mapping in release_labels.items()
236
+ ]
237
+
238
+ # Replace all placeholders in query
239
+ return self._replace_all_release_labels(query, self.release_labels_cache)
240
+
241
+ def _replace_all_release_labels(self, query: str, release_labels: List[Dict[str, str]]) -> str:
242
+ """
243
+ Replace all SOURCE_CURR_WEEK and SOURCE_PREV_WEEK placeholders in query.
244
+
245
+ Args:
246
+ query: Original query string
247
+ release_labels: List of release label mappings
248
+
249
+ Returns:
250
+ Query with all placeholders replaced
251
+ """
252
+ modified_query = query
253
+
254
+ if not release_labels:
255
+ logger.debug("No release labels to replace")
256
+ return modified_query
257
+
258
+ replacements_made = False
259
+ for label in release_labels:
260
+ source = label.get("source", "").upper()
261
+ curr_label = label.get("curr_release_label")
262
+ prev_label = label.get("prev_release_label")
263
+
264
+ if not source or not curr_label or not prev_label:
265
+ logger.debug(f"Skipping incomplete label for source '{source}'")
266
+ continue
267
+
268
+ # Check if placeholders exist in query before replacing
269
+ curr_placeholder = f"{source}_CURR_WEEK"
270
+ prev_placeholder = f"{source}_PREV_WEEK"
271
+
272
+ if curr_placeholder in modified_query or prev_placeholder in modified_query:
273
+ replacements_made = True
274
+ modified_query = modified_query.replace(curr_placeholder, curr_label).replace(prev_placeholder, prev_label)
275
+ logger.info(
276
+ f"Replaced placeholders for '{source}': "
277
+ f"{curr_placeholder} → {curr_label}, "
278
+ f"{prev_placeholder} → {prev_label}"
279
+ )
280
+ else:
281
+ logger.debug(f"No placeholders found for '{source}'")
282
+
283
+ if not replacements_made:
284
+ logger.info("No placeholder replacements made - query returned unchanged")
285
+
286
+ return modified_query
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataqe-framework
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
5
5
  Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
6
6
  Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework