dataqe-framework 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataqe_framework-0.2.0/src/dataqe_framework.egg-info → dataqe_framework-0.2.2}/PKG-INFO +3 -2
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/pyproject.toml +3 -2
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/__init__.py +1 -1
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/bigquery_connector.py +28 -24
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/mysql_connector.py +20 -3
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2/src/dataqe_framework.egg-info}/PKG-INFO +3 -2
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/LICENSE.txt +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/MANIFEST.in +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/README.md +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/setup.cfg +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/bigquery_client.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/cli.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/comparison/comparator.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/comparison/threshold.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/config.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/config_loader.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/__init__.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/base_connector.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/credentials_extractor.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/executor.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/preprocessor.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/reporter.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/validator.py +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/SOURCES.txt +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/dependency_links.txt +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/entry_points.txt +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/requires.txt +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/top_level.txt +0 -0
- {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/tests/test_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dataqe-framework
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
|
|
5
5
|
Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
|
|
6
6
|
Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework
|
|
@@ -8,7 +8,7 @@ Project-URL: Documentation, https://github.com/ShaikKhadarmohiddin/dataqe-framew
|
|
|
8
8
|
Project-URL: Repository, https://github.com/ShaikKhadarmohiddin/dataqe-framework.git
|
|
9
9
|
Project-URL: Issues, https://github.com/ShaikKhadarmohiddin/dataqe-framework/issues
|
|
10
10
|
Keywords: data-validation,data-quality,testing,ETL,migration,mysql,bigquery
|
|
11
|
-
Classifier: Development Status ::
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Intended Audience :: Developers
|
|
13
13
|
Classifier: Intended Audience :: System Administrators
|
|
14
14
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
21
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
22
|
Classifier: Topic :: Database
|
|
22
23
|
Classifier: Topic :: Software Development :: Testing
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "dataqe-framework"
|
|
7
7
|
dynamic = []
|
|
8
|
-
version = "0.2.
|
|
8
|
+
version = "0.2.2"
|
|
9
9
|
description = "Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation"
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.9"
|
|
@@ -22,7 +22,7 @@ keywords = [
|
|
|
22
22
|
"bigquery"
|
|
23
23
|
]
|
|
24
24
|
classifiers = [
|
|
25
|
-
"Development Status ::
|
|
25
|
+
"Development Status :: 5 - Production/Stable",
|
|
26
26
|
"Intended Audience :: Developers",
|
|
27
27
|
"Intended Audience :: System Administrators",
|
|
28
28
|
"License :: OSI Approved :: MIT License",
|
|
@@ -31,6 +31,7 @@ classifiers = [
|
|
|
31
31
|
"Programming Language :: Python :: 3.10",
|
|
32
32
|
"Programming Language :: Python :: 3.11",
|
|
33
33
|
"Programming Language :: Python :: 3.12",
|
|
34
|
+
"Programming Language :: Python :: 3.13",
|
|
34
35
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
35
36
|
"Topic :: Database",
|
|
36
37
|
"Topic :: Software Development :: Testing",
|
|
@@ -3,7 +3,12 @@ import logging
|
|
|
3
3
|
from google.cloud import bigquery
|
|
4
4
|
from google.oauth2 import service_account
|
|
5
5
|
from .base_connector import BaseConnector
|
|
6
|
-
|
|
6
|
+
execution_env = os.environ['SPRING_PROFILES_ACTIVE']
|
|
7
|
+
if (execution_env.upper() != "MYLOCAL"):
|
|
8
|
+
import castlight_common_lib.configfunctions as cfg
|
|
9
|
+
config_details = cfg.Config('dataqeteam', [os.environ.get('SPRING_PROFILES_ACTIVE')])
|
|
10
|
+
else:
|
|
11
|
+
config_details = None
|
|
7
12
|
logger = logging.getLogger(__name__)
|
|
8
13
|
|
|
9
14
|
|
|
@@ -21,35 +26,16 @@ class BigQueryConnector(BaseConnector):
|
|
|
21
26
|
Args:
|
|
22
27
|
config: Dictionary containing BigQuery configuration with keys:
|
|
23
28
|
- project_id: GCP project ID
|
|
24
|
-
- dataset_id: BigQuery dataset ID
|
|
25
29
|
- service_account (optional): Service account name for credential lookup
|
|
26
30
|
- credentials_path (optional): Path to service account JSON file
|
|
27
31
|
- location (optional): BigQuery location (default: us-central1)
|
|
28
|
-
- location_map (optional): Dict mapping environment to location
|
|
29
32
|
- infra_core (optional): Infrastructure core name
|
|
30
|
-
-
|
|
33
|
+
- k8_db_details (optional): Details of GCP project_id dataset_id from K8 env
|
|
31
34
|
"""
|
|
32
35
|
self.project_id = config.get("project_id")
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
-
self.
|
|
36
|
-
|
|
37
|
-
# Get execution environment
|
|
38
|
-
self.execution_env = os.environ.get("SPRING_PROFILES_ACTIVE", "mylocal").upper()
|
|
39
|
-
|
|
40
|
-
# Get location with fallback to default
|
|
41
|
-
location_map = config.get("location_map", {})
|
|
42
|
-
self.location = location_map.get(
|
|
43
|
-
os.environ.get("SPRING_PROFILES_ACTIVE", "mylocal"),
|
|
44
|
-
config.get("location", "us-central1")
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
# Get infra-core with fallback to default
|
|
48
|
-
infra_core_map = config.get("infra_core_map", {})
|
|
49
|
-
self.infra_core = infra_core_map.get(
|
|
50
|
-
os.environ.get("SPRING_PROFILES_ACTIVE", "mylocal"),
|
|
51
|
-
config.get("infra_core", "infra-core-us-central1")
|
|
52
|
-
)
|
|
36
|
+
self.service_account_name = config.get("service_account", "qe_kube_sa_key")
|
|
37
|
+
self.location = config.get("location", "us-central1")
|
|
38
|
+
self.infra_core = config.get("infra_core", "infra-core-us-central1")
|
|
53
39
|
|
|
54
40
|
# KMS encryption settings for PHI data
|
|
55
41
|
self.use_encryption = config.get("use_encryption", False)
|
|
@@ -58,6 +44,24 @@ class BigQueryConnector(BaseConnector):
|
|
|
58
44
|
self._encryption_config = None
|
|
59
45
|
self._query_job_config = None
|
|
60
46
|
|
|
47
|
+
if "k8_db_details" in config and config_details is not None:
|
|
48
|
+
try:
|
|
49
|
+
project, db_name = config.get("k8_db_details").split('_')
|
|
50
|
+
self.project_id = config_details.data['bigquery'][project]['datasets'][db_name]['project_id']
|
|
51
|
+
self.credentials_path = os.path.join(os.getcwd(), self.project_id.replace('-', '_') + '_sftp_client_secrets.json')
|
|
52
|
+
self.extract_service_account(config_details, self.credentials_path, self.service_account_name)
|
|
53
|
+
except (ValueError, KeyError) as e:
|
|
54
|
+
logger.error(f"Failed to extract Kubernetes configuration: {str(e)}")
|
|
55
|
+
raise ValueError(f"Invalid k8_db_details format or missing configuration: {str(e)}")
|
|
56
|
+
else:
|
|
57
|
+
self.credentials_path = config.get("credentials_path")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extract_service_account(self, config_details, service_config_file, serv_acct_name):
|
|
61
|
+
with open(service_config_file, "w") as file:
|
|
62
|
+
file.write(config_details.data['gcp'][serv_acct_name])
|
|
63
|
+
# print(config_details.data['gcp'][serv_acct_name])
|
|
64
|
+
|
|
61
65
|
def _setup_encryption(self):
|
|
62
66
|
"""
|
|
63
67
|
Setup KMS encryption configuration for PHI data.
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/mysql_connector.py
RENAMED
|
@@ -2,20 +2,37 @@ import os
|
|
|
2
2
|
import pymysql
|
|
3
3
|
import logging
|
|
4
4
|
from .base_connector import BaseConnector
|
|
5
|
-
|
|
5
|
+
execution_env = os.environ['SPRING_PROFILES_ACTIVE']
|
|
6
|
+
if (execution_env.upper() != "MYLOCAL"):
|
|
7
|
+
import castlight_common_lib.configfunctions as cfg
|
|
8
|
+
config_details = cfg.Config('dataqeteam', [os.environ.get('SPRING_PROFILES_ACTIVE')])
|
|
9
|
+
else:
|
|
10
|
+
config_details = None
|
|
6
11
|
logger = logging.getLogger(__name__)
|
|
7
12
|
|
|
8
13
|
|
|
9
14
|
class MySQLConnector(BaseConnector):
|
|
10
15
|
|
|
11
|
-
def __init__(self, host, port, user, password, database):
|
|
16
|
+
def __init__(self, host=None, port=None, user=None, password=None, database=None, k8_db_details=None):
|
|
12
17
|
self.host = host
|
|
13
18
|
self.port = port
|
|
14
19
|
self.user = user
|
|
15
20
|
self.password = password
|
|
16
21
|
self.database = database
|
|
17
22
|
self.connection = None
|
|
18
|
-
|
|
23
|
+
if k8_db_details is None or config_details is None:
|
|
24
|
+
logger.info(f"Locally MySQLConnector initialized for host={host}, database={database}")
|
|
25
|
+
else:
|
|
26
|
+
try:
|
|
27
|
+
project, db_name = k8_db_details.split('_')
|
|
28
|
+
self.host = config_details.data['mysql'][project][db_name]['db_host']
|
|
29
|
+
self.port = config_details.data['mysql'][project][db_name]['db_port']
|
|
30
|
+
self.user = config_details.data['mysql'][project][db_name]['db_user']
|
|
31
|
+
self.password = config_details.data['mysql'][project][db_name]['db_password']
|
|
32
|
+
self.database = config_details.data['mysql'][project][db_name]['db_name']
|
|
33
|
+
except (ValueError, KeyError) as e:
|
|
34
|
+
logger.error(f"Failed to extract Kubernetes configuration: {str(e)}")
|
|
35
|
+
raise ValueError(f"Invalid k8_db_details format or missing configuration: {str(e)}")
|
|
19
36
|
|
|
20
37
|
def connect(self):
|
|
21
38
|
logger.info(f"Establishing MySQL connection to {self.host}:{self.port}/{self.database}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dataqe-framework
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
|
|
5
5
|
Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
|
|
6
6
|
Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework
|
|
@@ -8,7 +8,7 @@ Project-URL: Documentation, https://github.com/ShaikKhadarmohiddin/dataqe-framew
|
|
|
8
8
|
Project-URL: Repository, https://github.com/ShaikKhadarmohiddin/dataqe-framework.git
|
|
9
9
|
Project-URL: Issues, https://github.com/ShaikKhadarmohiddin/dataqe-framework/issues
|
|
10
10
|
Keywords: data-validation,data-quality,testing,ETL,migration,mysql,bigquery
|
|
11
|
-
Classifier: Development Status ::
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Intended Audience :: Developers
|
|
13
13
|
Classifier: Intended Audience :: System Administrators
|
|
14
14
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
21
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
22
|
Classifier: Topic :: Database
|
|
22
23
|
Classifier: Topic :: Software Development :: Testing
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/comparison/comparator.py
RENAMED
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/comparison/threshold.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/__init__.py
RENAMED
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/base_connector.py
RENAMED
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/credentials_extractor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/requires.txt
RENAMED
|
File without changes
|
{dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|