dataqe-framework 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {dataqe_framework-0.2.0/src/dataqe_framework.egg-info → dataqe_framework-0.2.2}/PKG-INFO +3 -2
  2. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/pyproject.toml +3 -2
  3. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/__init__.py +1 -1
  4. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/bigquery_connector.py +28 -24
  5. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/mysql_connector.py +20 -3
  6. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2/src/dataqe_framework.egg-info}/PKG-INFO +3 -2
  7. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/LICENSE.txt +0 -0
  8. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/MANIFEST.in +0 -0
  9. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/README.md +0 -0
  10. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/setup.cfg +0 -0
  11. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/bigquery_client.py +0 -0
  12. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/cli.py +0 -0
  13. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/comparison/comparator.py +0 -0
  14. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/comparison/threshold.py +0 -0
  15. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/config.py +0 -0
  16. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/config_loader.py +0 -0
  17. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/__init__.py +0 -0
  18. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/connectors/base_connector.py +0 -0
  19. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/credentials_extractor.py +0 -0
  20. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/executor.py +0 -0
  21. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/preprocessor.py +0 -0
  22. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/reporter.py +0 -0
  23. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework/validator.py +0 -0
  24. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/SOURCES.txt +0 -0
  25. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/dependency_links.txt +0 -0
  26. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/entry_points.txt +0 -0
  27. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/requires.txt +0 -0
  28. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/src/dataqe_framework.egg-info/top_level.txt +0 -0
  29. {dataqe_framework-0.2.0 → dataqe_framework-0.2.2}/tests/test_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataqe-framework
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
5
5
  Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
6
6
  Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework
@@ -8,7 +8,7 @@ Project-URL: Documentation, https://github.com/ShaikKhadarmohiddin/dataqe-framew
8
8
  Project-URL: Repository, https://github.com/ShaikKhadarmohiddin/dataqe-framework.git
9
9
  Project-URL: Issues, https://github.com/ShaikKhadarmohiddin/dataqe-framework/issues
10
10
  Keywords: data-validation,data-quality,testing,ETL,migration,mysql,bigquery
11
- Classifier: Development Status :: 4 - Beta
11
+ Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: Intended Audience :: System Administrators
14
14
  Classifier: License :: OSI Approved :: MIT License
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.9
17
17
  Classifier: Programming Language :: Python :: 3.10
18
18
  Classifier: Programming Language :: Python :: 3.11
19
19
  Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
20
21
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
22
  Classifier: Topic :: Database
22
23
  Classifier: Topic :: Software Development :: Testing
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "dataqe-framework"
7
7
  dynamic = []
8
- version = "0.2.0"
8
+ version = "0.2.2"
9
9
  description = "Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation"
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
@@ -22,7 +22,7 @@ keywords = [
22
22
  "bigquery"
23
23
  ]
24
24
  classifiers = [
25
- "Development Status :: 4 - Beta",
25
+ "Development Status :: 5 - Production/Stable",
26
26
  "Intended Audience :: Developers",
27
27
  "Intended Audience :: System Administrators",
28
28
  "License :: OSI Approved :: MIT License",
@@ -31,6 +31,7 @@ classifiers = [
31
31
  "Programming Language :: Python :: 3.10",
32
32
  "Programming Language :: Python :: 3.11",
33
33
  "Programming Language :: Python :: 3.12",
34
+ "Programming Language :: Python :: 3.13",
34
35
  "Topic :: Software Development :: Libraries :: Python Modules",
35
36
  "Topic :: Database",
36
37
  "Topic :: Software Development :: Testing",
@@ -1,7 +1,7 @@
1
1
  from .validator import DataValidator
2
2
  from .credentials_extractor import CredentialsExtractor
3
3
 
4
- __version__ = "0.2.0"
4
+ __version__ = "0.2.2"
5
5
 
6
6
  __all__ = ["DataValidator", "CredentialsExtractor"]
7
7
 
@@ -3,7 +3,12 @@ import logging
3
3
  from google.cloud import bigquery
4
4
  from google.oauth2 import service_account
5
5
  from .base_connector import BaseConnector
6
-
6
+ execution_env = os.environ['SPRING_PROFILES_ACTIVE']
7
+ if (execution_env.upper() != "MYLOCAL"):
8
+ import castlight_common_lib.configfunctions as cfg
9
+ config_details = cfg.Config('dataqeteam', [os.environ.get('SPRING_PROFILES_ACTIVE')])
10
+ else:
11
+ config_details = None
7
12
  logger = logging.getLogger(__name__)
8
13
 
9
14
 
@@ -21,35 +26,16 @@ class BigQueryConnector(BaseConnector):
21
26
  Args:
22
27
  config: Dictionary containing BigQuery configuration with keys:
23
28
  - project_id: GCP project ID
24
- - dataset_id: BigQuery dataset ID
25
29
  - service_account (optional): Service account name for credential lookup
26
30
  - credentials_path (optional): Path to service account JSON file
27
31
  - location (optional): BigQuery location (default: us-central1)
28
- - location_map (optional): Dict mapping environment to location
29
32
  - infra_core (optional): Infrastructure core name
30
- - infra_core_map (optional): Dict mapping environment to infra-core
33
+ - k8_db_details (optional): Details of GCP project_id dataset_id from K8 env
31
34
  """
32
35
  self.project_id = config.get("project_id")
33
- self.dataset_id = config.get("dataset_id")
34
- self.credentials_path = config.get("credentials_path")
35
- self.service_account_name = config.get("service_account")
36
-
37
- # Get execution environment
38
- self.execution_env = os.environ.get("SPRING_PROFILES_ACTIVE", "mylocal").upper()
39
-
40
- # Get location with fallback to default
41
- location_map = config.get("location_map", {})
42
- self.location = location_map.get(
43
- os.environ.get("SPRING_PROFILES_ACTIVE", "mylocal"),
44
- config.get("location", "us-central1")
45
- )
46
-
47
- # Get infra-core with fallback to default
48
- infra_core_map = config.get("infra_core_map", {})
49
- self.infra_core = infra_core_map.get(
50
- os.environ.get("SPRING_PROFILES_ACTIVE", "mylocal"),
51
- config.get("infra_core", "infra-core-us-central1")
52
- )
36
+ self.service_account_name = config.get("service_account", "qe_kube_sa_key")
37
+ self.location = config.get("location", "us-central1")
38
+ self.infra_core = config.get("infra_core", "infra-core-us-central1")
53
39
 
54
40
  # KMS encryption settings for PHI data
55
41
  self.use_encryption = config.get("use_encryption", False)
@@ -58,6 +44,24 @@ class BigQueryConnector(BaseConnector):
58
44
  self._encryption_config = None
59
45
  self._query_job_config = None
60
46
 
47
+ if "k8_db_details" in config and config_details is not None:
48
+ try:
49
+ project, db_name = config.get("k8_db_details").split('_')
50
+ self.project_id = config_details.data['bigquery'][project]['datasets'][db_name]['project_id']
51
+ self.credentials_path = os.path.join(os.getcwd(), self.project_id.replace('-', '_') + '_sftp_client_secrets.json')
52
+ self.extract_service_account(config_details, self.credentials_path, self.service_account_name)
53
+ except (ValueError, KeyError) as e:
54
+ logger.error(f"Failed to extract Kubernetes configuration: {str(e)}")
55
+ raise ValueError(f"Invalid k8_db_details format or missing configuration: {str(e)}")
56
+ else:
57
+ self.credentials_path = config.get("credentials_path")
58
+
59
+
60
+ def extract_service_account(self, config_details, service_config_file, serv_acct_name):
61
+ with open(service_config_file, "w") as file:
62
+ file.write(config_details.data['gcp'][serv_acct_name])
63
+ # print(config_details.data['gcp'][serv_acct_name])
64
+
61
65
  def _setup_encryption(self):
62
66
  """
63
67
  Setup KMS encryption configuration for PHI data.
@@ -2,20 +2,37 @@ import os
2
2
  import pymysql
3
3
  import logging
4
4
  from .base_connector import BaseConnector
5
-
5
+ execution_env = os.environ['SPRING_PROFILES_ACTIVE']
6
+ if (execution_env.upper() != "MYLOCAL"):
7
+ import castlight_common_lib.configfunctions as cfg
8
+ config_details = cfg.Config('dataqeteam', [os.environ.get('SPRING_PROFILES_ACTIVE')])
9
+ else:
10
+ config_details = None
6
11
  logger = logging.getLogger(__name__)
7
12
 
8
13
 
9
14
  class MySQLConnector(BaseConnector):
10
15
 
11
- def __init__(self, host, port, user, password, database):
16
+ def __init__(self, host=None, port=None, user=None, password=None, database=None, k8_db_details=None):
12
17
  self.host = host
13
18
  self.port = port
14
19
  self.user = user
15
20
  self.password = password
16
21
  self.database = database
17
22
  self.connection = None
18
- logger.info(f"MySQLConnector initialized for host={host}, database={database}")
23
+ if k8_db_details is None or config_details is None:
24
+ logger.info(f"Locally MySQLConnector initialized for host={host}, database={database}")
25
+ else:
26
+ try:
27
+ project, db_name = k8_db_details.split('_')
28
+ self.host = config_details.data['mysql'][project][db_name]['db_host']
29
+ self.port = config_details.data['mysql'][project][db_name]['db_port']
30
+ self.user = config_details.data['mysql'][project][db_name]['db_user']
31
+ self.password = config_details.data['mysql'][project][db_name]['db_password']
32
+ self.database = config_details.data['mysql'][project][db_name]['db_name']
33
+ except (ValueError, KeyError) as e:
34
+ logger.error(f"Failed to extract Kubernetes configuration: {str(e)}")
35
+ raise ValueError(f"Invalid k8_db_details format or missing configuration: {str(e)}")
19
36
 
20
37
  def connect(self):
21
38
  logger.info(f"Establishing MySQL connection to {self.host}:{self.port}/{self.database}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataqe-framework
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Reusable Data Validation Framework for data migration, ETL validation, and cross-database reconciliation
5
5
  Author-email: Khadar Shaik <khadarmohiddin.shaik@apree.health>
6
6
  Project-URL: Homepage, https://github.com/ShaikKhadarmohiddin/dataqe-framework
@@ -8,7 +8,7 @@ Project-URL: Documentation, https://github.com/ShaikKhadarmohiddin/dataqe-framew
8
8
  Project-URL: Repository, https://github.com/ShaikKhadarmohiddin/dataqe-framework.git
9
9
  Project-URL: Issues, https://github.com/ShaikKhadarmohiddin/dataqe-framework/issues
10
10
  Keywords: data-validation,data-quality,testing,ETL,migration,mysql,bigquery
11
- Classifier: Development Status :: 4 - Beta
11
+ Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: Intended Audience :: System Administrators
14
14
  Classifier: License :: OSI Approved :: MIT License
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.9
17
17
  Classifier: Programming Language :: Python :: 3.10
18
18
  Classifier: Programming Language :: Python :: 3.11
19
19
  Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
20
21
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
22
  Classifier: Topic :: Database
22
23
  Classifier: Topic :: Software Development :: Testing