acryl-datahub-gx-plugin 1.3.1.5rc10__py3-none-any.whl → 1.3.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: acryl-datahub-gx-plugin
3
- Version: 1.3.1.5rc10
3
+ Version: 1.3.1.6
4
4
  Summary: Datahub GX plugin to capture executions and send to Datahub
5
5
  Home-page: https://docs.datahub.com/
6
6
  License: Apache-2.0
@@ -22,87 +22,87 @@ Classifier: Topic :: Software Development
22
22
  Requires-Python: >=3.9
23
23
  Description-Content-Type: text/markdown
24
24
  Requires-Dist: great-expectations<1.0.0,>=0.17.15
25
+ Requires-Dist: pydantic>=2.1.0
26
+ Requires-Dist: requests
27
+ Requires-Dist: sqlalchemy<2,>=1.4.39
28
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.6
25
29
  Requires-Dist: traitlets!=5.2.2
26
30
  Requires-Dist: requests_file
27
- Requires-Dist: sqlalchemy<2,>=1.4.39
28
- Requires-Dist: requests
29
- Requires-Dist: pydantic>=2.1.0
30
- Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc10
31
31
  Provides-Extra: ignore
32
32
  Provides-Extra: dev
33
- Requires-Dist: requests_file; extra == "dev"
34
- Requires-Dist: jsonpickle; extra == "dev"
33
+ Requires-Dist: mypy==1.17.1; extra == "dev"
34
+ Requires-Dist: types-freezegun; extra == "dev"
35
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.6; extra == "dev"
36
+ Requires-Dist: build; extra == "dev"
37
+ Requires-Dist: twine; extra == "dev"
38
+ Requires-Dist: types-setuptools; extra == "dev"
39
+ Requires-Dist: types-click==0.1.12; extra == "dev"
40
+ Requires-Dist: types-toml; extra == "dev"
41
+ Requires-Dist: types-cachetools; extra == "dev"
42
+ Requires-Dist: types-tabulate; extra == "dev"
35
43
  Requires-Dist: pydantic>=2.1.0; extra == "dev"
36
- Requires-Dist: tox; extra == "dev"
37
- Requires-Dist: deepdiff!=8.0.0; extra == "dev"
38
- Requires-Dist: ruff==0.11.7; extra == "dev"
39
44
  Requires-Dist: pytest-asyncio>=0.16.0; extra == "dev"
40
- Requires-Dist: types-PyYAML; extra == "dev"
45
+ Requires-Dist: types-six; extra == "dev"
46
+ Requires-Dist: tox; extra == "dev"
47
+ Requires-Dist: packaging; extra == "dev"
48
+ Requires-Dist: freezegun; extra == "dev"
41
49
  Requires-Dist: types-requests; extra == "dev"
42
- Requires-Dist: types-cachetools; extra == "dev"
50
+ Requires-Dist: ruff==0.11.7; extra == "dev"
51
+ Requires-Dist: types-pytz; extra == "dev"
52
+ Requires-Dist: types-dataclasses; extra == "dev"
53
+ Requires-Dist: coverage>=5.1; extra == "dev"
54
+ Requires-Dist: pytest>=6.2.2; extra == "dev"
55
+ Requires-Dist: sqlalchemy<2,>=1.4.39; extra == "dev"
43
56
  Requires-Dist: sqlalchemy-stubs; extra == "dev"
44
57
  Requires-Dist: types-python-dateutil; extra == "dev"
45
- Requires-Dist: types-dataclasses; extra == "dev"
46
- Requires-Dist: packaging; extra == "dev"
47
- Requires-Dist: mypy==1.17.1; extra == "dev"
48
- Requires-Dist: types-freezegun; extra == "dev"
49
- Requires-Dist: requests; extra == "dev"
50
- Requires-Dist: types-pytz; extra == "dev"
51
- Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
52
58
  Requires-Dist: traitlets!=5.2.2; extra == "dev"
53
- Requires-Dist: types-six; extra == "dev"
54
- Requires-Dist: types-setuptools; extra == "dev"
59
+ Requires-Dist: requests_file; extra == "dev"
60
+ Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
61
+ Requires-Dist: deepdiff!=8.0.0; extra == "dev"
62
+ Requires-Dist: requests; extra == "dev"
55
63
  Requires-Dist: great-expectations<1.0.0,>=0.17.15; extra == "dev"
56
- Requires-Dist: types-tabulate; extra == "dev"
57
- Requires-Dist: pytest>=6.2.2; extra == "dev"
58
- Requires-Dist: types-toml; extra == "dev"
59
- Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc10; extra == "dev"
60
- Requires-Dist: twine; extra == "dev"
61
- Requires-Dist: freezegun; extra == "dev"
64
+ Requires-Dist: types-PyYAML; extra == "dev"
65
+ Requires-Dist: jsonpickle; extra == "dev"
62
66
  Requires-Dist: requests-mock; extra == "dev"
63
- Requires-Dist: sqlalchemy<2,>=1.4.39; extra == "dev"
64
- Requires-Dist: coverage>=5.1; extra == "dev"
65
- Requires-Dist: build; extra == "dev"
66
- Requires-Dist: types-click==0.1.12; extra == "dev"
67
67
  Provides-Extra: integration-tests
68
- Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc10; extra == "integration-tests"
69
- Requires-Dist: requests_file; extra == "integration-tests"
70
- Requires-Dist: pytest-docker>=1.1.0; extra == "integration-tests"
71
- Requires-Dist: jsonpickle; extra == "integration-tests"
68
+ Requires-Dist: mypy==1.17.1; extra == "integration-tests"
69
+ Requires-Dist: pyspark; extra == "integration-tests"
70
+ Requires-Dist: types-freezegun; extra == "integration-tests"
71
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.6; extra == "integration-tests"
72
+ Requires-Dist: build; extra == "integration-tests"
73
+ Requires-Dist: twine; extra == "integration-tests"
74
+ Requires-Dist: types-setuptools; extra == "integration-tests"
75
+ Requires-Dist: types-click==0.1.12; extra == "integration-tests"
76
+ Requires-Dist: types-toml; extra == "integration-tests"
77
+ Requires-Dist: types-cachetools; extra == "integration-tests"
78
+ Requires-Dist: types-tabulate; extra == "integration-tests"
72
79
  Requires-Dist: pydantic>=2.1.0; extra == "integration-tests"
73
- Requires-Dist: tox; extra == "integration-tests"
74
- Requires-Dist: deepdiff!=8.0.0; extra == "integration-tests"
75
- Requires-Dist: ruff==0.11.7; extra == "integration-tests"
76
80
  Requires-Dist: pytest-asyncio>=0.16.0; extra == "integration-tests"
77
- Requires-Dist: types-PyYAML; extra == "integration-tests"
81
+ Requires-Dist: types-six; extra == "integration-tests"
82
+ Requires-Dist: tox; extra == "integration-tests"
83
+ Requires-Dist: packaging; extra == "integration-tests"
84
+ Requires-Dist: freezegun; extra == "integration-tests"
78
85
  Requires-Dist: types-requests; extra == "integration-tests"
79
- Requires-Dist: psycopg2-binary; extra == "integration-tests"
80
- Requires-Dist: types-cachetools; extra == "integration-tests"
86
+ Requires-Dist: ruff==0.11.7; extra == "integration-tests"
87
+ Requires-Dist: types-pytz; extra == "integration-tests"
88
+ Requires-Dist: types-dataclasses; extra == "integration-tests"
89
+ Requires-Dist: coverage>=5.1; extra == "integration-tests"
90
+ Requires-Dist: pytest>=6.2.2; extra == "integration-tests"
91
+ Requires-Dist: sqlalchemy<2,>=1.4.39; extra == "integration-tests"
81
92
  Requires-Dist: sqlalchemy-stubs; extra == "integration-tests"
82
93
  Requires-Dist: types-python-dateutil; extra == "integration-tests"
83
- Requires-Dist: types-dataclasses; extra == "integration-tests"
84
- Requires-Dist: packaging; extra == "integration-tests"
85
- Requires-Dist: mypy==1.17.1; extra == "integration-tests"
86
- Requires-Dist: types-freezegun; extra == "integration-tests"
87
- Requires-Dist: requests; extra == "integration-tests"
88
- Requires-Dist: types-pytz; extra == "integration-tests"
89
- Requires-Dist: pytest-cov>=2.8.1; extra == "integration-tests"
90
94
  Requires-Dist: traitlets!=5.2.2; extra == "integration-tests"
91
- Requires-Dist: types-six; extra == "integration-tests"
92
- Requires-Dist: types-setuptools; extra == "integration-tests"
95
+ Requires-Dist: requests_file; extra == "integration-tests"
96
+ Requires-Dist: pytest-cov>=2.8.1; extra == "integration-tests"
97
+ Requires-Dist: deepdiff!=8.0.0; extra == "integration-tests"
98
+ Requires-Dist: psycopg2-binary; extra == "integration-tests"
99
+ Requires-Dist: requests; extra == "integration-tests"
100
+ Requires-Dist: acryl-datahub[testing-utils]==1.3.1.6; extra == "integration-tests"
101
+ Requires-Dist: pytest-docker>=1.1.0; extra == "integration-tests"
93
102
  Requires-Dist: great-expectations<1.0.0,>=0.17.15; extra == "integration-tests"
94
- Requires-Dist: types-tabulate; extra == "integration-tests"
95
- Requires-Dist: pytest>=6.2.2; extra == "integration-tests"
96
- Requires-Dist: types-toml; extra == "integration-tests"
97
- Requires-Dist: pyspark; extra == "integration-tests"
98
- Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc10; extra == "integration-tests"
99
- Requires-Dist: twine; extra == "integration-tests"
100
- Requires-Dist: freezegun; extra == "integration-tests"
103
+ Requires-Dist: types-PyYAML; extra == "integration-tests"
104
+ Requires-Dist: jsonpickle; extra == "integration-tests"
101
105
  Requires-Dist: requests-mock; extra == "integration-tests"
102
- Requires-Dist: sqlalchemy<2,>=1.4.39; extra == "integration-tests"
103
- Requires-Dist: coverage>=5.1; extra == "integration-tests"
104
- Requires-Dist: build; extra == "integration-tests"
105
- Requires-Dist: types-click==0.1.12; extra == "integration-tests"
106
106
  Dynamic: classifier
107
107
  Dynamic: description
108
108
  Dynamic: description-content-type
@@ -0,0 +1,8 @@
1
+ datahub_gx_plugin/__init__.py,sha256=EAwU2Z4u9abD5xHwcfgwo1xvK8-lSfByM-cp0IMw4mQ,69
2
+ datahub_gx_plugin/_version.py,sha256=GlHJansOn_-rYbo_oQsFvc5W6jU54Ppp5V4ORtwd-FY,135
3
+ datahub_gx_plugin/action.py,sha256=d6H0daSN0SrZ46ZfNPdVwldQYk1eJcSryHSC6cWnjM8,40665
4
+ acryl_datahub_gx_plugin-1.3.1.6.dist-info/METADATA,sha256=RK93bGb4Ku1gqlyLlVX6j1Qn5nkpmLHwF4ceq-sKOas,5639
5
+ acryl_datahub_gx_plugin-1.3.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ acryl_datahub_gx_plugin-1.3.1.6.dist-info/entry_points.txt,sha256=MPipKHfWSerlcRwWt8OP93TPCVxpposDMyILo9cszbM,88
7
+ acryl_datahub_gx_plugin-1.3.1.6.dist-info/top_level.txt,sha256=ryn3FMtO1isrM-TbO-SRYy_-31fUpgdEBmBSrAmvL1c,18
8
+ acryl_datahub_gx_plugin-1.3.1.6.dist-info/RECORD,,
@@ -1,3 +1,3 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub-gx-plugin/.
2
2
  __package_name__ = "acryl-datahub-gx-plugin"
3
- __version__ = "1.3.1.5rc10"
3
+ __version__ = "1.3.1.6"
@@ -25,7 +25,10 @@ from great_expectations.data_context.types.resource_identifiers import (
25
25
  ExpectationSuiteIdentifier,
26
26
  ValidationResultIdentifier,
27
27
  )
28
- from great_expectations.execution_engine import PandasExecutionEngine
28
+ from great_expectations.execution_engine import (
29
+ PandasExecutionEngine,
30
+ SparkDFExecutionEngine,
31
+ )
29
32
  from great_expectations.execution_engine.sqlalchemy_execution_engine import (
30
33
  SqlAlchemyExecutionEngine,
31
34
  )
@@ -586,16 +589,67 @@ class DataHubValidationAction(ValidationAction):
586
589
  )
587
590
 
588
591
  def get_dataset_partitions(self, batch_identifier, data_asset):
589
- dataset_partitions = []
592
+ dataset_partitions: List[
593
+ Dict[str, Union[PartitionSpecClass, BatchSpec, str, None]]
594
+ ] = []
590
595
 
591
596
  logger.debug("Finding datasets being validated")
592
597
 
593
- # for now, we support only v3-api and sqlalchemy execution engine and Pandas engine
598
+ # for now, we support only v3-api and sqlalchemy execution engine,Pandas engine and Spark engine
594
599
  is_sql_alchemy = isinstance(data_asset, Validator) and (
595
600
  isinstance(data_asset.execution_engine, SqlAlchemyExecutionEngine)
596
601
  )
597
602
  is_pandas = isinstance(data_asset.execution_engine, PandasExecutionEngine)
598
- if is_sql_alchemy or is_pandas:
603
+
604
+ is_spark = isinstance(data_asset.execution_engine, SparkDFExecutionEngine)
605
+
606
+ if is_spark:
607
+ ge_batch_spec = data_asset.active_batch_spec
608
+ partitionSpec = None
609
+ batchSpecProperties = {
610
+ "data_asset_name": str(
611
+ data_asset.active_batch_definition.data_asset_name
612
+ ),
613
+ "datasource_name": str(
614
+ data_asset.active_batch_definition.datasource_name
615
+ ),
616
+ }
617
+
618
+ if isinstance(ge_batch_spec, RuntimeDataBatchSpec):
619
+ data_platform = self.get_platform_instance_spark(
620
+ data_asset.active_batch_definition.datasource_name
621
+ )
622
+
623
+ dataset_urn = builder.make_dataset_urn_with_platform_instance(
624
+ platform=(
625
+ data_platform
626
+ if self.platform_alias is None
627
+ else self.platform_alias
628
+ ),
629
+ name=data_asset.active_batch_definition.data_asset_name,
630
+ platform_instance="",
631
+ env=self.env,
632
+ )
633
+
634
+ batchSpec = BatchSpec(
635
+ nativeBatchId=batch_identifier,
636
+ query="",
637
+ customProperties=batchSpecProperties,
638
+ )
639
+ dataset_partitions.append(
640
+ {
641
+ "dataset_urn": dataset_urn,
642
+ "partitionSpec": partitionSpec,
643
+ "batchSpec": batchSpec,
644
+ }
645
+ )
646
+ else:
647
+ warn(
648
+ "DataHubValidationAction does not recognize this GE batch spec type for SparkDFExecutionEngine- {batch_spec_type}. No action will be taken.".format(
649
+ batch_spec_type=type(ge_batch_spec)
650
+ )
651
+ )
652
+ elif is_sql_alchemy or is_pandas:
599
653
  ge_batch_spec = data_asset.active_batch_spec
600
654
  partitionSpec = None
601
655
  batchSpecProperties = {
@@ -607,6 +661,7 @@ class DataHubValidationAction(ValidationAction):
607
661
  ),
608
662
  }
609
663
  sqlalchemy_uri = None
664
+
610
665
  if is_sql_alchemy and isinstance(
611
666
  data_asset.execution_engine.engine, Engine
612
667
  ):
@@ -627,7 +682,7 @@ class DataHubValidationAction(ValidationAction):
627
682
  schema_name,
628
683
  table_name,
629
684
  self.env,
630
- self.get_platform_instance(
685
+ self.get_platform_instance_sqlalchemy(
631
686
  data_asset.active_batch_definition.datasource_name
632
687
  ),
633
688
  self.exclude_dbname,
@@ -709,7 +764,7 @@ class DataHubValidationAction(ValidationAction):
709
764
  None,
710
765
  table,
711
766
  self.env,
712
- self.get_platform_instance(
767
+ self.get_platform_instance_sqlalchemy(
713
768
  data_asset.active_batch_definition.datasource_name
714
769
  ),
715
770
  self.exclude_dbname,
@@ -724,7 +779,7 @@ class DataHubValidationAction(ValidationAction):
724
779
  }
725
780
  )
726
781
  elif isinstance(ge_batch_spec, RuntimeDataBatchSpec):
727
- data_platform = self.get_platform_instance(
782
+ data_platform = self.get_platform_instance_sqlalchemy(
728
783
  data_asset.active_batch_definition.datasource_name
729
784
  )
730
785
  dataset_urn = builder.make_dataset_urn_with_platform_instance(
@@ -758,14 +813,14 @@ class DataHubValidationAction(ValidationAction):
758
813
  else:
759
814
  # TODO - v2-spec - SqlAlchemyDataset support
760
815
  warn(
761
- "DataHubValidationAction does not recognize this GE data asset type - {asset_type}. This is either using v2-api or execution engine other than sqlalchemy.".format(
816
+ "DataHubValidationAction does not recognize this GE data asset type - {asset_type}.".format(
762
817
  asset_type=type(data_asset)
763
818
  )
764
819
  )
765
820
 
766
821
  return dataset_partitions
767
822
 
768
- def get_platform_instance(self, datasource_name):
823
+ def get_platform_instance_sqlalchemy(self, datasource_name):
769
824
  if self.platform_instance_map and datasource_name in self.platform_instance_map:
770
825
  return self.platform_instance_map[datasource_name]
771
826
  else:
@@ -774,6 +829,16 @@ class DataHubValidationAction(ValidationAction):
774
829
  )
775
830
  return None
776
831
 
832
+ def get_platform_instance_spark(self, datasource_name):
833
+ if self.platform_instance_map and datasource_name in self.platform_instance_map:
834
+ return self.platform_instance_map[datasource_name]
835
+ else:
836
+ warn(
837
+ f"Datasource {datasource_name} is not present in platform_instance_map. \
838
+ Data platform will be {datasource_name} by default "
839
+ )
840
+ return datasource_name
841
+
777
842
 
778
843
  def parse_int_or_default(value, default_value=None):
779
844
  if value is None:
@@ -1,8 +0,0 @@
1
- datahub_gx_plugin/__init__.py,sha256=EAwU2Z4u9abD5xHwcfgwo1xvK8-lSfByM-cp0IMw4mQ,69
2
- datahub_gx_plugin/_version.py,sha256=SO5hwP7jAVhc9uGQ1pfby0NKv0UnFGAtY_XjJAVCn3A,139
3
- datahub_gx_plugin/action.py,sha256=W1TZEQzURdg-QOEUdayQmoGB2oO54UkRs9PQEbxhp7A,38149
4
- acryl_datahub_gx_plugin-1.3.1.5rc10.dist-info/METADATA,sha256=yNd7izc5_4qKF9qoUZvSvlrHD6C4UrstcNSq61kY5cg,5659
5
- acryl_datahub_gx_plugin-1.3.1.5rc10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- acryl_datahub_gx_plugin-1.3.1.5rc10.dist-info/entry_points.txt,sha256=MPipKHfWSerlcRwWt8OP93TPCVxpposDMyILo9cszbM,88
7
- acryl_datahub_gx_plugin-1.3.1.5rc10.dist-info/top_level.txt,sha256=ryn3FMtO1isrM-TbO-SRYy_-31fUpgdEBmBSrAmvL1c,18
8
- acryl_datahub_gx_plugin-1.3.1.5rc10.dist-info/RECORD,,