acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (203) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dremio/dremio_source.py +15 -15
  66. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  67. datahub/ingestion/source/elastic_search.py +4 -3
  68. datahub/ingestion/source/excel/source.py +1 -1
  69. datahub/ingestion/source/feast.py +1 -1
  70. datahub/ingestion/source/file.py +5 -4
  71. datahub/ingestion/source/fivetran/config.py +17 -16
  72. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  73. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  74. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  75. datahub/ingestion/source/ge_profiling_config.py +8 -5
  76. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  77. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  78. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  79. datahub/ingestion/source/grafana/models.py +23 -5
  80. datahub/ingestion/source/hex/api.py +7 -5
  81. datahub/ingestion/source/hex/hex.py +4 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  83. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  84. datahub/ingestion/source/identity/azure_ad.py +1 -1
  85. datahub/ingestion/source/identity/okta.py +10 -10
  86. datahub/ingestion/source/kafka/kafka.py +1 -1
  87. datahub/ingestion/source/ldap.py +1 -1
  88. datahub/ingestion/source/looker/looker_common.py +7 -5
  89. datahub/ingestion/source/looker/looker_config.py +21 -20
  90. datahub/ingestion/source/looker/lookml_config.py +47 -47
  91. datahub/ingestion/source/metabase.py +8 -8
  92. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  93. datahub/ingestion/source/metadata/lineage.py +13 -8
  94. datahub/ingestion/source/mlflow.py +1 -1
  95. datahub/ingestion/source/mode.py +6 -4
  96. datahub/ingestion/source/mongodb.py +4 -3
  97. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  98. datahub/ingestion/source/nifi.py +17 -23
  99. datahub/ingestion/source/openapi.py +6 -8
  100. datahub/ingestion/source/powerbi/config.py +33 -32
  101. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  102. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  103. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  104. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  105. datahub/ingestion/source/preset.py +8 -8
  106. datahub/ingestion/source/pulsar.py +1 -1
  107. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  108. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  109. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  110. datahub/ingestion/source/redshift/config.py +18 -20
  111. datahub/ingestion/source/redshift/redshift.py +2 -2
  112. datahub/ingestion/source/redshift/usage.py +23 -3
  113. datahub/ingestion/source/s3/config.py +83 -62
  114. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  115. datahub/ingestion/source/s3/source.py +8 -5
  116. datahub/ingestion/source/sac/sac.py +5 -4
  117. datahub/ingestion/source/salesforce.py +3 -2
  118. datahub/ingestion/source/schema/json_schema.py +2 -2
  119. datahub/ingestion/source/sigma/data_classes.py +3 -2
  120. datahub/ingestion/source/sigma/sigma.py +1 -1
  121. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  122. datahub/ingestion/source/slack/slack.py +1 -1
  123. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  125. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  127. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  128. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  129. datahub/ingestion/source/sql/athena.py +1 -1
  130. datahub/ingestion/source/sql/clickhouse.py +4 -2
  131. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  132. datahub/ingestion/source/sql/druid.py +1 -1
  133. datahub/ingestion/source/sql/hana.py +1 -1
  134. datahub/ingestion/source/sql/hive.py +7 -5
  135. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  136. datahub/ingestion/source/sql/mssql/source.py +13 -6
  137. datahub/ingestion/source/sql/mysql.py +1 -1
  138. datahub/ingestion/source/sql/oracle.py +17 -10
  139. datahub/ingestion/source/sql/postgres.py +2 -2
  140. datahub/ingestion/source/sql/presto.py +1 -1
  141. datahub/ingestion/source/sql/sql_config.py +8 -9
  142. datahub/ingestion/source/sql/sql_generic.py +1 -1
  143. datahub/ingestion/source/sql/teradata.py +1 -1
  144. datahub/ingestion/source/sql/trino.py +1 -1
  145. datahub/ingestion/source/sql/vertica.py +5 -4
  146. datahub/ingestion/source/sql_queries.py +11 -8
  147. datahub/ingestion/source/state/checkpoint.py +2 -2
  148. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  149. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  150. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  152. datahub/ingestion/source/superset.py +9 -9
  153. datahub/ingestion/source/tableau/tableau.py +14 -16
  154. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  155. datahub/ingestion/source/unity/config.py +51 -34
  156. datahub/ingestion/source/unity/connection.py +7 -1
  157. datahub/ingestion/source/unity/connection_test.py +1 -1
  158. datahub/ingestion/source/unity/proxy.py +216 -7
  159. datahub/ingestion/source/unity/proxy_types.py +91 -0
  160. datahub/ingestion/source/unity/source.py +29 -3
  161. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  162. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  163. datahub/ingestion/source/usage/usage_common.py +5 -3
  164. datahub/ingestion/source_config/csv_enricher.py +7 -6
  165. datahub/ingestion/source_config/operation_config.py +7 -4
  166. datahub/ingestion/source_config/pulsar.py +11 -15
  167. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  168. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  169. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  171. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  172. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  173. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  175. datahub/ingestion/transformer/dataset_domain.py +3 -3
  176. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  177. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  178. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  179. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  180. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  181. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  182. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  183. datahub/ingestion/transformer/replace_external_url.py +2 -2
  184. datahub/ingestion/transformer/set_browse_path.py +1 -1
  185. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  186. datahub/lite/duckdb_lite.py +1 -1
  187. datahub/lite/lite_util.py +2 -2
  188. datahub/metadata/schema.avsc +7 -2
  189. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  190. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
  191. datahub/sdk/__init__.py +1 -0
  192. datahub/sdk/_all_entities.py +2 -0
  193. datahub/sdk/search_filters.py +68 -40
  194. datahub/sdk/tag.py +112 -0
  195. datahub/secret/datahub_secret_store.py +7 -4
  196. datahub/secret/file_secret_store.py +1 -1
  197. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  198. datahub/testing/check_sql_parser_result.py +2 -2
  199. datahub/utilities/ingest_utils.py +1 -1
  200. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  201. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  202. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  203. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from dataclasses import dataclass
3
3
  from typing import Any, Dict, Generic, Optional, Type, TypeVar
4
4
 
5
5
  import pydantic
6
- from pydantic import root_validator
6
+ from pydantic import model_validator
7
7
  from pydantic.fields import Field
8
8
 
9
9
  from datahub.configuration.common import (
@@ -73,14 +73,14 @@ class StatefulIngestionConfig(ConfigModel):
73
73
  description="If set to True, ignores the current checkpoint state.",
74
74
  )
75
75
 
76
- @pydantic.root_validator(skip_on_failure=True)
77
- def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
78
- if values.get("enabled"):
79
- if values.get("state_provider") is None:
80
- values["state_provider"] = DynamicTypedStateProviderConfig(
76
+ @model_validator(mode="after")
77
+ def validate_config(self) -> "StatefulIngestionConfig":
78
+ if self.enabled:
79
+ if self.state_provider is None:
80
+ self.state_provider = DynamicTypedStateProviderConfig(
81
81
  type="datahub", config={}
82
82
  )
83
- return values
83
+ return self
84
84
 
85
85
 
86
86
  CustomConfig = TypeVar("CustomConfig", bound=StatefulIngestionConfig)
@@ -110,17 +110,19 @@ class StatefulLineageConfigMixin(ConfigModel):
110
110
  "store_last_lineage_extraction_timestamp", "enable_stateful_lineage_ingestion"
111
111
  )
112
112
 
113
- @root_validator(skip_on_failure=True)
114
- def lineage_stateful_option_validator(cls, values: Dict) -> Dict:
115
- sti = values.get("stateful_ingestion")
116
- if not sti or not sti.enabled:
117
- if values.get("enable_stateful_lineage_ingestion"):
118
- logger.warning(
119
- "Stateful ingestion is disabled, disabling enable_stateful_lineage_ingestion config option as well"
120
- )
121
- values["enable_stateful_lineage_ingestion"] = False
122
-
123
- return values
113
+ @model_validator(mode="after")
114
+ def lineage_stateful_option_validator(self) -> "StatefulLineageConfigMixin":
115
+ try:
116
+ sti = getattr(self, "stateful_ingestion", None)
117
+ if not sti or not getattr(sti, "enabled", False):
118
+ if getattr(self, "enable_stateful_lineage_ingestion", False):
119
+ logger.warning(
120
+ "Stateful ingestion is disabled, disabling enable_stateful_lineage_ingestion config option as well"
121
+ )
122
+ self.enable_stateful_lineage_ingestion = False
123
+ except (AttributeError, RecursionError) as e:
124
+ logger.debug(f"Skipping stateful lineage validation due to: {e}")
125
+ return self
124
126
 
125
127
 
126
128
  class StatefulProfilingConfigMixin(ConfigModel):
@@ -135,16 +137,19 @@ class StatefulProfilingConfigMixin(ConfigModel):
135
137
  "store_last_profiling_timestamps", "enable_stateful_profiling"
136
138
  )
137
139
 
138
- @root_validator(skip_on_failure=True)
139
- def profiling_stateful_option_validator(cls, values: Dict) -> Dict:
140
- sti = values.get("stateful_ingestion")
141
- if not sti or not sti.enabled:
142
- if values.get("enable_stateful_profiling"):
143
- logger.warning(
144
- "Stateful ingestion is disabled, disabling enable_stateful_profiling config option as well"
145
- )
146
- values["enable_stateful_profiling"] = False
147
- return values
140
+ @model_validator(mode="after")
141
+ def profiling_stateful_option_validator(self) -> "StatefulProfilingConfigMixin":
142
+ try:
143
+ sti = getattr(self, "stateful_ingestion", None)
144
+ if not sti or not getattr(sti, "enabled", False):
145
+ if getattr(self, "enable_stateful_profiling", False):
146
+ logger.warning(
147
+ "Stateful ingestion is disabled, disabling enable_stateful_profiling config option as well"
148
+ )
149
+ self.enable_stateful_profiling = False
150
+ except (AttributeError, RecursionError) as e:
151
+ logger.debug(f"Skipping stateful profiling validation due to: {e}")
152
+ return self
148
153
 
149
154
 
150
155
  class StatefulUsageConfigMixin(BaseTimeWindowConfig):
@@ -161,16 +166,21 @@ class StatefulUsageConfigMixin(BaseTimeWindowConfig):
161
166
  "store_last_usage_extraction_timestamp", "enable_stateful_usage_ingestion"
162
167
  )
163
168
 
164
- @root_validator(skip_on_failure=True)
165
- def last_usage_extraction_stateful_option_validator(cls, values: Dict) -> Dict:
166
- sti = values.get("stateful_ingestion")
167
- if not sti or not sti.enabled:
168
- if values.get("enable_stateful_usage_ingestion"):
169
- logger.warning(
170
- "Stateful ingestion is disabled, disabling enable_stateful_usage_ingestion config option as well"
171
- )
172
- values["enable_stateful_usage_ingestion"] = False
173
- return values
169
+ @model_validator(mode="after")
170
+ def last_usage_extraction_stateful_option_validator(
171
+ self,
172
+ ) -> "StatefulUsageConfigMixin":
173
+ try:
174
+ sti = getattr(self, "stateful_ingestion", None)
175
+ if not sti or not getattr(sti, "enabled", False):
176
+ if getattr(self, "enable_stateful_usage_ingestion", False):
177
+ logger.warning(
178
+ "Stateful ingestion is disabled, disabling enable_stateful_usage_ingestion config option as well"
179
+ )
180
+ self.enable_stateful_usage_ingestion = False
181
+ except (AttributeError, RecursionError) as e:
182
+ logger.debug(f"Skipping stateful usage validation due to: {e}")
183
+ return self
174
184
 
175
185
 
176
186
  class StatefulTimeWindowConfigMixin(BaseTimeWindowConfig):
@@ -185,16 +195,16 @@ class StatefulTimeWindowConfigMixin(BaseTimeWindowConfig):
185
195
  "and queries together from a single audit log and uses a unified time window.",
186
196
  )
187
197
 
188
- @root_validator(skip_on_failure=True)
189
- def time_window_stateful_option_validator(cls, values: Dict) -> Dict:
190
- sti = values.get("stateful_ingestion")
191
- if not sti or not sti.enabled:
192
- if values.get("enable_stateful_time_window"):
198
+ @model_validator(mode="after")
199
+ def time_window_stateful_option_validator(self) -> "StatefulTimeWindowConfigMixin":
200
+ sti = getattr(self, "stateful_ingestion", None)
201
+ if not sti or not getattr(sti, "enabled", False):
202
+ if getattr(self, "enable_stateful_time_window", False):
193
203
  logger.warning(
194
204
  "Stateful ingestion is disabled, disabling enable_stateful_time_window config option as well"
195
205
  )
196
- values["enable_stateful_time_window"] = False
197
- return values
206
+ self.enable_stateful_time_window = False
207
+ return self
198
208
 
199
209
 
200
210
  @dataclass
@@ -40,7 +40,7 @@ class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
40
40
  def create(
41
41
  cls, config_dict: Dict[str, Any], ctx: PipelineContext
42
42
  ) -> "DatahubIngestionCheckpointingProvider":
43
- config = DatahubIngestionStateProviderConfig.parse_obj(config_dict)
43
+ config = DatahubIngestionStateProviderConfig.model_validate(config_dict)
44
44
  if config.datahub_api is not None:
45
45
  return cls(DataHubGraph(config.datahub_api))
46
46
  elif ctx.graph:
@@ -32,7 +32,7 @@ class FileIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
32
32
  def create(
33
33
  cls, config_dict: Dict[str, Any], ctx: PipelineContext
34
34
  ) -> "FileIngestionCheckpointingProvider":
35
- config = FileIngestionStateProviderConfig.parse_obj(config_dict)
35
+ config = FileIngestionStateProviderConfig.model_validate(config_dict)
36
36
  return cls(config)
37
37
 
38
38
  def get_latest_checkpoint(
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
9
9
  import dateutil.parser as dp
10
10
  import requests
11
11
  import sqlglot
12
- from pydantic import BaseModel, root_validator, validator
12
+ from pydantic import BaseModel, field_validator, model_validator
13
13
  from pydantic.fields import Field
14
14
  from requests.adapters import HTTPAdapter
15
15
  from urllib3.util.retry import Retry
@@ -246,16 +246,16 @@ class SupersetConfig(
246
246
  # This is required to allow preset configs to get parsed
247
247
  extra = "allow"
248
248
 
249
- @validator("connect_uri", "display_uri")
250
- def remove_trailing_slash(cls, v):
249
+ @field_validator("connect_uri", "display_uri", mode="after")
250
+ @classmethod
251
+ def remove_trailing_slash(cls, v: str) -> str:
251
252
  return config_clean.remove_trailing_slashes(v)
252
253
 
253
- @root_validator(skip_on_failure=True)
254
- def default_display_uri_to_connect_uri(cls, values):
255
- base = values.get("display_uri")
256
- if base is None:
257
- values["display_uri"] = values.get("connect_uri")
258
- return values
254
+ @model_validator(mode="after")
255
+ def default_display_uri_to_connect_uri(self) -> "SupersetConfig":
256
+ if self.display_uri is None:
257
+ self.display_uri = self.connect_uri
258
+ return self
259
259
 
260
260
 
261
261
  def get_metric_name(metric):
@@ -25,7 +25,7 @@ from urllib.parse import quote, urlparse
25
25
 
26
26
  import dateutil.parser as dp
27
27
  import tableauserverclient as TSC
28
- from pydantic import root_validator, validator
28
+ from pydantic import field_validator, model_validator
29
29
  from pydantic.fields import Field
30
30
  from requests.adapters import HTTPAdapter
31
31
  from tableauserverclient import (
@@ -257,8 +257,9 @@ class TableauConnectionConfig(ConfigModel):
257
257
  description="When enabled, extracts column-level lineage from Tableau Datasources",
258
258
  )
259
259
 
260
- @validator("connect_uri")
261
- def remove_trailing_slash(cls, v):
260
+ @field_validator("connect_uri", mode="after")
261
+ @classmethod
262
+ def remove_trailing_slash(cls, v: str) -> str:
262
263
  return config_clean.remove_trailing_slashes(v)
263
264
 
264
265
  def get_tableau_auth(
@@ -652,8 +653,9 @@ class TableauConfig(
652
653
  "fetch_size",
653
654
  )
654
655
 
655
- # pre = True because we want to take some decision before pydantic initialize the configuration to default values
656
- @root_validator(pre=True)
656
+ # mode = "before" because we want to take some decision before pydantic initialize the configuration to default values
657
+ @model_validator(mode="before")
658
+ @classmethod
657
659
  def projects_backward_compatibility(cls, values: Dict) -> Dict:
658
660
  # In-place update of the input dict would cause state contamination. This was discovered through test failures
659
661
  # in test_hex.py where the same dict is reused.
@@ -683,27 +685,23 @@ class TableauConfig(
683
685
 
684
686
  return values
685
687
 
686
- @root_validator(skip_on_failure=True)
687
- def validate_config_values(cls, values: Dict) -> Dict:
688
- tags_for_hidden_assets = values.get("tags_for_hidden_assets")
689
- ingest_tags = values.get("ingest_tags")
688
+ @model_validator(mode="after")
689
+ def validate_config_values(self) -> "TableauConfig":
690
690
  if (
691
- not ingest_tags
692
- and tags_for_hidden_assets
693
- and len(tags_for_hidden_assets) > 0
691
+ not self.ingest_tags
692
+ and self.tags_for_hidden_assets
693
+ and len(self.tags_for_hidden_assets) > 0
694
694
  ):
695
695
  raise ValueError(
696
696
  "tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
697
697
  )
698
698
 
699
- use_email_as_username = values.get("use_email_as_username")
700
- ingest_owner = values.get("ingest_owner")
701
- if use_email_as_username and not ingest_owner:
699
+ if self.use_email_as_username and not self.ingest_owner:
702
700
  raise ValueError(
703
701
  "use_email_as_username requires ingest_owner to be enabled."
704
702
  )
705
703
 
706
- return values
704
+ return self
707
705
 
708
706
 
709
707
  class WorkbookKey(ContainerKey):
@@ -0,0 +1,15 @@
1
+ from pydantic import Field, SecretStr
2
+
3
+ from datahub.configuration import ConfigModel
4
+
5
+
6
+ class AzureAuthConfig(ConfigModel):
7
+ client_secret: SecretStr = Field(
8
+ description="Azure application client secret used for authentication. This is a confidential credential that should be kept secure."
9
+ )
10
+ client_id: str = Field(
11
+ description="Azure application (client) ID. This is the unique identifier for the registered Azure AD application.",
12
+ )
13
+ tenant_id: str = Field(
14
+ description="Azure tenant (directory) ID. This identifies the Azure AD tenant where the application is registered.",
15
+ )
@@ -1,10 +1,10 @@
1
1
  import logging
2
2
  import os
3
3
  from datetime import datetime, timedelta, timezone
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Dict, List, Optional, Union
5
5
 
6
6
  import pydantic
7
- from pydantic import Field
7
+ from pydantic import Field, field_validator, model_validator
8
8
  from typing_extensions import Literal
9
9
 
10
10
  from datahub.configuration.common import (
@@ -397,13 +397,15 @@ class UnityCatalogSourceConfig(
397
397
  default=None, description="Unity Catalog Stateful Ingestion Config."
398
398
  )
399
399
 
400
- @pydantic.validator("start_time")
400
+ @field_validator("start_time", mode="after")
401
+ @classmethod
401
402
  def within_thirty_days(cls, v: datetime) -> datetime:
402
403
  if (datetime.now(timezone.utc) - v).days > 30:
403
404
  raise ValueError("Query history is only maintained for 30 days.")
404
405
  return v
405
406
 
406
- @pydantic.validator("workspace_url")
407
+ @field_validator("workspace_url", mode="after")
408
+ @classmethod
407
409
  def workspace_url_should_start_with_http_scheme(cls, workspace_url: str) -> str:
408
410
  if not workspace_url.lower().startswith(("http://", "https://")):
409
411
  raise ValueError(
@@ -411,7 +413,26 @@ class UnityCatalogSourceConfig(
411
413
  )
412
414
  return workspace_url
413
415
 
414
- @pydantic.validator("include_metastore")
416
+ @model_validator(mode="before")
417
+ def either_token_or_azure_auth_provided(cls, values: dict) -> dict:
418
+ token = values.get("token")
419
+ azure_auth = values.get("azure_auth")
420
+
421
+ # Check if exactly one of the authentication methods is provided
422
+ if not token and not azure_auth:
423
+ raise ValueError(
424
+ "Either 'azure_auth' or 'token' (personal access token) must be provided in the configuration."
425
+ )
426
+
427
+ if token and azure_auth:
428
+ raise ValueError(
429
+ "Cannot specify both 'token' and 'azure_auth'. Please provide only one authentication method."
430
+ )
431
+
432
+ return values
433
+
434
+ @field_validator("include_metastore", mode="after")
435
+ @classmethod
415
436
  def include_metastore_warning(cls, v: bool) -> bool:
416
437
  if v:
417
438
  msg = (
@@ -424,60 +445,56 @@ class UnityCatalogSourceConfig(
424
445
  add_global_warning(msg)
425
446
  return v
426
447
 
427
- @pydantic.root_validator(skip_on_failure=True)
428
- def set_warehouse_id_from_profiling(cls, values: Dict[str, Any]) -> Dict[str, Any]:
429
- profiling: Optional[
430
- Union[UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig]
431
- ] = values.get("profiling")
432
- if not values.get("warehouse_id") and profiling and profiling.warehouse_id:
433
- values["warehouse_id"] = profiling.warehouse_id
448
+ @model_validator(mode="after")
449
+ def set_warehouse_id_from_profiling(self):
450
+ profiling = self.profiling
451
+ if not self.warehouse_id and profiling and profiling.warehouse_id:
452
+ self.warehouse_id = profiling.warehouse_id
434
453
  if (
435
- values.get("warehouse_id")
454
+ self.warehouse_id
436
455
  and profiling
437
456
  and profiling.warehouse_id
438
- and values["warehouse_id"] != profiling.warehouse_id
457
+ and self.warehouse_id != profiling.warehouse_id
439
458
  ):
440
459
  raise ValueError(
441
460
  "When `warehouse_id` is set, it must match the `warehouse_id` in `profiling`."
442
461
  )
443
462
 
444
- if values.get("warehouse_id") and profiling and not profiling.warehouse_id:
445
- profiling.warehouse_id = values["warehouse_id"]
463
+ if self.warehouse_id and profiling and not profiling.warehouse_id:
464
+ profiling.warehouse_id = self.warehouse_id
446
465
 
447
466
  if profiling and profiling.enabled and not profiling.warehouse_id:
448
467
  raise ValueError("warehouse_id must be set when profiling is enabled.")
449
468
 
450
- return values
469
+ return self
451
470
 
452
- @pydantic.root_validator(skip_on_failure=True)
453
- def validate_lineage_data_source_with_warehouse(
454
- cls, values: Dict[str, Any]
455
- ) -> Dict[str, Any]:
456
- lineage_data_source = values.get("lineage_data_source", LineageDataSource.AUTO)
457
- warehouse_id = values.get("warehouse_id")
471
+ @model_validator(mode="after")
472
+ def validate_lineage_data_source_with_warehouse(self):
473
+ lineage_data_source = self.lineage_data_source or LineageDataSource.AUTO
458
474
 
459
- if lineage_data_source == LineageDataSource.SYSTEM_TABLES and not warehouse_id:
475
+ if (
476
+ lineage_data_source == LineageDataSource.SYSTEM_TABLES
477
+ and not self.warehouse_id
478
+ ):
460
479
  raise ValueError(
461
480
  f"lineage_data_source='{LineageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
462
481
  )
463
482
 
464
- return values
483
+ return self
465
484
 
466
- @pydantic.root_validator(skip_on_failure=True)
467
- def validate_usage_data_source_with_warehouse(
468
- cls, values: Dict[str, Any]
469
- ) -> Dict[str, Any]:
470
- usage_data_source = values.get("usage_data_source", UsageDataSource.AUTO)
471
- warehouse_id = values.get("warehouse_id")
485
+ @model_validator(mode="after")
486
+ def validate_usage_data_source_with_warehouse(self):
487
+ usage_data_source = self.usage_data_source or UsageDataSource.AUTO
472
488
 
473
- if usage_data_source == UsageDataSource.SYSTEM_TABLES and not warehouse_id:
489
+ if usage_data_source == UsageDataSource.SYSTEM_TABLES and not self.warehouse_id:
474
490
  raise ValueError(
475
491
  f"usage_data_source='{UsageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
476
492
  )
477
493
 
478
- return values
494
+ return self
479
495
 
480
- @pydantic.validator("schema_pattern", always=True)
496
+ @field_validator("schema_pattern", mode="before")
497
+ @classmethod
481
498
  def schema_pattern_should__always_deny_information_schema(
482
499
  cls, v: AllowDenyPattern
483
500
  ) -> AllowDenyPattern:
@@ -8,6 +8,7 @@ from pydantic import Field
8
8
 
9
9
  from datahub.configuration.common import ConfigModel
10
10
  from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
11
+ from datahub.ingestion.source.unity.azure_auth_config import AzureAuthConfig
11
12
 
12
13
  DATABRICKS = "databricks"
13
14
 
@@ -19,7 +20,12 @@ class UnityCatalogConnectionConfig(ConfigModel):
19
20
  """
20
21
 
21
22
  scheme: str = DATABRICKS
22
- token: str = pydantic.Field(description="Databricks personal access token")
23
+ token: Optional[str] = pydantic.Field(
24
+ default=None, description="Databricks personal access token"
25
+ )
26
+ azure_auth: Optional[AzureAuthConfig] = Field(
27
+ default=None, description="Azure configuration"
28
+ )
23
29
  workspace_url: str = pydantic.Field(
24
30
  description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
25
31
  )
@@ -16,10 +16,10 @@ class UnityCatalogConnectionTest:
16
16
  self.report = UnityCatalogReport()
17
17
  self.proxy = UnityCatalogApiProxy(
18
18
  self.config.workspace_url,
19
- self.config.token,
20
19
  self.config.profiling.warehouse_id,
21
20
  report=self.report,
22
21
  databricks_api_page_size=self.config.databricks_api_page_size,
22
+ personal_access_token=self.config.token,
23
23
  )
24
24
 
25
25
  def get_connection_test(self) -> TestConnectionReport: