acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (193) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  66. datahub/ingestion/source/elastic_search.py +4 -3
  67. datahub/ingestion/source/excel/source.py +1 -1
  68. datahub/ingestion/source/feast.py +1 -1
  69. datahub/ingestion/source/file.py +5 -4
  70. datahub/ingestion/source/fivetran/config.py +17 -16
  71. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  72. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  73. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  74. datahub/ingestion/source/ge_profiling_config.py +8 -5
  75. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  76. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  77. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  78. datahub/ingestion/source/grafana/models.py +23 -5
  79. datahub/ingestion/source/hex/api.py +7 -5
  80. datahub/ingestion/source/hex/hex.py +4 -3
  81. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  82. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  83. datahub/ingestion/source/identity/azure_ad.py +1 -1
  84. datahub/ingestion/source/identity/okta.py +10 -10
  85. datahub/ingestion/source/kafka/kafka.py +1 -1
  86. datahub/ingestion/source/ldap.py +1 -1
  87. datahub/ingestion/source/looker/looker_common.py +7 -5
  88. datahub/ingestion/source/looker/looker_config.py +21 -20
  89. datahub/ingestion/source/looker/lookml_config.py +47 -47
  90. datahub/ingestion/source/metabase.py +8 -8
  91. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  92. datahub/ingestion/source/metadata/lineage.py +13 -8
  93. datahub/ingestion/source/mlflow.py +1 -1
  94. datahub/ingestion/source/mode.py +6 -4
  95. datahub/ingestion/source/mongodb.py +4 -3
  96. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  97. datahub/ingestion/source/nifi.py +17 -23
  98. datahub/ingestion/source/openapi.py +6 -8
  99. datahub/ingestion/source/powerbi/config.py +33 -32
  100. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  101. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  102. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  103. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  104. datahub/ingestion/source/preset.py +8 -8
  105. datahub/ingestion/source/pulsar.py +1 -1
  106. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  107. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  108. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  109. datahub/ingestion/source/redshift/config.py +18 -20
  110. datahub/ingestion/source/redshift/redshift.py +2 -2
  111. datahub/ingestion/source/redshift/usage.py +23 -3
  112. datahub/ingestion/source/s3/config.py +83 -62
  113. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  114. datahub/ingestion/source/s3/source.py +8 -5
  115. datahub/ingestion/source/sac/sac.py +5 -4
  116. datahub/ingestion/source/salesforce.py +3 -2
  117. datahub/ingestion/source/schema/json_schema.py +2 -2
  118. datahub/ingestion/source/sigma/data_classes.py +3 -2
  119. datahub/ingestion/source/sigma/sigma.py +1 -1
  120. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  121. datahub/ingestion/source/slack/slack.py +1 -1
  122. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  123. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  125. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  126. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  127. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  128. datahub/ingestion/source/sql/athena.py +1 -1
  129. datahub/ingestion/source/sql/clickhouse.py +4 -2
  130. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  131. datahub/ingestion/source/sql/druid.py +1 -1
  132. datahub/ingestion/source/sql/hana.py +1 -1
  133. datahub/ingestion/source/sql/hive.py +7 -5
  134. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  135. datahub/ingestion/source/sql/mssql/source.py +13 -6
  136. datahub/ingestion/source/sql/mysql.py +1 -1
  137. datahub/ingestion/source/sql/oracle.py +17 -10
  138. datahub/ingestion/source/sql/postgres.py +2 -2
  139. datahub/ingestion/source/sql/presto.py +1 -1
  140. datahub/ingestion/source/sql/sql_config.py +8 -9
  141. datahub/ingestion/source/sql/sql_generic.py +1 -1
  142. datahub/ingestion/source/sql/teradata.py +1 -1
  143. datahub/ingestion/source/sql/trino.py +1 -1
  144. datahub/ingestion/source/sql/vertica.py +5 -4
  145. datahub/ingestion/source/sql_queries.py +11 -8
  146. datahub/ingestion/source/state/checkpoint.py +2 -2
  147. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  148. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  149. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  150. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/superset.py +9 -9
  152. datahub/ingestion/source/tableau/tableau.py +14 -16
  153. datahub/ingestion/source/unity/config.py +33 -34
  154. datahub/ingestion/source/unity/proxy.py +203 -0
  155. datahub/ingestion/source/unity/proxy_types.py +91 -0
  156. datahub/ingestion/source/unity/source.py +27 -2
  157. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  158. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  159. datahub/ingestion/source/usage/usage_common.py +5 -3
  160. datahub/ingestion/source_config/csv_enricher.py +7 -6
  161. datahub/ingestion/source_config/operation_config.py +7 -4
  162. datahub/ingestion/source_config/pulsar.py +11 -15
  163. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  164. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  165. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  166. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  167. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  168. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  169. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  171. datahub/ingestion/transformer/dataset_domain.py +3 -3
  172. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  173. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  174. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  175. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  176. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  177. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  178. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  179. datahub/ingestion/transformer/replace_external_url.py +2 -2
  180. datahub/ingestion/transformer/set_browse_path.py +1 -1
  181. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  182. datahub/lite/duckdb_lite.py +1 -1
  183. datahub/lite/lite_util.py +2 -2
  184. datahub/sdk/search_filters.py +68 -40
  185. datahub/secret/datahub_secret_store.py +7 -4
  186. datahub/secret/file_secret_store.py +1 -1
  187. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  188. datahub/testing/check_sql_parser_result.py +2 -2
  189. datahub/utilities/ingest_utils.py +1 -1
  190. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
  191. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
  192. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
  193. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from abc import abstractmethod
3
3
  from typing import Any, Dict, Optional
4
4
 
5
5
  import pydantic
6
- from pydantic import Field
6
+ from pydantic import Field, model_validator
7
7
 
8
8
  from datahub.configuration.common import AllowDenyPattern, ConfigModel
9
9
  from datahub.configuration.source_common import (
@@ -49,7 +49,8 @@ class SQLFilterConfig(ConfigModel):
49
49
  description="Regex patterns for views to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
50
50
  )
51
51
 
52
- @pydantic.root_validator(pre=True)
52
+ @model_validator(mode="before")
53
+ @classmethod
53
54
  def view_pattern_is_table_pattern_unless_specified(
54
55
  cls, values: Dict[str, Any]
55
56
  ) -> Dict[str, Any]:
@@ -120,11 +121,9 @@ class SQLCommonConfig(
120
121
  self.profiling.operation_config
121
122
  )
122
123
 
123
- @pydantic.root_validator(skip_on_failure=True)
124
- def ensure_profiling_pattern_is_passed_to_profiling(
125
- cls, values: Dict[str, Any]
126
- ) -> Dict[str, Any]:
127
- profiling: Optional[GEProfilingConfig] = values.get("profiling")
124
+ @model_validator(mode="after")
125
+ def ensure_profiling_pattern_is_passed_to_profiling(self):
126
+ profiling = self.profiling
128
127
  # Note: isinstance() check is required here as unity-catalog source reuses
129
128
  # SQLCommonConfig with different profiling config than GEProfilingConfig
130
129
  if (
@@ -132,8 +131,8 @@ class SQLCommonConfig(
132
131
  and isinstance(profiling, GEProfilingConfig)
133
132
  and profiling.enabled
134
133
  ):
135
- profiling._allow_deny_patterns = values["profile_pattern"]
136
- return values
134
+ profiling._allow_deny_patterns = self.profile_pattern
135
+ return self
137
136
 
138
137
  @abstractmethod
139
138
  def get_sql_alchemy_url(self):
@@ -85,5 +85,5 @@ class SQLAlchemyGenericSource(SQLAlchemySource):
85
85
 
86
86
  @classmethod
87
87
  def create(cls, config_dict, ctx):
88
- config = SQLAlchemyGenericConfig.parse_obj(config_dict)
88
+ config = SQLAlchemyGenericConfig.model_validate(config_dict)
89
89
  return cls(config, ctx)
@@ -860,7 +860,7 @@ ORDER by DataBaseName, TableName;
860
860
 
861
861
  @classmethod
862
862
  def create(cls, config_dict, ctx):
863
- config = TeradataConfig.parse_obj(config_dict)
863
+ config = TeradataConfig.model_validate(config_dict)
864
864
  return cls(config, ctx)
865
865
 
866
866
  def _init_schema_resolver(self) -> SchemaResolver:
@@ -413,7 +413,7 @@ class TrinoSource(SQLAlchemySource):
413
413
 
414
414
  @classmethod
415
415
  def create(cls, config_dict, ctx):
416
- config = TrinoConfig.parse_obj(config_dict)
416
+ config = TrinoConfig.model_validate(config_dict)
417
417
  return cls(config, ctx)
418
418
 
419
419
  def get_schema_fields_for_column(
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tupl
5
5
 
6
6
  import pydantic
7
7
  import pytest
8
- from pydantic import validator
8
+ from pydantic import field_validator
9
9
  from vertica_sqlalchemy_dialect.base import VerticaInspector
10
10
 
11
11
  from datahub.configuration.common import AllowDenyPattern
@@ -105,8 +105,9 @@ class VerticaConfig(BasicSQLAlchemyConfig):
105
105
  # defaults
106
106
  scheme: str = pydantic.Field(default="vertica+vertica_python")
107
107
 
108
- @validator("host_port")
109
- def clean_host_port(cls, v):
108
+ @field_validator("host_port", mode="after")
109
+ @classmethod
110
+ def clean_host_port(cls, v: str) -> str:
110
111
  return config_clean.remove_protocol(v)
111
112
 
112
113
 
@@ -138,7 +139,7 @@ class VerticaSource(SQLAlchemySource):
138
139
 
139
140
  @classmethod
140
141
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "VerticaSource":
141
- config = VerticaConfig.parse_obj(config_dict)
142
+ config = VerticaConfig.model_validate(config_dict)
142
143
  return cls(config, ctx)
143
144
 
144
145
  def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
@@ -5,10 +5,10 @@ import re
5
5
  from dataclasses import dataclass, field
6
6
  from datetime import datetime
7
7
  from functools import partial
8
- from typing import ClassVar, Iterable, List, Optional, Union, cast
8
+ from typing import Any, ClassVar, Iterable, List, Optional, Union, cast
9
9
 
10
10
  import smart_open
11
- from pydantic import BaseModel, Field, validator
11
+ from pydantic import BaseModel, Field, field_validator
12
12
 
13
13
  from datahub.configuration.common import HiddenFromDocs
14
14
  from datahub.configuration.datetimes import parse_user_datetime
@@ -450,19 +450,22 @@ class QueryEntry(BaseModel):
450
450
  class Config:
451
451
  arbitrary_types_allowed = True
452
452
 
453
- @validator("timestamp", pre=True)
454
- def parse_timestamp(cls, v):
453
+ @field_validator("timestamp", mode="before")
454
+ @classmethod
455
+ def parse_timestamp(cls, v: Any) -> Any:
455
456
  return None if v is None else parse_user_datetime(str(v))
456
457
 
457
- @validator("user", pre=True)
458
- def parse_user(cls, v):
458
+ @field_validator("user", mode="before")
459
+ @classmethod
460
+ def parse_user(cls, v: Any) -> Any:
459
461
  if v is None:
460
462
  return None
461
463
 
462
464
  return v if isinstance(v, CorpUserUrn) else CorpUserUrn(v)
463
465
 
464
- @validator("downstream_tables", "upstream_tables", pre=True)
465
- def parse_tables(cls, v):
466
+ @field_validator("downstream_tables", "upstream_tables", mode="before")
467
+ @classmethod
468
+ def parse_tables(cls, v: Any) -> Any:
466
469
  if not v:
467
470
  return []
468
471
 
@@ -163,7 +163,7 @@ class Checkpoint(Generic[StateType]):
163
163
  )
164
164
  state_as_dict["version"] = checkpoint_aspect.state.formatVersion
165
165
  state_as_dict["serde"] = checkpoint_aspect.state.serde
166
- return state_class.parse_obj(state_as_dict)
166
+ return state_class.model_validate(state_as_dict)
167
167
 
168
168
  @staticmethod
169
169
  def _from_base85_json_bytes(
@@ -179,7 +179,7 @@ class Checkpoint(Generic[StateType]):
179
179
  state_as_dict = json.loads(state_uncompressed.decode("utf-8"))
180
180
  state_as_dict["version"] = checkpoint_aspect.state.formatVersion
181
181
  state_as_dict["serde"] = checkpoint_aspect.state.serde
182
- return state_class.parse_obj(state_as_dict)
182
+ return state_class.model_validate(state_as_dict)
183
183
 
184
184
  def to_checkpoint_aspect(
185
185
  self, max_allowed_state_size: int
@@ -1,6 +1,7 @@
1
1
  from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Tuple, Type
2
2
 
3
3
  import pydantic
4
+ from pydantic import model_validator
4
5
 
5
6
  from datahub.emitter.mce_builder import make_assertion_urn, make_container_urn
6
7
  from datahub.ingestion.source.state.checkpoint import CheckpointStateBase
@@ -59,7 +60,7 @@ def pydantic_state_migrator(mapping: Dict[str, str]) -> "V1RootValidator":
59
60
 
60
61
  return values
61
62
 
62
- return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_rename)
63
+ return model_validator(mode="before")(_validate_field_rename)
63
64
 
64
65
 
65
66
  class GenericCheckpointState(CheckpointStateBase):
@@ -3,7 +3,7 @@ from dataclasses import dataclass
3
3
  from typing import Any, Dict, Generic, Optional, Type, TypeVar
4
4
 
5
5
  import pydantic
6
- from pydantic import root_validator
6
+ from pydantic import model_validator
7
7
  from pydantic.fields import Field
8
8
 
9
9
  from datahub.configuration.common import (
@@ -73,14 +73,14 @@ class StatefulIngestionConfig(ConfigModel):
73
73
  description="If set to True, ignores the current checkpoint state.",
74
74
  )
75
75
 
76
- @pydantic.root_validator(skip_on_failure=True)
77
- def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
78
- if values.get("enabled"):
79
- if values.get("state_provider") is None:
80
- values["state_provider"] = DynamicTypedStateProviderConfig(
76
+ @model_validator(mode="after")
77
+ def validate_config(self) -> "StatefulIngestionConfig":
78
+ if self.enabled:
79
+ if self.state_provider is None:
80
+ self.state_provider = DynamicTypedStateProviderConfig(
81
81
  type="datahub", config={}
82
82
  )
83
- return values
83
+ return self
84
84
 
85
85
 
86
86
  CustomConfig = TypeVar("CustomConfig", bound=StatefulIngestionConfig)
@@ -110,17 +110,19 @@ class StatefulLineageConfigMixin(ConfigModel):
110
110
  "store_last_lineage_extraction_timestamp", "enable_stateful_lineage_ingestion"
111
111
  )
112
112
 
113
- @root_validator(skip_on_failure=True)
114
- def lineage_stateful_option_validator(cls, values: Dict) -> Dict:
115
- sti = values.get("stateful_ingestion")
116
- if not sti or not sti.enabled:
117
- if values.get("enable_stateful_lineage_ingestion"):
118
- logger.warning(
119
- "Stateful ingestion is disabled, disabling enable_stateful_lineage_ingestion config option as well"
120
- )
121
- values["enable_stateful_lineage_ingestion"] = False
122
-
123
- return values
113
+ @model_validator(mode="after")
114
+ def lineage_stateful_option_validator(self) -> "StatefulLineageConfigMixin":
115
+ try:
116
+ sti = getattr(self, "stateful_ingestion", None)
117
+ if not sti or not getattr(sti, "enabled", False):
118
+ if getattr(self, "enable_stateful_lineage_ingestion", False):
119
+ logger.warning(
120
+ "Stateful ingestion is disabled, disabling enable_stateful_lineage_ingestion config option as well"
121
+ )
122
+ self.enable_stateful_lineage_ingestion = False
123
+ except (AttributeError, RecursionError) as e:
124
+ logger.debug(f"Skipping stateful lineage validation due to: {e}")
125
+ return self
124
126
 
125
127
 
126
128
  class StatefulProfilingConfigMixin(ConfigModel):
@@ -135,16 +137,19 @@ class StatefulProfilingConfigMixin(ConfigModel):
135
137
  "store_last_profiling_timestamps", "enable_stateful_profiling"
136
138
  )
137
139
 
138
- @root_validator(skip_on_failure=True)
139
- def profiling_stateful_option_validator(cls, values: Dict) -> Dict:
140
- sti = values.get("stateful_ingestion")
141
- if not sti or not sti.enabled:
142
- if values.get("enable_stateful_profiling"):
143
- logger.warning(
144
- "Stateful ingestion is disabled, disabling enable_stateful_profiling config option as well"
145
- )
146
- values["enable_stateful_profiling"] = False
147
- return values
140
+ @model_validator(mode="after")
141
+ def profiling_stateful_option_validator(self) -> "StatefulProfilingConfigMixin":
142
+ try:
143
+ sti = getattr(self, "stateful_ingestion", None)
144
+ if not sti or not getattr(sti, "enabled", False):
145
+ if getattr(self, "enable_stateful_profiling", False):
146
+ logger.warning(
147
+ "Stateful ingestion is disabled, disabling enable_stateful_profiling config option as well"
148
+ )
149
+ self.enable_stateful_profiling = False
150
+ except (AttributeError, RecursionError) as e:
151
+ logger.debug(f"Skipping stateful profiling validation due to: {e}")
152
+ return self
148
153
 
149
154
 
150
155
  class StatefulUsageConfigMixin(BaseTimeWindowConfig):
@@ -161,16 +166,21 @@ class StatefulUsageConfigMixin(BaseTimeWindowConfig):
161
166
  "store_last_usage_extraction_timestamp", "enable_stateful_usage_ingestion"
162
167
  )
163
168
 
164
- @root_validator(skip_on_failure=True)
165
- def last_usage_extraction_stateful_option_validator(cls, values: Dict) -> Dict:
166
- sti = values.get("stateful_ingestion")
167
- if not sti or not sti.enabled:
168
- if values.get("enable_stateful_usage_ingestion"):
169
- logger.warning(
170
- "Stateful ingestion is disabled, disabling enable_stateful_usage_ingestion config option as well"
171
- )
172
- values["enable_stateful_usage_ingestion"] = False
173
- return values
169
+ @model_validator(mode="after")
170
+ def last_usage_extraction_stateful_option_validator(
171
+ self,
172
+ ) -> "StatefulUsageConfigMixin":
173
+ try:
174
+ sti = getattr(self, "stateful_ingestion", None)
175
+ if not sti or not getattr(sti, "enabled", False):
176
+ if getattr(self, "enable_stateful_usage_ingestion", False):
177
+ logger.warning(
178
+ "Stateful ingestion is disabled, disabling enable_stateful_usage_ingestion config option as well"
179
+ )
180
+ self.enable_stateful_usage_ingestion = False
181
+ except (AttributeError, RecursionError) as e:
182
+ logger.debug(f"Skipping stateful usage validation due to: {e}")
183
+ return self
174
184
 
175
185
 
176
186
  class StatefulTimeWindowConfigMixin(BaseTimeWindowConfig):
@@ -185,16 +195,16 @@ class StatefulTimeWindowConfigMixin(BaseTimeWindowConfig):
185
195
  "and queries together from a single audit log and uses a unified time window.",
186
196
  )
187
197
 
188
- @root_validator(skip_on_failure=True)
189
- def time_window_stateful_option_validator(cls, values: Dict) -> Dict:
190
- sti = values.get("stateful_ingestion")
191
- if not sti or not sti.enabled:
192
- if values.get("enable_stateful_time_window"):
198
+ @model_validator(mode="after")
199
+ def time_window_stateful_option_validator(self) -> "StatefulTimeWindowConfigMixin":
200
+ sti = getattr(self, "stateful_ingestion", None)
201
+ if not sti or not getattr(sti, "enabled", False):
202
+ if getattr(self, "enable_stateful_time_window", False):
193
203
  logger.warning(
194
204
  "Stateful ingestion is disabled, disabling enable_stateful_time_window config option as well"
195
205
  )
196
- values["enable_stateful_time_window"] = False
197
- return values
206
+ self.enable_stateful_time_window = False
207
+ return self
198
208
 
199
209
 
200
210
  @dataclass
@@ -40,7 +40,7 @@ class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
40
40
  def create(
41
41
  cls, config_dict: Dict[str, Any], ctx: PipelineContext
42
42
  ) -> "DatahubIngestionCheckpointingProvider":
43
- config = DatahubIngestionStateProviderConfig.parse_obj(config_dict)
43
+ config = DatahubIngestionStateProviderConfig.model_validate(config_dict)
44
44
  if config.datahub_api is not None:
45
45
  return cls(DataHubGraph(config.datahub_api))
46
46
  elif ctx.graph:
@@ -32,7 +32,7 @@ class FileIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
32
32
  def create(
33
33
  cls, config_dict: Dict[str, Any], ctx: PipelineContext
34
34
  ) -> "FileIngestionCheckpointingProvider":
35
- config = FileIngestionStateProviderConfig.parse_obj(config_dict)
35
+ config = FileIngestionStateProviderConfig.model_validate(config_dict)
36
36
  return cls(config)
37
37
 
38
38
  def get_latest_checkpoint(
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
9
9
  import dateutil.parser as dp
10
10
  import requests
11
11
  import sqlglot
12
- from pydantic import BaseModel, root_validator, validator
12
+ from pydantic import BaseModel, field_validator, model_validator
13
13
  from pydantic.fields import Field
14
14
  from requests.adapters import HTTPAdapter
15
15
  from urllib3.util.retry import Retry
@@ -246,16 +246,16 @@ class SupersetConfig(
246
246
  # This is required to allow preset configs to get parsed
247
247
  extra = "allow"
248
248
 
249
- @validator("connect_uri", "display_uri")
250
- def remove_trailing_slash(cls, v):
249
+ @field_validator("connect_uri", "display_uri", mode="after")
250
+ @classmethod
251
+ def remove_trailing_slash(cls, v: str) -> str:
251
252
  return config_clean.remove_trailing_slashes(v)
252
253
 
253
- @root_validator(skip_on_failure=True)
254
- def default_display_uri_to_connect_uri(cls, values):
255
- base = values.get("display_uri")
256
- if base is None:
257
- values["display_uri"] = values.get("connect_uri")
258
- return values
254
+ @model_validator(mode="after")
255
+ def default_display_uri_to_connect_uri(self) -> "SupersetConfig":
256
+ if self.display_uri is None:
257
+ self.display_uri = self.connect_uri
258
+ return self
259
259
 
260
260
 
261
261
  def get_metric_name(metric):
@@ -25,7 +25,7 @@ from urllib.parse import quote, urlparse
25
25
 
26
26
  import dateutil.parser as dp
27
27
  import tableauserverclient as TSC
28
- from pydantic import root_validator, validator
28
+ from pydantic import field_validator, model_validator
29
29
  from pydantic.fields import Field
30
30
  from requests.adapters import HTTPAdapter
31
31
  from tableauserverclient import (
@@ -257,8 +257,9 @@ class TableauConnectionConfig(ConfigModel):
257
257
  description="When enabled, extracts column-level lineage from Tableau Datasources",
258
258
  )
259
259
 
260
- @validator("connect_uri")
261
- def remove_trailing_slash(cls, v):
260
+ @field_validator("connect_uri", mode="after")
261
+ @classmethod
262
+ def remove_trailing_slash(cls, v: str) -> str:
262
263
  return config_clean.remove_trailing_slashes(v)
263
264
 
264
265
  def get_tableau_auth(
@@ -652,8 +653,9 @@ class TableauConfig(
652
653
  "fetch_size",
653
654
  )
654
655
 
655
- # pre = True because we want to take some decision before pydantic initialize the configuration to default values
656
- @root_validator(pre=True)
656
+ # mode = "before" because we want to take some decision before pydantic initialize the configuration to default values
657
+ @model_validator(mode="before")
658
+ @classmethod
657
659
  def projects_backward_compatibility(cls, values: Dict) -> Dict:
658
660
  # In-place update of the input dict would cause state contamination. This was discovered through test failures
659
661
  # in test_hex.py where the same dict is reused.
@@ -683,27 +685,23 @@ class TableauConfig(
683
685
 
684
686
  return values
685
687
 
686
- @root_validator(skip_on_failure=True)
687
- def validate_config_values(cls, values: Dict) -> Dict:
688
- tags_for_hidden_assets = values.get("tags_for_hidden_assets")
689
- ingest_tags = values.get("ingest_tags")
688
+ @model_validator(mode="after")
689
+ def validate_config_values(self) -> "TableauConfig":
690
690
  if (
691
- not ingest_tags
692
- and tags_for_hidden_assets
693
- and len(tags_for_hidden_assets) > 0
691
+ not self.ingest_tags
692
+ and self.tags_for_hidden_assets
693
+ and len(self.tags_for_hidden_assets) > 0
694
694
  ):
695
695
  raise ValueError(
696
696
  "tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
697
697
  )
698
698
 
699
- use_email_as_username = values.get("use_email_as_username")
700
- ingest_owner = values.get("ingest_owner")
701
- if use_email_as_username and not ingest_owner:
699
+ if self.use_email_as_username and not self.ingest_owner:
702
700
  raise ValueError(
703
701
  "use_email_as_username requires ingest_owner to be enabled."
704
702
  )
705
703
 
706
- return values
704
+ return self
707
705
 
708
706
 
709
707
  class WorkbookKey(ContainerKey):
@@ -1,10 +1,10 @@
1
1
  import logging
2
2
  import os
3
3
  from datetime import datetime, timedelta, timezone
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Dict, List, Optional, Union
5
5
 
6
6
  import pydantic
7
- from pydantic import Field
7
+ from pydantic import Field, field_validator, model_validator
8
8
  from typing_extensions import Literal
9
9
 
10
10
  from datahub.configuration.common import (
@@ -397,13 +397,15 @@ class UnityCatalogSourceConfig(
397
397
  default=None, description="Unity Catalog Stateful Ingestion Config."
398
398
  )
399
399
 
400
- @pydantic.validator("start_time")
400
+ @field_validator("start_time", mode="after")
401
+ @classmethod
401
402
  def within_thirty_days(cls, v: datetime) -> datetime:
402
403
  if (datetime.now(timezone.utc) - v).days > 30:
403
404
  raise ValueError("Query history is only maintained for 30 days.")
404
405
  return v
405
406
 
406
- @pydantic.validator("workspace_url")
407
+ @field_validator("workspace_url", mode="after")
408
+ @classmethod
407
409
  def workspace_url_should_start_with_http_scheme(cls, workspace_url: str) -> str:
408
410
  if not workspace_url.lower().startswith(("http://", "https://")):
409
411
  raise ValueError(
@@ -411,7 +413,8 @@ class UnityCatalogSourceConfig(
411
413
  )
412
414
  return workspace_url
413
415
 
414
- @pydantic.validator("include_metastore")
416
+ @field_validator("include_metastore", mode="after")
417
+ @classmethod
415
418
  def include_metastore_warning(cls, v: bool) -> bool:
416
419
  if v:
417
420
  msg = (
@@ -424,60 +427,56 @@ class UnityCatalogSourceConfig(
424
427
  add_global_warning(msg)
425
428
  return v
426
429
 
427
- @pydantic.root_validator(skip_on_failure=True)
428
- def set_warehouse_id_from_profiling(cls, values: Dict[str, Any]) -> Dict[str, Any]:
429
- profiling: Optional[
430
- Union[UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig]
431
- ] = values.get("profiling")
432
- if not values.get("warehouse_id") and profiling and profiling.warehouse_id:
433
- values["warehouse_id"] = profiling.warehouse_id
430
+ @model_validator(mode="after")
431
+ def set_warehouse_id_from_profiling(self):
432
+ profiling = self.profiling
433
+ if not self.warehouse_id and profiling and profiling.warehouse_id:
434
+ self.warehouse_id = profiling.warehouse_id
434
435
  if (
435
- values.get("warehouse_id")
436
+ self.warehouse_id
436
437
  and profiling
437
438
  and profiling.warehouse_id
438
- and values["warehouse_id"] != profiling.warehouse_id
439
+ and self.warehouse_id != profiling.warehouse_id
439
440
  ):
440
441
  raise ValueError(
441
442
  "When `warehouse_id` is set, it must match the `warehouse_id` in `profiling`."
442
443
  )
443
444
 
444
- if values.get("warehouse_id") and profiling and not profiling.warehouse_id:
445
- profiling.warehouse_id = values["warehouse_id"]
445
+ if self.warehouse_id and profiling and not profiling.warehouse_id:
446
+ profiling.warehouse_id = self.warehouse_id
446
447
 
447
448
  if profiling and profiling.enabled and not profiling.warehouse_id:
448
449
  raise ValueError("warehouse_id must be set when profiling is enabled.")
449
450
 
450
- return values
451
+ return self
451
452
 
452
- @pydantic.root_validator(skip_on_failure=True)
453
- def validate_lineage_data_source_with_warehouse(
454
- cls, values: Dict[str, Any]
455
- ) -> Dict[str, Any]:
456
- lineage_data_source = values.get("lineage_data_source", LineageDataSource.AUTO)
457
- warehouse_id = values.get("warehouse_id")
453
+ @model_validator(mode="after")
454
+ def validate_lineage_data_source_with_warehouse(self):
455
+ lineage_data_source = self.lineage_data_source or LineageDataSource.AUTO
458
456
 
459
- if lineage_data_source == LineageDataSource.SYSTEM_TABLES and not warehouse_id:
457
+ if (
458
+ lineage_data_source == LineageDataSource.SYSTEM_TABLES
459
+ and not self.warehouse_id
460
+ ):
460
461
  raise ValueError(
461
462
  f"lineage_data_source='{LineageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
462
463
  )
463
464
 
464
- return values
465
+ return self
465
466
 
466
- @pydantic.root_validator(skip_on_failure=True)
467
- def validate_usage_data_source_with_warehouse(
468
- cls, values: Dict[str, Any]
469
- ) -> Dict[str, Any]:
470
- usage_data_source = values.get("usage_data_source", UsageDataSource.AUTO)
471
- warehouse_id = values.get("warehouse_id")
467
+ @model_validator(mode="after")
468
+ def validate_usage_data_source_with_warehouse(self):
469
+ usage_data_source = self.usage_data_source or UsageDataSource.AUTO
472
470
 
473
- if usage_data_source == UsageDataSource.SYSTEM_TABLES and not warehouse_id:
471
+ if usage_data_source == UsageDataSource.SYSTEM_TABLES and not self.warehouse_id:
474
472
  raise ValueError(
475
473
  f"usage_data_source='{UsageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
476
474
  )
477
475
 
478
- return values
476
+ return self
479
477
 
480
- @pydantic.validator("schema_pattern", always=True)
478
+ @field_validator("schema_pattern", mode="before")
479
+ @classmethod
481
480
  def schema_pattern_should__always_deny_information_schema(
482
481
  cls, v: AllowDenyPattern
483
482
  ) -> AllowDenyPattern: