acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (193) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  66. datahub/ingestion/source/elastic_search.py +4 -3
  67. datahub/ingestion/source/excel/source.py +1 -1
  68. datahub/ingestion/source/feast.py +1 -1
  69. datahub/ingestion/source/file.py +5 -4
  70. datahub/ingestion/source/fivetran/config.py +17 -16
  71. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  72. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  73. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  74. datahub/ingestion/source/ge_profiling_config.py +8 -5
  75. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  76. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  77. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  78. datahub/ingestion/source/grafana/models.py +23 -5
  79. datahub/ingestion/source/hex/api.py +7 -5
  80. datahub/ingestion/source/hex/hex.py +4 -3
  81. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  82. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  83. datahub/ingestion/source/identity/azure_ad.py +1 -1
  84. datahub/ingestion/source/identity/okta.py +10 -10
  85. datahub/ingestion/source/kafka/kafka.py +1 -1
  86. datahub/ingestion/source/ldap.py +1 -1
  87. datahub/ingestion/source/looker/looker_common.py +7 -5
  88. datahub/ingestion/source/looker/looker_config.py +21 -20
  89. datahub/ingestion/source/looker/lookml_config.py +47 -47
  90. datahub/ingestion/source/metabase.py +8 -8
  91. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  92. datahub/ingestion/source/metadata/lineage.py +13 -8
  93. datahub/ingestion/source/mlflow.py +1 -1
  94. datahub/ingestion/source/mode.py +6 -4
  95. datahub/ingestion/source/mongodb.py +4 -3
  96. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  97. datahub/ingestion/source/nifi.py +17 -23
  98. datahub/ingestion/source/openapi.py +6 -8
  99. datahub/ingestion/source/powerbi/config.py +33 -32
  100. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  101. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  102. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  103. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  104. datahub/ingestion/source/preset.py +8 -8
  105. datahub/ingestion/source/pulsar.py +1 -1
  106. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  107. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  108. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  109. datahub/ingestion/source/redshift/config.py +18 -20
  110. datahub/ingestion/source/redshift/redshift.py +2 -2
  111. datahub/ingestion/source/redshift/usage.py +23 -3
  112. datahub/ingestion/source/s3/config.py +83 -62
  113. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  114. datahub/ingestion/source/s3/source.py +8 -5
  115. datahub/ingestion/source/sac/sac.py +5 -4
  116. datahub/ingestion/source/salesforce.py +3 -2
  117. datahub/ingestion/source/schema/json_schema.py +2 -2
  118. datahub/ingestion/source/sigma/data_classes.py +3 -2
  119. datahub/ingestion/source/sigma/sigma.py +1 -1
  120. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  121. datahub/ingestion/source/slack/slack.py +1 -1
  122. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  123. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  125. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  126. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  127. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  128. datahub/ingestion/source/sql/athena.py +1 -1
  129. datahub/ingestion/source/sql/clickhouse.py +4 -2
  130. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  131. datahub/ingestion/source/sql/druid.py +1 -1
  132. datahub/ingestion/source/sql/hana.py +1 -1
  133. datahub/ingestion/source/sql/hive.py +7 -5
  134. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  135. datahub/ingestion/source/sql/mssql/source.py +13 -6
  136. datahub/ingestion/source/sql/mysql.py +1 -1
  137. datahub/ingestion/source/sql/oracle.py +17 -10
  138. datahub/ingestion/source/sql/postgres.py +2 -2
  139. datahub/ingestion/source/sql/presto.py +1 -1
  140. datahub/ingestion/source/sql/sql_config.py +8 -9
  141. datahub/ingestion/source/sql/sql_generic.py +1 -1
  142. datahub/ingestion/source/sql/teradata.py +1 -1
  143. datahub/ingestion/source/sql/trino.py +1 -1
  144. datahub/ingestion/source/sql/vertica.py +5 -4
  145. datahub/ingestion/source/sql_queries.py +11 -8
  146. datahub/ingestion/source/state/checkpoint.py +2 -2
  147. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  148. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  149. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  150. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/superset.py +9 -9
  152. datahub/ingestion/source/tableau/tableau.py +14 -16
  153. datahub/ingestion/source/unity/config.py +33 -34
  154. datahub/ingestion/source/unity/proxy.py +203 -0
  155. datahub/ingestion/source/unity/proxy_types.py +91 -0
  156. datahub/ingestion/source/unity/source.py +27 -2
  157. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  158. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  159. datahub/ingestion/source/usage/usage_common.py +5 -3
  160. datahub/ingestion/source_config/csv_enricher.py +7 -6
  161. datahub/ingestion/source_config/operation_config.py +7 -4
  162. datahub/ingestion/source_config/pulsar.py +11 -15
  163. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  164. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  165. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  166. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  167. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  168. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  169. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  171. datahub/ingestion/transformer/dataset_domain.py +3 -3
  172. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  173. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  174. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  175. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  176. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  177. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  178. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  179. datahub/ingestion/transformer/replace_external_url.py +2 -2
  180. datahub/ingestion/transformer/set_browse_path.py +1 -1
  181. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  182. datahub/lite/duckdb_lite.py +1 -1
  183. datahub/lite/lite_util.py +2 -2
  184. datahub/sdk/search_filters.py +68 -40
  185. datahub/secret/datahub_secret_store.py +7 -4
  186. datahub/secret/file_secret_store.py +1 -1
  187. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  188. datahub/testing/check_sql_parser_result.py +2 -2
  189. datahub/utilities/ingest_utils.py +1 -1
  190. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
  191. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
  192. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
  193. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
@@ -462,7 +462,7 @@ class FeastRepositorySource(StatefulIngestionSourceBase):
462
462
 
463
463
  @classmethod
464
464
  def create(cls, config_dict, ctx):
465
- config = FeastRepositorySourceConfig.parse_obj(config_dict)
465
+ config = FeastRepositorySourceConfig.model_validate(config_dict)
466
466
  return cls(config, ctx)
467
467
 
468
468
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -9,7 +9,7 @@ from functools import partial
9
9
  from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
10
10
 
11
11
  import ijson
12
- from pydantic import validator
12
+ from pydantic import field_validator
13
13
  from pydantic.fields import Field
14
14
 
15
15
  from datahub.configuration.common import ConfigEnum
@@ -103,7 +103,8 @@ class FileSourceConfig(StatefulIngestionConfigBase):
103
103
 
104
104
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
105
105
 
106
- @validator("file_extension", always=True)
106
+ @field_validator("file_extension", mode="after")
107
+ @classmethod
107
108
  def add_leading_dot_to_extension(cls, v: str) -> str:
108
109
  if v:
109
110
  if v.startswith("."):
@@ -205,7 +206,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
205
206
 
206
207
  @classmethod
207
208
  def create(cls, config_dict, ctx):
208
- config = FileSourceConfig.parse_obj(config_dict)
209
+ config = FileSourceConfig.model_validate(config_dict)
209
210
  return cls(ctx, config)
210
211
 
211
212
  def get_filenames(self) -> Iterable[FileInfo]:
@@ -358,7 +359,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
358
359
 
359
360
  @staticmethod
360
361
  def test_connection(config_dict: dict) -> TestConnectionReport:
361
- config = FileSourceConfig.parse_obj(config_dict)
362
+ config = FileSourceConfig.model_validate(config_dict)
362
363
  exists = os.path.exists(config.path)
363
364
  if not exists:
364
365
  return TestConnectionReport(
@@ -1,10 +1,10 @@
1
1
  import dataclasses
2
2
  import logging
3
3
  import warnings
4
- from typing import Dict, Optional
4
+ from typing import Any, Dict, Optional
5
5
 
6
6
  import pydantic
7
- from pydantic import Field, root_validator
7
+ from pydantic import Field, field_validator, model_validator
8
8
  from typing_extensions import Literal
9
9
 
10
10
  from datahub.configuration.common import (
@@ -98,7 +98,8 @@ class DatabricksDestinationConfig(UnityCatalogConnectionConfig):
98
98
  catalog: str = Field(description="The fivetran connector log catalog.")
99
99
  log_schema: str = Field(description="The fivetran connector log schema.")
100
100
 
101
- @pydantic.validator("warehouse_id")
101
+ @field_validator("warehouse_id", mode="after")
102
+ @classmethod
102
103
  def warehouse_id_should_not_be_empty(cls, warehouse_id: Optional[str]) -> str:
103
104
  if warehouse_id is None or (warehouse_id and warehouse_id.strip() == ""):
104
105
  raise ValueError("Fivetran requires warehouse_id to be set")
@@ -141,29 +142,28 @@ class FivetranLogConfig(ConfigModel):
141
142
  "destination_config", "snowflake_destination_config"
142
143
  )
143
144
 
144
- @root_validator(skip_on_failure=True)
145
- def validate_destination_platfrom_and_config(cls, values: Dict) -> Dict:
146
- destination_platform = values["destination_platform"]
147
- if destination_platform == "snowflake":
148
- if "snowflake_destination_config" not in values:
145
+ @model_validator(mode="after")
146
+ def validate_destination_platform_and_config(self) -> "FivetranLogConfig":
147
+ if self.destination_platform == "snowflake":
148
+ if self.snowflake_destination_config is None:
149
149
  raise ValueError(
150
150
  "If destination platform is 'snowflake', user must provide snowflake destination configuration in the recipe."
151
151
  )
152
- elif destination_platform == "bigquery":
153
- if "bigquery_destination_config" not in values:
152
+ elif self.destination_platform == "bigquery":
153
+ if self.bigquery_destination_config is None:
154
154
  raise ValueError(
155
155
  "If destination platform is 'bigquery', user must provide bigquery destination configuration in the recipe."
156
156
  )
157
- elif destination_platform == "databricks":
158
- if "databricks_destination_config" not in values:
157
+ elif self.destination_platform == "databricks":
158
+ if self.databricks_destination_config is None:
159
159
  raise ValueError(
160
160
  "If destination platform is 'databricks', user must provide databricks destination configuration in the recipe."
161
161
  )
162
162
  else:
163
163
  raise ValueError(
164
- f"Destination platform '{destination_platform}' is not yet supported."
164
+ f"Destination platform '{self.destination_platform}' is not yet supported."
165
165
  )
166
- return values
166
+ return self
167
167
 
168
168
 
169
169
  @dataclasses.dataclass
@@ -267,8 +267,9 @@ class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin
267
267
  description="Fivetran REST API configuration, used to provide wider support for connections.",
268
268
  )
269
269
 
270
- @pydantic.root_validator(pre=True)
271
- def compat_sources_to_database(cls, values: Dict) -> Dict:
270
+ @model_validator(mode="before")
271
+ @classmethod
272
+ def compat_sources_to_database(cls, values: Any) -> Any:
272
273
  if "sources_to_database" in values:
273
274
  warnings.warn(
274
275
  "The sources_to_database field is deprecated, please use sources_to_platform_instance instead.",
@@ -234,12 +234,12 @@ class FivetranSource(StatefulIngestionSourceBase):
234
234
  return dict(
235
235
  **{
236
236
  f"source.{k}": str(v)
237
- for k, v in source_details.dict().items()
237
+ for k, v in source_details.model_dump().items()
238
238
  if v is not None and not isinstance(v, bool)
239
239
  },
240
240
  **{
241
241
  f"destination.{k}": str(v)
242
- for k, v in destination_details.dict().items()
242
+ for k, v in destination_details.model_dump().items()
243
243
  if v is not None and not isinstance(v, bool)
244
244
  },
245
245
  )
@@ -127,7 +127,7 @@ class DataHubGcSource(Source):
127
127
 
128
128
  @classmethod
129
129
  def create(cls, config_dict, ctx):
130
- config = DataHubGcSourceConfig.parse_obj(config_dict)
130
+ config = DataHubGcSourceConfig.model_validate(config_dict)
131
131
  return cls(ctx, config)
132
132
 
133
133
  # auto_work_unit_report is overriden to disable a couple of automation like auto status aspect, etc. which is not needed her.
@@ -1,7 +1,7 @@
1
1
  import logging
2
- from typing import Dict, Iterable, List, Optional
2
+ from typing import Iterable, List, Optional
3
3
 
4
- from pydantic import Field, SecretStr, validator
4
+ from pydantic import Field, SecretStr, model_validator
5
5
 
6
6
  from datahub.configuration.common import ConfigModel
7
7
  from datahub.configuration.source_common import DatasetSourceConfigMixin
@@ -64,18 +64,16 @@ class GCSSourceConfig(
64
64
 
65
65
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
66
66
 
67
- @validator("path_specs", always=True)
68
- def check_path_specs_and_infer_platform(
69
- cls, path_specs: List[PathSpec], values: Dict
70
- ) -> List[PathSpec]:
71
- if len(path_specs) == 0:
67
+ @model_validator(mode="after")
68
+ def check_path_specs_and_infer_platform(self) -> "GCSSourceConfig":
69
+ if len(self.path_specs) == 0:
72
70
  raise ValueError("path_specs must not be empty")
73
71
 
74
72
  # Check that all path specs have the gs:// prefix.
75
- if any([not is_gcs_uri(path_spec.include) for path_spec in path_specs]):
73
+ if any([not is_gcs_uri(path_spec.include) for path_spec in self.path_specs]):
76
74
  raise ValueError("All path_spec.include should start with gs://")
77
75
 
78
- return path_specs
76
+ return self
79
77
 
80
78
 
81
79
  class GCSSourceReport(DataLakeSourceReport):
@@ -105,7 +103,7 @@ class GCSSource(StatefulIngestionSourceBase):
105
103
 
106
104
  @classmethod
107
105
  def create(cls, config_dict, ctx):
108
- config = GCSSourceConfig.parse_obj(config_dict)
106
+ config = GCSSourceConfig.model_validate(config_dict)
109
107
  return cls(config, ctx)
110
108
 
111
109
  def create_equivalent_s3_config(self):
@@ -4,6 +4,7 @@ import os
4
4
  from typing import Annotated, Any, Dict, List, Optional
5
5
 
6
6
  import pydantic
7
+ from pydantic import model_validator
7
8
  from pydantic.fields import Field
8
9
 
9
10
  from datahub.configuration.common import AllowDenyPattern, ConfigModel, SupportedSources
@@ -212,7 +213,8 @@ class GEProfilingConfig(GEProfilingBaseConfig):
212
213
  description="Whether to profile complex types like structs, arrays and maps. ",
213
214
  )
214
215
 
215
- @pydantic.root_validator(pre=True)
216
+ @model_validator(mode="before")
217
+ @classmethod
216
218
  def deprecate_bigquery_temp_table_schema(cls, values):
217
219
  # TODO: Update docs to remove mention of this field.
218
220
  if "bigquery_temp_table_schema" in values:
@@ -222,16 +224,17 @@ class GEProfilingConfig(GEProfilingBaseConfig):
222
224
  del values["bigquery_temp_table_schema"]
223
225
  return values
224
226
 
225
- @pydantic.root_validator(pre=True)
227
+ @model_validator(mode="before")
228
+ @classmethod
226
229
  def ensure_field_level_settings_are_normalized(
227
- cls: "GEProfilingConfig", values: Dict[str, Any]
230
+ cls, values: Dict[str, Any]
228
231
  ) -> Dict[str, Any]:
229
232
  max_num_fields_to_profile_key = "max_number_of_fields_to_profile"
230
233
  max_num_fields_to_profile = values.get(max_num_fields_to_profile_key)
231
234
 
232
235
  # Disable all field-level metrics.
233
236
  if values.get("profile_table_level_only"):
234
- for field_level_metric in cls.__fields__:
237
+ for field_level_metric in cls.model_fields:
235
238
  if field_level_metric.startswith("include_field_"):
236
239
  if values.get(field_level_metric):
237
240
  raise ValueError(
@@ -267,7 +270,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
267
270
  )
268
271
 
269
272
  def config_for_telemetry(self) -> Dict[str, Any]:
270
- config_dict = self.dict()
273
+ config_dict = self.model_dump()
271
274
 
272
275
  return {
273
276
  flag: config_dict[flag]
@@ -69,7 +69,7 @@ class GrafanaAPIClient:
69
69
  if not batch:
70
70
  break
71
71
 
72
- folders.extend(Folder.parse_obj(folder) for folder in batch)
72
+ folders.extend(Folder.model_validate(folder) for folder in batch)
73
73
  page += 1
74
74
  except requests.exceptions.RequestException as e:
75
75
  self.report.report_failure(
@@ -88,7 +88,7 @@ class GrafanaAPIClient:
88
88
  try:
89
89
  response = self.session.get(f"{self.base_url}/api/dashboards/uid/{uid}")
90
90
  response.raise_for_status()
91
- return Dashboard.parse_obj(response.json())
91
+ return Dashboard.model_validate(response.json())
92
92
  except requests.exceptions.RequestException as e:
93
93
  self.report.warning(
94
94
  title="Dashboard Fetch Error",
@@ -1,6 +1,6 @@
1
1
  from typing import Dict, Optional
2
2
 
3
- from pydantic import Field, SecretStr, validator
3
+ from pydantic import Field, SecretStr, field_validator
4
4
 
5
5
  from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
6
6
  from datahub.configuration.source_common import (
@@ -99,6 +99,7 @@ class GrafanaSourceConfig(
99
99
  description="Map of Grafana datasource types/UIDs to platform connection configs for lineage extraction",
100
100
  )
101
101
 
102
- @validator("url", allow_reuse=True)
103
- def remove_trailing_slash(cls, v):
102
+ @field_validator("url", mode="after")
103
+ @classmethod
104
+ def remove_trailing_slash(cls, v: str) -> str:
104
105
  return config_clean.remove_trailing_slashes(v)
@@ -171,7 +171,7 @@ class GrafanaSource(StatefulIngestionSourceBase):
171
171
 
172
172
  @classmethod
173
173
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "GrafanaSource":
174
- config = GrafanaSourceConfig.parse_obj(config_dict)
174
+ config = GrafanaSourceConfig.model_validate(config_dict)
175
175
  return cls(config, ctx)
176
176
 
177
177
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -79,18 +79,29 @@ class Dashboard(_GrafanaBaseModel):
79
79
  for panel_data in panels_data:
80
80
  if panel_data.get("type") == "row" and "panels" in panel_data:
81
81
  panels.extend(
82
- Panel.parse_obj(p)
82
+ Panel.model_validate(p)
83
83
  for p in panel_data["panels"]
84
84
  if p.get("type") != "row"
85
85
  )
86
86
  elif panel_data.get("type") != "row":
87
- panels.append(Panel.parse_obj(panel_data))
87
+ panels.append(Panel.model_validate(panel_data))
88
88
  return panels
89
89
 
90
90
  @classmethod
91
- def parse_obj(cls, data: Dict[str, Any]) -> "Dashboard":
91
+ def model_validate(
92
+ cls,
93
+ obj: Any,
94
+ *,
95
+ strict: Optional[bool] = None,
96
+ from_attributes: Optional[bool] = None,
97
+ context: Optional[Any] = None,
98
+ by_alias: Optional[bool] = None,
99
+ by_name: Optional[bool] = None,
100
+ ) -> "Dashboard":
92
101
  """Custom parsing to handle nested panel extraction."""
93
- dashboard_data = data.get("dashboard", {})
102
+ # Handle both direct dashboard data and nested structure with 'dashboard' key
103
+ dashboard_data = obj.get("dashboard", obj)
104
+
94
105
  _panel_data = dashboard_data.get("panels", [])
95
106
  panels = []
96
107
  try:
@@ -113,7 +124,14 @@ class Dashboard(_GrafanaBaseModel):
113
124
  if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool):
114
125
  dashboard_dict["refresh"] = str(dashboard_dict["refresh"])
115
126
 
116
- return super().parse_obj(dashboard_dict)
127
+ return super().model_validate(
128
+ dashboard_dict,
129
+ strict=strict,
130
+ from_attributes=from_attributes,
131
+ context=context,
132
+ by_alias=by_alias,
133
+ by_name=by_name,
134
+ )
117
135
 
118
136
 
119
137
  class Folder(_GrafanaBaseModel):
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
4
4
  from typing import Any, Dict, Generator, List, Optional, Union
5
5
 
6
6
  import requests
7
- from pydantic import BaseModel, Field, ValidationError, validator
7
+ from pydantic import BaseModel, Field, ValidationError, field_validator
8
8
  from requests.adapters import HTTPAdapter
9
9
  from typing_extensions import assert_never
10
10
  from urllib3.util.retry import Retry
@@ -50,7 +50,8 @@ class HexApiProjectAnalytics(BaseModel):
50
50
  default=None, alias="publishedResultsUpdatedAt"
51
51
  )
52
52
 
53
- @validator("last_viewed_at", "published_results_updated_at", pre=True)
53
+ @field_validator("last_viewed_at", "published_results_updated_at", mode="before")
54
+ @classmethod
54
55
  def parse_datetime(cls, value):
55
56
  if value is None:
56
57
  return None
@@ -167,14 +168,15 @@ class HexApiProjectApiResource(BaseModel):
167
168
  class Config:
168
169
  extra = "ignore" # Allow extra fields in the JSON
169
170
 
170
- @validator(
171
+ @field_validator(
171
172
  "created_at",
172
173
  "last_edited_at",
173
174
  "last_published_at",
174
175
  "archived_at",
175
176
  "trashed_at",
176
- pre=True,
177
+ mode="before",
177
178
  )
179
+ @classmethod
178
180
  def parse_datetime(cls, value):
179
181
  if value is None:
180
182
  return None
@@ -292,7 +294,7 @@ class HexApi:
292
294
  )
293
295
  response.raise_for_status()
294
296
 
295
- api_response = HexApiProjectsListResponse.parse_obj(response.json())
297
+ api_response = HexApiProjectsListResponse.model_validate(response.json())
296
298
  logger.info(f"Fetched {len(api_response.values)} items")
297
299
  params["after"] = (
298
300
  api_response.pagination.after if api_response.pagination else None
@@ -3,7 +3,7 @@ from dataclasses import dataclass
3
3
  from datetime import datetime, timedelta, timezone
4
4
  from typing import Any, Dict, Iterable, List, Optional
5
5
 
6
- from pydantic import Field, SecretStr, root_validator
6
+ from pydantic import Field, SecretStr, model_validator
7
7
  from typing_extensions import assert_never
8
8
 
9
9
  from datahub.configuration.common import AllowDenyPattern
@@ -120,7 +120,8 @@ class HexSourceConfig(
120
120
  description="Number of items to fetch per DataHub API call.",
121
121
  )
122
122
 
123
- @root_validator(pre=True)
123
+ @model_validator(mode="before")
124
+ @classmethod
124
125
  def validate_lineage_times(cls, data: Dict[str, Any]) -> Dict[str, Any]:
125
126
  # In-place update of the input dict would cause state contamination. This was discovered through test failures
126
127
  # in test_hex.py where the same dict is reused.
@@ -238,7 +239,7 @@ class HexSource(StatefulIngestionSourceBase):
238
239
 
239
240
  @classmethod
240
241
  def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "HexSource":
241
- config = HexSourceConfig.parse_obj(config_dict)
242
+ config = HexSourceConfig.model_validate(config_dict)
242
243
  return cls(config, ctx)
243
244
 
244
245
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -161,7 +161,7 @@ class IcebergSource(StatefulIngestionSourceBase):
161
161
 
162
162
  @classmethod
163
163
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
164
- config = IcebergSourceConfig.parse_obj(config_dict)
164
+ config = IcebergSourceConfig.model_validate(config_dict)
165
165
  return cls(config, ctx)
166
166
 
167
167
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -4,7 +4,7 @@ from dataclasses import dataclass, field
4
4
  from typing import Any, Dict, Optional
5
5
 
6
6
  from humanfriendly import format_timespan
7
- from pydantic import Field, validator
7
+ from pydantic import Field, field_validator
8
8
  from pyiceberg.catalog import Catalog, load_catalog
9
9
  from pyiceberg.catalog.rest import RestCatalog
10
10
  from requests.adapters import HTTPAdapter
@@ -108,7 +108,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
108
108
  default=1, description="How many threads will be processing tables"
109
109
  )
110
110
 
111
- @validator("catalog", pre=True, always=True)
111
+ @field_validator("catalog", mode="before")
112
+ @classmethod
112
113
  def handle_deprecated_catalog_format(cls, value):
113
114
  # Once support for deprecated format is dropped, we can remove this validator.
114
115
  if (
@@ -131,7 +132,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
131
132
  # In case the input is already the new format or is invalid
132
133
  return value
133
134
 
134
- @validator("catalog")
135
+ @field_validator("catalog", mode="after")
136
+ @classmethod
135
137
  def validate_catalog_size(cls, value):
136
138
  if len(value) != 1:
137
139
  raise ValueError("The catalog must contain exactly one entry.")
@@ -254,7 +254,7 @@ class AzureADSource(StatefulIngestionSourceBase):
254
254
 
255
255
  @classmethod
256
256
  def create(cls, config_dict, ctx):
257
- config = AzureADConfig.parse_obj(config_dict)
257
+ config = AzureADConfig.model_validate(config_dict)
258
258
  return cls(config, ctx)
259
259
 
260
260
  def __init__(self, config: AzureADConfig, ctx: PipelineContext):
@@ -11,7 +11,7 @@ import nest_asyncio
11
11
  from okta.client import Client as OktaClient
12
12
  from okta.exceptions import OktaAPIException
13
13
  from okta.models import Group, GroupProfile, User, UserProfile, UserStatus
14
- from pydantic import validator
14
+ from pydantic import model_validator
15
15
  from pydantic.fields import Field
16
16
 
17
17
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
@@ -157,21 +157,21 @@ class OktaConfig(StatefulIngestionConfigBase):
157
157
  mask_group_id: bool = True
158
158
  mask_user_id: bool = True
159
159
 
160
- @validator("okta_users_search")
161
- def okta_users_one_of_filter_or_search(cls, v, values):
162
- if v and values["okta_users_filter"]:
160
+ @model_validator(mode="after")
161
+ def okta_users_one_of_filter_or_search(self) -> "OktaConfig":
162
+ if self.okta_users_search and self.okta_users_filter:
163
163
  raise ValueError(
164
164
  "Only one of okta_users_filter or okta_users_search can be set"
165
165
  )
166
- return v
166
+ return self
167
167
 
168
- @validator("okta_groups_search")
169
- def okta_groups_one_of_filter_or_search(cls, v, values):
170
- if v and values["okta_groups_filter"]:
168
+ @model_validator(mode="after")
169
+ def okta_groups_one_of_filter_or_search(self) -> "OktaConfig":
170
+ if self.okta_groups_search and self.okta_groups_filter:
171
171
  raise ValueError(
172
172
  "Only one of okta_groups_filter or okta_groups_search can be set"
173
173
  )
174
- return v
174
+ return self
175
175
 
176
176
 
177
177
  @dataclass
@@ -288,7 +288,7 @@ class OktaSource(StatefulIngestionSourceBase):
288
288
 
289
289
  @classmethod
290
290
  def create(cls, config_dict, ctx):
291
- config = OktaConfig.parse_obj(config_dict)
291
+ config = OktaConfig.model_validate(config_dict)
292
292
  return cls(config, ctx)
293
293
 
294
294
  def __init__(self, config: OktaConfig, ctx: PipelineContext):
@@ -267,7 +267,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
267
267
 
268
268
  @classmethod
269
269
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "KafkaSource":
270
- config: KafkaSourceConfig = KafkaSourceConfig.parse_obj(config_dict)
270
+ config: KafkaSourceConfig = KafkaSourceConfig.model_validate(config_dict)
271
271
  return cls(config, ctx)
272
272
 
273
273
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -242,7 +242,7 @@ class LDAPSource(StatefulIngestionSourceBase):
242
242
  @classmethod
243
243
  def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "LDAPSource":
244
244
  """Factory method."""
245
- config = LDAPSourceConfig.parse_obj(config_dict)
245
+ config = LDAPSourceConfig.model_validate(config_dict)
246
246
  return cls(ctx, config)
247
247
 
248
248
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
28
28
  User,
29
29
  WriteQuery,
30
30
  )
31
- from pydantic import validator
31
+ from pydantic import field_validator
32
32
 
33
33
  import datahub.emitter.mce_builder as builder
34
34
  from datahub.api.entities.platformresource.platform_resource import (
@@ -202,8 +202,9 @@ class LookerViewId:
202
202
  folder_path=os.path.dirname(self.file_path),
203
203
  )
204
204
 
205
- @validator("view_name")
206
- def remove_quotes(cls, v):
205
+ @field_validator("view_name", mode="after")
206
+ @classmethod
207
+ def remove_quotes(cls, v: str) -> str:
207
208
  # Sanitize the name.
208
209
  v = v.replace('"', "").replace("`", "")
209
210
  return v
@@ -931,8 +932,9 @@ class LookerExplore:
931
932
  source_file: Optional[str] = None
932
933
  tags: List[str] = dataclasses_field(default_factory=list)
933
934
 
934
- @validator("name")
935
- def remove_quotes(cls, v):
935
+ @field_validator("name", mode="after")
936
+ @classmethod
937
+ def remove_quotes(cls, v: str) -> str:
936
938
  # Sanitize the name.
937
939
  v = v.replace('"', "").replace("`", "")
938
940
  return v
@@ -1,11 +1,11 @@
1
1
  import dataclasses
2
2
  import os
3
3
  import re
4
- from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
4
+ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
5
5
 
6
6
  import pydantic
7
7
  from looker_sdk.sdk.api40.models import DBConnection
8
- from pydantic import Field, model_validator, validator
8
+ from pydantic import Field, field_validator, model_validator
9
9
 
10
10
  from datahub.configuration import ConfigModel
11
11
  from datahub.configuration.common import (
@@ -198,17 +198,20 @@ class LookerConnectionDefinition(ConfigModel):
198
198
  "the top level Looker configuration",
199
199
  )
200
200
 
201
- @validator("platform_env")
201
+ @field_validator("platform_env", mode="after")
202
+ @classmethod
202
203
  def platform_env_must_be_one_of(cls, v: Optional[str]) -> Optional[str]:
203
204
  if v is not None:
204
205
  return EnvConfigMixin.env_must_be_one_of(v)
205
206
  return v
206
207
 
207
- @validator("platform", "default_db", "default_schema")
208
- def lower_everything(cls, v):
208
+ @field_validator("platform", "default_db", "default_schema", mode="after")
209
+ @classmethod
210
+ def lower_everything(cls, v: Optional[str]) -> Optional[str]:
209
211
  """We lower case all strings passed in to avoid casing issues later"""
210
212
  if v is not None:
211
213
  return v.lower()
214
+ return v
212
215
 
213
216
  @classmethod
214
217
  def from_looker_connection(
@@ -326,22 +329,20 @@ class LookerDashboardSourceConfig(
326
329
  "Dashboards will only be ingested if they're allowed by both this config and dashboard_pattern.",
327
330
  )
328
331
 
329
- @validator("external_base_url", pre=True, always=True)
332
+ @model_validator(mode="before")
333
+ @classmethod
330
334
  def external_url_defaults_to_api_config_base_url(
331
- cls, v: Optional[str], *, values: Dict[str, Any], **kwargs: Dict[str, Any]
332
- ) -> Optional[str]:
333
- return v or values.get("base_url")
334
-
335
- @validator("extract_independent_looks", always=True)
336
- def stateful_ingestion_should_be_enabled(
337
- cls, v: Optional[bool], *, values: Dict[str, Any], **kwargs: Dict[str, Any]
338
- ) -> Optional[bool]:
339
- stateful_ingestion: StatefulStaleMetadataRemovalConfig = cast(
340
- StatefulStaleMetadataRemovalConfig, values.get("stateful_ingestion")
341
- )
342
- if v is True and (
343
- stateful_ingestion is None or stateful_ingestion.enabled is False
335
+ cls, values: Dict[str, Any]
336
+ ) -> Dict[str, Any]:
337
+ if "external_base_url" not in values or values["external_base_url"] is None:
338
+ values["external_base_url"] = values.get("base_url")
339
+ return values
340
+
341
+ @model_validator(mode="after")
342
+ def stateful_ingestion_should_be_enabled(self):
343
+ if self.extract_independent_looks is True and (
344
+ self.stateful_ingestion is None or self.stateful_ingestion.enabled is False
344
345
  ):
345
346
  raise ValueError("stateful_ingestion.enabled should be set to true")
346
347
 
347
- return v
348
+ return self