acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (193) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  66. datahub/ingestion/source/elastic_search.py +4 -3
  67. datahub/ingestion/source/excel/source.py +1 -1
  68. datahub/ingestion/source/feast.py +1 -1
  69. datahub/ingestion/source/file.py +5 -4
  70. datahub/ingestion/source/fivetran/config.py +17 -16
  71. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  72. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  73. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  74. datahub/ingestion/source/ge_profiling_config.py +8 -5
  75. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  76. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  77. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  78. datahub/ingestion/source/grafana/models.py +23 -5
  79. datahub/ingestion/source/hex/api.py +7 -5
  80. datahub/ingestion/source/hex/hex.py +4 -3
  81. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  82. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  83. datahub/ingestion/source/identity/azure_ad.py +1 -1
  84. datahub/ingestion/source/identity/okta.py +10 -10
  85. datahub/ingestion/source/kafka/kafka.py +1 -1
  86. datahub/ingestion/source/ldap.py +1 -1
  87. datahub/ingestion/source/looker/looker_common.py +7 -5
  88. datahub/ingestion/source/looker/looker_config.py +21 -20
  89. datahub/ingestion/source/looker/lookml_config.py +47 -47
  90. datahub/ingestion/source/metabase.py +8 -8
  91. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  92. datahub/ingestion/source/metadata/lineage.py +13 -8
  93. datahub/ingestion/source/mlflow.py +1 -1
  94. datahub/ingestion/source/mode.py +6 -4
  95. datahub/ingestion/source/mongodb.py +4 -3
  96. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  97. datahub/ingestion/source/nifi.py +17 -23
  98. datahub/ingestion/source/openapi.py +6 -8
  99. datahub/ingestion/source/powerbi/config.py +33 -32
  100. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  101. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  102. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  103. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  104. datahub/ingestion/source/preset.py +8 -8
  105. datahub/ingestion/source/pulsar.py +1 -1
  106. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  107. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  108. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  109. datahub/ingestion/source/redshift/config.py +18 -20
  110. datahub/ingestion/source/redshift/redshift.py +2 -2
  111. datahub/ingestion/source/redshift/usage.py +23 -3
  112. datahub/ingestion/source/s3/config.py +83 -62
  113. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  114. datahub/ingestion/source/s3/source.py +8 -5
  115. datahub/ingestion/source/sac/sac.py +5 -4
  116. datahub/ingestion/source/salesforce.py +3 -2
  117. datahub/ingestion/source/schema/json_schema.py +2 -2
  118. datahub/ingestion/source/sigma/data_classes.py +3 -2
  119. datahub/ingestion/source/sigma/sigma.py +1 -1
  120. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  121. datahub/ingestion/source/slack/slack.py +1 -1
  122. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  123. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  125. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  126. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  127. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  128. datahub/ingestion/source/sql/athena.py +1 -1
  129. datahub/ingestion/source/sql/clickhouse.py +4 -2
  130. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  131. datahub/ingestion/source/sql/druid.py +1 -1
  132. datahub/ingestion/source/sql/hana.py +1 -1
  133. datahub/ingestion/source/sql/hive.py +7 -5
  134. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  135. datahub/ingestion/source/sql/mssql/source.py +13 -6
  136. datahub/ingestion/source/sql/mysql.py +1 -1
  137. datahub/ingestion/source/sql/oracle.py +17 -10
  138. datahub/ingestion/source/sql/postgres.py +2 -2
  139. datahub/ingestion/source/sql/presto.py +1 -1
  140. datahub/ingestion/source/sql/sql_config.py +8 -9
  141. datahub/ingestion/source/sql/sql_generic.py +1 -1
  142. datahub/ingestion/source/sql/teradata.py +1 -1
  143. datahub/ingestion/source/sql/trino.py +1 -1
  144. datahub/ingestion/source/sql/vertica.py +5 -4
  145. datahub/ingestion/source/sql_queries.py +11 -8
  146. datahub/ingestion/source/state/checkpoint.py +2 -2
  147. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  148. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  149. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  150. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/superset.py +9 -9
  152. datahub/ingestion/source/tableau/tableau.py +14 -16
  153. datahub/ingestion/source/unity/config.py +33 -34
  154. datahub/ingestion/source/unity/proxy.py +203 -0
  155. datahub/ingestion/source/unity/proxy_types.py +91 -0
  156. datahub/ingestion/source/unity/source.py +27 -2
  157. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  158. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  159. datahub/ingestion/source/usage/usage_common.py +5 -3
  160. datahub/ingestion/source_config/csv_enricher.py +7 -6
  161. datahub/ingestion/source_config/operation_config.py +7 -4
  162. datahub/ingestion/source_config/pulsar.py +11 -15
  163. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  164. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  165. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  166. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  167. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  168. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  169. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  171. datahub/ingestion/transformer/dataset_domain.py +3 -3
  172. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  173. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  174. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  175. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  176. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  177. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  178. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  179. datahub/ingestion/transformer/replace_external_url.py +2 -2
  180. datahub/ingestion/transformer/set_browse_path.py +1 -1
  181. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  182. datahub/lite/duckdb_lite.py +1 -1
  183. datahub/lite/lite_util.py +2 -2
  184. datahub/sdk/search_filters.py +68 -40
  185. datahub/secret/datahub_secret_store.py +7 -4
  186. datahub/secret/file_secret_store.py +1 -1
  187. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  188. datahub/testing/check_sql_parser_result.py +2 -2
  189. datahub/utilities/ingest_utils.py +1 -1
  190. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
  191. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
  192. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
  193. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,11 @@
1
1
  import logging
2
+ from copy import deepcopy
2
3
  from dataclasses import dataclass, field as dataclass_field
3
4
  from datetime import timedelta
4
5
  from typing import Any, Dict, Literal, Optional, Union
5
6
 
6
7
  import pydantic
7
- from pydantic import root_validator, validator
8
+ from pydantic import model_validator
8
9
  from pydantic.fields import Field
9
10
 
10
11
  from datahub.configuration.common import AllowDenyPattern
@@ -210,75 +211,74 @@ class LookMLSourceConfig(
210
211
  "All if comments are evaluated to true for configured looker_environment value",
211
212
  )
212
213
 
213
- @validator("connection_to_platform_map", pre=True)
214
- def convert_string_to_connection_def(cls, conn_map):
215
- # Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap
216
- for key in conn_map:
217
- if isinstance(conn_map[key], str):
218
- platform = conn_map[key]
219
- if "." in platform:
220
- platform_db_split = conn_map[key].split(".")
221
- connection = LookerConnectionDefinition(
222
- platform=platform_db_split[0],
223
- default_db=platform_db_split[1],
224
- default_schema="",
225
- )
226
- conn_map[key] = connection
227
- else:
228
- logger.warning(
229
- f"Connection map for {key} provides platform {platform} but does not provide a default "
230
- f"database name. This might result in failed resolution"
231
- )
232
- conn_map[key] = LookerConnectionDefinition(
233
- platform=platform, default_db="", default_schema=""
234
- )
235
- return conn_map
214
+ @model_validator(mode="before")
215
+ @classmethod
216
+ def convert_string_to_connection_def(cls, values: Dict[str, Any]) -> Dict[str, Any]:
217
+ values = deepcopy(values)
218
+ conn_map = values.get("connection_to_platform_map")
219
+ if conn_map:
220
+ # Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap
221
+ for key in conn_map:
222
+ if isinstance(conn_map[key], str):
223
+ platform = conn_map[key]
224
+ if "." in platform:
225
+ platform_db_split = conn_map[key].split(".")
226
+ connection = LookerConnectionDefinition(
227
+ platform=platform_db_split[0],
228
+ default_db=platform_db_split[1],
229
+ default_schema="",
230
+ )
231
+ conn_map[key] = connection
232
+ else:
233
+ logger.warning(
234
+ f"Connection map for {key} provides platform {platform} but does not provide a default "
235
+ f"database name. This might result in failed resolution"
236
+ )
237
+ conn_map[key] = LookerConnectionDefinition(
238
+ platform=platform, default_db="", default_schema=""
239
+ )
240
+ return values
236
241
 
237
- @root_validator(skip_on_failure=True)
238
- def check_either_connection_map_or_connection_provided(cls, values):
242
+ @model_validator(mode="after")
243
+ def check_either_connection_map_or_connection_provided(self):
239
244
  """Validate that we must either have a connection map or an api credential"""
240
- if not values.get("connection_to_platform_map", {}) and not values.get(
241
- "api", {}
242
- ):
245
+ if not (self.connection_to_platform_map or {}) and not (self.api):
243
246
  raise ValueError(
244
247
  "Neither api not connection_to_platform_map config was found. LookML source requires either api "
245
248
  "credentials for Looker or a map of connection names to platform identifiers to work correctly"
246
249
  )
247
- return values
250
+ return self
248
251
 
249
- @root_validator(skip_on_failure=True)
250
- def check_either_project_name_or_api_provided(cls, values):
252
+ @model_validator(mode="after")
253
+ def check_either_project_name_or_api_provided(self):
251
254
  """Validate that we must either have a project name or an api credential to fetch project names"""
252
- if not values.get("project_name") and not values.get("api"):
255
+ if not self.project_name and not self.api:
253
256
  raise ValueError(
254
257
  "Neither project_name not an API credential was found. LookML source requires either api credentials "
255
258
  "for Looker or a project_name to accurately name views and models."
256
259
  )
257
- return values
260
+ return self
258
261
 
259
- @root_validator(skip_on_failure=True)
260
- def check_api_provided_for_view_lineage(cls, values):
262
+ @model_validator(mode="after")
263
+ def check_api_provided_for_view_lineage(self):
261
264
  """Validate that we must have an api credential to use Looker API for view's column lineage"""
262
- if not values.get("api") and values.get("use_api_for_view_lineage"):
265
+ if not self.api and self.use_api_for_view_lineage:
263
266
  raise ValueError(
264
267
  "API credential was not found. LookML source requires api credentials "
265
268
  "for Looker to use Looker APIs for view's column lineage extraction."
266
269
  "Set `use_api_for_view_lineage` to False to skip using Looker APIs."
267
270
  )
268
- return values
271
+ return self
269
272
 
270
- @validator("base_folder", always=True)
271
- def check_base_folder_if_not_provided(
272
- cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
273
- ) -> Optional[pydantic.DirectoryPath]:
274
- if v is None:
275
- git_info: Optional[GitInfo] = values.get("git_info")
276
- if git_info:
277
- if not git_info.deploy_key:
273
+ @model_validator(mode="after")
274
+ def check_base_folder_if_not_provided(self):
275
+ if self.base_folder is None:
276
+ if self.git_info:
277
+ if not self.git_info.deploy_key:
278
278
  logger.warning(
279
279
  "git_info is provided, but no SSH key is present. If the repo is not public, we'll fail to "
280
280
  "clone it."
281
281
  )
282
282
  else:
283
283
  raise ValueError("Neither base_folder nor git_info has been provided.")
284
- return v
284
+ return self
@@ -9,7 +9,7 @@ from typing import Dict, Iterable, List, Optional, Tuple, Union
9
9
  import dateutil.parser as dp
10
10
  import pydantic
11
11
  import requests
12
- from pydantic import Field, root_validator, validator
12
+ from pydantic import Field, field_validator, model_validator
13
13
  from requests.models import HTTPError
14
14
 
15
15
  import datahub.emitter.mce_builder as builder
@@ -115,16 +115,16 @@ class MetabaseConfig(
115
115
  )
116
116
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
117
117
 
118
- @validator("connect_uri", "display_uri")
118
+ @field_validator("connect_uri", "display_uri", mode="after")
119
+ @classmethod
119
120
  def remove_trailing_slash(cls, v):
120
121
  return config_clean.remove_trailing_slashes(v)
121
122
 
122
- @root_validator(skip_on_failure=True)
123
- def default_display_uri_to_connect_uri(cls, values):
124
- base = values.get("display_uri")
125
- if base is None:
126
- values["display_uri"] = values.get("connect_uri")
127
- return values
123
+ @model_validator(mode="after")
124
+ def default_display_uri_to_connect_uri(self) -> "MetabaseConfig":
125
+ if self.display_uri is None:
126
+ self.display_uri = self.connect_uri
127
+ return self
128
128
 
129
129
 
130
130
  @dataclass
@@ -563,7 +563,7 @@ class BusinessGlossaryFileSource(Source):
563
563
 
564
564
  @classmethod
565
565
  def create(cls, config_dict, ctx):
566
- config = BusinessGlossarySourceConfig.parse_obj(config_dict)
566
+ config = BusinessGlossarySourceConfig.model_validate(config_dict)
567
567
  return cls(ctx, config)
568
568
 
569
569
  @classmethod
@@ -571,7 +571,7 @@ class BusinessGlossaryFileSource(Source):
571
571
  cls, file_name: Union[str, pathlib.Path]
572
572
  ) -> BusinessGlossaryConfig:
573
573
  config = load_config_file(file_name, resolve_env_vars=True)
574
- glossary_cfg = BusinessGlossaryConfig.parse_obj(config)
574
+ glossary_cfg = BusinessGlossaryConfig.model_validate(config)
575
575
  return glossary_cfg
576
576
 
577
577
  def get_workunits_internal(
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
3
3
  from functools import partial
4
4
  from typing import Any, Dict, Iterable, List, Optional
5
5
 
6
- from pydantic import validator
6
+ from pydantic import field_validator
7
7
  from pydantic.fields import Field
8
8
 
9
9
  import datahub.metadata.schema_classes as models
@@ -51,7 +51,8 @@ class EntityConfig(EnvConfigMixin):
51
51
  platform: str
52
52
  platform_instance: Optional[str] = None
53
53
 
54
- @validator("type")
54
+ @field_validator("type", mode="after")
55
+ @classmethod
55
56
  def type_must_be_supported(cls, v: str) -> str:
56
57
  allowed_types = ["dataset"]
57
58
  if v not in allowed_types:
@@ -60,7 +61,8 @@ class EntityConfig(EnvConfigMixin):
60
61
  )
61
62
  return v
62
63
 
63
- @validator("name")
64
+ @field_validator("name", mode="after")
65
+ @classmethod
64
66
  def validate_name(cls, v: str) -> str:
65
67
  if v.startswith("urn:li:"):
66
68
  raise ValueError(
@@ -77,7 +79,8 @@ class FineGrainedLineageConfig(ConfigModel):
77
79
  transformOperation: Optional[str]
78
80
  confidenceScore: Optional[float] = 1.0
79
81
 
80
- @validator("upstreamType")
82
+ @field_validator("upstreamType", mode="after")
83
+ @classmethod
81
84
  def upstream_type_must_be_supported(cls, v: str) -> str:
82
85
  allowed_types = [
83
86
  FineGrainedLineageUpstreamTypeClass.FIELD_SET,
@@ -90,7 +93,8 @@ class FineGrainedLineageConfig(ConfigModel):
90
93
  )
91
94
  return v
92
95
 
93
- @validator("downstreamType")
96
+ @field_validator("downstreamType", mode="after")
97
+ @classmethod
94
98
  def downstream_type_must_be_supported(cls, v: str) -> str:
95
99
  allowed_types = [
96
100
  FineGrainedLineageDownstreamTypeClass.FIELD_SET,
@@ -124,7 +128,8 @@ class LineageFileSourceConfig(ConfigModel):
124
128
  class LineageConfig(VersionedConfig):
125
129
  lineage: List[EntityNodeConfig]
126
130
 
127
- @validator("version")
131
+ @field_validator("version", mode="after")
132
+ @classmethod
128
133
  def version_must_be_1(cls, v):
129
134
  if v != "1":
130
135
  raise ValueError("Only version 1 is supported")
@@ -148,13 +153,13 @@ class LineageFileSource(Source):
148
153
  def create(
149
154
  cls, config_dict: Dict[str, Any], ctx: PipelineContext
150
155
  ) -> "LineageFileSource":
151
- config = LineageFileSourceConfig.parse_obj(config_dict)
156
+ config = LineageFileSourceConfig.model_validate(config_dict)
152
157
  return cls(ctx, config)
153
158
 
154
159
  @staticmethod
155
160
  def load_lineage_config(file_name: str) -> LineageConfig:
156
161
  config = load_config_file(file_name, resolve_env_vars=True)
157
- lineage_config = LineageConfig.parse_obj(config)
162
+ lineage_config = LineageConfig.model_validate(config)
158
163
  return lineage_config
159
164
 
160
165
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -892,5 +892,5 @@ class MLflowSource(StatefulIngestionSourceBase):
892
892
 
893
893
  @classmethod
894
894
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "MLflowSource":
895
- config = MLflowConfig.parse_obj(config_dict)
895
+ config = MLflowConfig.model_validate(config_dict)
896
896
  return cls(ctx, config)
@@ -26,7 +26,7 @@ import sqlglot
26
26
  import tenacity
27
27
  import yaml
28
28
  from liquid import Template, Undefined
29
- from pydantic import Field, validator
29
+ from pydantic import Field, field_validator
30
30
  from requests.adapters import HTTPAdapter, Retry
31
31
  from requests.exceptions import ConnectionError
32
32
  from requests.models import HTTPBasicAuth, HTTPError
@@ -218,11 +218,13 @@ class ModeConfig(
218
218
  default=False, description="Exclude archived reports"
219
219
  )
220
220
 
221
- @validator("connect_uri")
221
+ @field_validator("connect_uri", mode="after")
222
+ @classmethod
222
223
  def remove_trailing_slash(cls, v):
223
224
  return config_clean.remove_trailing_slashes(v)
224
225
 
225
- @validator("items_per_page")
226
+ @field_validator("items_per_page", mode="after")
227
+ @classmethod
226
228
  def validate_items_per_page(cls, v):
227
229
  if 1 <= v <= DEFAULT_API_ITEMS_PER_PAGE:
228
230
  return v
@@ -1824,7 +1826,7 @@ class ModeSource(StatefulIngestionSourceBase):
1824
1826
 
1825
1827
  @classmethod
1826
1828
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "ModeSource":
1827
- config: ModeConfig = ModeConfig.parse_obj(config_dict)
1829
+ config: ModeConfig = ModeConfig.model_validate(config_dict)
1828
1830
  return cls(ctx, config)
1829
1831
 
1830
1832
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -6,7 +6,7 @@ from typing import Dict, Iterable, List, Optional, Tuple, Type, Union, ValuesVie
6
6
  import bson.timestamp
7
7
  import pymongo.collection
8
8
  from packaging import version
9
- from pydantic import PositiveInt, validator
9
+ from pydantic import PositiveInt, field_validator
10
10
  from pydantic.fields import Field
11
11
  from pymongo.mongo_client import MongoClient
12
12
 
@@ -138,7 +138,8 @@ class MongoDBConfig(
138
138
  # Custom Stateful Ingestion settings
139
139
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
140
140
 
141
- @validator("maxDocumentSize")
141
+ @field_validator("maxDocumentSize", mode="after")
142
+ @classmethod
142
143
  def check_max_doc_size_filter_is_valid(cls, doc_size_filter_value):
143
144
  if doc_size_filter_value > 16793600:
144
145
  raise ValueError("maxDocumentSize must be a positive value <= 16793600.")
@@ -311,7 +312,7 @@ class MongoDBSource(StatefulIngestionSourceBase):
311
312
 
312
313
  @classmethod
313
314
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "MongoDBSource":
314
- config = MongoDBConfig.parse_obj(config_dict)
315
+ config = MongoDBConfig.model_validate(config_dict)
315
316
  return cls(ctx, config)
316
317
 
317
318
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -78,7 +78,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
78
78
 
79
79
  @classmethod
80
80
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "Neo4jSource":
81
- config = Neo4jConfig.parse_obj(config_dict)
81
+ config = Neo4jConfig.model_validate(config_dict)
82
82
  return cls(config, ctx)
83
83
 
84
84
  def create_schema_field_tuple(
@@ -13,7 +13,7 @@ import requests
13
13
  from cached_property import cached_property
14
14
  from dateutil import parser
15
15
  from packaging import version
16
- from pydantic import root_validator, validator
16
+ from pydantic import field_validator, model_validator
17
17
  from pydantic.fields import Field
18
18
  from requests import Response
19
19
  from requests.adapters import HTTPAdapter
@@ -165,39 +165,33 @@ class NifiSourceConfig(StatefulIngestionConfigBase, EnvConfigMixin):
165
165
  " When disabled, re-states lineage on each run.",
166
166
  )
167
167
 
168
- @root_validator(skip_on_failure=True)
169
- def validate_auth_params(cls, values):
170
- if values.get("auth") is NifiAuthType.CLIENT_CERT and not values.get(
171
- "client_cert_file"
172
- ):
168
+ @model_validator(mode="after")
169
+ def validate_auth_params(self) -> "NifiSourceConfig":
170
+ if self.auth is NifiAuthType.CLIENT_CERT and not self.client_cert_file:
173
171
  raise ValueError(
174
172
  "Config `client_cert_file` is required for CLIENT_CERT auth"
175
173
  )
176
- elif values.get("auth") in (
174
+ elif self.auth in (
177
175
  NifiAuthType.SINGLE_USER,
178
176
  NifiAuthType.BASIC_AUTH,
179
- ) and (not values.get("username") or not values.get("password")):
177
+ ) and (not self.username or not self.password):
180
178
  raise ValueError(
181
- f"Config `username` and `password` is required for {values.get('auth').value} auth"
179
+ f"Config `username` and `password` is required for {self.auth.value} auth"
182
180
  )
183
- return values
184
-
185
- @root_validator(skip_on_failure=True)
186
- def validator_site_url_to_site_name(cls, values):
187
- site_url_to_site_name = values.get("site_url_to_site_name")
188
- site_url = values.get("site_url")
189
- site_name = values.get("site_name")
181
+ return self
190
182
 
191
- if site_url_to_site_name is None:
192
- site_url_to_site_name = {}
193
- values["site_url_to_site_name"] = site_url_to_site_name
183
+ @model_validator(mode="after")
184
+ def validator_site_url_to_site_name(self) -> "NifiSourceConfig":
185
+ if self.site_url_to_site_name is None:
186
+ self.site_url_to_site_name = {}
194
187
 
195
- if site_url not in site_url_to_site_name:
196
- site_url_to_site_name[site_url] = site_name
188
+ if self.site_url not in self.site_url_to_site_name:
189
+ self.site_url_to_site_name[self.site_url] = self.site_name
197
190
 
198
- return values
191
+ return self
199
192
 
200
- @validator("site_url")
193
+ @field_validator("site_url", mode="after")
194
+ @classmethod
201
195
  def validator_site_url(cls, site_url: str) -> str:
202
196
  assert site_url.startswith(("http://", "https://")), (
203
197
  "site_url must start with http:// or https://"
@@ -4,7 +4,7 @@ import warnings
4
4
  from abc import ABC
5
5
  from typing import Dict, Iterable, List, Optional, Tuple
6
6
 
7
- from pydantic import validator
7
+ from pydantic import model_validator
8
8
  from pydantic.fields import Field
9
9
 
10
10
  from datahub.configuration.common import ConfigModel
@@ -86,13 +86,11 @@ class OpenApiConfig(ConfigModel):
86
86
  default=True, description="Enable SSL certificate verification"
87
87
  )
88
88
 
89
- @validator("bearer_token", always=True)
90
- def ensure_only_one_token(
91
- cls, bearer_token: Optional[str], values: Dict
92
- ) -> Optional[str]:
93
- if bearer_token is not None and values.get("token") is not None:
89
+ @model_validator(mode="after")
90
+ def ensure_only_one_token(self) -> "OpenApiConfig":
91
+ if self.bearer_token is not None and self.token is not None:
94
92
  raise ValueError("Unable to use 'token' and 'bearer_token' together.")
95
- return bearer_token
93
+ return self
96
94
 
97
95
  def get_swagger(self) -> Dict:
98
96
  if self.get_token or self.token or self.bearer_token is not None:
@@ -463,5 +461,5 @@ class OpenApiSource(APISource):
463
461
 
464
462
  @classmethod
465
463
  def create(cls, config_dict, ctx):
466
- config = OpenApiConfig.parse_obj(config_dict)
464
+ config = OpenApiConfig.model_validate(config_dict)
467
465
  return cls(config, ctx)
@@ -4,7 +4,7 @@ from enum import Enum
4
4
  from typing import Dict, List, Literal, Optional, Union
5
5
 
6
6
  import pydantic
7
- from pydantic import root_validator, validator
7
+ from pydantic import field_validator, model_validator
8
8
 
9
9
  import datahub.emitter.mce_builder as builder
10
10
  from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
@@ -540,8 +540,8 @@ class PowerBiDashboardSourceConfig(
540
540
  description="timeout in seconds for Metadata Rest Api.",
541
541
  )
542
542
 
543
- @root_validator(skip_on_failure=True)
544
- def validate_extract_column_level_lineage(cls, values: Dict) -> Dict:
543
+ @model_validator(mode="after")
544
+ def validate_extract_column_level_lineage(self) -> "PowerBiDashboardSourceConfig":
545
545
  flags = [
546
546
  "native_query_parsing",
547
547
  "enable_advance_lineage_sql_construct",
@@ -549,26 +549,23 @@ class PowerBiDashboardSourceConfig(
549
549
  "extract_dataset_schema",
550
550
  ]
551
551
 
552
- if (
553
- "extract_column_level_lineage" in values
554
- and values["extract_column_level_lineage"] is False
555
- ):
552
+ if self.extract_column_level_lineage is False:
556
553
  # Flag is not set. skip validation
557
- return values
554
+ return self
558
555
 
559
556
  logger.debug(f"Validating additional flags: {flags}")
560
557
 
561
558
  is_flag_enabled: bool = True
562
559
  for flag in flags:
563
- if flag not in values or values[flag] is False:
560
+ if not getattr(self, flag, True):
564
561
  is_flag_enabled = False
565
562
 
566
563
  if not is_flag_enabled:
567
564
  raise ValueError(f"Enable all these flags in recipe: {flags} ")
568
565
 
569
- return values
566
+ return self
570
567
 
571
- @validator("dataset_type_mapping")
568
+ @field_validator("dataset_type_mapping", mode="after")
572
569
  @classmethod
573
570
  def map_data_platform(cls, value):
574
571
  # For backward compatibility convert input PostgreSql to PostgreSQL
@@ -580,28 +577,32 @@ class PowerBiDashboardSourceConfig(
580
577
 
581
578
  return value
582
579
 
583
- @root_validator(skip_on_failure=True)
584
- def workspace_id_backward_compatibility(cls, values: Dict) -> Dict:
585
- workspace_id = values.get("workspace_id")
586
- workspace_id_pattern = values.get("workspace_id_pattern")
587
-
588
- if workspace_id_pattern == AllowDenyPattern.allow_all() and workspace_id:
580
+ @model_validator(mode="after")
581
+ def workspace_id_backward_compatibility(self) -> "PowerBiDashboardSourceConfig":
582
+ if (
583
+ self.workspace_id_pattern == AllowDenyPattern.allow_all()
584
+ and self.workspace_id
585
+ ):
589
586
  logger.warning(
590
587
  "workspace_id_pattern is not set but workspace_id is set, setting workspace_id as "
591
588
  "workspace_id_pattern. workspace_id will be deprecated, please use workspace_id_pattern instead."
592
589
  )
593
- values["workspace_id_pattern"] = AllowDenyPattern(
594
- allow=[f"^{workspace_id}$"]
590
+ self.workspace_id_pattern = AllowDenyPattern(
591
+ allow=[f"^{self.workspace_id}$"]
595
592
  )
596
- elif workspace_id_pattern != AllowDenyPattern.allow_all() and workspace_id:
593
+ elif (
594
+ self.workspace_id_pattern != AllowDenyPattern.allow_all()
595
+ and self.workspace_id
596
+ ):
597
597
  logger.warning(
598
598
  "workspace_id will be ignored in favour of workspace_id_pattern. workspace_id will be deprecated, "
599
599
  "please use workspace_id_pattern only."
600
600
  )
601
- values.pop("workspace_id")
602
- return values
601
+ self.workspace_id = None
602
+ return self
603
603
 
604
- @root_validator(pre=True)
604
+ @model_validator(mode="before")
605
+ @classmethod
605
606
  def raise_error_for_dataset_type_mapping(cls, values: Dict) -> Dict:
606
607
  if (
607
608
  values.get("dataset_type_mapping") is not None
@@ -613,18 +614,18 @@ class PowerBiDashboardSourceConfig(
613
614
 
614
615
  return values
615
616
 
616
- @root_validator(skip_on_failure=True)
617
- def validate_extract_dataset_schema(cls, values: Dict) -> Dict:
618
- if values.get("extract_dataset_schema") is False:
617
+ @model_validator(mode="after")
618
+ def validate_extract_dataset_schema(self) -> "PowerBiDashboardSourceConfig":
619
+ if self.extract_dataset_schema is False:
619
620
  add_global_warning(
620
621
  "Please use `extract_dataset_schema: true`, otherwise dataset schema extraction will be skipped."
621
622
  )
622
- return values
623
+ return self
623
624
 
624
- @root_validator(skip_on_failure=True)
625
- def validate_dsn_to_database_schema(cls, values: Dict) -> Dict:
626
- if values.get("dsn_to_database_schema") is not None:
627
- dsn_mapping = values.get("dsn_to_database_schema")
625
+ @model_validator(mode="after")
626
+ def validate_dsn_to_database_schema(self) -> "PowerBiDashboardSourceConfig":
627
+ if self.dsn_to_database_schema is not None:
628
+ dsn_mapping = self.dsn_to_database_schema
628
629
  if not isinstance(dsn_mapping, dict):
629
630
  raise ValueError("dsn_to_database_schema must contain key-value pairs")
630
631
 
@@ -639,4 +640,4 @@ class PowerBiDashboardSourceConfig(
639
640
  f"dsn_to_database_schema invalid mapping value: {value}"
640
641
  )
641
642
 
642
- return values
643
+ return self
@@ -41,7 +41,7 @@ class ResolvePlatformInstanceFromDatasetTypeMapping(
41
41
  if isinstance(platform, PlatformDetail):
42
42
  return platform
43
43
 
44
- return PlatformDetail.parse_obj({})
44
+ return PlatformDetail.model_validate({})
45
45
 
46
46
 
47
47
  class ResolvePlatformInstanceFromServerToPlatformInstance(
@@ -56,7 +56,7 @@ class ResolvePlatformInstanceFromServerToPlatformInstance(
56
56
  ]
57
57
  if data_platform_detail.data_platform_server
58
58
  in self.config.server_to_platform_instance
59
- else PlatformDetail.parse_obj({})
59
+ else PlatformDetail.model_validate({})
60
60
  )
61
61
 
62
62
 
@@ -1316,7 +1316,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1316
1316
 
1317
1317
  @classmethod
1318
1318
  def create(cls, config_dict, ctx):
1319
- config = PowerBiDashboardSourceConfig.parse_obj(config_dict)
1319
+ config = PowerBiDashboardSourceConfig.model_validate(config_dict)
1320
1320
  return cls(config, ctx)
1321
1321
 
1322
1322
  def get_allowed_workspaces(self) -> List[powerbi_data_classes.Workspace]:
@@ -213,7 +213,7 @@ class PowerBiReportServerAPI:
213
213
 
214
214
  if response_dict.get("value"):
215
215
  reports.extend(
216
- report_types_mapping[report_type].parse_obj(report)
216
+ report_types_mapping[report_type].model_validate(report)
217
217
  for report in response_dict.get("value")
218
218
  )
219
219
 
@@ -517,7 +517,7 @@ class PowerBiReportServerDashboardSource(StatefulIngestionSourceBase):
517
517
 
518
518
  @classmethod
519
519
  def create(cls, config_dict, ctx):
520
- config = PowerBiReportServerDashboardSourceConfig.parse_obj(config_dict)
520
+ config = PowerBiReportServerDashboardSourceConfig.model_validate(config_dict)
521
521
  return cls(config, ctx)
522
522
 
523
523
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
- from pydantic import BaseModel, Field, validator
4
+ from pydantic import BaseModel, Field, model_validator
5
5
 
6
6
  from datahub.ingestion.source.powerbi_report_server.constants import (
7
7
  RelationshipDirection,
@@ -30,11 +30,13 @@ class CatalogItem(BaseModel):
30
30
  has_data_sources: bool = Field(False, alias="HasDataSources")
31
31
  data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
32
32
 
33
- @validator("display_name", always=True)
34
- def validate_diplay_name(cls, value, values):
35
- if values["created_by"]:
36
- return values["created_by"].split("\\")[-1]
37
- return ""
33
+ @model_validator(mode="after")
34
+ def validate_diplay_name(self):
35
+ if self.created_by:
36
+ self.display_name = self.created_by.split("\\")[-1]
37
+ else:
38
+ self.display_name = ""
39
+ return self
38
40
 
39
41
  def get_urn_part(self):
40
42
  return f"reports.{self.id}"