acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (193) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  66. datahub/ingestion/source/elastic_search.py +4 -3
  67. datahub/ingestion/source/excel/source.py +1 -1
  68. datahub/ingestion/source/feast.py +1 -1
  69. datahub/ingestion/source/file.py +5 -4
  70. datahub/ingestion/source/fivetran/config.py +17 -16
  71. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  72. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  73. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  74. datahub/ingestion/source/ge_profiling_config.py +8 -5
  75. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  76. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  77. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  78. datahub/ingestion/source/grafana/models.py +23 -5
  79. datahub/ingestion/source/hex/api.py +7 -5
  80. datahub/ingestion/source/hex/hex.py +4 -3
  81. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  82. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  83. datahub/ingestion/source/identity/azure_ad.py +1 -1
  84. datahub/ingestion/source/identity/okta.py +10 -10
  85. datahub/ingestion/source/kafka/kafka.py +1 -1
  86. datahub/ingestion/source/ldap.py +1 -1
  87. datahub/ingestion/source/looker/looker_common.py +7 -5
  88. datahub/ingestion/source/looker/looker_config.py +21 -20
  89. datahub/ingestion/source/looker/lookml_config.py +47 -47
  90. datahub/ingestion/source/metabase.py +8 -8
  91. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  92. datahub/ingestion/source/metadata/lineage.py +13 -8
  93. datahub/ingestion/source/mlflow.py +1 -1
  94. datahub/ingestion/source/mode.py +6 -4
  95. datahub/ingestion/source/mongodb.py +4 -3
  96. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  97. datahub/ingestion/source/nifi.py +17 -23
  98. datahub/ingestion/source/openapi.py +6 -8
  99. datahub/ingestion/source/powerbi/config.py +33 -32
  100. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  101. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  102. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  103. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  104. datahub/ingestion/source/preset.py +8 -8
  105. datahub/ingestion/source/pulsar.py +1 -1
  106. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  107. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  108. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  109. datahub/ingestion/source/redshift/config.py +18 -20
  110. datahub/ingestion/source/redshift/redshift.py +2 -2
  111. datahub/ingestion/source/redshift/usage.py +23 -3
  112. datahub/ingestion/source/s3/config.py +83 -62
  113. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  114. datahub/ingestion/source/s3/source.py +8 -5
  115. datahub/ingestion/source/sac/sac.py +5 -4
  116. datahub/ingestion/source/salesforce.py +3 -2
  117. datahub/ingestion/source/schema/json_schema.py +2 -2
  118. datahub/ingestion/source/sigma/data_classes.py +3 -2
  119. datahub/ingestion/source/sigma/sigma.py +1 -1
  120. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  121. datahub/ingestion/source/slack/slack.py +1 -1
  122. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  123. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  125. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  126. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  127. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  128. datahub/ingestion/source/sql/athena.py +1 -1
  129. datahub/ingestion/source/sql/clickhouse.py +4 -2
  130. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  131. datahub/ingestion/source/sql/druid.py +1 -1
  132. datahub/ingestion/source/sql/hana.py +1 -1
  133. datahub/ingestion/source/sql/hive.py +7 -5
  134. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  135. datahub/ingestion/source/sql/mssql/source.py +13 -6
  136. datahub/ingestion/source/sql/mysql.py +1 -1
  137. datahub/ingestion/source/sql/oracle.py +17 -10
  138. datahub/ingestion/source/sql/postgres.py +2 -2
  139. datahub/ingestion/source/sql/presto.py +1 -1
  140. datahub/ingestion/source/sql/sql_config.py +8 -9
  141. datahub/ingestion/source/sql/sql_generic.py +1 -1
  142. datahub/ingestion/source/sql/teradata.py +1 -1
  143. datahub/ingestion/source/sql/trino.py +1 -1
  144. datahub/ingestion/source/sql/vertica.py +5 -4
  145. datahub/ingestion/source/sql_queries.py +11 -8
  146. datahub/ingestion/source/state/checkpoint.py +2 -2
  147. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  148. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  149. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  150. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/superset.py +9 -9
  152. datahub/ingestion/source/tableau/tableau.py +14 -16
  153. datahub/ingestion/source/unity/config.py +33 -34
  154. datahub/ingestion/source/unity/proxy.py +203 -0
  155. datahub/ingestion/source/unity/proxy_types.py +91 -0
  156. datahub/ingestion/source/unity/source.py +27 -2
  157. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  158. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  159. datahub/ingestion/source/usage/usage_common.py +5 -3
  160. datahub/ingestion/source_config/csv_enricher.py +7 -6
  161. datahub/ingestion/source_config/operation_config.py +7 -4
  162. datahub/ingestion/source_config/pulsar.py +11 -15
  163. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  164. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  165. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  166. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  167. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  168. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  169. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  171. datahub/ingestion/transformer/dataset_domain.py +3 -3
  172. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  173. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  174. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  175. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  176. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  177. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  178. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  179. datahub/ingestion/transformer/replace_external_url.py +2 -2
  180. datahub/ingestion/transformer/set_browse_path.py +1 -1
  181. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  182. datahub/lite/duckdb_lite.py +1 -1
  183. datahub/lite/lite_util.py +2 -2
  184. datahub/sdk/search_filters.py +68 -40
  185. datahub/secret/datahub_secret_store.py +7 -4
  186. datahub/secret/file_secret_store.py +1 -1
  187. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  188. datahub/testing/check_sql_parser_result.py +2 -2
  189. datahub/utilities/ingest_utils.py +1 -1
  190. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
  191. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
  192. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
  193. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ from copy import deepcopy
2
2
  from datetime import datetime
3
3
  from typing import Dict, List, Optional
4
4
 
5
- from pydantic import BaseModel, root_validator
5
+ from pydantic import BaseModel, model_validator
6
6
 
7
7
  from datahub.emitter.mcp_builder import ContainerKey
8
8
 
@@ -22,7 +22,8 @@ class Workspace(BaseModel):
22
22
  createdAt: datetime
23
23
  updatedAt: datetime
24
24
 
25
- @root_validator(pre=True)
25
+ @model_validator(mode="before")
26
+ @classmethod
26
27
  def update_values(cls, values: Dict) -> Dict:
27
28
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
28
29
  values = deepcopy(values)
@@ -150,7 +150,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
150
150
 
151
151
  @classmethod
152
152
  def create(cls, config_dict, ctx):
153
- config = SigmaSourceConfig.parse_obj(config_dict)
153
+ config = SigmaSourceConfig.model_validate(config_dict)
154
154
  return cls(config, ctx)
155
155
 
156
156
  def _gen_workbook_key(self, workbook_id: str) -> WorkbookKey:
@@ -108,7 +108,7 @@ class SigmaAPI:
108
108
  self.report.non_accessible_workspaces_count += 1
109
109
  return None
110
110
  response.raise_for_status()
111
- workspace = Workspace.parse_obj(response.json())
111
+ workspace = Workspace.model_validate(response.json())
112
112
  self.workspaces[workspace.workspaceId] = workspace
113
113
  return workspace
114
114
  except Exception as e:
@@ -127,7 +127,7 @@ class SigmaAPI:
127
127
  response_dict = response.json()
128
128
  for workspace_dict in response_dict[Constant.ENTRIES]:
129
129
  self.workspaces[workspace_dict[Constant.WORKSPACEID]] = (
130
- Workspace.parse_obj(workspace_dict)
130
+ Workspace.model_validate(workspace_dict)
131
131
  )
132
132
  if response_dict[Constant.NEXTPAGE]:
133
133
  url = f"{workspace_url}&page={response_dict[Constant.NEXTPAGE]}"
@@ -197,7 +197,7 @@ class SigmaAPI:
197
197
  response.raise_for_status()
198
198
  response_dict = response.json()
199
199
  for file_dict in response_dict[Constant.ENTRIES]:
200
- file = File.parse_obj(file_dict)
200
+ file = File.model_validate(file_dict)
201
201
  file.workspaceId = self.get_workspace_id_from_file_path(
202
202
  file.parentId, file.path
203
203
  )
@@ -225,7 +225,7 @@ class SigmaAPI:
225
225
  response.raise_for_status()
226
226
  response_dict = response.json()
227
227
  for dataset_dict in response_dict[Constant.ENTRIES]:
228
- dataset = SigmaDataset.parse_obj(dataset_dict)
228
+ dataset = SigmaDataset.model_validate(dataset_dict)
229
229
 
230
230
  if dataset.datasetId not in dataset_files_metadata:
231
231
  self.report.datasets.dropped(
@@ -354,7 +354,7 @@ class SigmaAPI:
354
354
  element_dict[Constant.URL] = (
355
355
  f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
356
356
  )
357
- element = Element.parse_obj(element_dict)
357
+ element = Element.model_validate(element_dict)
358
358
  if (
359
359
  self.config.extract_lineage
360
360
  and self.config.workbook_lineage_pattern.allowed(workbook.name)
@@ -379,7 +379,7 @@ class SigmaAPI:
379
379
  )
380
380
  response.raise_for_status()
381
381
  for page_dict in response.json()[Constant.ENTRIES]:
382
- page = Page.parse_obj(page_dict)
382
+ page = Page.model_validate(page_dict)
383
383
  page.elements = self.get_page_elements(workbook, page)
384
384
  pages.append(page)
385
385
  return pages
@@ -400,7 +400,7 @@ class SigmaAPI:
400
400
  response.raise_for_status()
401
401
  response_dict = response.json()
402
402
  for workbook_dict in response_dict[Constant.ENTRIES]:
403
- workbook = Workbook.parse_obj(workbook_dict)
403
+ workbook = Workbook.model_validate(workbook_dict)
404
404
 
405
405
  if workbook.workbookId not in workbook_files_metadata:
406
406
  # Due to a bug in the Sigma API, it seems like the /files endpoint does not
@@ -260,7 +260,7 @@ class SlackSource(StatefulIngestionSourceBase):
260
260
 
261
261
  @classmethod
262
262
  def create(cls, config_dict, ctx):
263
- config = SlackSourceConfig.parse_obj(config_dict)
263
+ config = SlackSourceConfig.model_validate(config_dict)
264
264
  return cls(ctx, config)
265
265
 
266
266
  def get_slack_client(self) -> WebClient:
@@ -351,5 +351,5 @@ class SnaplogicSource(StatefulIngestionSourceBase):
351
351
 
352
352
  @classmethod
353
353
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "SnaplogicSource":
354
- config = SnaplogicConfig.parse_obj(config_dict)
354
+ config = SnaplogicConfig.model_validate(config_dict)
355
355
  return cls(config, ctx)
@@ -91,7 +91,7 @@ class SnowflakeAssertionsHandler:
91
91
  self, result_row: dict, discovered_datasets: List[str]
92
92
  ) -> Optional[MetadataChangeProposalWrapper]:
93
93
  try:
94
- result = DataQualityMonitoringResult.parse_obj(result_row)
94
+ result = DataQualityMonitoringResult.model_validate(result_row)
95
95
  assertion_guid = result.METRIC_NAME.split("__")[-1].lower()
96
96
  status = bool(result.VALUE) # 1 if PASS, 0 if FAIL
97
97
  assertee = self.identifiers.get_dataset_identifier(
@@ -5,7 +5,7 @@ from enum import Enum
5
5
  from typing import Dict, List, Optional, Set
6
6
 
7
7
  import pydantic
8
- from pydantic import Field, root_validator, validator
8
+ from pydantic import Field, ValidationInfo, field_validator, model_validator
9
9
 
10
10
  from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
11
11
  from datahub.configuration.pattern_utils import UUID_REGEX
@@ -122,10 +122,10 @@ class SnowflakeFilterConfig(SQLFilterConfig):
122
122
  description="Whether `schema_pattern` is matched against fully qualified schema name `<catalog>.<schema>`.",
123
123
  )
124
124
 
125
- @root_validator(pre=False, skip_on_failure=True)
126
- def validate_legacy_schema_pattern(cls, values: Dict) -> Dict:
127
- schema_pattern: Optional[AllowDenyPattern] = values.get("schema_pattern")
128
- match_fully_qualified_names = values.get("match_fully_qualified_names")
125
+ @model_validator(mode="after")
126
+ def validate_legacy_schema_pattern(self) -> "SnowflakeFilterConfig":
127
+ schema_pattern: Optional[AllowDenyPattern] = self.schema_pattern
128
+ match_fully_qualified_names = self.match_fully_qualified_names
129
129
 
130
130
  if (
131
131
  schema_pattern is not None
@@ -145,7 +145,7 @@ class SnowflakeFilterConfig(SQLFilterConfig):
145
145
  assert isinstance(schema_pattern, AllowDenyPattern)
146
146
  schema_pattern.deny.append(r".*INFORMATION_SCHEMA$")
147
147
 
148
- return values
148
+ return self
149
149
 
150
150
 
151
151
  class SnowflakeIdentifierConfig(
@@ -391,7 +391,8 @@ class SnowflakeV2Config(
391
391
  "This may be required in the case of _eg_ temporary tables being created in a different database than the ones in the database_name patterns.",
392
392
  )
393
393
 
394
- @validator("convert_urns_to_lowercase")
394
+ @field_validator("convert_urns_to_lowercase", mode="after")
395
+ @classmethod
395
396
  def validate_convert_urns_to_lowercase(cls, v):
396
397
  if not v:
397
398
  add_global_warning(
@@ -400,30 +401,31 @@ class SnowflakeV2Config(
400
401
 
401
402
  return v
402
403
 
403
- @validator("include_column_lineage")
404
- def validate_include_column_lineage(cls, v, values):
405
- if not values.get("include_table_lineage") and v:
404
+ @field_validator("include_column_lineage", mode="after")
405
+ @classmethod
406
+ def validate_include_column_lineage(cls, v, info):
407
+ if not info.data.get("include_table_lineage") and v:
406
408
  raise ValueError(
407
409
  "include_table_lineage must be True for include_column_lineage to be set."
408
410
  )
409
411
  return v
410
412
 
411
- @root_validator(pre=False, skip_on_failure=True)
412
- def validate_unsupported_configs(cls, values: Dict) -> Dict:
413
- value = values.get("include_read_operational_stats")
414
- if value is not None and value:
413
+ @model_validator(mode="after")
414
+ def validate_unsupported_configs(self) -> "SnowflakeV2Config":
415
+ if (
416
+ hasattr(self, "include_read_operational_stats")
417
+ and self.include_read_operational_stats
418
+ ):
415
419
  raise ValueError(
416
420
  "include_read_operational_stats is not supported. Set `include_read_operational_stats` to False.",
417
421
  )
418
422
 
419
- include_technical_schema = values.get("include_technical_schema")
420
- include_profiles = (
421
- values.get("profiling") is not None and values["profiling"].enabled
422
- )
423
+ include_technical_schema = self.include_technical_schema
424
+ include_profiles = self.profiling is not None and self.profiling.enabled
423
425
  delete_detection_enabled = (
424
- values.get("stateful_ingestion") is not None
425
- and values["stateful_ingestion"].enabled
426
- and values["stateful_ingestion"].remove_stale_metadata
426
+ self.stateful_ingestion is not None
427
+ and self.stateful_ingestion.enabled
428
+ and self.stateful_ingestion.remove_stale_metadata
427
429
  )
428
430
 
429
431
  # TODO: Allow profiling irrespective of basic schema extraction,
@@ -435,13 +437,14 @@ class SnowflakeV2Config(
435
437
  "Cannot perform Deletion Detection or Profiling without extracting snowflake technical schema. Set `include_technical_schema` to True or disable Deletion Detection and Profiling."
436
438
  )
437
439
 
438
- return values
440
+ return self
439
441
 
440
- @validator("shares")
442
+ @field_validator("shares", mode="after")
443
+ @classmethod
441
444
  def validate_shares(
442
- cls, shares: Optional[Dict[str, SnowflakeShareConfig]], values: Dict
445
+ cls, shares: Optional[Dict[str, SnowflakeShareConfig]], info: ValidationInfo
443
446
  ) -> Optional[Dict[str, SnowflakeShareConfig]]:
444
- current_platform_instance = values.get("platform_instance")
447
+ current_platform_instance = info.data.get("platform_instance")
445
448
 
446
449
  if shares:
447
450
  # Check: platform_instance should be present
@@ -479,11 +482,12 @@ class SnowflakeV2Config(
479
482
 
480
483
  return shares
481
484
 
482
- @root_validator(pre=False, skip_on_failure=True)
483
- def validate_queries_v2_stateful_ingestion(cls, values: Dict) -> Dict:
484
- if values.get("use_queries_v2"):
485
- if values.get("enable_stateful_lineage_ingestion") or values.get(
486
- "enable_stateful_usage_ingestion"
485
+ @model_validator(mode="after")
486
+ def validate_queries_v2_stateful_ingestion(self) -> "SnowflakeV2Config":
487
+ if self.use_queries_v2:
488
+ if (
489
+ self.enable_stateful_lineage_ingestion
490
+ or self.enable_stateful_usage_ingestion
487
491
  ):
488
492
  logger.warning(
489
493
  "enable_stateful_lineage_ingestion and enable_stateful_usage_ingestion are deprecated "
@@ -491,7 +495,7 @@ class SnowflakeV2Config(
491
495
  "For queries v2, use enable_stateful_time_window instead to enable stateful ingestion "
492
496
  "for the unified time window extraction (lineage + usage + operations + queries)."
493
497
  )
494
- return values
498
+ return self
495
499
 
496
500
  def outbounds(self) -> Dict[str, Set[DatabaseId]]:
497
501
  """
@@ -6,6 +6,7 @@ import pydantic
6
6
  import snowflake.connector
7
7
  from cryptography.hazmat.backends import default_backend
8
8
  from cryptography.hazmat.primitives import serialization
9
+ from pydantic import field_validator, model_validator
9
10
  from snowflake.connector import SnowflakeConnection as NativeSnowflakeConnection
10
11
  from snowflake.connector.cursor import DictCursor
11
12
  from snowflake.connector.network import (
@@ -125,26 +126,28 @@ class SnowflakeConnectionConfig(ConfigModel):
125
126
 
126
127
  rename_host_port_to_account_id = pydantic_renamed_field("host_port", "account_id") # type: ignore[pydantic-field]
127
128
 
128
- @pydantic.validator("account_id")
129
- def validate_account_id(cls, account_id: str, values: Dict) -> str:
129
+ @field_validator("account_id", mode="after")
130
+ @classmethod
131
+ def validate_account_id(cls, account_id: str, info: pydantic.ValidationInfo) -> str:
130
132
  account_id = remove_protocol(account_id)
131
133
  account_id = remove_trailing_slashes(account_id)
132
134
  # Get the domain from config, fallback to default
133
- domain = values.get("snowflake_domain", DEFAULT_SNOWFLAKE_DOMAIN)
135
+ domain = info.data.get("snowflake_domain", DEFAULT_SNOWFLAKE_DOMAIN)
134
136
  snowflake_host_suffix = f".{domain}"
135
137
  account_id = remove_suffix(account_id, snowflake_host_suffix)
136
138
  return account_id
137
139
 
138
- @pydantic.validator("authentication_type", always=True)
139
- def authenticator_type_is_valid(cls, v, values):
140
+ @field_validator("authentication_type", mode="before")
141
+ @classmethod
142
+ def authenticator_type_is_valid(cls, v: Any, info: pydantic.ValidationInfo) -> Any:
140
143
  if v not in _VALID_AUTH_TYPES:
141
144
  raise ValueError(
142
145
  f"unsupported authenticator type '{v}' was provided,"
143
146
  f" use one of {list(_VALID_AUTH_TYPES.keys())}"
144
147
  )
145
148
  if (
146
- values.get("private_key") is not None
147
- or values.get("private_key_path") is not None
149
+ info.data.get("private_key") is not None
150
+ or info.data.get("private_key_path") is not None
148
151
  ) and v != "KEY_PAIR_AUTHENTICATOR":
149
152
  raise ValueError(
150
153
  f"Either `private_key` and `private_key_path` is set but `authentication_type` is {v}. "
@@ -153,21 +156,22 @@ class SnowflakeConnectionConfig(ConfigModel):
153
156
  if v == "KEY_PAIR_AUTHENTICATOR":
154
157
  # If we are using key pair auth, we need the private key path and password to be set
155
158
  if (
156
- values.get("private_key") is None
157
- and values.get("private_key_path") is None
159
+ info.data.get("private_key") is None
160
+ and info.data.get("private_key_path") is None
158
161
  ):
159
162
  raise ValueError(
160
163
  f"Both `private_key` and `private_key_path` are none. "
161
164
  f"At least one should be set when using {v} authentication"
162
165
  )
163
166
  elif v == "OAUTH_AUTHENTICATOR":
164
- cls._check_oauth_config(values.get("oauth_config"))
167
+ cls._check_oauth_config(info.data.get("oauth_config"))
165
168
  logger.info(f"using authenticator type '{v}'")
166
169
  return v
167
170
 
168
- @pydantic.validator("token", always=True)
169
- def validate_token_oauth_config(cls, v, values):
170
- auth_type = values.get("authentication_type")
171
+ @field_validator("token", mode="before")
172
+ @classmethod
173
+ def validate_token_oauth_config(cls, v: Any, info: pydantic.ValidationInfo) -> Any:
174
+ auth_type = info.data.get("authentication_type")
171
175
  if auth_type == "OAUTH_AUTHENTICATOR_TOKEN":
172
176
  if not v:
173
177
  raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.")
@@ -177,6 +181,24 @@ class SnowflakeConnectionConfig(ConfigModel):
177
181
  )
178
182
  return v
179
183
 
184
+ @model_validator(mode="after")
185
+ def validate_authentication_config(self):
186
+ """Validate authentication configuration consistency."""
187
+ # Check token requirement for OAUTH_AUTHENTICATOR_TOKEN
188
+ if self.authentication_type == "OAUTH_AUTHENTICATOR_TOKEN":
189
+ if not self.token:
190
+ raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.")
191
+
192
+ # Check private key authentication consistency
193
+ if self.private_key is not None or self.private_key_path is not None:
194
+ if self.authentication_type != "KEY_PAIR_AUTHENTICATOR":
195
+ raise ValueError(
196
+ f"Either `private_key` and `private_key_path` is set but `authentication_type` is {self.authentication_type}. "
197
+ f"Should be set to 'KEY_PAIR_AUTHENTICATOR' when using key pair authentication"
198
+ )
199
+
200
+ return self
201
+
180
202
  @staticmethod
181
203
  def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None:
182
204
  if oauth_config is None:
@@ -14,7 +14,7 @@ from typing import (
14
14
  Type,
15
15
  )
16
16
 
17
- from pydantic import BaseModel, Field, validator
17
+ from pydantic import BaseModel, Field, field_validator
18
18
 
19
19
  from datahub.configuration.datetimes import parse_absolute_time
20
20
  from datahub.ingestion.api.closeable import Closeable
@@ -70,7 +70,7 @@ def pydantic_parse_json(field: str) -> "V1Validator":
70
70
  return json.loads(v)
71
71
  return v
72
72
 
73
- return validator(field, pre=True, allow_reuse=True)(_parse_from_json)
73
+ return field_validator(field, mode="before")(_parse_from_json)
74
74
 
75
75
 
76
76
  class UpstreamColumnNode(BaseModel):
@@ -379,7 +379,7 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
379
379
  # To avoid that causing a pydantic error we are setting it to an empty list
380
380
  # instead of a list with an empty object
381
381
  db_row["QUERIES"] = "[]"
382
- return UpstreamLineageEdge.parse_obj(db_row)
382
+ return UpstreamLineageEdge.model_validate(db_row)
383
383
  except Exception as e:
384
384
  self.report.num_upstream_lineage_edge_parsing_failed += 1
385
385
  upstream_tables = db_row.get("UPSTREAM_TABLES")
@@ -806,7 +806,7 @@ class SnowflakeQueriesSource(Source):
806
806
 
807
807
  @classmethod
808
808
  def create(cls, config_dict: dict, ctx: PipelineContext) -> Self:
809
- config = SnowflakeQueriesSourceConfig.parse_obj(config_dict)
809
+ config = SnowflakeQueriesSourceConfig.model_validate(config_dict)
810
810
  return cls(ctx, config)
811
811
 
812
812
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
@@ -386,7 +386,7 @@ class AthenaSource(SQLAlchemySource):
386
386
 
387
387
  @classmethod
388
388
  def create(cls, config_dict, ctx):
389
- config = AthenaConfig.parse_obj(config_dict)
389
+ config = AthenaConfig.model_validate(config_dict)
390
390
  return cls(config, ctx)
391
391
 
392
392
  # overwrite this method to allow to specify the usage of a custom dialect
@@ -10,6 +10,7 @@ import clickhouse_sqlalchemy.types as custom_types
10
10
  import pydantic
11
11
  from clickhouse_sqlalchemy.drivers import base
12
12
  from clickhouse_sqlalchemy.drivers.base import ClickHouseDialect
13
+ from pydantic import model_validator
13
14
  from pydantic.fields import Field
14
15
  from sqlalchemy import create_engine, text
15
16
  from sqlalchemy.engine import reflection
@@ -175,7 +176,8 @@ class ClickHouseConfig(
175
176
  return str(url)
176
177
 
177
178
  # pre = True because we want to take some decision before pydantic initialize the configuration to default values
178
- @pydantic.root_validator(pre=True)
179
+ @model_validator(mode="before")
180
+ @classmethod
179
181
  def projects_backward_compatibility(cls, values: Dict) -> Dict:
180
182
  secure = values.get("secure")
181
183
  protocol = values.get("protocol")
@@ -423,7 +425,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
423
425
 
424
426
  @classmethod
425
427
  def create(cls, config_dict, ctx):
426
- config = ClickHouseConfig.parse_obj(config_dict)
428
+ config = ClickHouseConfig.model_validate(config_dict)
427
429
  return cls(config, ctx)
428
430
 
429
431
  def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
@@ -39,5 +39,5 @@ class CockroachDBSource(PostgresSource):
39
39
 
40
40
  @classmethod
41
41
  def create(cls, config_dict, ctx):
42
- config = CockroachDBConfig.parse_obj(config_dict)
42
+ config = CockroachDBConfig.model_validate(config_dict)
43
43
  return cls(config, ctx)
@@ -77,5 +77,5 @@ class DruidSource(SQLAlchemySource):
77
77
 
78
78
  @classmethod
79
79
  def create(cls, config_dict, ctx):
80
- config = DruidConfig.parse_obj(config_dict)
80
+ config = DruidConfig.model_validate(config_dict)
81
81
  return cls(config, ctx)
@@ -36,5 +36,5 @@ class HanaSource(SQLAlchemySource):
36
36
 
37
37
  @classmethod
38
38
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "HanaSource":
39
- config = HanaConfig.parse_obj(config_dict)
39
+ config = HanaConfig.model_validate(config_dict)
40
40
  return cls(config, ctx)
@@ -6,7 +6,7 @@ from enum import Enum
6
6
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
7
7
  from urllib.parse import urlparse
8
8
 
9
- from pydantic import validator
9
+ from pydantic import field_validator
10
10
  from pydantic.fields import Field
11
11
 
12
12
  # This import verifies that the dependencies are available.
@@ -674,11 +674,13 @@ class HiveConfig(TwoTierSQLAlchemyConfig):
674
674
  description="Platform instance for the storage system",
675
675
  )
676
676
 
677
- @validator("host_port")
678
- def clean_host_port(cls, v):
677
+ @field_validator("host_port", mode="after")
678
+ @classmethod
679
+ def clean_host_port(cls, v: str) -> str:
679
680
  return config_clean.remove_protocol(v)
680
681
 
681
- @validator("hive_storage_lineage_direction")
682
+ @field_validator("hive_storage_lineage_direction", mode="after")
683
+ @classmethod
682
684
  def _validate_direction(cls, v: str) -> str:
683
685
  """Validate the lineage direction."""
684
686
  if v.lower() not in ["upstream", "downstream"]:
@@ -725,7 +727,7 @@ class HiveSource(TwoTierSQLAlchemySource):
725
727
 
726
728
  @classmethod
727
729
  def create(cls, config_dict, ctx):
728
- config = HiveConfig.parse_obj(config_dict)
730
+ config = HiveConfig.model_validate(config_dict)
729
731
  return cls(config, ctx)
730
732
 
731
733
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
@@ -351,7 +351,7 @@ class HiveMetastoreSource(SQLAlchemySource):
351
351
 
352
352
  @classmethod
353
353
  def create(cls, config_dict, ctx):
354
- config = HiveMetastore.parse_obj(config_dict)
354
+ config = HiveMetastore.model_validate(config_dict)
355
355
  return cls(config, ctx)
356
356
 
357
357
  def gen_database_containers(
@@ -3,8 +3,8 @@ import re
3
3
  import urllib.parse
4
4
  from typing import Any, Dict, Iterable, List, Optional, Tuple
5
5
 
6
- import pydantic
7
6
  import sqlalchemy.dialects.mssql
7
+ from pydantic import ValidationInfo, field_validator
8
8
  from pydantic.fields import Field
9
9
  from sqlalchemy import create_engine, inspect
10
10
  from sqlalchemy.engine.base import Connection
@@ -140,11 +140,18 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
140
140
  description="Indicates if the SQL Server instance is running on AWS RDS. When None (default), automatic detection will be attempted using server name analysis.",
141
141
  )
142
142
 
143
- @pydantic.validator("uri_args")
144
- def passwords_match(cls, v, values, **kwargs):
145
- if values["use_odbc"] and not values["sqlalchemy_uri"] and "driver" not in v:
143
+ @field_validator("uri_args", mode="after")
144
+ @classmethod
145
+ def passwords_match(
146
+ cls, v: Dict[str, Any], info: ValidationInfo, **kwargs: Any
147
+ ) -> Dict[str, Any]:
148
+ if (
149
+ info.data["use_odbc"]
150
+ and not info.data["sqlalchemy_uri"]
151
+ and "driver" not in v
152
+ ):
146
153
  raise ValueError("uri_args must contain a 'driver' option")
147
- elif not values["use_odbc"] and v:
154
+ elif not info.data["use_odbc"] and v:
148
155
  raise ValueError("uri_args is not supported when ODBC is disabled")
149
156
  return v
150
157
 
@@ -314,7 +321,7 @@ class SQLServerSource(SQLAlchemySource):
314
321
 
315
322
  @classmethod
316
323
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "SQLServerSource":
317
- config = SQLServerConfig.parse_obj(config_dict)
324
+ config = SQLServerConfig.model_validate(config_dict)
318
325
  return cls(config, ctx)
319
326
 
320
327
  # override to get table descriptions
@@ -150,7 +150,7 @@ class MySQLSource(TwoTierSQLAlchemySource):
150
150
 
151
151
  @classmethod
152
152
  def create(cls, config_dict, ctx):
153
- config = MySQLConfig.parse_obj(config_dict)
153
+ config = MySQLConfig.model_validate(config_dict)
154
154
  return cls(config, ctx)
155
155
 
156
156
  def _setup_rds_iam_event_listener(
@@ -10,8 +10,8 @@ from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Union,
10
10
  from unittest.mock import patch
11
11
 
12
12
  import oracledb
13
- import pydantic
14
13
  import sqlalchemy.engine
14
+ from pydantic import ValidationInfo, field_validator
15
15
  from pydantic.fields import Field
16
16
  from sqlalchemy import event, sql
17
17
  from sqlalchemy.dialects.oracle.base import ischema_names
@@ -101,25 +101,32 @@ class OracleConfig(BasicSQLAlchemyConfig):
101
101
  "On Linux, this value is ignored, as ldconfig or LD_LIBRARY_PATH will define the location.",
102
102
  )
103
103
 
104
- @pydantic.validator("service_name")
105
- def check_service_name(cls, v, values):
106
- if values.get("database") and v:
104
+ @field_validator("service_name", mode="after")
105
+ @classmethod
106
+ def check_service_name(
107
+ cls, v: Optional[str], info: ValidationInfo
108
+ ) -> Optional[str]:
109
+ if info.data.get("database") and v:
107
110
  raise ValueError(
108
111
  "specify one of 'database' and 'service_name', but not both"
109
112
  )
110
113
  return v
111
114
 
112
- @pydantic.validator("data_dictionary_mode")
113
- def check_data_dictionary_mode(cls, value):
115
+ @field_validator("data_dictionary_mode", mode="after")
116
+ @classmethod
117
+ def check_data_dictionary_mode(cls, value: str) -> str:
114
118
  if value not in ("ALL", "DBA"):
115
119
  raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
116
120
  return value
117
121
 
118
- @pydantic.validator("thick_mode_lib_dir", always=True)
119
- def check_thick_mode_lib_dir(cls, v, values):
122
+ @field_validator("thick_mode_lib_dir", mode="before")
123
+ @classmethod
124
+ def check_thick_mode_lib_dir(
125
+ cls, v: Optional[str], info: ValidationInfo
126
+ ) -> Optional[str]:
120
127
  if (
121
128
  v is None
122
- and values.get("enable_thick_mode")
129
+ and info.data.get("enable_thick_mode")
123
130
  and (platform.system() == "Darwin" or platform.system() == "Windows")
124
131
  ):
125
132
  raise ValueError(
@@ -659,7 +666,7 @@ class OracleSource(SQLAlchemySource):
659
666
 
660
667
  @classmethod
661
668
  def create(cls, config_dict, ctx):
662
- config = OracleConfig.parse_obj(config_dict)
669
+ config = OracleConfig.model_validate(config_dict)
663
670
  return cls(config, ctx)
664
671
 
665
672
  def get_db_name(self, inspector: Inspector) -> str:
@@ -212,7 +212,7 @@ class PostgresSource(SQLAlchemySource):
212
212
 
213
213
  @classmethod
214
214
  def create(cls, config_dict, ctx):
215
- config = PostgresConfig.parse_obj(config_dict)
215
+ config = PostgresConfig.model_validate(config_dict)
216
216
  return cls(config, ctx)
217
217
 
218
218
  def _setup_rds_iam_event_listener(
@@ -288,7 +288,7 @@ class PostgresSource(SQLAlchemySource):
288
288
  return {}
289
289
 
290
290
  for row in results:
291
- data.append(ViewLineageEntry.parse_obj(row))
291
+ data.append(ViewLineageEntry.model_validate(row))
292
292
 
293
293
  lineage_elements: Dict[Tuple[str, str], List[str]] = defaultdict(list)
294
294
  # Loop over the lineages in the JSON data.
@@ -115,7 +115,7 @@ class PrestoSource(TrinoSource):
115
115
 
116
116
  @classmethod
117
117
  def create(cls, config_dict, ctx):
118
- config = PrestoConfig.parse_obj(config_dict)
118
+ config = PrestoConfig.model_validate(config_dict)
119
119
  return cls(config, ctx)
120
120
 
121
121