acryl-datahub 1.3.0.1rc9__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (263) hide show
  1. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2550 -2543
  2. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +263 -261
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/graph/client.py +5 -1
  39. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  40. datahub/ingestion/reporting/file_reporter.py +5 -4
  41. datahub/ingestion/run/pipeline.py +7 -6
  42. datahub/ingestion/run/pipeline_config.py +12 -14
  43. datahub/ingestion/run/sink_callback.py +1 -1
  44. datahub/ingestion/sink/datahub_rest.py +6 -4
  45. datahub/ingestion/source/abs/config.py +19 -19
  46. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  47. datahub/ingestion/source/abs/source.py +2 -2
  48. datahub/ingestion/source/aws/aws_common.py +1 -1
  49. datahub/ingestion/source/aws/glue.py +6 -4
  50. datahub/ingestion/source/aws/sagemaker.py +1 -1
  51. datahub/ingestion/source/azure/azure_common.py +8 -12
  52. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  53. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  54. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  55. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  56. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  57. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  58. datahub/ingestion/source/datahub/config.py +8 -8
  59. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  60. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  61. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  62. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  63. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  64. datahub/ingestion/source/delta_lake/config.py +6 -4
  65. datahub/ingestion/source/dremio/dremio_api.py +212 -78
  66. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  67. datahub/ingestion/source/dremio/dremio_entities.py +55 -39
  68. datahub/ingestion/source/dremio/dremio_profiling.py +14 -3
  69. datahub/ingestion/source/dremio/dremio_source.py +24 -26
  70. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  71. datahub/ingestion/source/elastic_search.py +110 -32
  72. datahub/ingestion/source/excel/source.py +1 -1
  73. datahub/ingestion/source/feast.py +1 -1
  74. datahub/ingestion/source/file.py +5 -4
  75. datahub/ingestion/source/fivetran/config.py +17 -16
  76. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  77. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  78. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  79. datahub/ingestion/source/ge_profiling_config.py +8 -5
  80. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  81. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  82. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  83. datahub/ingestion/source/grafana/models.py +23 -5
  84. datahub/ingestion/source/hex/api.py +7 -5
  85. datahub/ingestion/source/hex/hex.py +4 -3
  86. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  87. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  88. datahub/ingestion/source/identity/azure_ad.py +1 -1
  89. datahub/ingestion/source/identity/okta.py +10 -10
  90. datahub/ingestion/source/kafka/kafka.py +1 -1
  91. datahub/ingestion/source/ldap.py +1 -1
  92. datahub/ingestion/source/looker/looker_common.py +7 -5
  93. datahub/ingestion/source/looker/looker_config.py +21 -20
  94. datahub/ingestion/source/looker/lookml_config.py +47 -47
  95. datahub/ingestion/source/metabase.py +8 -8
  96. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  97. datahub/ingestion/source/metadata/lineage.py +13 -8
  98. datahub/ingestion/source/mlflow.py +1 -1
  99. datahub/ingestion/source/mode.py +6 -4
  100. datahub/ingestion/source/mongodb.py +4 -3
  101. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  102. datahub/ingestion/source/nifi.py +17 -23
  103. datahub/ingestion/source/openapi.py +6 -8
  104. datahub/ingestion/source/powerbi/config.py +33 -32
  105. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  106. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  107. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  108. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  109. datahub/ingestion/source/preset.py +8 -8
  110. datahub/ingestion/source/pulsar.py +1 -1
  111. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  112. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  113. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  114. datahub/ingestion/source/redshift/config.py +18 -20
  115. datahub/ingestion/source/redshift/redshift.py +2 -2
  116. datahub/ingestion/source/redshift/usage.py +23 -3
  117. datahub/ingestion/source/s3/config.py +83 -62
  118. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  119. datahub/ingestion/source/s3/source.py +8 -5
  120. datahub/ingestion/source/sac/sac.py +5 -4
  121. datahub/ingestion/source/salesforce.py +3 -2
  122. datahub/ingestion/source/schema/json_schema.py +2 -2
  123. datahub/ingestion/source/sigma/data_classes.py +3 -2
  124. datahub/ingestion/source/sigma/sigma.py +1 -1
  125. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  126. datahub/ingestion/source/slack/slack.py +1 -1
  127. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  128. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  129. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  130. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  131. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  132. datahub/ingestion/source/snowflake/snowflake_queries.py +28 -4
  133. datahub/ingestion/source/sql/athena.py +1 -1
  134. datahub/ingestion/source/sql/clickhouse.py +4 -2
  135. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  136. datahub/ingestion/source/sql/druid.py +1 -1
  137. datahub/ingestion/source/sql/hana.py +1 -1
  138. datahub/ingestion/source/sql/hive.py +7 -5
  139. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  140. datahub/ingestion/source/sql/mssql/source.py +13 -6
  141. datahub/ingestion/source/sql/mysql.py +1 -1
  142. datahub/ingestion/source/sql/oracle.py +17 -10
  143. datahub/ingestion/source/sql/postgres.py +2 -2
  144. datahub/ingestion/source/sql/presto.py +1 -1
  145. datahub/ingestion/source/sql/sql_config.py +8 -9
  146. datahub/ingestion/source/sql/sql_generic.py +1 -1
  147. datahub/ingestion/source/sql/teradata.py +1 -1
  148. datahub/ingestion/source/sql/trino.py +1 -1
  149. datahub/ingestion/source/sql/vertica.py +5 -4
  150. datahub/ingestion/source/sql_queries.py +174 -22
  151. datahub/ingestion/source/state/checkpoint.py +2 -2
  152. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  153. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  154. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  155. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  156. datahub/ingestion/source/superset.py +9 -9
  157. datahub/ingestion/source/tableau/tableau.py +14 -16
  158. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  159. datahub/ingestion/source/unity/config.py +51 -34
  160. datahub/ingestion/source/unity/connection.py +7 -1
  161. datahub/ingestion/source/unity/connection_test.py +1 -1
  162. datahub/ingestion/source/unity/proxy.py +216 -7
  163. datahub/ingestion/source/unity/proxy_types.py +91 -0
  164. datahub/ingestion/source/unity/source.py +29 -3
  165. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  166. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  167. datahub/ingestion/source/usage/usage_common.py +5 -3
  168. datahub/ingestion/source_config/csv_enricher.py +7 -6
  169. datahub/ingestion/source_config/operation_config.py +7 -4
  170. datahub/ingestion/source_config/pulsar.py +11 -15
  171. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  172. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  173. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  175. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  176. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  177. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  178. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  179. datahub/ingestion/transformer/dataset_domain.py +3 -3
  180. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  181. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  182. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  183. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  184. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  185. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  186. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  187. datahub/ingestion/transformer/replace_external_url.py +2 -2
  188. datahub/ingestion/transformer/set_browse_path.py +1 -1
  189. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  190. datahub/lite/duckdb_lite.py +1 -1
  191. datahub/lite/lite_util.py +2 -2
  192. datahub/metadata/_internal_schema_classes.py +62 -2
  193. datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
  194. datahub/metadata/schema.avsc +271 -91
  195. datahub/metadata/schemas/ApplicationProperties.avsc +5 -2
  196. datahub/metadata/schemas/AssertionInfo.avsc +48 -5
  197. datahub/metadata/schemas/BusinessAttributeInfo.avsc +8 -4
  198. datahub/metadata/schemas/ChartInfo.avsc +12 -5
  199. datahub/metadata/schemas/ContainerProperties.avsc +12 -5
  200. datahub/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
  201. datahub/metadata/schemas/CorpGroupInfo.avsc +7 -3
  202. datahub/metadata/schemas/CorpUserInfo.avsc +5 -2
  203. datahub/metadata/schemas/CorpUserSettings.avsc +4 -2
  204. datahub/metadata/schemas/DashboardInfo.avsc +16 -4
  205. datahub/metadata/schemas/DataFlowInfo.avsc +11 -5
  206. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +4 -2
  207. datahub/metadata/schemas/DataJobInfo.avsc +9 -4
  208. datahub/metadata/schemas/DataPlatformInfo.avsc +3 -1
  209. datahub/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
  210. datahub/metadata/schemas/DataProductProperties.avsc +5 -2
  211. datahub/metadata/schemas/DataTypeInfo.avsc +5 -0
  212. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  213. datahub/metadata/schemas/DatasetProperties.avsc +12 -5
  214. datahub/metadata/schemas/DomainProperties.avsc +7 -3
  215. datahub/metadata/schemas/EditableContainerProperties.avsc +2 -1
  216. datahub/metadata/schemas/EditableDashboardProperties.avsc +2 -1
  217. datahub/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
  218. datahub/metadata/schemas/EditableDataJobProperties.avsc +2 -1
  219. datahub/metadata/schemas/EditableDatasetProperties.avsc +2 -1
  220. datahub/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
  221. datahub/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
  222. datahub/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
  223. datahub/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
  224. datahub/metadata/schemas/EditableMLModelProperties.avsc +2 -1
  225. datahub/metadata/schemas/EditableNotebookProperties.avsc +2 -1
  226. datahub/metadata/schemas/EditableSchemaMetadata.avsc +5 -3
  227. datahub/metadata/schemas/EntityTypeInfo.avsc +5 -0
  228. datahub/metadata/schemas/GlobalTags.avsc +3 -2
  229. datahub/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
  230. datahub/metadata/schemas/GlossaryTermInfo.avsc +3 -1
  231. datahub/metadata/schemas/InputFields.avsc +3 -2
  232. datahub/metadata/schemas/MLFeatureKey.avsc +3 -1
  233. datahub/metadata/schemas/MLFeatureTableKey.avsc +3 -1
  234. datahub/metadata/schemas/MLModelDeploymentKey.avsc +3 -1
  235. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  236. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  237. datahub/metadata/schemas/MLModelProperties.avsc +4 -2
  238. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +3 -1
  239. datahub/metadata/schemas/MetadataChangeEvent.avsc +124 -50
  240. datahub/metadata/schemas/NotebookInfo.avsc +5 -2
  241. datahub/metadata/schemas/Ownership.avsc +3 -2
  242. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  243. datahub/metadata/schemas/RoleProperties.avsc +3 -1
  244. datahub/metadata/schemas/SchemaFieldInfo.avsc +3 -1
  245. datahub/metadata/schemas/SchemaMetadata.avsc +3 -2
  246. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
  247. datahub/metadata/schemas/TagProperties.avsc +3 -1
  248. datahub/metadata/schemas/TestInfo.avsc +2 -1
  249. datahub/sdk/__init__.py +1 -0
  250. datahub/sdk/_all_entities.py +2 -0
  251. datahub/sdk/search_filters.py +68 -40
  252. datahub/sdk/tag.py +112 -0
  253. datahub/secret/datahub_secret_store.py +7 -4
  254. datahub/secret/file_secret_store.py +1 -1
  255. datahub/sql_parsing/schema_resolver.py +29 -0
  256. datahub/sql_parsing/sql_parsing_aggregator.py +15 -0
  257. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  258. datahub/testing/check_sql_parser_result.py +2 -2
  259. datahub/utilities/ingest_utils.py +1 -1
  260. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  261. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  262. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  263. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
@@ -79,18 +79,29 @@ class Dashboard(_GrafanaBaseModel):
79
79
  for panel_data in panels_data:
80
80
  if panel_data.get("type") == "row" and "panels" in panel_data:
81
81
  panels.extend(
82
- Panel.parse_obj(p)
82
+ Panel.model_validate(p)
83
83
  for p in panel_data["panels"]
84
84
  if p.get("type") != "row"
85
85
  )
86
86
  elif panel_data.get("type") != "row":
87
- panels.append(Panel.parse_obj(panel_data))
87
+ panels.append(Panel.model_validate(panel_data))
88
88
  return panels
89
89
 
90
90
  @classmethod
91
- def parse_obj(cls, data: Dict[str, Any]) -> "Dashboard":
91
+ def model_validate(
92
+ cls,
93
+ obj: Any,
94
+ *,
95
+ strict: Optional[bool] = None,
96
+ from_attributes: Optional[bool] = None,
97
+ context: Optional[Any] = None,
98
+ by_alias: Optional[bool] = None,
99
+ by_name: Optional[bool] = None,
100
+ ) -> "Dashboard":
92
101
  """Custom parsing to handle nested panel extraction."""
93
- dashboard_data = data.get("dashboard", {})
102
+ # Handle both direct dashboard data and nested structure with 'dashboard' key
103
+ dashboard_data = obj.get("dashboard", obj)
104
+
94
105
  _panel_data = dashboard_data.get("panels", [])
95
106
  panels = []
96
107
  try:
@@ -113,7 +124,14 @@ class Dashboard(_GrafanaBaseModel):
113
124
  if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool):
114
125
  dashboard_dict["refresh"] = str(dashboard_dict["refresh"])
115
126
 
116
- return super().parse_obj(dashboard_dict)
127
+ return super().model_validate(
128
+ dashboard_dict,
129
+ strict=strict,
130
+ from_attributes=from_attributes,
131
+ context=context,
132
+ by_alias=by_alias,
133
+ by_name=by_name,
134
+ )
117
135
 
118
136
 
119
137
  class Folder(_GrafanaBaseModel):
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
4
4
  from typing import Any, Dict, Generator, List, Optional, Union
5
5
 
6
6
  import requests
7
- from pydantic import BaseModel, Field, ValidationError, validator
7
+ from pydantic import BaseModel, Field, ValidationError, field_validator
8
8
  from requests.adapters import HTTPAdapter
9
9
  from typing_extensions import assert_never
10
10
  from urllib3.util.retry import Retry
@@ -50,7 +50,8 @@ class HexApiProjectAnalytics(BaseModel):
50
50
  default=None, alias="publishedResultsUpdatedAt"
51
51
  )
52
52
 
53
- @validator("last_viewed_at", "published_results_updated_at", pre=True)
53
+ @field_validator("last_viewed_at", "published_results_updated_at", mode="before")
54
+ @classmethod
54
55
  def parse_datetime(cls, value):
55
56
  if value is None:
56
57
  return None
@@ -167,14 +168,15 @@ class HexApiProjectApiResource(BaseModel):
167
168
  class Config:
168
169
  extra = "ignore" # Allow extra fields in the JSON
169
170
 
170
- @validator(
171
+ @field_validator(
171
172
  "created_at",
172
173
  "last_edited_at",
173
174
  "last_published_at",
174
175
  "archived_at",
175
176
  "trashed_at",
176
- pre=True,
177
+ mode="before",
177
178
  )
179
+ @classmethod
178
180
  def parse_datetime(cls, value):
179
181
  if value is None:
180
182
  return None
@@ -292,7 +294,7 @@ class HexApi:
292
294
  )
293
295
  response.raise_for_status()
294
296
 
295
- api_response = HexApiProjectsListResponse.parse_obj(response.json())
297
+ api_response = HexApiProjectsListResponse.model_validate(response.json())
296
298
  logger.info(f"Fetched {len(api_response.values)} items")
297
299
  params["after"] = (
298
300
  api_response.pagination.after if api_response.pagination else None
@@ -3,7 +3,7 @@ from dataclasses import dataclass
3
3
  from datetime import datetime, timedelta, timezone
4
4
  from typing import Any, Dict, Iterable, List, Optional
5
5
 
6
- from pydantic import Field, SecretStr, root_validator
6
+ from pydantic import Field, SecretStr, model_validator
7
7
  from typing_extensions import assert_never
8
8
 
9
9
  from datahub.configuration.common import AllowDenyPattern
@@ -120,7 +120,8 @@ class HexSourceConfig(
120
120
  description="Number of items to fetch per DataHub API call.",
121
121
  )
122
122
 
123
- @root_validator(pre=True)
123
+ @model_validator(mode="before")
124
+ @classmethod
124
125
  def validate_lineage_times(cls, data: Dict[str, Any]) -> Dict[str, Any]:
125
126
  # In-place update of the input dict would cause state contamination. This was discovered through test failures
126
127
  # in test_hex.py where the same dict is reused.
@@ -238,7 +239,7 @@ class HexSource(StatefulIngestionSourceBase):
238
239
 
239
240
  @classmethod
240
241
  def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "HexSource":
241
- config = HexSourceConfig.parse_obj(config_dict)
242
+ config = HexSourceConfig.model_validate(config_dict)
242
243
  return cls(config, ctx)
243
244
 
244
245
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -161,7 +161,7 @@ class IcebergSource(StatefulIngestionSourceBase):
161
161
 
162
162
  @classmethod
163
163
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
164
- config = IcebergSourceConfig.parse_obj(config_dict)
164
+ config = IcebergSourceConfig.model_validate(config_dict)
165
165
  return cls(config, ctx)
166
166
 
167
167
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -4,7 +4,7 @@ from dataclasses import dataclass, field
4
4
  from typing import Any, Dict, Optional
5
5
 
6
6
  from humanfriendly import format_timespan
7
- from pydantic import Field, validator
7
+ from pydantic import Field, field_validator
8
8
  from pyiceberg.catalog import Catalog, load_catalog
9
9
  from pyiceberg.catalog.rest import RestCatalog
10
10
  from requests.adapters import HTTPAdapter
@@ -108,7 +108,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
108
108
  default=1, description="How many threads will be processing tables"
109
109
  )
110
110
 
111
- @validator("catalog", pre=True, always=True)
111
+ @field_validator("catalog", mode="before")
112
+ @classmethod
112
113
  def handle_deprecated_catalog_format(cls, value):
113
114
  # Once support for deprecated format is dropped, we can remove this validator.
114
115
  if (
@@ -131,7 +132,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
131
132
  # In case the input is already the new format or is invalid
132
133
  return value
133
134
 
134
- @validator("catalog")
135
+ @field_validator("catalog", mode="after")
136
+ @classmethod
135
137
  def validate_catalog_size(cls, value):
136
138
  if len(value) != 1:
137
139
  raise ValueError("The catalog must contain exactly one entry.")
@@ -254,7 +254,7 @@ class AzureADSource(StatefulIngestionSourceBase):
254
254
 
255
255
  @classmethod
256
256
  def create(cls, config_dict, ctx):
257
- config = AzureADConfig.parse_obj(config_dict)
257
+ config = AzureADConfig.model_validate(config_dict)
258
258
  return cls(config, ctx)
259
259
 
260
260
  def __init__(self, config: AzureADConfig, ctx: PipelineContext):
@@ -11,7 +11,7 @@ import nest_asyncio
11
11
  from okta.client import Client as OktaClient
12
12
  from okta.exceptions import OktaAPIException
13
13
  from okta.models import Group, GroupProfile, User, UserProfile, UserStatus
14
- from pydantic import validator
14
+ from pydantic import model_validator
15
15
  from pydantic.fields import Field
16
16
 
17
17
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
@@ -157,21 +157,21 @@ class OktaConfig(StatefulIngestionConfigBase):
157
157
  mask_group_id: bool = True
158
158
  mask_user_id: bool = True
159
159
 
160
- @validator("okta_users_search")
161
- def okta_users_one_of_filter_or_search(cls, v, values):
162
- if v and values["okta_users_filter"]:
160
+ @model_validator(mode="after")
161
+ def okta_users_one_of_filter_or_search(self) -> "OktaConfig":
162
+ if self.okta_users_search and self.okta_users_filter:
163
163
  raise ValueError(
164
164
  "Only one of okta_users_filter or okta_users_search can be set"
165
165
  )
166
- return v
166
+ return self
167
167
 
168
- @validator("okta_groups_search")
169
- def okta_groups_one_of_filter_or_search(cls, v, values):
170
- if v and values["okta_groups_filter"]:
168
+ @model_validator(mode="after")
169
+ def okta_groups_one_of_filter_or_search(self) -> "OktaConfig":
170
+ if self.okta_groups_search and self.okta_groups_filter:
171
171
  raise ValueError(
172
172
  "Only one of okta_groups_filter or okta_groups_search can be set"
173
173
  )
174
- return v
174
+ return self
175
175
 
176
176
 
177
177
  @dataclass
@@ -288,7 +288,7 @@ class OktaSource(StatefulIngestionSourceBase):
288
288
 
289
289
  @classmethod
290
290
  def create(cls, config_dict, ctx):
291
- config = OktaConfig.parse_obj(config_dict)
291
+ config = OktaConfig.model_validate(config_dict)
292
292
  return cls(config, ctx)
293
293
 
294
294
  def __init__(self, config: OktaConfig, ctx: PipelineContext):
@@ -267,7 +267,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
267
267
 
268
268
  @classmethod
269
269
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "KafkaSource":
270
- config: KafkaSourceConfig = KafkaSourceConfig.parse_obj(config_dict)
270
+ config: KafkaSourceConfig = KafkaSourceConfig.model_validate(config_dict)
271
271
  return cls(config, ctx)
272
272
 
273
273
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -242,7 +242,7 @@ class LDAPSource(StatefulIngestionSourceBase):
242
242
  @classmethod
243
243
  def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "LDAPSource":
244
244
  """Factory method."""
245
- config = LDAPSourceConfig.parse_obj(config_dict)
245
+ config = LDAPSourceConfig.model_validate(config_dict)
246
246
  return cls(ctx, config)
247
247
 
248
248
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
28
28
  User,
29
29
  WriteQuery,
30
30
  )
31
- from pydantic import validator
31
+ from pydantic import field_validator
32
32
 
33
33
  import datahub.emitter.mce_builder as builder
34
34
  from datahub.api.entities.platformresource.platform_resource import (
@@ -202,8 +202,9 @@ class LookerViewId:
202
202
  folder_path=os.path.dirname(self.file_path),
203
203
  )
204
204
 
205
- @validator("view_name")
206
- def remove_quotes(cls, v):
205
+ @field_validator("view_name", mode="after")
206
+ @classmethod
207
+ def remove_quotes(cls, v: str) -> str:
207
208
  # Sanitize the name.
208
209
  v = v.replace('"', "").replace("`", "")
209
210
  return v
@@ -931,8 +932,9 @@ class LookerExplore:
931
932
  source_file: Optional[str] = None
932
933
  tags: List[str] = dataclasses_field(default_factory=list)
933
934
 
934
- @validator("name")
935
- def remove_quotes(cls, v):
935
+ @field_validator("name", mode="after")
936
+ @classmethod
937
+ def remove_quotes(cls, v: str) -> str:
936
938
  # Sanitize the name.
937
939
  v = v.replace('"', "").replace("`", "")
938
940
  return v
@@ -1,11 +1,11 @@
1
1
  import dataclasses
2
2
  import os
3
3
  import re
4
- from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
4
+ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
5
5
 
6
6
  import pydantic
7
7
  from looker_sdk.sdk.api40.models import DBConnection
8
- from pydantic import Field, model_validator, validator
8
+ from pydantic import Field, field_validator, model_validator
9
9
 
10
10
  from datahub.configuration import ConfigModel
11
11
  from datahub.configuration.common import (
@@ -198,17 +198,20 @@ class LookerConnectionDefinition(ConfigModel):
198
198
  "the top level Looker configuration",
199
199
  )
200
200
 
201
- @validator("platform_env")
201
+ @field_validator("platform_env", mode="after")
202
+ @classmethod
202
203
  def platform_env_must_be_one_of(cls, v: Optional[str]) -> Optional[str]:
203
204
  if v is not None:
204
205
  return EnvConfigMixin.env_must_be_one_of(v)
205
206
  return v
206
207
 
207
- @validator("platform", "default_db", "default_schema")
208
- def lower_everything(cls, v):
208
+ @field_validator("platform", "default_db", "default_schema", mode="after")
209
+ @classmethod
210
+ def lower_everything(cls, v: Optional[str]) -> Optional[str]:
209
211
  """We lower case all strings passed in to avoid casing issues later"""
210
212
  if v is not None:
211
213
  return v.lower()
214
+ return v
212
215
 
213
216
  @classmethod
214
217
  def from_looker_connection(
@@ -326,22 +329,20 @@ class LookerDashboardSourceConfig(
326
329
  "Dashboards will only be ingested if they're allowed by both this config and dashboard_pattern.",
327
330
  )
328
331
 
329
- @validator("external_base_url", pre=True, always=True)
332
+ @model_validator(mode="before")
333
+ @classmethod
330
334
  def external_url_defaults_to_api_config_base_url(
331
- cls, v: Optional[str], *, values: Dict[str, Any], **kwargs: Dict[str, Any]
332
- ) -> Optional[str]:
333
- return v or values.get("base_url")
334
-
335
- @validator("extract_independent_looks", always=True)
336
- def stateful_ingestion_should_be_enabled(
337
- cls, v: Optional[bool], *, values: Dict[str, Any], **kwargs: Dict[str, Any]
338
- ) -> Optional[bool]:
339
- stateful_ingestion: StatefulStaleMetadataRemovalConfig = cast(
340
- StatefulStaleMetadataRemovalConfig, values.get("stateful_ingestion")
341
- )
342
- if v is True and (
343
- stateful_ingestion is None or stateful_ingestion.enabled is False
335
+ cls, values: Dict[str, Any]
336
+ ) -> Dict[str, Any]:
337
+ if "external_base_url" not in values or values["external_base_url"] is None:
338
+ values["external_base_url"] = values.get("base_url")
339
+ return values
340
+
341
+ @model_validator(mode="after")
342
+ def stateful_ingestion_should_be_enabled(self):
343
+ if self.extract_independent_looks is True and (
344
+ self.stateful_ingestion is None or self.stateful_ingestion.enabled is False
344
345
  ):
345
346
  raise ValueError("stateful_ingestion.enabled should be set to true")
346
347
 
347
- return v
348
+ return self
@@ -1,10 +1,11 @@
1
1
  import logging
2
+ from copy import deepcopy
2
3
  from dataclasses import dataclass, field as dataclass_field
3
4
  from datetime import timedelta
4
5
  from typing import Any, Dict, Literal, Optional, Union
5
6
 
6
7
  import pydantic
7
- from pydantic import root_validator, validator
8
+ from pydantic import model_validator
8
9
  from pydantic.fields import Field
9
10
 
10
11
  from datahub.configuration.common import AllowDenyPattern
@@ -210,75 +211,74 @@ class LookMLSourceConfig(
210
211
  "All if comments are evaluated to true for configured looker_environment value",
211
212
  )
212
213
 
213
- @validator("connection_to_platform_map", pre=True)
214
- def convert_string_to_connection_def(cls, conn_map):
215
- # Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap
216
- for key in conn_map:
217
- if isinstance(conn_map[key], str):
218
- platform = conn_map[key]
219
- if "." in platform:
220
- platform_db_split = conn_map[key].split(".")
221
- connection = LookerConnectionDefinition(
222
- platform=platform_db_split[0],
223
- default_db=platform_db_split[1],
224
- default_schema="",
225
- )
226
- conn_map[key] = connection
227
- else:
228
- logger.warning(
229
- f"Connection map for {key} provides platform {platform} but does not provide a default "
230
- f"database name. This might result in failed resolution"
231
- )
232
- conn_map[key] = LookerConnectionDefinition(
233
- platform=platform, default_db="", default_schema=""
234
- )
235
- return conn_map
214
+ @model_validator(mode="before")
215
+ @classmethod
216
+ def convert_string_to_connection_def(cls, values: Dict[str, Any]) -> Dict[str, Any]:
217
+ values = deepcopy(values)
218
+ conn_map = values.get("connection_to_platform_map")
219
+ if conn_map:
220
+ # Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap
221
+ for key in conn_map:
222
+ if isinstance(conn_map[key], str):
223
+ platform = conn_map[key]
224
+ if "." in platform:
225
+ platform_db_split = conn_map[key].split(".")
226
+ connection = LookerConnectionDefinition(
227
+ platform=platform_db_split[0],
228
+ default_db=platform_db_split[1],
229
+ default_schema="",
230
+ )
231
+ conn_map[key] = connection
232
+ else:
233
+ logger.warning(
234
+ f"Connection map for {key} provides platform {platform} but does not provide a default "
235
+ f"database name. This might result in failed resolution"
236
+ )
237
+ conn_map[key] = LookerConnectionDefinition(
238
+ platform=platform, default_db="", default_schema=""
239
+ )
240
+ return values
236
241
 
237
- @root_validator(skip_on_failure=True)
238
- def check_either_connection_map_or_connection_provided(cls, values):
242
+ @model_validator(mode="after")
243
+ def check_either_connection_map_or_connection_provided(self):
239
244
  """Validate that we must either have a connection map or an api credential"""
240
- if not values.get("connection_to_platform_map", {}) and not values.get(
241
- "api", {}
242
- ):
245
+ if not (self.connection_to_platform_map or {}) and not (self.api):
243
246
  raise ValueError(
244
247
  "Neither api not connection_to_platform_map config was found. LookML source requires either api "
245
248
  "credentials for Looker or a map of connection names to platform identifiers to work correctly"
246
249
  )
247
- return values
250
+ return self
248
251
 
249
- @root_validator(skip_on_failure=True)
250
- def check_either_project_name_or_api_provided(cls, values):
252
+ @model_validator(mode="after")
253
+ def check_either_project_name_or_api_provided(self):
251
254
  """Validate that we must either have a project name or an api credential to fetch project names"""
252
- if not values.get("project_name") and not values.get("api"):
255
+ if not self.project_name and not self.api:
253
256
  raise ValueError(
254
257
  "Neither project_name not an API credential was found. LookML source requires either api credentials "
255
258
  "for Looker or a project_name to accurately name views and models."
256
259
  )
257
- return values
260
+ return self
258
261
 
259
- @root_validator(skip_on_failure=True)
260
- def check_api_provided_for_view_lineage(cls, values):
262
+ @model_validator(mode="after")
263
+ def check_api_provided_for_view_lineage(self):
261
264
  """Validate that we must have an api credential to use Looker API for view's column lineage"""
262
- if not values.get("api") and values.get("use_api_for_view_lineage"):
265
+ if not self.api and self.use_api_for_view_lineage:
263
266
  raise ValueError(
264
267
  "API credential was not found. LookML source requires api credentials "
265
268
  "for Looker to use Looker APIs for view's column lineage extraction."
266
269
  "Set `use_api_for_view_lineage` to False to skip using Looker APIs."
267
270
  )
268
- return values
271
+ return self
269
272
 
270
- @validator("base_folder", always=True)
271
- def check_base_folder_if_not_provided(
272
- cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
273
- ) -> Optional[pydantic.DirectoryPath]:
274
- if v is None:
275
- git_info: Optional[GitInfo] = values.get("git_info")
276
- if git_info:
277
- if not git_info.deploy_key:
273
+ @model_validator(mode="after")
274
+ def check_base_folder_if_not_provided(self):
275
+ if self.base_folder is None:
276
+ if self.git_info:
277
+ if not self.git_info.deploy_key:
278
278
  logger.warning(
279
279
  "git_info is provided, but no SSH key is present. If the repo is not public, we'll fail to "
280
280
  "clone it."
281
281
  )
282
282
  else:
283
283
  raise ValueError("Neither base_folder nor git_info has been provided.")
284
- return v
284
+ return self
@@ -9,7 +9,7 @@ from typing import Dict, Iterable, List, Optional, Tuple, Union
9
9
  import dateutil.parser as dp
10
10
  import pydantic
11
11
  import requests
12
- from pydantic import Field, root_validator, validator
12
+ from pydantic import Field, field_validator, model_validator
13
13
  from requests.models import HTTPError
14
14
 
15
15
  import datahub.emitter.mce_builder as builder
@@ -115,16 +115,16 @@ class MetabaseConfig(
115
115
  )
116
116
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
117
117
 
118
- @validator("connect_uri", "display_uri")
118
+ @field_validator("connect_uri", "display_uri", mode="after")
119
+ @classmethod
119
120
  def remove_trailing_slash(cls, v):
120
121
  return config_clean.remove_trailing_slashes(v)
121
122
 
122
- @root_validator(skip_on_failure=True)
123
- def default_display_uri_to_connect_uri(cls, values):
124
- base = values.get("display_uri")
125
- if base is None:
126
- values["display_uri"] = values.get("connect_uri")
127
- return values
123
+ @model_validator(mode="after")
124
+ def default_display_uri_to_connect_uri(self) -> "MetabaseConfig":
125
+ if self.display_uri is None:
126
+ self.display_uri = self.connect_uri
127
+ return self
128
128
 
129
129
 
130
130
  @dataclass
@@ -563,7 +563,7 @@ class BusinessGlossaryFileSource(Source):
563
563
 
564
564
  @classmethod
565
565
  def create(cls, config_dict, ctx):
566
- config = BusinessGlossarySourceConfig.parse_obj(config_dict)
566
+ config = BusinessGlossarySourceConfig.model_validate(config_dict)
567
567
  return cls(ctx, config)
568
568
 
569
569
  @classmethod
@@ -571,7 +571,7 @@ class BusinessGlossaryFileSource(Source):
571
571
  cls, file_name: Union[str, pathlib.Path]
572
572
  ) -> BusinessGlossaryConfig:
573
573
  config = load_config_file(file_name, resolve_env_vars=True)
574
- glossary_cfg = BusinessGlossaryConfig.parse_obj(config)
574
+ glossary_cfg = BusinessGlossaryConfig.model_validate(config)
575
575
  return glossary_cfg
576
576
 
577
577
  def get_workunits_internal(
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
3
3
  from functools import partial
4
4
  from typing import Any, Dict, Iterable, List, Optional
5
5
 
6
- from pydantic import validator
6
+ from pydantic import field_validator
7
7
  from pydantic.fields import Field
8
8
 
9
9
  import datahub.metadata.schema_classes as models
@@ -51,7 +51,8 @@ class EntityConfig(EnvConfigMixin):
51
51
  platform: str
52
52
  platform_instance: Optional[str] = None
53
53
 
54
- @validator("type")
54
+ @field_validator("type", mode="after")
55
+ @classmethod
55
56
  def type_must_be_supported(cls, v: str) -> str:
56
57
  allowed_types = ["dataset"]
57
58
  if v not in allowed_types:
@@ -60,7 +61,8 @@ class EntityConfig(EnvConfigMixin):
60
61
  )
61
62
  return v
62
63
 
63
- @validator("name")
64
+ @field_validator("name", mode="after")
65
+ @classmethod
64
66
  def validate_name(cls, v: str) -> str:
65
67
  if v.startswith("urn:li:"):
66
68
  raise ValueError(
@@ -77,7 +79,8 @@ class FineGrainedLineageConfig(ConfigModel):
77
79
  transformOperation: Optional[str]
78
80
  confidenceScore: Optional[float] = 1.0
79
81
 
80
- @validator("upstreamType")
82
+ @field_validator("upstreamType", mode="after")
83
+ @classmethod
81
84
  def upstream_type_must_be_supported(cls, v: str) -> str:
82
85
  allowed_types = [
83
86
  FineGrainedLineageUpstreamTypeClass.FIELD_SET,
@@ -90,7 +93,8 @@ class FineGrainedLineageConfig(ConfigModel):
90
93
  )
91
94
  return v
92
95
 
93
- @validator("downstreamType")
96
+ @field_validator("downstreamType", mode="after")
97
+ @classmethod
94
98
  def downstream_type_must_be_supported(cls, v: str) -> str:
95
99
  allowed_types = [
96
100
  FineGrainedLineageDownstreamTypeClass.FIELD_SET,
@@ -124,7 +128,8 @@ class LineageFileSourceConfig(ConfigModel):
124
128
  class LineageConfig(VersionedConfig):
125
129
  lineage: List[EntityNodeConfig]
126
130
 
127
- @validator("version")
131
+ @field_validator("version", mode="after")
132
+ @classmethod
128
133
  def version_must_be_1(cls, v):
129
134
  if v != "1":
130
135
  raise ValueError("Only version 1 is supported")
@@ -148,13 +153,13 @@ class LineageFileSource(Source):
148
153
  def create(
149
154
  cls, config_dict: Dict[str, Any], ctx: PipelineContext
150
155
  ) -> "LineageFileSource":
151
- config = LineageFileSourceConfig.parse_obj(config_dict)
156
+ config = LineageFileSourceConfig.model_validate(config_dict)
152
157
  return cls(ctx, config)
153
158
 
154
159
  @staticmethod
155
160
  def load_lineage_config(file_name: str) -> LineageConfig:
156
161
  config = load_config_file(file_name, resolve_env_vars=True)
157
- lineage_config = LineageConfig.parse_obj(config)
162
+ lineage_config = LineageConfig.model_validate(config)
158
163
  return lineage_config
159
164
 
160
165
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -892,5 +892,5 @@ class MLflowSource(StatefulIngestionSourceBase):
892
892
 
893
893
  @classmethod
894
894
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "MLflowSource":
895
- config = MLflowConfig.parse_obj(config_dict)
895
+ config = MLflowConfig.model_validate(config_dict)
896
896
  return cls(ctx, config)
@@ -26,7 +26,7 @@ import sqlglot
26
26
  import tenacity
27
27
  import yaml
28
28
  from liquid import Template, Undefined
29
- from pydantic import Field, validator
29
+ from pydantic import Field, field_validator
30
30
  from requests.adapters import HTTPAdapter, Retry
31
31
  from requests.exceptions import ConnectionError
32
32
  from requests.models import HTTPBasicAuth, HTTPError
@@ -218,11 +218,13 @@ class ModeConfig(
218
218
  default=False, description="Exclude archived reports"
219
219
  )
220
220
 
221
- @validator("connect_uri")
221
+ @field_validator("connect_uri", mode="after")
222
+ @classmethod
222
223
  def remove_trailing_slash(cls, v):
223
224
  return config_clean.remove_trailing_slashes(v)
224
225
 
225
- @validator("items_per_page")
226
+ @field_validator("items_per_page", mode="after")
227
+ @classmethod
226
228
  def validate_items_per_page(cls, v):
227
229
  if 1 <= v <= DEFAULT_API_ITEMS_PER_PAGE:
228
230
  return v
@@ -1824,7 +1826,7 @@ class ModeSource(StatefulIngestionSourceBase):
1824
1826
 
1825
1827
  @classmethod
1826
1828
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "ModeSource":
1827
- config: ModeConfig = ModeConfig.parse_obj(config_dict)
1829
+ config: ModeConfig = ModeConfig.model_validate(config_dict)
1828
1830
  return cls(ctx, config)
1829
1831
 
1830
1832
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: