acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (203) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dremio/dremio_source.py +15 -15
  66. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  67. datahub/ingestion/source/elastic_search.py +4 -3
  68. datahub/ingestion/source/excel/source.py +1 -1
  69. datahub/ingestion/source/feast.py +1 -1
  70. datahub/ingestion/source/file.py +5 -4
  71. datahub/ingestion/source/fivetran/config.py +17 -16
  72. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  73. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  74. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  75. datahub/ingestion/source/ge_profiling_config.py +8 -5
  76. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  77. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  78. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  79. datahub/ingestion/source/grafana/models.py +23 -5
  80. datahub/ingestion/source/hex/api.py +7 -5
  81. datahub/ingestion/source/hex/hex.py +4 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  83. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  84. datahub/ingestion/source/identity/azure_ad.py +1 -1
  85. datahub/ingestion/source/identity/okta.py +10 -10
  86. datahub/ingestion/source/kafka/kafka.py +1 -1
  87. datahub/ingestion/source/ldap.py +1 -1
  88. datahub/ingestion/source/looker/looker_common.py +7 -5
  89. datahub/ingestion/source/looker/looker_config.py +21 -20
  90. datahub/ingestion/source/looker/lookml_config.py +47 -47
  91. datahub/ingestion/source/metabase.py +8 -8
  92. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  93. datahub/ingestion/source/metadata/lineage.py +13 -8
  94. datahub/ingestion/source/mlflow.py +1 -1
  95. datahub/ingestion/source/mode.py +6 -4
  96. datahub/ingestion/source/mongodb.py +4 -3
  97. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  98. datahub/ingestion/source/nifi.py +17 -23
  99. datahub/ingestion/source/openapi.py +6 -8
  100. datahub/ingestion/source/powerbi/config.py +33 -32
  101. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  102. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  103. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  104. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  105. datahub/ingestion/source/preset.py +8 -8
  106. datahub/ingestion/source/pulsar.py +1 -1
  107. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  108. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  109. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  110. datahub/ingestion/source/redshift/config.py +18 -20
  111. datahub/ingestion/source/redshift/redshift.py +2 -2
  112. datahub/ingestion/source/redshift/usage.py +23 -3
  113. datahub/ingestion/source/s3/config.py +83 -62
  114. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  115. datahub/ingestion/source/s3/source.py +8 -5
  116. datahub/ingestion/source/sac/sac.py +5 -4
  117. datahub/ingestion/source/salesforce.py +3 -2
  118. datahub/ingestion/source/schema/json_schema.py +2 -2
  119. datahub/ingestion/source/sigma/data_classes.py +3 -2
  120. datahub/ingestion/source/sigma/sigma.py +1 -1
  121. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  122. datahub/ingestion/source/slack/slack.py +1 -1
  123. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  125. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  127. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  128. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  129. datahub/ingestion/source/sql/athena.py +1 -1
  130. datahub/ingestion/source/sql/clickhouse.py +4 -2
  131. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  132. datahub/ingestion/source/sql/druid.py +1 -1
  133. datahub/ingestion/source/sql/hana.py +1 -1
  134. datahub/ingestion/source/sql/hive.py +7 -5
  135. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  136. datahub/ingestion/source/sql/mssql/source.py +13 -6
  137. datahub/ingestion/source/sql/mysql.py +1 -1
  138. datahub/ingestion/source/sql/oracle.py +17 -10
  139. datahub/ingestion/source/sql/postgres.py +2 -2
  140. datahub/ingestion/source/sql/presto.py +1 -1
  141. datahub/ingestion/source/sql/sql_config.py +8 -9
  142. datahub/ingestion/source/sql/sql_generic.py +1 -1
  143. datahub/ingestion/source/sql/teradata.py +1 -1
  144. datahub/ingestion/source/sql/trino.py +1 -1
  145. datahub/ingestion/source/sql/vertica.py +5 -4
  146. datahub/ingestion/source/sql_queries.py +11 -8
  147. datahub/ingestion/source/state/checkpoint.py +2 -2
  148. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  149. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  150. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  152. datahub/ingestion/source/superset.py +9 -9
  153. datahub/ingestion/source/tableau/tableau.py +14 -16
  154. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  155. datahub/ingestion/source/unity/config.py +51 -34
  156. datahub/ingestion/source/unity/connection.py +7 -1
  157. datahub/ingestion/source/unity/connection_test.py +1 -1
  158. datahub/ingestion/source/unity/proxy.py +216 -7
  159. datahub/ingestion/source/unity/proxy_types.py +91 -0
  160. datahub/ingestion/source/unity/source.py +29 -3
  161. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  162. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  163. datahub/ingestion/source/usage/usage_common.py +5 -3
  164. datahub/ingestion/source_config/csv_enricher.py +7 -6
  165. datahub/ingestion/source_config/operation_config.py +7 -4
  166. datahub/ingestion/source_config/pulsar.py +11 -15
  167. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  168. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  169. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  171. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  172. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  173. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  175. datahub/ingestion/transformer/dataset_domain.py +3 -3
  176. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  177. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  178. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  179. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  180. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  181. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  182. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  183. datahub/ingestion/transformer/replace_external_url.py +2 -2
  184. datahub/ingestion/transformer/set_browse_path.py +1 -1
  185. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  186. datahub/lite/duckdb_lite.py +1 -1
  187. datahub/lite/lite_util.py +2 -2
  188. datahub/metadata/schema.avsc +7 -2
  189. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  190. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
  191. datahub/sdk/__init__.py +1 -0
  192. datahub/sdk/_all_entities.py +2 -0
  193. datahub/sdk/search_filters.py +68 -40
  194. datahub/sdk/tag.py +112 -0
  195. datahub/secret/datahub_secret_store.py +7 -4
  196. datahub/secret/file_secret_store.py +1 -1
  197. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  198. datahub/testing/check_sql_parser_result.py +2 -2
  199. datahub/utilities/ingest_utils.py +1 -1
  200. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  201. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  202. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  203. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ from enum import Enum
4
4
  from typing import Dict, List, Literal, Optional, Union
5
5
 
6
6
  import pydantic
7
- from pydantic import root_validator, validator
7
+ from pydantic import field_validator, model_validator
8
8
 
9
9
  import datahub.emitter.mce_builder as builder
10
10
  from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
@@ -540,8 +540,8 @@ class PowerBiDashboardSourceConfig(
540
540
  description="timeout in seconds for Metadata Rest Api.",
541
541
  )
542
542
 
543
- @root_validator(skip_on_failure=True)
544
- def validate_extract_column_level_lineage(cls, values: Dict) -> Dict:
543
+ @model_validator(mode="after")
544
+ def validate_extract_column_level_lineage(self) -> "PowerBiDashboardSourceConfig":
545
545
  flags = [
546
546
  "native_query_parsing",
547
547
  "enable_advance_lineage_sql_construct",
@@ -549,26 +549,23 @@ class PowerBiDashboardSourceConfig(
549
549
  "extract_dataset_schema",
550
550
  ]
551
551
 
552
- if (
553
- "extract_column_level_lineage" in values
554
- and values["extract_column_level_lineage"] is False
555
- ):
552
+ if self.extract_column_level_lineage is False:
556
553
  # Flag is not set. skip validation
557
- return values
554
+ return self
558
555
 
559
556
  logger.debug(f"Validating additional flags: {flags}")
560
557
 
561
558
  is_flag_enabled: bool = True
562
559
  for flag in flags:
563
- if flag not in values or values[flag] is False:
560
+ if not getattr(self, flag, True):
564
561
  is_flag_enabled = False
565
562
 
566
563
  if not is_flag_enabled:
567
564
  raise ValueError(f"Enable all these flags in recipe: {flags} ")
568
565
 
569
- return values
566
+ return self
570
567
 
571
- @validator("dataset_type_mapping")
568
+ @field_validator("dataset_type_mapping", mode="after")
572
569
  @classmethod
573
570
  def map_data_platform(cls, value):
574
571
  # For backward compatibility convert input PostgreSql to PostgreSQL
@@ -580,28 +577,32 @@ class PowerBiDashboardSourceConfig(
580
577
 
581
578
  return value
582
579
 
583
- @root_validator(skip_on_failure=True)
584
- def workspace_id_backward_compatibility(cls, values: Dict) -> Dict:
585
- workspace_id = values.get("workspace_id")
586
- workspace_id_pattern = values.get("workspace_id_pattern")
587
-
588
- if workspace_id_pattern == AllowDenyPattern.allow_all() and workspace_id:
580
+ @model_validator(mode="after")
581
+ def workspace_id_backward_compatibility(self) -> "PowerBiDashboardSourceConfig":
582
+ if (
583
+ self.workspace_id_pattern == AllowDenyPattern.allow_all()
584
+ and self.workspace_id
585
+ ):
589
586
  logger.warning(
590
587
  "workspace_id_pattern is not set but workspace_id is set, setting workspace_id as "
591
588
  "workspace_id_pattern. workspace_id will be deprecated, please use workspace_id_pattern instead."
592
589
  )
593
- values["workspace_id_pattern"] = AllowDenyPattern(
594
- allow=[f"^{workspace_id}$"]
590
+ self.workspace_id_pattern = AllowDenyPattern(
591
+ allow=[f"^{self.workspace_id}$"]
595
592
  )
596
- elif workspace_id_pattern != AllowDenyPattern.allow_all() and workspace_id:
593
+ elif (
594
+ self.workspace_id_pattern != AllowDenyPattern.allow_all()
595
+ and self.workspace_id
596
+ ):
597
597
  logger.warning(
598
598
  "workspace_id will be ignored in favour of workspace_id_pattern. workspace_id will be deprecated, "
599
599
  "please use workspace_id_pattern only."
600
600
  )
601
- values.pop("workspace_id")
602
- return values
601
+ self.workspace_id = None
602
+ return self
603
603
 
604
- @root_validator(pre=True)
604
+ @model_validator(mode="before")
605
+ @classmethod
605
606
  def raise_error_for_dataset_type_mapping(cls, values: Dict) -> Dict:
606
607
  if (
607
608
  values.get("dataset_type_mapping") is not None
@@ -613,18 +614,18 @@ class PowerBiDashboardSourceConfig(
613
614
 
614
615
  return values
615
616
 
616
- @root_validator(skip_on_failure=True)
617
- def validate_extract_dataset_schema(cls, values: Dict) -> Dict:
618
- if values.get("extract_dataset_schema") is False:
617
+ @model_validator(mode="after")
618
+ def validate_extract_dataset_schema(self) -> "PowerBiDashboardSourceConfig":
619
+ if self.extract_dataset_schema is False:
619
620
  add_global_warning(
620
621
  "Please use `extract_dataset_schema: true`, otherwise dataset schema extraction will be skipped."
621
622
  )
622
- return values
623
+ return self
623
624
 
624
- @root_validator(skip_on_failure=True)
625
- def validate_dsn_to_database_schema(cls, values: Dict) -> Dict:
626
- if values.get("dsn_to_database_schema") is not None:
627
- dsn_mapping = values.get("dsn_to_database_schema")
625
+ @model_validator(mode="after")
626
+ def validate_dsn_to_database_schema(self) -> "PowerBiDashboardSourceConfig":
627
+ if self.dsn_to_database_schema is not None:
628
+ dsn_mapping = self.dsn_to_database_schema
628
629
  if not isinstance(dsn_mapping, dict):
629
630
  raise ValueError("dsn_to_database_schema must contain key-value pairs")
630
631
 
@@ -639,4 +640,4 @@ class PowerBiDashboardSourceConfig(
639
640
  f"dsn_to_database_schema invalid mapping value: {value}"
640
641
  )
641
642
 
642
- return values
643
+ return self
@@ -41,7 +41,7 @@ class ResolvePlatformInstanceFromDatasetTypeMapping(
41
41
  if isinstance(platform, PlatformDetail):
42
42
  return platform
43
43
 
44
- return PlatformDetail.parse_obj({})
44
+ return PlatformDetail.model_validate({})
45
45
 
46
46
 
47
47
  class ResolvePlatformInstanceFromServerToPlatformInstance(
@@ -56,7 +56,7 @@ class ResolvePlatformInstanceFromServerToPlatformInstance(
56
56
  ]
57
57
  if data_platform_detail.data_platform_server
58
58
  in self.config.server_to_platform_instance
59
- else PlatformDetail.parse_obj({})
59
+ else PlatformDetail.model_validate({})
60
60
  )
61
61
 
62
62
 
@@ -1316,7 +1316,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1316
1316
 
1317
1317
  @classmethod
1318
1318
  def create(cls, config_dict, ctx):
1319
- config = PowerBiDashboardSourceConfig.parse_obj(config_dict)
1319
+ config = PowerBiDashboardSourceConfig.model_validate(config_dict)
1320
1320
  return cls(config, ctx)
1321
1321
 
1322
1322
  def get_allowed_workspaces(self) -> List[powerbi_data_classes.Workspace]:
@@ -213,7 +213,7 @@ class PowerBiReportServerAPI:
213
213
 
214
214
  if response_dict.get("value"):
215
215
  reports.extend(
216
- report_types_mapping[report_type].parse_obj(report)
216
+ report_types_mapping[report_type].model_validate(report)
217
217
  for report in response_dict.get("value")
218
218
  )
219
219
 
@@ -517,7 +517,7 @@ class PowerBiReportServerDashboardSource(StatefulIngestionSourceBase):
517
517
 
518
518
  @classmethod
519
519
  def create(cls, config_dict, ctx):
520
- config = PowerBiReportServerDashboardSourceConfig.parse_obj(config_dict)
520
+ config = PowerBiReportServerDashboardSourceConfig.model_validate(config_dict)
521
521
  return cls(config, ctx)
522
522
 
523
523
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
- from pydantic import BaseModel, Field, validator
4
+ from pydantic import BaseModel, Field, model_validator
5
5
 
6
6
  from datahub.ingestion.source.powerbi_report_server.constants import (
7
7
  RelationshipDirection,
@@ -30,11 +30,13 @@ class CatalogItem(BaseModel):
30
30
  has_data_sources: bool = Field(False, alias="HasDataSources")
31
31
  data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
32
32
 
33
- @validator("display_name", always=True)
34
- def validate_diplay_name(cls, value, values):
35
- if values["created_by"]:
36
- return values["created_by"].split("\\")[-1]
37
- return ""
33
+ @model_validator(mode="after")
34
+ def validate_diplay_name(self):
35
+ if self.created_by:
36
+ self.display_name = self.created_by.split("\\")[-1]
37
+ else:
38
+ self.display_name = ""
39
+ return self
38
40
 
39
41
  def get_urn_part(self):
40
42
  return f"reports.{self.id}"
@@ -2,7 +2,7 @@ import logging
2
2
  from typing import Dict, Optional
3
3
 
4
4
  import requests
5
- from pydantic import root_validator, validator
5
+ from pydantic import field_validator, model_validator
6
6
  from pydantic.fields import Field
7
7
 
8
8
  from datahub.emitter.mce_builder import DEFAULT_ENV
@@ -55,16 +55,16 @@ class PresetConfig(SupersetConfig):
55
55
  description="Can be used to change mapping for database names in superset to what you have in datahub",
56
56
  )
57
57
 
58
- @validator("connect_uri", "display_uri")
58
+ @field_validator("connect_uri", "display_uri", mode="after")
59
+ @classmethod
59
60
  def remove_trailing_slash(cls, v):
60
61
  return config_clean.remove_trailing_slashes(v)
61
62
 
62
- @root_validator(skip_on_failure=True)
63
- def default_display_uri_to_connect_uri(cls, values):
64
- base = values.get("display_uri")
65
- if base is None:
66
- values["display_uri"] = values.get("connect_uri")
67
- return values
63
+ @model_validator(mode="after")
64
+ def default_display_uri_to_connect_uri(self) -> "PresetConfig":
65
+ if self.display_uri is None:
66
+ self.display_uri = self.connect_uri
67
+ return self
68
68
 
69
69
 
70
70
  @platform_name("Preset")
@@ -235,7 +235,7 @@ class PulsarSource(StatefulIngestionSourceBase):
235
235
 
236
236
  @classmethod
237
237
  def create(cls, config_dict, ctx):
238
- config = PulsarSourceConfig.parse_obj(config_dict)
238
+ config = PulsarSourceConfig.model_validate(config_dict)
239
239
 
240
240
  # Do not include each individual partition for partitioned topics,
241
241
  if config.exclude_individual_partitions:
@@ -3,7 +3,7 @@ from datetime import datetime
3
3
  from enum import Enum
4
4
  from typing import Dict, List, Optional, Type, Union
5
5
 
6
- from pydantic import BaseModel, ConfigDict, Field, root_validator
6
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
7
7
 
8
8
  from datahub.emitter.mcp_builder import ContainerKey
9
9
  from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
@@ -92,7 +92,8 @@ class Space(_QlikBaseModel):
92
92
  updatedAt: datetime
93
93
  ownerId: Optional[str] = None
94
94
 
95
- @root_validator(pre=True)
95
+ @model_validator(mode="before")
96
+ @classmethod
96
97
  def update_values(cls, values: Dict) -> Dict:
97
98
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
98
99
  values = deepcopy(values)
@@ -121,7 +122,8 @@ class SchemaField(_QlikBaseModel):
121
122
  primaryKey: Optional[bool] = None
122
123
  nullable: Optional[bool] = None
123
124
 
124
- @root_validator(pre=True)
125
+ @model_validator(mode="before")
126
+ @classmethod
125
127
  def update_values(cls, values: Dict) -> Dict:
126
128
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
127
129
  values = deepcopy(values)
@@ -138,7 +140,8 @@ class QlikDataset(Item):
138
140
  itemId: str
139
141
  datasetSchema: List[SchemaField]
140
142
 
141
- @root_validator(pre=True)
143
+ @model_validator(mode="before")
144
+ @classmethod
142
145
  def update_values(cls, values: Dict) -> Dict:
143
146
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
144
147
  values = deepcopy(values)
@@ -174,7 +177,8 @@ class Chart(_QlikBaseModel):
174
177
  qDimension: List[AxisProperty]
175
178
  qMeasure: List[AxisProperty]
176
179
 
177
- @root_validator(pre=True)
180
+ @model_validator(mode="before")
181
+ @classmethod
178
182
  def update_values(cls, values: Dict) -> Dict:
179
183
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
180
184
  values = deepcopy(values)
@@ -193,7 +197,8 @@ class Sheet(_QlikBaseModel):
193
197
  updatedAt: datetime
194
198
  charts: List[Chart] = []
195
199
 
196
- @root_validator(pre=True)
200
+ @model_validator(mode="before")
201
+ @classmethod
197
202
  def update_values(cls, values: Dict) -> Dict:
198
203
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
199
204
  values = deepcopy(values)
@@ -220,7 +225,8 @@ class QlikTable(_QlikBaseModel):
220
225
  databaseName: Optional[str] = None
221
226
  schemaName: Optional[str] = None
222
227
 
223
- @root_validator(pre=True)
228
+ @model_validator(mode="before")
229
+ @classmethod
224
230
  def update_values(cls, values: Dict) -> Dict:
225
231
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
226
232
  values = deepcopy(values)
@@ -239,7 +245,8 @@ class App(Item):
239
245
  sheets: List[Sheet] = []
240
246
  tables: List[QlikTable] = []
241
247
 
242
- @root_validator(pre=True)
248
+ @model_validator(mode="before")
249
+ @classmethod
243
250
  def update_values(cls, values: Dict) -> Dict:
244
251
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
245
252
  values = deepcopy(values)
@@ -56,7 +56,7 @@ class QlikAPI:
56
56
  response.raise_for_status()
57
57
  response_dict = response.json()
58
58
  for space_dict in response_dict[Constant.DATA]:
59
- space = Space.parse_obj(space_dict)
59
+ space = Space.model_validate(space_dict)
60
60
  spaces.append(space)
61
61
  self.spaces[space.id] = space.name
62
62
  if Constant.NEXT in response_dict[Constant.LINKS]:
@@ -64,7 +64,7 @@ class QlikAPI:
64
64
  else:
65
65
  break
66
66
  # Add personal space entity
67
- spaces.append(Space.parse_obj(PERSONAL_SPACE_DICT))
67
+ spaces.append(Space.model_validate(PERSONAL_SPACE_DICT))
68
68
  self.spaces[PERSONAL_SPACE_DICT[Constant.ID]] = PERSONAL_SPACE_DICT[
69
69
  Constant.NAME
70
70
  ]
@@ -78,7 +78,7 @@ class QlikAPI:
78
78
  response.raise_for_status()
79
79
  response_dict = response.json()
80
80
  response_dict[Constant.ITEMID] = item_id
81
- return QlikDataset.parse_obj(response_dict)
81
+ return QlikDataset.model_validate(response_dict)
82
82
  except Exception as e:
83
83
  self._log_http_error(
84
84
  message=f"Unable to fetch dataset with id {dataset_id}. Exception: {e}"
@@ -119,7 +119,7 @@ class QlikAPI:
119
119
  f"Chart with id {chart_id} of sheet {sheet_id} does not have hypercube. q_layout: {q_layout}"
120
120
  )
121
121
  return None
122
- return Chart.parse_obj(q_layout)
122
+ return Chart.model_validate(q_layout)
123
123
  except Exception as e:
124
124
  self._log_http_error(
125
125
  message=f"Unable to fetch chart {chart_id} of sheet {sheet_id}. Exception: {e}"
@@ -140,7 +140,7 @@ class QlikAPI:
140
140
  if Constant.OWNERID not in sheet_dict[Constant.QMETA]:
141
141
  # That means sheet is private sheet
142
142
  return None
143
- sheet = Sheet.parse_obj(sheet_dict[Constant.QMETA])
143
+ sheet = Sheet.model_validate(sheet_dict[Constant.QMETA])
144
144
  if Constant.QCHILDLIST not in sheet_dict:
145
145
  logger.warning(
146
146
  f"Sheet {sheet.title} with id {sheet_id} does not have any charts. sheet_dict: {sheet_dict}"
@@ -222,7 +222,7 @@ class QlikAPI:
222
222
  return []
223
223
  response = websocket_connection.websocket_send_request(method="GetLayout")
224
224
  for table_dict in response[Constant.QLAYOUT][Constant.TABLES]:
225
- tables.append(QlikTable.parse_obj(table_dict))
225
+ tables.append(QlikTable.model_validate(table_dict))
226
226
  websocket_connection.handle.pop()
227
227
  self._add_qri_of_tables(tables, app_id)
228
228
  except Exception as e:
@@ -270,7 +270,7 @@ class QlikAPI:
270
270
  response = websocket_connection.websocket_send_request(
271
271
  method="GetAppLayout"
272
272
  )
273
- app = App.parse_obj(response[Constant.QLAYOUT])
273
+ app = App.model_validate(response[Constant.QLAYOUT])
274
274
  app.sheets = self._get_app_sheets(websocket_connection, app_id)
275
275
  app.tables = self._get_app_used_tables(websocket_connection, app_id)
276
276
  websocket_connection.close_websocket()
@@ -148,7 +148,7 @@ class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
148
148
 
149
149
  @classmethod
150
150
  def create(cls, config_dict, ctx):
151
- config = QlikSourceConfig.parse_obj(config_dict)
151
+ config = QlikSourceConfig.model_validate(config_dict)
152
152
  return cls(config, ctx)
153
153
 
154
154
  def _gen_space_key(self, space_id: str) -> SpaceKey:
@@ -3,7 +3,7 @@ from copy import deepcopy
3
3
  from enum import Enum
4
4
  from typing import Any, Dict, List, Optional
5
5
 
6
- from pydantic import root_validator
6
+ from pydantic import model_validator
7
7
  from pydantic.fields import Field
8
8
 
9
9
  from datahub.configuration import ConfigModel
@@ -182,7 +182,8 @@ class RedshiftConfig(
182
182
  description="Whether to skip EXTERNAL tables.",
183
183
  )
184
184
 
185
- @root_validator(pre=True)
185
+ @model_validator(mode="before")
186
+ @classmethod
186
187
  def check_email_is_set_on_usage(cls, values):
187
188
  if values.get("include_usage_statistics"):
188
189
  assert "email_domain" in values and values["email_domain"], (
@@ -190,31 +191,28 @@ class RedshiftConfig(
190
191
  )
191
192
  return values
192
193
 
193
- @root_validator(skip_on_failure=True)
194
- def check_database_is_set(cls, values):
195
- assert values.get("database"), "database must be set"
196
- return values
197
-
198
- @root_validator(skip_on_failure=True)
199
- def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
200
- match_fully_qualified_names = values.get("match_fully_qualified_names")
201
-
202
- schema_pattern: Optional[AllowDenyPattern] = values.get("schema_pattern")
194
+ @model_validator(mode="after")
195
+ def check_database_is_set(self) -> "RedshiftConfig":
196
+ assert self.database, "database must be set"
197
+ return self
203
198
 
199
+ @model_validator(mode="after")
200
+ def backward_compatibility_configs_set(self) -> "RedshiftConfig":
204
201
  if (
205
- schema_pattern is not None
206
- and schema_pattern != AllowDenyPattern.allow_all()
207
- and match_fully_qualified_names is not None
208
- and not match_fully_qualified_names
202
+ self.schema_pattern is not None
203
+ and self.schema_pattern != AllowDenyPattern.allow_all()
204
+ and self.match_fully_qualified_names is not None
205
+ and not self.match_fully_qualified_names
209
206
  ):
210
207
  logger.warning(
211
208
  "Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
212
209
  "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
213
210
  "The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`."
214
211
  )
215
- return values
212
+ return self
216
213
 
217
- @root_validator(skip_on_failure=True)
214
+ @model_validator(mode="before")
215
+ @classmethod
218
216
  def connection_config_compatibility_set(cls, values: Dict) -> Dict:
219
217
  # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
220
218
  values = deepcopy(values)
@@ -231,8 +229,8 @@ class RedshiftConfig(
231
229
  if "options" in values and "connect_args" in values["options"]:
232
230
  values["extra_client_options"] = values["options"]["connect_args"]
233
231
 
234
- if values["extra_client_options"]:
235
- if values["options"]:
232
+ if values.get("extra_client_options"):
233
+ if values.get("options"):
236
234
  values["options"]["connect_args"] = values["extra_client_options"]
237
235
  else:
238
236
  values["options"] = {"connect_args": values["extra_client_options"]}
@@ -236,7 +236,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
236
236
  RedshiftConfig.Config.extra = (
237
237
  pydantic.Extra.allow
238
238
  ) # we are okay with extra fields during this stage
239
- config = RedshiftConfig.parse_obj(config_dict)
239
+ config = RedshiftConfig.model_validate(config_dict)
240
240
  # source = RedshiftSource(config, report)
241
241
  connection: redshift_connector.Connection = (
242
242
  RedshiftSource.get_redshift_connection(config)
@@ -316,7 +316,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
316
316
 
317
317
  @classmethod
318
318
  def create(cls, config_dict, ctx):
319
- config = RedshiftConfig.parse_obj(config_dict)
319
+ config = RedshiftConfig.model_validate(config_dict)
320
320
  return cls(config, ctx)
321
321
 
322
322
  @staticmethod
@@ -1,12 +1,12 @@
1
1
  import collections
2
2
  import logging
3
3
  import time
4
- from datetime import datetime
4
+ from datetime import datetime, timezone
5
5
  from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
6
6
 
7
7
  import cachetools
8
- import pydantic.error_wrappers
9
8
  import redshift_connector
9
+ from pydantic import ValidationError, field_validator
10
10
  from pydantic.fields import Field
11
11
  from pydantic.main import BaseModel
12
12
 
@@ -64,6 +64,26 @@ class RedshiftAccessEvent(BaseModel):
64
64
  starttime: datetime
65
65
  endtime: datetime
66
66
 
67
+ @field_validator("starttime", "endtime", mode="before")
68
+ @classmethod
69
+ def ensure_utc_datetime(cls, v):
70
+ """Ensure datetime fields are treated as UTC for consistency with Pydantic V1 behavior.
71
+
72
+ Pydantic V2 assumes local timezone for naive datetime strings, whereas Pydantic V1 assumed UTC.
73
+ This validator restores V1 behavior to maintain timestamp consistency.
74
+ """
75
+ if isinstance(v, str):
76
+ # Parse as naive datetime, then assume UTC (matching V1 behavior)
77
+ dt = datetime.fromisoformat(v)
78
+ if dt.tzinfo is None:
79
+ # Treat naive datetime as UTC (this was the V1 behavior)
80
+ dt = dt.replace(tzinfo=timezone.utc)
81
+ return dt
82
+ elif isinstance(v, datetime) and v.tzinfo is None:
83
+ # If we get a naive datetime object, assume UTC
84
+ return v.replace(tzinfo=timezone.utc)
85
+ return v
86
+
67
87
 
68
88
  class RedshiftUsageExtractor:
69
89
  """
@@ -291,7 +311,7 @@ class RedshiftUsageExtractor:
291
311
  else None
292
312
  ),
293
313
  )
294
- except pydantic.error_wrappers.ValidationError as e:
314
+ except ValidationError as e:
295
315
  logging.warning(
296
316
  f"Validation error on access event creation from row {row}. The error was: {e} Skipping ...."
297
317
  )