acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (203) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dremio/dremio_source.py +15 -15
  66. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  67. datahub/ingestion/source/elastic_search.py +4 -3
  68. datahub/ingestion/source/excel/source.py +1 -1
  69. datahub/ingestion/source/feast.py +1 -1
  70. datahub/ingestion/source/file.py +5 -4
  71. datahub/ingestion/source/fivetran/config.py +17 -16
  72. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  73. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  74. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  75. datahub/ingestion/source/ge_profiling_config.py +8 -5
  76. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  77. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  78. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  79. datahub/ingestion/source/grafana/models.py +23 -5
  80. datahub/ingestion/source/hex/api.py +7 -5
  81. datahub/ingestion/source/hex/hex.py +4 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  83. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  84. datahub/ingestion/source/identity/azure_ad.py +1 -1
  85. datahub/ingestion/source/identity/okta.py +10 -10
  86. datahub/ingestion/source/kafka/kafka.py +1 -1
  87. datahub/ingestion/source/ldap.py +1 -1
  88. datahub/ingestion/source/looker/looker_common.py +7 -5
  89. datahub/ingestion/source/looker/looker_config.py +21 -20
  90. datahub/ingestion/source/looker/lookml_config.py +47 -47
  91. datahub/ingestion/source/metabase.py +8 -8
  92. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  93. datahub/ingestion/source/metadata/lineage.py +13 -8
  94. datahub/ingestion/source/mlflow.py +1 -1
  95. datahub/ingestion/source/mode.py +6 -4
  96. datahub/ingestion/source/mongodb.py +4 -3
  97. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  98. datahub/ingestion/source/nifi.py +17 -23
  99. datahub/ingestion/source/openapi.py +6 -8
  100. datahub/ingestion/source/powerbi/config.py +33 -32
  101. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  102. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  103. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  104. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  105. datahub/ingestion/source/preset.py +8 -8
  106. datahub/ingestion/source/pulsar.py +1 -1
  107. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  108. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  109. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  110. datahub/ingestion/source/redshift/config.py +18 -20
  111. datahub/ingestion/source/redshift/redshift.py +2 -2
  112. datahub/ingestion/source/redshift/usage.py +23 -3
  113. datahub/ingestion/source/s3/config.py +83 -62
  114. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  115. datahub/ingestion/source/s3/source.py +8 -5
  116. datahub/ingestion/source/sac/sac.py +5 -4
  117. datahub/ingestion/source/salesforce.py +3 -2
  118. datahub/ingestion/source/schema/json_schema.py +2 -2
  119. datahub/ingestion/source/sigma/data_classes.py +3 -2
  120. datahub/ingestion/source/sigma/sigma.py +1 -1
  121. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  122. datahub/ingestion/source/slack/slack.py +1 -1
  123. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  125. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  127. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  128. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  129. datahub/ingestion/source/sql/athena.py +1 -1
  130. datahub/ingestion/source/sql/clickhouse.py +4 -2
  131. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  132. datahub/ingestion/source/sql/druid.py +1 -1
  133. datahub/ingestion/source/sql/hana.py +1 -1
  134. datahub/ingestion/source/sql/hive.py +7 -5
  135. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  136. datahub/ingestion/source/sql/mssql/source.py +13 -6
  137. datahub/ingestion/source/sql/mysql.py +1 -1
  138. datahub/ingestion/source/sql/oracle.py +17 -10
  139. datahub/ingestion/source/sql/postgres.py +2 -2
  140. datahub/ingestion/source/sql/presto.py +1 -1
  141. datahub/ingestion/source/sql/sql_config.py +8 -9
  142. datahub/ingestion/source/sql/sql_generic.py +1 -1
  143. datahub/ingestion/source/sql/teradata.py +1 -1
  144. datahub/ingestion/source/sql/trino.py +1 -1
  145. datahub/ingestion/source/sql/vertica.py +5 -4
  146. datahub/ingestion/source/sql_queries.py +11 -8
  147. datahub/ingestion/source/state/checkpoint.py +2 -2
  148. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  149. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  150. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  152. datahub/ingestion/source/superset.py +9 -9
  153. datahub/ingestion/source/tableau/tableau.py +14 -16
  154. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  155. datahub/ingestion/source/unity/config.py +51 -34
  156. datahub/ingestion/source/unity/connection.py +7 -1
  157. datahub/ingestion/source/unity/connection_test.py +1 -1
  158. datahub/ingestion/source/unity/proxy.py +216 -7
  159. datahub/ingestion/source/unity/proxy_types.py +91 -0
  160. datahub/ingestion/source/unity/source.py +29 -3
  161. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  162. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  163. datahub/ingestion/source/usage/usage_common.py +5 -3
  164. datahub/ingestion/source_config/csv_enricher.py +7 -6
  165. datahub/ingestion/source_config/operation_config.py +7 -4
  166. datahub/ingestion/source_config/pulsar.py +11 -15
  167. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  168. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  169. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  171. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  172. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  173. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  175. datahub/ingestion/transformer/dataset_domain.py +3 -3
  176. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  177. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  178. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  179. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  180. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  181. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  182. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  183. datahub/ingestion/transformer/replace_external_url.py +2 -2
  184. datahub/ingestion/transformer/set_browse_path.py +1 -1
  185. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  186. datahub/lite/duckdb_lite.py +1 -1
  187. datahub/lite/lite_util.py +2 -2
  188. datahub/metadata/schema.avsc +7 -2
  189. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  190. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
  191. datahub/sdk/__init__.py +1 -0
  192. datahub/sdk/_all_entities.py +2 -0
  193. datahub/sdk/search_filters.py +68 -40
  194. datahub/sdk/tag.py +112 -0
  195. datahub/secret/datahub_secret_store.py +7 -4
  196. datahub/secret/file_secret_store.py +1 -1
  197. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  198. datahub/testing/check_sql_parser_result.py +2 -2
  199. datahub/utilities/ingest_utils.py +1 -1
  200. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  201. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  202. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  203. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.3.1"
3
+ __version__ = "1.3.1.1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -104,7 +104,7 @@ class SerializedResourceValue(BaseModel):
104
104
  assert self.schema_ref
105
105
  assert self.schema_ref == model_type.__name__
106
106
  object_dict = self.as_raw_json()
107
- return model_type.parse_obj(object_dict)
107
+ return model_type.model_validate(object_dict)
108
108
 
109
109
  @classmethod
110
110
  def from_resource_value(
@@ -131,7 +131,7 @@ class SerializedResourceValue(BaseModel):
131
131
  elif isinstance(object, BaseModel):
132
132
  return SerializedResourceValue(
133
133
  content_type=models.SerializedValueContentTypeClass.JSON,
134
- blob=json.dumps(object.dict(), sort_keys=True).encode("utf-8"),
134
+ blob=json.dumps(object.model_dump(), sort_keys=True).encode("utf-8"),
135
135
  schema_type=models.SerializedValueSchemaTypeClass.JSON,
136
136
  schema_ref=object.__class__.__name__,
137
137
  )
@@ -2,10 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  from dataclasses import dataclass
5
- from typing import Callable, Iterable, List, Optional, Union
5
+ from typing import Any, Callable, Iterable, List, Optional, Union
6
6
 
7
- import pydantic
8
- from pydantic import BaseModel
7
+ from pydantic import BaseModel, field_validator
9
8
 
10
9
  import datahub.emitter.mce_builder as builder
11
10
  from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
@@ -70,9 +69,15 @@ class CorpGroup(BaseModel):
70
69
 
71
70
  _rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
72
71
 
73
- @pydantic.validator("owners", "members", each_item=True)
74
- def make_urn_if_needed(cls, v):
75
- if isinstance(v, str):
72
+ @field_validator("owners", "members", mode="before")
73
+ @classmethod
74
+ def make_urn_if_needed(cls, v: Any) -> Any:
75
+ if isinstance(v, list):
76
+ return [
77
+ builder.make_user_urn(item) if isinstance(item, str) else item
78
+ for item in v
79
+ ]
80
+ elif isinstance(v, str):
76
81
  return builder.make_user_urn(v)
77
82
  return v
78
83
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from dataclasses import dataclass
4
4
  from typing import Callable, Iterable, List, Optional
5
5
 
6
- import pydantic
6
+ from pydantic import model_validator
7
7
 
8
8
  import datahub.emitter.mce_builder as builder
9
9
  from datahub.configuration.common import ConfigModel
@@ -65,16 +65,16 @@ class CorpUser(ConfigModel):
65
65
  picture_link: Optional[str] = None
66
66
  phone: Optional[str] = None
67
67
 
68
- @pydantic.validator("full_name", always=True)
69
- def full_name_can_be_built_from_first_name_last_name(v, values):
70
- if not v:
71
- if "first_name" in values or "last_name" in values:
72
- first_name = values.get("first_name") or ""
73
- last_name = values.get("last_name") or ""
74
- full_name = f"{first_name} {last_name}" if last_name else first_name
75
- return full_name
76
- else:
77
- return v
68
+ @model_validator(mode="after")
69
+ def full_name_can_be_built_from_first_name_last_name(self) -> "CorpUser":
70
+ if not self.full_name:
71
+ if self.first_name or self.last_name:
72
+ first_name = self.first_name or ""
73
+ last_name = self.last_name or ""
74
+ self.full_name = (
75
+ f"{first_name} {last_name}" if last_name else first_name
76
+ )
77
+ return self
78
78
 
79
79
  @property
80
80
  def urn(self):
@@ -4,7 +4,7 @@ import time
4
4
  from pathlib import Path
5
5
  from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
6
6
 
7
- import pydantic
7
+ from pydantic import field_validator, model_validator
8
8
  from ruamel.yaml import YAML
9
9
  from typing_extensions import assert_never
10
10
 
@@ -71,7 +71,8 @@ class Ownership(ConfigModel):
71
71
  id: str
72
72
  type: str
73
73
 
74
- @pydantic.validator("type")
74
+ @field_validator("type", mode="after")
75
+ @classmethod
75
76
  def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str:
76
77
  _, _ = builder.validate_ownership_type(v)
77
78
  return v
@@ -116,30 +117,49 @@ class DataProduct(ConfigModel):
116
117
  output_ports: Optional[List[str]] = None
117
118
  _original_yaml_dict: Optional[dict] = None
118
119
 
119
- @pydantic.validator("assets", each_item=True)
120
- def assets_must_be_urns(cls, v: str) -> str:
121
- try:
122
- Urn.from_string(v)
123
- except Exception as e:
124
- raise ValueError(f"asset {v} is not an urn: {e}") from e
125
-
126
- return v
127
-
128
- @pydantic.validator("output_ports", each_item=True)
129
- def output_ports_must_be_urns(cls, v: str) -> str:
130
- try:
131
- Urn.create_from_string(v)
132
- except Exception as e:
133
- raise ValueError(f"Output port {v} is not an urn: {e}") from e
120
+ @field_validator("assets", mode="before")
121
+ @classmethod
122
+ def assets_must_be_urns(cls, v: Any) -> Any:
123
+ if isinstance(v, list):
124
+ for item in v:
125
+ try:
126
+ Urn.from_string(item)
127
+ except Exception as e:
128
+ raise ValueError(f"asset {item} is not an urn: {e}") from e
129
+ return v
130
+ else:
131
+ try:
132
+ Urn.from_string(v)
133
+ except Exception as e:
134
+ raise ValueError(f"asset {v} is not an urn: {e}") from e
135
+ return v
134
136
 
137
+ @field_validator("output_ports", mode="before")
138
+ @classmethod
139
+ def output_ports_must_be_urns(cls, v: Any) -> Any:
140
+ if v is not None:
141
+ if isinstance(v, list):
142
+ for item in v:
143
+ try:
144
+ Urn.create_from_string(item)
145
+ except Exception as e:
146
+ raise ValueError(
147
+ f"Output port {item} is not an urn: {e}"
148
+ ) from e
149
+ else:
150
+ try:
151
+ Urn.create_from_string(v)
152
+ except Exception as e:
153
+ raise ValueError(f"Output port {v} is not an urn: {e}") from e
135
154
  return v
136
155
 
137
- @pydantic.validator("output_ports", each_item=True)
138
- def output_ports_must_be_from_asset_list(cls, v: str, values: dict) -> str:
139
- assets = values.get("assets", [])
140
- if v not in assets:
141
- raise ValueError(f"Output port {v} is not in asset list")
142
- return v
156
+ @model_validator(mode="after")
157
+ def output_ports_must_be_from_asset_list(self) -> "DataProduct":
158
+ if self.output_ports and self.assets:
159
+ for port in self.output_ports:
160
+ if port not in self.assets:
161
+ raise ValueError(f"Output port {port} is not in asset list")
162
+ return self
143
163
 
144
164
  @property
145
165
  def urn(self) -> str:
@@ -454,7 +474,7 @@ class DataProduct(ConfigModel):
454
474
  patches_add.append(new_owner)
455
475
  else:
456
476
  patches_add.append(
457
- Ownership(id=new_owner, type=new_owner_type).dict()
477
+ Ownership(id=new_owner, type=new_owner_type).model_dump()
458
478
  )
459
479
 
460
480
  mutation_needed = bool(patches_replace or patches_drop or patches_add)
@@ -485,8 +505,8 @@ class DataProduct(ConfigModel):
485
505
  raise Exception("Original Data Product was not loaded from yaml")
486
506
 
487
507
  orig_dictionary = original_dataproduct._original_yaml_dict
488
- original_dataproduct_dict = original_dataproduct.dict()
489
- this_dataproduct_dict = self.dict()
508
+ original_dataproduct_dict = original_dataproduct.model_dump()
509
+ this_dataproduct_dict = self.model_dump()
490
510
  for simple_field in ["display_name", "description", "external_url"]:
491
511
  if original_dataproduct_dict.get(simple_field) != this_dataproduct_dict.get(
492
512
  simple_field
@@ -566,7 +586,7 @@ class DataProduct(ConfigModel):
566
586
  yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip)
567
587
  yaml.indent(mapping=2, sequence=4, offset=2)
568
588
  yaml.default_flow_style = False
569
- yaml.dump(self.dict(), fp)
589
+ yaml.dump(self.model_dump(), fp)
570
590
 
571
591
  @staticmethod
572
592
  def get_patch_builder(
@@ -3,6 +3,7 @@ import logging
3
3
  import time
4
4
  from pathlib import Path
5
5
  from typing import (
6
+ Any,
6
7
  Dict,
7
8
  Iterable,
8
9
  List,
@@ -19,8 +20,9 @@ from pydantic import (
19
20
  BaseModel,
20
21
  Field,
21
22
  StrictStr,
22
- root_validator,
23
- validator,
23
+ ValidationInfo,
24
+ field_validator,
25
+ model_validator,
24
26
  )
25
27
  from ruamel.yaml import YAML
26
28
  from typing_extensions import TypeAlias
@@ -213,14 +215,15 @@ class SchemaFieldSpecification(StrictModel):
213
215
  ),
214
216
  )
215
217
 
216
- @validator("urn", pre=True, always=True)
217
- def either_id_or_urn_must_be_filled_out(cls, v, values):
218
- if not v and not values.get("id"):
218
+ @model_validator(mode="after")
219
+ def either_id_or_urn_must_be_filled_out(self) -> "SchemaFieldSpecification":
220
+ if not self.urn and not self.id:
219
221
  raise ValueError("Either id or urn must be present")
220
- return v
222
+ return self
221
223
 
222
- @root_validator(pre=True)
223
- def sync_doc_into_description(cls, values: Dict) -> Dict:
224
+ @model_validator(mode="before")
225
+ @classmethod
226
+ def sync_doc_into_description(cls, values: Any) -> Any:
224
227
  """Synchronize doc into description field if doc is provided."""
225
228
  description = values.get("description")
226
229
  doc = values.pop("doc", None)
@@ -348,8 +351,9 @@ class SchemaSpecification(BaseModel):
348
351
  fields: Optional[List[SchemaFieldSpecification]] = None
349
352
  raw_schema: Optional[str] = None
350
353
 
351
- @validator("file")
352
- def file_must_be_avsc(cls, v):
354
+ @field_validator("file", mode="after")
355
+ @classmethod
356
+ def file_must_be_avsc(cls, v: Optional[str]) -> Optional[str]:
353
357
  if v and not v.endswith(".avsc"):
354
358
  raise ValueError("file must be a .avsc file")
355
359
  return v
@@ -359,7 +363,8 @@ class Ownership(ConfigModel):
359
363
  id: str
360
364
  type: str
361
365
 
362
- @validator("type")
366
+ @field_validator("type", mode="after")
367
+ @classmethod
363
368
  def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str:
364
369
  _, _ = validate_ownership_type(v)
365
370
  return v
@@ -397,30 +402,36 @@ class Dataset(StrictModel):
397
402
  dataset_urn = DatasetUrn.from_string(self.urn)
398
403
  return str(dataset_urn.get_data_platform_urn())
399
404
 
400
- @validator("urn", pre=True, always=True)
401
- def urn_must_be_present(cls, v, values):
405
+ @field_validator("urn", mode="before")
406
+ @classmethod
407
+ def urn_must_be_present(cls, v: Any, info: ValidationInfo) -> Any:
402
408
  if not v:
409
+ values = info.data
403
410
  assert "id" in values, "id must be present if urn is not"
404
411
  assert "platform" in values, "platform must be present if urn is not"
405
412
  assert "env" in values, "env must be present if urn is not"
406
413
  return make_dataset_urn(values["platform"], values["id"], values["env"])
407
414
  return v
408
415
 
409
- @validator("name", pre=True, always=True)
410
- def name_filled_with_id_if_not_present(cls, v, values):
416
+ @field_validator("name", mode="before")
417
+ @classmethod
418
+ def name_filled_with_id_if_not_present(cls, v: Any, info: ValidationInfo) -> Any:
411
419
  if not v:
420
+ values = info.data
412
421
  assert "id" in values, "id must be present if name is not"
413
422
  return values["id"]
414
423
  return v
415
424
 
416
- @validator("platform")
417
- def platform_must_not_be_urn(cls, v):
418
- if v.startswith("urn:li:dataPlatform:"):
425
+ @field_validator("platform", mode="after")
426
+ @classmethod
427
+ def platform_must_not_be_urn(cls, v: Optional[str]) -> Optional[str]:
428
+ if v and v.startswith("urn:li:dataPlatform:"):
419
429
  return v[len("urn:li:dataPlatform:") :]
420
430
  return v
421
431
 
422
- @validator("structured_properties")
423
- def simplify_structured_properties(cls, v):
432
+ @field_validator("structured_properties", mode="after")
433
+ @classmethod
434
+ def simplify_structured_properties(cls, v: Any) -> Any:
424
435
  return StructuredPropertiesHelper.simplify_structured_properties_list(v)
425
436
 
426
437
  def _mint_auditstamp(self, message: str) -> AuditStampClass:
@@ -461,7 +472,7 @@ class Dataset(StrictModel):
461
472
  if isinstance(datasets, dict):
462
473
  datasets = [datasets]
463
474
  for dataset_raw in datasets:
464
- dataset = Dataset.parse_obj(dataset_raw)
475
+ dataset = Dataset.model_validate(dataset_raw)
465
476
  # dataset = Dataset.model_validate(dataset_raw, strict=True)
466
477
  yield dataset
467
478
 
@@ -12,7 +12,7 @@
12
12
  # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
13
13
  from typing import Any, Dict, Optional
14
14
 
15
- from pydantic import validator
15
+ from pydantic import field_validator
16
16
  from typing_extensions import ClassVar
17
17
 
18
18
  from datahub.api.entities.external.external_tag import ExternalTag
@@ -50,11 +50,10 @@ class LakeFormationTag(ExternalTag):
50
50
  value: Optional[LakeFormationTagValueText] = None
51
51
  catalog: Optional[str] = None
52
52
 
53
- # Pydantic v1 validators
54
- @validator("key", pre=True)
53
+ @field_validator("key", mode="before")
55
54
  @classmethod
56
55
  def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
57
- """Validate and convert key field for Pydantic v1."""
56
+ """Validate and convert key field."""
58
57
  if isinstance(v, LakeFormationTagKeyText):
59
58
  return v
60
59
 
@@ -64,10 +63,10 @@ class LakeFormationTag(ExternalTag):
64
63
 
65
64
  return LakeFormationTagKeyText(raw_text=v)
66
65
 
67
- @validator("value", pre=True)
66
+ @field_validator("value", mode="before")
68
67
  @classmethod
69
68
  def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]:
70
- """Validate and convert value field for Pydantic v1."""
69
+ """Validate and convert value field."""
71
70
  if v is None:
72
71
  return None
73
72
 
@@ -12,8 +12,7 @@
12
12
  # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
13
13
  from typing import Any, Dict, Optional, Set
14
14
 
15
- # Import validator for Pydantic v1 (always needed since we removed conditional logic)
16
- from pydantic import validator
15
+ from pydantic import field_validator
17
16
  from typing_extensions import ClassVar
18
17
 
19
18
  from datahub.api.entities.external.external_tag import ExternalTag
@@ -62,11 +61,10 @@ class UnityCatalogTag(ExternalTag):
62
61
  key: UnityCatalogTagKeyText
63
62
  value: Optional[UnityCatalogTagValueText] = None
64
63
 
65
- # Pydantic v1 validators
66
- @validator("key", pre=True)
64
+ @field_validator("key", mode="before")
67
65
  @classmethod
68
66
  def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
69
- """Validate and convert key field for Pydantic v1."""
67
+ """Validate and convert key field."""
70
68
  if isinstance(v, UnityCatalogTagKeyText):
71
69
  return v
72
70
 
@@ -76,10 +74,10 @@ class UnityCatalogTag(ExternalTag):
76
74
 
77
75
  return UnityCatalogTagKeyText(raw_text=v)
78
76
 
79
- @validator("value", pre=True)
77
+ @field_validator("value", mode="before")
80
78
  @classmethod
81
79
  def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
82
- """Validate and convert value field for Pydantic v1."""
80
+ """Validate and convert value field."""
83
81
  if v is None:
84
82
  return None
85
83
 
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from typing import List, Optional, Union
6
6
 
7
7
  import yaml
8
- from pydantic import Field, validator
8
+ from pydantic import Field, model_validator
9
9
  from ruamel.yaml import YAML
10
10
  from typing_extensions import Literal
11
11
 
@@ -70,11 +70,13 @@ class Prompt(ConfigModel):
70
70
  structured_property_urn: Optional[str] = Field(default=None, validate_default=True)
71
71
  required: Optional[bool] = None
72
72
 
73
- @validator("structured_property_urn", pre=True, always=True)
74
- def structured_property_urn_must_be_present(cls, v, values):
75
- if not v and values.get("structured_property_id"):
76
- return Urn.make_structured_property_urn(values["structured_property_id"])
77
- return v
73
+ @model_validator(mode="after")
74
+ def structured_property_urn_must_be_present(self) -> "Prompt":
75
+ if not self.structured_property_urn and self.structured_property_id:
76
+ self.structured_property_urn = Urn.make_structured_property_urn(
77
+ self.structured_property_id
78
+ )
79
+ return self
78
80
 
79
81
 
80
82
  class FormType(Enum):
@@ -122,13 +124,13 @@ class Forms(ConfigModel):
122
124
  group_owners: Optional[List[str]] = None # can be group IDs or urns
123
125
  actors: Optional[Actors] = None
124
126
 
125
- @validator("urn", pre=True, always=True)
126
- def urn_must_be_present(cls, v, values):
127
- if not v:
128
- if values.get("id") is None:
127
+ @model_validator(mode="after")
128
+ def urn_must_be_present(self) -> "Forms":
129
+ if not self.urn:
130
+ if self.id is None:
129
131
  raise ValueError("Form id must be present if urn is not")
130
- return f"urn:li:form:{values['id']}"
131
- return v
132
+ self.urn = f"urn:li:form:{self.id}"
133
+ return self
132
134
 
133
135
  @staticmethod
134
136
  def create(file: str) -> None:
@@ -137,7 +139,7 @@ class Forms(ConfigModel):
137
139
  with get_default_graph(ClientMode.CLI) as emitter, open(file) as fp:
138
140
  forms: List[dict] = yaml.safe_load(fp)
139
141
  for form_raw in forms:
140
- form = Forms.parse_obj(form_raw)
142
+ form = Forms.model_validate(form_raw)
141
143
 
142
144
  try:
143
145
  if not FormType.has_value(form.type):
@@ -445,4 +447,4 @@ class Forms(ConfigModel):
445
447
  yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip)
446
448
  yaml.indent(mapping=2, sequence=4, offset=2)
447
449
  yaml.default_flow_style = False
448
- yaml.dump(self.dict(), fp)
450
+ yaml.dump(self.model_dump(), fp)
@@ -4,7 +4,7 @@ from pathlib import Path
4
4
  from typing import Iterable, List, Optional, Type, Union
5
5
 
6
6
  import yaml
7
- from pydantic import Field, StrictStr, validator
7
+ from pydantic import Field, StrictStr, field_validator, model_validator
8
8
  from ruamel.yaml import YAML
9
9
 
10
10
  from datahub.configuration.common import ConfigModel
@@ -61,9 +61,12 @@ def _validate_entity_type_urn(cls: Type, v: str) -> str:
61
61
  class TypeQualifierAllowedTypes(ConfigModel):
62
62
  allowed_types: List[str]
63
63
 
64
- _check_allowed_types = validator("allowed_types", each_item=True, allow_reuse=True)(
65
- _validate_entity_type_urn
66
- )
64
+ @field_validator("allowed_types", mode="before")
65
+ @classmethod
66
+ def _check_allowed_types(cls, v: Union[str, List[str]]) -> Union[str, List[str]]:
67
+ if isinstance(v, list):
68
+ return [_validate_entity_type_urn(cls, item) for item in v]
69
+ return _validate_entity_type_urn(cls, v)
67
70
 
68
71
 
69
72
  class StructuredProperties(ConfigModel):
@@ -80,11 +83,15 @@ class StructuredProperties(ConfigModel):
80
83
  type_qualifier: Optional[TypeQualifierAllowedTypes] = None
81
84
  immutable: Optional[bool] = False
82
85
 
83
- _check_entity_types = validator("entity_types", each_item=True, allow_reuse=True)(
84
- _validate_entity_type_urn
85
- )
86
+ @field_validator("entity_types", mode="before")
87
+ @classmethod
88
+ def _check_entity_types(cls, v: Union[str, List[str]]) -> Union[str, List[str]]:
89
+ if isinstance(v, list):
90
+ return [_validate_entity_type_urn(cls, item) for item in v]
91
+ return _validate_entity_type_urn(cls, v)
86
92
 
87
- @validator("type")
93
+ @field_validator("type", mode="after")
94
+ @classmethod
88
95
  def validate_type(cls, v: str) -> str:
89
96
  # This logic is somewhat hacky, since we need to deal with
90
97
  # 1. fully qualified urns
@@ -123,13 +130,13 @@ class StructuredProperties(ConfigModel):
123
130
  )
124
131
  return id
125
132
 
126
- @validator("urn", pre=True, always=True)
127
- def urn_must_be_present(cls, v, values):
128
- if not v:
129
- if "id" not in values:
133
+ @model_validator(mode="after")
134
+ def urn_must_be_present(self) -> "StructuredProperties":
135
+ if not self.urn:
136
+ if not hasattr(self, "id") or not self.id:
130
137
  raise ValueError("id must be present if urn is not")
131
- return f"urn:li:structuredProperty:{values['id']}"
132
- return v
138
+ self.urn = f"urn:li:structuredProperty:{self.id}"
139
+ return self
133
140
 
134
141
  @staticmethod
135
142
  def from_yaml(file: str) -> List["StructuredProperties"]:
@@ -138,7 +145,7 @@ class StructuredProperties(ConfigModel):
138
145
 
139
146
  result: List[StructuredProperties] = []
140
147
  for structuredproperty_raw in structuredproperties:
141
- result.append(StructuredProperties.parse_obj(structuredproperty_raw))
148
+ result.append(StructuredProperties.model_validate(structuredproperty_raw))
142
149
  return result
143
150
 
144
151
  def generate_mcps(self) -> List[MetadataChangeProposalWrapper]:
@@ -225,7 +232,7 @@ class StructuredProperties(ConfigModel):
225
232
  yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip)
226
233
  yaml.indent(mapping=2, sequence=4, offset=2)
227
234
  yaml.default_flow_style = False
228
- yaml.dump(self.dict(), fp)
235
+ yaml.dump(self.model_dump(), fp)
229
236
 
230
237
  @staticmethod
231
238
  def list_urns(graph: DataHubGraph) -> Iterable[str]:
datahub/cli/check_cli.py CHANGED
@@ -316,7 +316,7 @@ def test_allow_deny(config: str, input: str, pattern_key: str) -> None:
316
316
  click.secho(f"{pattern_key} is not defined in the config", fg="red")
317
317
  exit(1)
318
318
 
319
- allow_deny_pattern = AllowDenyPattern.parse_obj(pattern_dict)
319
+ allow_deny_pattern = AllowDenyPattern.model_validate(pattern_dict)
320
320
  if allow_deny_pattern.allowed(input):
321
321
  click.secho(f"✅ {input} is allowed by {pattern_key}", fg="green")
322
322
  exit(0)
@@ -372,7 +372,7 @@ def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
372
372
  pattern_dicts = [pattern_dicts]
373
373
 
374
374
  for pattern_dict in pattern_dicts:
375
- path_spec_pattern = PathSpec.parse_obj(pattern_dict)
375
+ path_spec_pattern = PathSpec.model_validate(pattern_dict)
376
376
  if path_spec_pattern.allowed(input):
377
377
  click.echo(f"{input} is allowed by {path_spec_pattern}")
378
378
  else:
@@ -114,7 +114,7 @@ def load_client_config() -> DatahubClientConfig:
114
114
  try:
115
115
  _ensure_datahub_config()
116
116
  client_config_dict = get_raw_client_config()
117
- datahub_config: DatahubClientConfig = DatahubConfig.parse_obj(
117
+ datahub_config: DatahubClientConfig = DatahubConfig.model_validate(
118
118
  client_config_dict
119
119
  ).gms
120
120
  return datahub_config
@@ -146,7 +146,7 @@ def write_gms_config(
146
146
  logger.debug(
147
147
  f"Failed to retrieve config from file {DATAHUB_CONFIG_PATH}: {e}. This isn't fatal."
148
148
  )
149
- config_dict = {**previous_config, **config.dict()}
149
+ config_dict = {**previous_config, **config.model_dump()}
150
150
  else:
151
- config_dict = config.dict()
151
+ config_dict = config.model_dump()
152
152
  persist_raw_datahub_config(config_dict)
datahub/cli/lite_cli.py CHANGED
@@ -40,13 +40,13 @@ class DuckDBLiteConfigWrapper(DuckDBLiteConfig):
40
40
 
41
41
  class LiteCliConfig(DatahubConfig):
42
42
  lite: LiteLocalConfig = LiteLocalConfig(
43
- type="duckdb", config=DuckDBLiteConfigWrapper().dict()
43
+ type="duckdb", config=DuckDBLiteConfigWrapper().model_dump()
44
44
  )
45
45
 
46
46
 
47
47
  def get_lite_config() -> LiteLocalConfig:
48
48
  client_config_dict = get_raw_client_config()
49
- lite_config = LiteCliConfig.parse_obj(client_config_dict)
49
+ lite_config = LiteCliConfig.model_validate(client_config_dict)
50
50
  return lite_config.lite
51
51
 
52
52
 
@@ -55,7 +55,9 @@ def _get_datahub_lite(read_only: bool = False) -> DataHubLiteLocal:
55
55
  if lite_config.type == "duckdb":
56
56
  lite_config.config["read_only"] = read_only
57
57
 
58
- duckdb_lite = get_datahub_lite(config_dict=lite_config.dict(), read_only=read_only)
58
+ duckdb_lite = get_datahub_lite(
59
+ config_dict=lite_config.model_dump(), read_only=read_only
60
+ )
59
61
  return duckdb_lite
60
62
 
61
63
 
@@ -308,7 +310,7 @@ def search(
308
310
  ):
309
311
  result_str = searchable.id
310
312
  if details:
311
- result_str = json.dumps(searchable.dict())
313
+ result_str = json.dumps(searchable.model_dump())
312
314
  # suppress id if we have already seen it in the non-detailed response
313
315
  if details or searchable.id not in result_ids:
314
316
  click.secho(result_str)
@@ -321,7 +323,7 @@ def search(
321
323
  def write_lite_config(lite_config: LiteLocalConfig) -> None:
322
324
  cli_config = get_raw_client_config()
323
325
  assert isinstance(cli_config, dict)
324
- cli_config["lite"] = lite_config.dict()
326
+ cli_config["lite"] = lite_config.model_dump()
325
327
  persist_raw_datahub_config(cli_config)
326
328
 
327
329
 
@@ -332,12 +334,12 @@ def write_lite_config(lite_config: LiteLocalConfig) -> None:
332
334
  @telemetry.with_telemetry()
333
335
  def init(ctx: click.Context, type: Optional[str], file: Optional[str]) -> None:
334
336
  lite_config = get_lite_config()
335
- new_lite_config_dict = lite_config.dict()
337
+ new_lite_config_dict = lite_config.model_dump()
336
338
  # Update the type and config sections only
337
339
  new_lite_config_dict["type"] = type
338
340
  if file:
339
341
  new_lite_config_dict["config"]["file"] = file
340
- new_lite_config = LiteLocalConfig.parse_obj(new_lite_config_dict)
342
+ new_lite_config = LiteLocalConfig.model_validate(new_lite_config_dict)
341
343
  if lite_config != new_lite_config:
342
344
  if click.confirm(
343
345
  f"Will replace datahub lite config {lite_config} with {new_lite_config}"