acryl-datahub 1.3.0.1rc9__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (263) hide show
  1. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2550 -2543
  2. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +263 -261
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/graph/client.py +5 -1
  39. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  40. datahub/ingestion/reporting/file_reporter.py +5 -4
  41. datahub/ingestion/run/pipeline.py +7 -6
  42. datahub/ingestion/run/pipeline_config.py +12 -14
  43. datahub/ingestion/run/sink_callback.py +1 -1
  44. datahub/ingestion/sink/datahub_rest.py +6 -4
  45. datahub/ingestion/source/abs/config.py +19 -19
  46. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  47. datahub/ingestion/source/abs/source.py +2 -2
  48. datahub/ingestion/source/aws/aws_common.py +1 -1
  49. datahub/ingestion/source/aws/glue.py +6 -4
  50. datahub/ingestion/source/aws/sagemaker.py +1 -1
  51. datahub/ingestion/source/azure/azure_common.py +8 -12
  52. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  53. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  54. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  55. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  56. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  57. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  58. datahub/ingestion/source/datahub/config.py +8 -8
  59. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  60. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  61. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  62. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  63. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  64. datahub/ingestion/source/delta_lake/config.py +6 -4
  65. datahub/ingestion/source/dremio/dremio_api.py +212 -78
  66. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  67. datahub/ingestion/source/dremio/dremio_entities.py +55 -39
  68. datahub/ingestion/source/dremio/dremio_profiling.py +14 -3
  69. datahub/ingestion/source/dremio/dremio_source.py +24 -26
  70. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  71. datahub/ingestion/source/elastic_search.py +110 -32
  72. datahub/ingestion/source/excel/source.py +1 -1
  73. datahub/ingestion/source/feast.py +1 -1
  74. datahub/ingestion/source/file.py +5 -4
  75. datahub/ingestion/source/fivetran/config.py +17 -16
  76. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  77. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  78. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  79. datahub/ingestion/source/ge_profiling_config.py +8 -5
  80. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  81. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  82. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  83. datahub/ingestion/source/grafana/models.py +23 -5
  84. datahub/ingestion/source/hex/api.py +7 -5
  85. datahub/ingestion/source/hex/hex.py +4 -3
  86. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  87. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  88. datahub/ingestion/source/identity/azure_ad.py +1 -1
  89. datahub/ingestion/source/identity/okta.py +10 -10
  90. datahub/ingestion/source/kafka/kafka.py +1 -1
  91. datahub/ingestion/source/ldap.py +1 -1
  92. datahub/ingestion/source/looker/looker_common.py +7 -5
  93. datahub/ingestion/source/looker/looker_config.py +21 -20
  94. datahub/ingestion/source/looker/lookml_config.py +47 -47
  95. datahub/ingestion/source/metabase.py +8 -8
  96. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  97. datahub/ingestion/source/metadata/lineage.py +13 -8
  98. datahub/ingestion/source/mlflow.py +1 -1
  99. datahub/ingestion/source/mode.py +6 -4
  100. datahub/ingestion/source/mongodb.py +4 -3
  101. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  102. datahub/ingestion/source/nifi.py +17 -23
  103. datahub/ingestion/source/openapi.py +6 -8
  104. datahub/ingestion/source/powerbi/config.py +33 -32
  105. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  106. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  107. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  108. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  109. datahub/ingestion/source/preset.py +8 -8
  110. datahub/ingestion/source/pulsar.py +1 -1
  111. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  112. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  113. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  114. datahub/ingestion/source/redshift/config.py +18 -20
  115. datahub/ingestion/source/redshift/redshift.py +2 -2
  116. datahub/ingestion/source/redshift/usage.py +23 -3
  117. datahub/ingestion/source/s3/config.py +83 -62
  118. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  119. datahub/ingestion/source/s3/source.py +8 -5
  120. datahub/ingestion/source/sac/sac.py +5 -4
  121. datahub/ingestion/source/salesforce.py +3 -2
  122. datahub/ingestion/source/schema/json_schema.py +2 -2
  123. datahub/ingestion/source/sigma/data_classes.py +3 -2
  124. datahub/ingestion/source/sigma/sigma.py +1 -1
  125. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  126. datahub/ingestion/source/slack/slack.py +1 -1
  127. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  128. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  129. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  130. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  131. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  132. datahub/ingestion/source/snowflake/snowflake_queries.py +28 -4
  133. datahub/ingestion/source/sql/athena.py +1 -1
  134. datahub/ingestion/source/sql/clickhouse.py +4 -2
  135. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  136. datahub/ingestion/source/sql/druid.py +1 -1
  137. datahub/ingestion/source/sql/hana.py +1 -1
  138. datahub/ingestion/source/sql/hive.py +7 -5
  139. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  140. datahub/ingestion/source/sql/mssql/source.py +13 -6
  141. datahub/ingestion/source/sql/mysql.py +1 -1
  142. datahub/ingestion/source/sql/oracle.py +17 -10
  143. datahub/ingestion/source/sql/postgres.py +2 -2
  144. datahub/ingestion/source/sql/presto.py +1 -1
  145. datahub/ingestion/source/sql/sql_config.py +8 -9
  146. datahub/ingestion/source/sql/sql_generic.py +1 -1
  147. datahub/ingestion/source/sql/teradata.py +1 -1
  148. datahub/ingestion/source/sql/trino.py +1 -1
  149. datahub/ingestion/source/sql/vertica.py +5 -4
  150. datahub/ingestion/source/sql_queries.py +174 -22
  151. datahub/ingestion/source/state/checkpoint.py +2 -2
  152. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  153. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  154. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  155. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  156. datahub/ingestion/source/superset.py +9 -9
  157. datahub/ingestion/source/tableau/tableau.py +14 -16
  158. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  159. datahub/ingestion/source/unity/config.py +51 -34
  160. datahub/ingestion/source/unity/connection.py +7 -1
  161. datahub/ingestion/source/unity/connection_test.py +1 -1
  162. datahub/ingestion/source/unity/proxy.py +216 -7
  163. datahub/ingestion/source/unity/proxy_types.py +91 -0
  164. datahub/ingestion/source/unity/source.py +29 -3
  165. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  166. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  167. datahub/ingestion/source/usage/usage_common.py +5 -3
  168. datahub/ingestion/source_config/csv_enricher.py +7 -6
  169. datahub/ingestion/source_config/operation_config.py +7 -4
  170. datahub/ingestion/source_config/pulsar.py +11 -15
  171. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  172. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  173. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  175. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  176. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  177. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  178. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  179. datahub/ingestion/transformer/dataset_domain.py +3 -3
  180. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  181. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  182. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  183. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  184. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  185. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  186. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  187. datahub/ingestion/transformer/replace_external_url.py +2 -2
  188. datahub/ingestion/transformer/set_browse_path.py +1 -1
  189. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  190. datahub/lite/duckdb_lite.py +1 -1
  191. datahub/lite/lite_util.py +2 -2
  192. datahub/metadata/_internal_schema_classes.py +62 -2
  193. datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
  194. datahub/metadata/schema.avsc +271 -91
  195. datahub/metadata/schemas/ApplicationProperties.avsc +5 -2
  196. datahub/metadata/schemas/AssertionInfo.avsc +48 -5
  197. datahub/metadata/schemas/BusinessAttributeInfo.avsc +8 -4
  198. datahub/metadata/schemas/ChartInfo.avsc +12 -5
  199. datahub/metadata/schemas/ContainerProperties.avsc +12 -5
  200. datahub/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
  201. datahub/metadata/schemas/CorpGroupInfo.avsc +7 -3
  202. datahub/metadata/schemas/CorpUserInfo.avsc +5 -2
  203. datahub/metadata/schemas/CorpUserSettings.avsc +4 -2
  204. datahub/metadata/schemas/DashboardInfo.avsc +16 -4
  205. datahub/metadata/schemas/DataFlowInfo.avsc +11 -5
  206. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +4 -2
  207. datahub/metadata/schemas/DataJobInfo.avsc +9 -4
  208. datahub/metadata/schemas/DataPlatformInfo.avsc +3 -1
  209. datahub/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
  210. datahub/metadata/schemas/DataProductProperties.avsc +5 -2
  211. datahub/metadata/schemas/DataTypeInfo.avsc +5 -0
  212. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  213. datahub/metadata/schemas/DatasetProperties.avsc +12 -5
  214. datahub/metadata/schemas/DomainProperties.avsc +7 -3
  215. datahub/metadata/schemas/EditableContainerProperties.avsc +2 -1
  216. datahub/metadata/schemas/EditableDashboardProperties.avsc +2 -1
  217. datahub/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
  218. datahub/metadata/schemas/EditableDataJobProperties.avsc +2 -1
  219. datahub/metadata/schemas/EditableDatasetProperties.avsc +2 -1
  220. datahub/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
  221. datahub/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
  222. datahub/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
  223. datahub/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
  224. datahub/metadata/schemas/EditableMLModelProperties.avsc +2 -1
  225. datahub/metadata/schemas/EditableNotebookProperties.avsc +2 -1
  226. datahub/metadata/schemas/EditableSchemaMetadata.avsc +5 -3
  227. datahub/metadata/schemas/EntityTypeInfo.avsc +5 -0
  228. datahub/metadata/schemas/GlobalTags.avsc +3 -2
  229. datahub/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
  230. datahub/metadata/schemas/GlossaryTermInfo.avsc +3 -1
  231. datahub/metadata/schemas/InputFields.avsc +3 -2
  232. datahub/metadata/schemas/MLFeatureKey.avsc +3 -1
  233. datahub/metadata/schemas/MLFeatureTableKey.avsc +3 -1
  234. datahub/metadata/schemas/MLModelDeploymentKey.avsc +3 -1
  235. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  236. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  237. datahub/metadata/schemas/MLModelProperties.avsc +4 -2
  238. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +3 -1
  239. datahub/metadata/schemas/MetadataChangeEvent.avsc +124 -50
  240. datahub/metadata/schemas/NotebookInfo.avsc +5 -2
  241. datahub/metadata/schemas/Ownership.avsc +3 -2
  242. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  243. datahub/metadata/schemas/RoleProperties.avsc +3 -1
  244. datahub/metadata/schemas/SchemaFieldInfo.avsc +3 -1
  245. datahub/metadata/schemas/SchemaMetadata.avsc +3 -2
  246. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
  247. datahub/metadata/schemas/TagProperties.avsc +3 -1
  248. datahub/metadata/schemas/TestInfo.avsc +2 -1
  249. datahub/sdk/__init__.py +1 -0
  250. datahub/sdk/_all_entities.py +2 -0
  251. datahub/sdk/search_filters.py +68 -40
  252. datahub/sdk/tag.py +112 -0
  253. datahub/secret/datahub_secret_store.py +7 -4
  254. datahub/secret/file_secret_store.py +1 -1
  255. datahub/sql_parsing/schema_resolver.py +29 -0
  256. datahub/sql_parsing/sql_parsing_aggregator.py +15 -0
  257. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  258. datahub/testing/check_sql_parser_result.py +2 -2
  259. datahub/utilities/ingest_utils.py +1 -1
  260. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  261. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  262. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  263. {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ import pydantic
6
6
  import snowflake.connector
7
7
  from cryptography.hazmat.backends import default_backend
8
8
  from cryptography.hazmat.primitives import serialization
9
+ from pydantic import field_validator, model_validator
9
10
  from snowflake.connector import SnowflakeConnection as NativeSnowflakeConnection
10
11
  from snowflake.connector.cursor import DictCursor
11
12
  from snowflake.connector.network import (
@@ -125,26 +126,28 @@ class SnowflakeConnectionConfig(ConfigModel):
125
126
 
126
127
  rename_host_port_to_account_id = pydantic_renamed_field("host_port", "account_id") # type: ignore[pydantic-field]
127
128
 
128
- @pydantic.validator("account_id")
129
- def validate_account_id(cls, account_id: str, values: Dict) -> str:
129
+ @field_validator("account_id", mode="after")
130
+ @classmethod
131
+ def validate_account_id(cls, account_id: str, info: pydantic.ValidationInfo) -> str:
130
132
  account_id = remove_protocol(account_id)
131
133
  account_id = remove_trailing_slashes(account_id)
132
134
  # Get the domain from config, fallback to default
133
- domain = values.get("snowflake_domain", DEFAULT_SNOWFLAKE_DOMAIN)
135
+ domain = info.data.get("snowflake_domain", DEFAULT_SNOWFLAKE_DOMAIN)
134
136
  snowflake_host_suffix = f".{domain}"
135
137
  account_id = remove_suffix(account_id, snowflake_host_suffix)
136
138
  return account_id
137
139
 
138
- @pydantic.validator("authentication_type", always=True)
139
- def authenticator_type_is_valid(cls, v, values):
140
+ @field_validator("authentication_type", mode="before")
141
+ @classmethod
142
+ def authenticator_type_is_valid(cls, v: Any, info: pydantic.ValidationInfo) -> Any:
140
143
  if v not in _VALID_AUTH_TYPES:
141
144
  raise ValueError(
142
145
  f"unsupported authenticator type '{v}' was provided,"
143
146
  f" use one of {list(_VALID_AUTH_TYPES.keys())}"
144
147
  )
145
148
  if (
146
- values.get("private_key") is not None
147
- or values.get("private_key_path") is not None
149
+ info.data.get("private_key") is not None
150
+ or info.data.get("private_key_path") is not None
148
151
  ) and v != "KEY_PAIR_AUTHENTICATOR":
149
152
  raise ValueError(
150
153
  f"Either `private_key` and `private_key_path` is set but `authentication_type` is {v}. "
@@ -153,21 +156,22 @@ class SnowflakeConnectionConfig(ConfigModel):
153
156
  if v == "KEY_PAIR_AUTHENTICATOR":
154
157
  # If we are using key pair auth, we need the private key path and password to be set
155
158
  if (
156
- values.get("private_key") is None
157
- and values.get("private_key_path") is None
159
+ info.data.get("private_key") is None
160
+ and info.data.get("private_key_path") is None
158
161
  ):
159
162
  raise ValueError(
160
163
  f"Both `private_key` and `private_key_path` are none. "
161
164
  f"At least one should be set when using {v} authentication"
162
165
  )
163
166
  elif v == "OAUTH_AUTHENTICATOR":
164
- cls._check_oauth_config(values.get("oauth_config"))
167
+ cls._check_oauth_config(info.data.get("oauth_config"))
165
168
  logger.info(f"using authenticator type '{v}'")
166
169
  return v
167
170
 
168
- @pydantic.validator("token", always=True)
169
- def validate_token_oauth_config(cls, v, values):
170
- auth_type = values.get("authentication_type")
171
+ @field_validator("token", mode="before")
172
+ @classmethod
173
+ def validate_token_oauth_config(cls, v: Any, info: pydantic.ValidationInfo) -> Any:
174
+ auth_type = info.data.get("authentication_type")
171
175
  if auth_type == "OAUTH_AUTHENTICATOR_TOKEN":
172
176
  if not v:
173
177
  raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.")
@@ -177,6 +181,24 @@ class SnowflakeConnectionConfig(ConfigModel):
177
181
  )
178
182
  return v
179
183
 
184
+ @model_validator(mode="after")
185
+ def validate_authentication_config(self):
186
+ """Validate authentication configuration consistency."""
187
+ # Check token requirement for OAUTH_AUTHENTICATOR_TOKEN
188
+ if self.authentication_type == "OAUTH_AUTHENTICATOR_TOKEN":
189
+ if not self.token:
190
+ raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.")
191
+
192
+ # Check private key authentication consistency
193
+ if self.private_key is not None or self.private_key_path is not None:
194
+ if self.authentication_type != "KEY_PAIR_AUTHENTICATOR":
195
+ raise ValueError(
196
+ f"Either `private_key` and `private_key_path` is set but `authentication_type` is {self.authentication_type}. "
197
+ f"Should be set to 'KEY_PAIR_AUTHENTICATOR' when using key pair authentication"
198
+ )
199
+
200
+ return self
201
+
180
202
  @staticmethod
181
203
  def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None:
182
204
  if oauth_config is None:
@@ -14,7 +14,7 @@ from typing import (
14
14
  Type,
15
15
  )
16
16
 
17
- from pydantic import BaseModel, Field, validator
17
+ from pydantic import BaseModel, Field, field_validator
18
18
 
19
19
  from datahub.configuration.datetimes import parse_absolute_time
20
20
  from datahub.ingestion.api.closeable import Closeable
@@ -70,7 +70,7 @@ def pydantic_parse_json(field: str) -> "V1Validator":
70
70
  return json.loads(v)
71
71
  return v
72
72
 
73
- return validator(field, pre=True, allow_reuse=True)(_parse_from_json)
73
+ return field_validator(field, mode="before")(_parse_from_json)
74
74
 
75
75
 
76
76
  class UpstreamColumnNode(BaseModel):
@@ -379,7 +379,7 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
379
379
  # To avoid that causing a pydantic error we are setting it to an empty list
380
380
  # instead of a list with an empty object
381
381
  db_row["QUERIES"] = "[]"
382
- return UpstreamLineageEdge.parse_obj(db_row)
382
+ return UpstreamLineageEdge.model_validate(db_row)
383
383
  except Exception as e:
384
384
  self.report.num_upstream_lineage_edge_parsing_failed += 1
385
385
  upstream_tables = db_row.get("UPSTREAM_TABLES")
@@ -78,6 +78,7 @@ from datahub.utilities.file_backed_collections import (
78
78
  ConnectionWrapper,
79
79
  FileBackedList,
80
80
  )
81
+ from datahub.utilities.lossy_collections import LossyList
81
82
  from datahub.utilities.perf_timer import PerfTimer
82
83
 
83
84
  logger = logging.getLogger(__name__)
@@ -169,6 +170,10 @@ class SnowflakeQueriesExtractorReport(Report):
169
170
  num_stream_queries_observed: int = 0
170
171
  num_create_temp_view_queries_observed: int = 0
171
172
  num_users: int = 0
173
+ num_queries_with_empty_column_name: int = 0
174
+ queries_with_empty_column_name: LossyList[str] = dataclasses.field(
175
+ default_factory=LossyList
176
+ )
172
177
 
173
178
 
174
179
  @dataclass
@@ -626,9 +631,28 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
626
631
 
627
632
  columns = set()
628
633
  for modified_column in obj["columns"]:
629
- columns.add(
630
- self.identifiers.snowflake_identifier(modified_column["columnName"])
631
- )
634
+ column_name = modified_column["columnName"]
635
+ # An empty column name in the audit log would cause an error when creating column URNs.
636
+ # To avoid this and still extract lineage, the raw query text is parsed as a fallback.
637
+ if not column_name or not column_name.strip():
638
+ query_id = res["query_id"]
639
+ self.report.num_queries_with_empty_column_name += 1
640
+ self.report.queries_with_empty_column_name.append(query_id)
641
+ logger.info(f"Query {query_id} has empty column name in audit log.")
642
+
643
+ return ObservedQuery(
644
+ query=query_text,
645
+ session_id=res["session_id"],
646
+ timestamp=timestamp,
647
+ user=user,
648
+ default_db=res["default_db"],
649
+ default_schema=res["default_schema"],
650
+ query_hash=get_query_fingerprint(
651
+ query_text, self.identifiers.platform, fast=True
652
+ ),
653
+ extra_info=extra_info,
654
+ )
655
+ columns.add(self.identifiers.snowflake_identifier(column_name))
632
656
 
633
657
  upstreams.append(dataset)
634
658
  column_usage[dataset] = columns
@@ -782,7 +806,7 @@ class SnowflakeQueriesSource(Source):
782
806
 
783
807
  @classmethod
784
808
  def create(cls, config_dict: dict, ctx: PipelineContext) -> Self:
785
- config = SnowflakeQueriesSourceConfig.parse_obj(config_dict)
809
+ config = SnowflakeQueriesSourceConfig.model_validate(config_dict)
786
810
  return cls(ctx, config)
787
811
 
788
812
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
@@ -386,7 +386,7 @@ class AthenaSource(SQLAlchemySource):
386
386
 
387
387
  @classmethod
388
388
  def create(cls, config_dict, ctx):
389
- config = AthenaConfig.parse_obj(config_dict)
389
+ config = AthenaConfig.model_validate(config_dict)
390
390
  return cls(config, ctx)
391
391
 
392
392
  # overwrite this method to allow to specify the usage of a custom dialect
@@ -10,6 +10,7 @@ import clickhouse_sqlalchemy.types as custom_types
10
10
  import pydantic
11
11
  from clickhouse_sqlalchemy.drivers import base
12
12
  from clickhouse_sqlalchemy.drivers.base import ClickHouseDialect
13
+ from pydantic import model_validator
13
14
  from pydantic.fields import Field
14
15
  from sqlalchemy import create_engine, text
15
16
  from sqlalchemy.engine import reflection
@@ -175,7 +176,8 @@ class ClickHouseConfig(
175
176
  return str(url)
176
177
 
177
178
  # pre = True because we want to take some decision before pydantic initialize the configuration to default values
178
- @pydantic.root_validator(pre=True)
179
+ @model_validator(mode="before")
180
+ @classmethod
179
181
  def projects_backward_compatibility(cls, values: Dict) -> Dict:
180
182
  secure = values.get("secure")
181
183
  protocol = values.get("protocol")
@@ -423,7 +425,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
423
425
 
424
426
  @classmethod
425
427
  def create(cls, config_dict, ctx):
426
- config = ClickHouseConfig.parse_obj(config_dict)
428
+ config = ClickHouseConfig.model_validate(config_dict)
427
429
  return cls(config, ctx)
428
430
 
429
431
  def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
@@ -39,5 +39,5 @@ class CockroachDBSource(PostgresSource):
39
39
 
40
40
  @classmethod
41
41
  def create(cls, config_dict, ctx):
42
- config = CockroachDBConfig.parse_obj(config_dict)
42
+ config = CockroachDBConfig.model_validate(config_dict)
43
43
  return cls(config, ctx)
@@ -77,5 +77,5 @@ class DruidSource(SQLAlchemySource):
77
77
 
78
78
  @classmethod
79
79
  def create(cls, config_dict, ctx):
80
- config = DruidConfig.parse_obj(config_dict)
80
+ config = DruidConfig.model_validate(config_dict)
81
81
  return cls(config, ctx)
@@ -36,5 +36,5 @@ class HanaSource(SQLAlchemySource):
36
36
 
37
37
  @classmethod
38
38
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "HanaSource":
39
- config = HanaConfig.parse_obj(config_dict)
39
+ config = HanaConfig.model_validate(config_dict)
40
40
  return cls(config, ctx)
@@ -6,7 +6,7 @@ from enum import Enum
6
6
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
7
7
  from urllib.parse import urlparse
8
8
 
9
- from pydantic import validator
9
+ from pydantic import field_validator
10
10
  from pydantic.fields import Field
11
11
 
12
12
  # This import verifies that the dependencies are available.
@@ -674,11 +674,13 @@ class HiveConfig(TwoTierSQLAlchemyConfig):
674
674
  description="Platform instance for the storage system",
675
675
  )
676
676
 
677
- @validator("host_port")
678
- def clean_host_port(cls, v):
677
+ @field_validator("host_port", mode="after")
678
+ @classmethod
679
+ def clean_host_port(cls, v: str) -> str:
679
680
  return config_clean.remove_protocol(v)
680
681
 
681
- @validator("hive_storage_lineage_direction")
682
+ @field_validator("hive_storage_lineage_direction", mode="after")
683
+ @classmethod
682
684
  def _validate_direction(cls, v: str) -> str:
683
685
  """Validate the lineage direction."""
684
686
  if v.lower() not in ["upstream", "downstream"]:
@@ -725,7 +727,7 @@ class HiveSource(TwoTierSQLAlchemySource):
725
727
 
726
728
  @classmethod
727
729
  def create(cls, config_dict, ctx):
728
- config = HiveConfig.parse_obj(config_dict)
730
+ config = HiveConfig.model_validate(config_dict)
729
731
  return cls(config, ctx)
730
732
 
731
733
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
@@ -351,7 +351,7 @@ class HiveMetastoreSource(SQLAlchemySource):
351
351
 
352
352
  @classmethod
353
353
  def create(cls, config_dict, ctx):
354
- config = HiveMetastore.parse_obj(config_dict)
354
+ config = HiveMetastore.model_validate(config_dict)
355
355
  return cls(config, ctx)
356
356
 
357
357
  def gen_database_containers(
@@ -3,8 +3,8 @@ import re
3
3
  import urllib.parse
4
4
  from typing import Any, Dict, Iterable, List, Optional, Tuple
5
5
 
6
- import pydantic
7
6
  import sqlalchemy.dialects.mssql
7
+ from pydantic import ValidationInfo, field_validator
8
8
  from pydantic.fields import Field
9
9
  from sqlalchemy import create_engine, inspect
10
10
  from sqlalchemy.engine.base import Connection
@@ -140,11 +140,18 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
140
140
  description="Indicates if the SQL Server instance is running on AWS RDS. When None (default), automatic detection will be attempted using server name analysis.",
141
141
  )
142
142
 
143
- @pydantic.validator("uri_args")
144
- def passwords_match(cls, v, values, **kwargs):
145
- if values["use_odbc"] and not values["sqlalchemy_uri"] and "driver" not in v:
143
+ @field_validator("uri_args", mode="after")
144
+ @classmethod
145
+ def passwords_match(
146
+ cls, v: Dict[str, Any], info: ValidationInfo, **kwargs: Any
147
+ ) -> Dict[str, Any]:
148
+ if (
149
+ info.data["use_odbc"]
150
+ and not info.data["sqlalchemy_uri"]
151
+ and "driver" not in v
152
+ ):
146
153
  raise ValueError("uri_args must contain a 'driver' option")
147
- elif not values["use_odbc"] and v:
154
+ elif not info.data["use_odbc"] and v:
148
155
  raise ValueError("uri_args is not supported when ODBC is disabled")
149
156
  return v
150
157
 
@@ -314,7 +321,7 @@ class SQLServerSource(SQLAlchemySource):
314
321
 
315
322
  @classmethod
316
323
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "SQLServerSource":
317
- config = SQLServerConfig.parse_obj(config_dict)
324
+ config = SQLServerConfig.model_validate(config_dict)
318
325
  return cls(config, ctx)
319
326
 
320
327
  # override to get table descriptions
@@ -150,7 +150,7 @@ class MySQLSource(TwoTierSQLAlchemySource):
150
150
 
151
151
  @classmethod
152
152
  def create(cls, config_dict, ctx):
153
- config = MySQLConfig.parse_obj(config_dict)
153
+ config = MySQLConfig.model_validate(config_dict)
154
154
  return cls(config, ctx)
155
155
 
156
156
  def _setup_rds_iam_event_listener(
@@ -10,8 +10,8 @@ from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Union,
10
10
  from unittest.mock import patch
11
11
 
12
12
  import oracledb
13
- import pydantic
14
13
  import sqlalchemy.engine
14
+ from pydantic import ValidationInfo, field_validator
15
15
  from pydantic.fields import Field
16
16
  from sqlalchemy import event, sql
17
17
  from sqlalchemy.dialects.oracle.base import ischema_names
@@ -101,25 +101,32 @@ class OracleConfig(BasicSQLAlchemyConfig):
101
101
  "On Linux, this value is ignored, as ldconfig or LD_LIBRARY_PATH will define the location.",
102
102
  )
103
103
 
104
- @pydantic.validator("service_name")
105
- def check_service_name(cls, v, values):
106
- if values.get("database") and v:
104
+ @field_validator("service_name", mode="after")
105
+ @classmethod
106
+ def check_service_name(
107
+ cls, v: Optional[str], info: ValidationInfo
108
+ ) -> Optional[str]:
109
+ if info.data.get("database") and v:
107
110
  raise ValueError(
108
111
  "specify one of 'database' and 'service_name', but not both"
109
112
  )
110
113
  return v
111
114
 
112
- @pydantic.validator("data_dictionary_mode")
113
- def check_data_dictionary_mode(cls, value):
115
+ @field_validator("data_dictionary_mode", mode="after")
116
+ @classmethod
117
+ def check_data_dictionary_mode(cls, value: str) -> str:
114
118
  if value not in ("ALL", "DBA"):
115
119
  raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
116
120
  return value
117
121
 
118
- @pydantic.validator("thick_mode_lib_dir", always=True)
119
- def check_thick_mode_lib_dir(cls, v, values):
122
+ @field_validator("thick_mode_lib_dir", mode="before")
123
+ @classmethod
124
+ def check_thick_mode_lib_dir(
125
+ cls, v: Optional[str], info: ValidationInfo
126
+ ) -> Optional[str]:
120
127
  if (
121
128
  v is None
122
- and values.get("enable_thick_mode")
129
+ and info.data.get("enable_thick_mode")
123
130
  and (platform.system() == "Darwin" or platform.system() == "Windows")
124
131
  ):
125
132
  raise ValueError(
@@ -659,7 +666,7 @@ class OracleSource(SQLAlchemySource):
659
666
 
660
667
  @classmethod
661
668
  def create(cls, config_dict, ctx):
662
- config = OracleConfig.parse_obj(config_dict)
669
+ config = OracleConfig.model_validate(config_dict)
663
670
  return cls(config, ctx)
664
671
 
665
672
  def get_db_name(self, inspector: Inspector) -> str:
@@ -212,7 +212,7 @@ class PostgresSource(SQLAlchemySource):
212
212
 
213
213
  @classmethod
214
214
  def create(cls, config_dict, ctx):
215
- config = PostgresConfig.parse_obj(config_dict)
215
+ config = PostgresConfig.model_validate(config_dict)
216
216
  return cls(config, ctx)
217
217
 
218
218
  def _setup_rds_iam_event_listener(
@@ -288,7 +288,7 @@ class PostgresSource(SQLAlchemySource):
288
288
  return {}
289
289
 
290
290
  for row in results:
291
- data.append(ViewLineageEntry.parse_obj(row))
291
+ data.append(ViewLineageEntry.model_validate(row))
292
292
 
293
293
  lineage_elements: Dict[Tuple[str, str], List[str]] = defaultdict(list)
294
294
  # Loop over the lineages in the JSON data.
@@ -115,7 +115,7 @@ class PrestoSource(TrinoSource):
115
115
 
116
116
  @classmethod
117
117
  def create(cls, config_dict, ctx):
118
- config = PrestoConfig.parse_obj(config_dict)
118
+ config = PrestoConfig.model_validate(config_dict)
119
119
  return cls(config, ctx)
120
120
 
121
121
 
@@ -3,7 +3,7 @@ from abc import abstractmethod
3
3
  from typing import Any, Dict, Optional
4
4
 
5
5
  import pydantic
6
- from pydantic import Field
6
+ from pydantic import Field, model_validator
7
7
 
8
8
  from datahub.configuration.common import AllowDenyPattern, ConfigModel
9
9
  from datahub.configuration.source_common import (
@@ -49,7 +49,8 @@ class SQLFilterConfig(ConfigModel):
49
49
  description="Regex patterns for views to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
50
50
  )
51
51
 
52
- @pydantic.root_validator(pre=True)
52
+ @model_validator(mode="before")
53
+ @classmethod
53
54
  def view_pattern_is_table_pattern_unless_specified(
54
55
  cls, values: Dict[str, Any]
55
56
  ) -> Dict[str, Any]:
@@ -120,11 +121,9 @@ class SQLCommonConfig(
120
121
  self.profiling.operation_config
121
122
  )
122
123
 
123
- @pydantic.root_validator(skip_on_failure=True)
124
- def ensure_profiling_pattern_is_passed_to_profiling(
125
- cls, values: Dict[str, Any]
126
- ) -> Dict[str, Any]:
127
- profiling: Optional[GEProfilingConfig] = values.get("profiling")
124
+ @model_validator(mode="after")
125
+ def ensure_profiling_pattern_is_passed_to_profiling(self):
126
+ profiling = self.profiling
128
127
  # Note: isinstance() check is required here as unity-catalog source reuses
129
128
  # SQLCommonConfig with different profiling config than GEProfilingConfig
130
129
  if (
@@ -132,8 +131,8 @@ class SQLCommonConfig(
132
131
  and isinstance(profiling, GEProfilingConfig)
133
132
  and profiling.enabled
134
133
  ):
135
- profiling._allow_deny_patterns = values["profile_pattern"]
136
- return values
134
+ profiling._allow_deny_patterns = self.profile_pattern
135
+ return self
137
136
 
138
137
  @abstractmethod
139
138
  def get_sql_alchemy_url(self):
@@ -85,5 +85,5 @@ class SQLAlchemyGenericSource(SQLAlchemySource):
85
85
 
86
86
  @classmethod
87
87
  def create(cls, config_dict, ctx):
88
- config = SQLAlchemyGenericConfig.parse_obj(config_dict)
88
+ config = SQLAlchemyGenericConfig.model_validate(config_dict)
89
89
  return cls(config, ctx)
@@ -860,7 +860,7 @@ ORDER by DataBaseName, TableName;
860
860
 
861
861
  @classmethod
862
862
  def create(cls, config_dict, ctx):
863
- config = TeradataConfig.parse_obj(config_dict)
863
+ config = TeradataConfig.model_validate(config_dict)
864
864
  return cls(config, ctx)
865
865
 
866
866
  def _init_schema_resolver(self) -> SchemaResolver:
@@ -413,7 +413,7 @@ class TrinoSource(SQLAlchemySource):
413
413
 
414
414
  @classmethod
415
415
  def create(cls, config_dict, ctx):
416
- config = TrinoConfig.parse_obj(config_dict)
416
+ config = TrinoConfig.model_validate(config_dict)
417
417
  return cls(config, ctx)
418
418
 
419
419
  def get_schema_fields_for_column(
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tupl
5
5
 
6
6
  import pydantic
7
7
  import pytest
8
- from pydantic import validator
8
+ from pydantic import field_validator
9
9
  from vertica_sqlalchemy_dialect.base import VerticaInspector
10
10
 
11
11
  from datahub.configuration.common import AllowDenyPattern
@@ -105,8 +105,9 @@ class VerticaConfig(BasicSQLAlchemyConfig):
105
105
  # defaults
106
106
  scheme: str = pydantic.Field(default="vertica+vertica_python")
107
107
 
108
- @validator("host_port")
109
- def clean_host_port(cls, v):
108
+ @field_validator("host_port", mode="after")
109
+ @classmethod
110
+ def clean_host_port(cls, v: str) -> str:
110
111
  return config_clean.remove_protocol(v)
111
112
 
112
113
 
@@ -138,7 +139,7 @@ class VerticaSource(SQLAlchemySource):
138
139
 
139
140
  @classmethod
140
141
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "VerticaSource":
141
- config = VerticaConfig.parse_obj(config_dict)
142
+ config = VerticaConfig.model_validate(config_dict)
142
143
  return cls(config, ctx)
143
144
 
144
145
  def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: