acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (203) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dremio/dremio_source.py +15 -15
  66. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  67. datahub/ingestion/source/elastic_search.py +4 -3
  68. datahub/ingestion/source/excel/source.py +1 -1
  69. datahub/ingestion/source/feast.py +1 -1
  70. datahub/ingestion/source/file.py +5 -4
  71. datahub/ingestion/source/fivetran/config.py +17 -16
  72. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  73. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  74. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  75. datahub/ingestion/source/ge_profiling_config.py +8 -5
  76. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  77. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  78. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  79. datahub/ingestion/source/grafana/models.py +23 -5
  80. datahub/ingestion/source/hex/api.py +7 -5
  81. datahub/ingestion/source/hex/hex.py +4 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  83. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  84. datahub/ingestion/source/identity/azure_ad.py +1 -1
  85. datahub/ingestion/source/identity/okta.py +10 -10
  86. datahub/ingestion/source/kafka/kafka.py +1 -1
  87. datahub/ingestion/source/ldap.py +1 -1
  88. datahub/ingestion/source/looker/looker_common.py +7 -5
  89. datahub/ingestion/source/looker/looker_config.py +21 -20
  90. datahub/ingestion/source/looker/lookml_config.py +47 -47
  91. datahub/ingestion/source/metabase.py +8 -8
  92. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  93. datahub/ingestion/source/metadata/lineage.py +13 -8
  94. datahub/ingestion/source/mlflow.py +1 -1
  95. datahub/ingestion/source/mode.py +6 -4
  96. datahub/ingestion/source/mongodb.py +4 -3
  97. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  98. datahub/ingestion/source/nifi.py +17 -23
  99. datahub/ingestion/source/openapi.py +6 -8
  100. datahub/ingestion/source/powerbi/config.py +33 -32
  101. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  102. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  103. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  104. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  105. datahub/ingestion/source/preset.py +8 -8
  106. datahub/ingestion/source/pulsar.py +1 -1
  107. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  108. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  109. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  110. datahub/ingestion/source/redshift/config.py +18 -20
  111. datahub/ingestion/source/redshift/redshift.py +2 -2
  112. datahub/ingestion/source/redshift/usage.py +23 -3
  113. datahub/ingestion/source/s3/config.py +83 -62
  114. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  115. datahub/ingestion/source/s3/source.py +8 -5
  116. datahub/ingestion/source/sac/sac.py +5 -4
  117. datahub/ingestion/source/salesforce.py +3 -2
  118. datahub/ingestion/source/schema/json_schema.py +2 -2
  119. datahub/ingestion/source/sigma/data_classes.py +3 -2
  120. datahub/ingestion/source/sigma/sigma.py +1 -1
  121. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  122. datahub/ingestion/source/slack/slack.py +1 -1
  123. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  125. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  127. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  128. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  129. datahub/ingestion/source/sql/athena.py +1 -1
  130. datahub/ingestion/source/sql/clickhouse.py +4 -2
  131. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  132. datahub/ingestion/source/sql/druid.py +1 -1
  133. datahub/ingestion/source/sql/hana.py +1 -1
  134. datahub/ingestion/source/sql/hive.py +7 -5
  135. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  136. datahub/ingestion/source/sql/mssql/source.py +13 -6
  137. datahub/ingestion/source/sql/mysql.py +1 -1
  138. datahub/ingestion/source/sql/oracle.py +17 -10
  139. datahub/ingestion/source/sql/postgres.py +2 -2
  140. datahub/ingestion/source/sql/presto.py +1 -1
  141. datahub/ingestion/source/sql/sql_config.py +8 -9
  142. datahub/ingestion/source/sql/sql_generic.py +1 -1
  143. datahub/ingestion/source/sql/teradata.py +1 -1
  144. datahub/ingestion/source/sql/trino.py +1 -1
  145. datahub/ingestion/source/sql/vertica.py +5 -4
  146. datahub/ingestion/source/sql_queries.py +11 -8
  147. datahub/ingestion/source/state/checkpoint.py +2 -2
  148. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  149. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  150. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  152. datahub/ingestion/source/superset.py +9 -9
  153. datahub/ingestion/source/tableau/tableau.py +14 -16
  154. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  155. datahub/ingestion/source/unity/config.py +51 -34
  156. datahub/ingestion/source/unity/connection.py +7 -1
  157. datahub/ingestion/source/unity/connection_test.py +1 -1
  158. datahub/ingestion/source/unity/proxy.py +216 -7
  159. datahub/ingestion/source/unity/proxy_types.py +91 -0
  160. datahub/ingestion/source/unity/source.py +29 -3
  161. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  162. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  163. datahub/ingestion/source/usage/usage_common.py +5 -3
  164. datahub/ingestion/source_config/csv_enricher.py +7 -6
  165. datahub/ingestion/source_config/operation_config.py +7 -4
  166. datahub/ingestion/source_config/pulsar.py +11 -15
  167. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  168. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  169. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  171. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  172. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  173. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  175. datahub/ingestion/transformer/dataset_domain.py +3 -3
  176. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  177. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  178. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  179. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  180. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  181. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  182. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  183. datahub/ingestion/transformer/replace_external_url.py +2 -2
  184. datahub/ingestion/transformer/set_browse_path.py +1 -1
  185. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  186. datahub/lite/duckdb_lite.py +1 -1
  187. datahub/lite/lite_util.py +2 -2
  188. datahub/metadata/schema.avsc +7 -2
  189. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  190. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
  191. datahub/sdk/__init__.py +1 -0
  192. datahub/sdk/_all_entities.py +2 -0
  193. datahub/sdk/search_filters.py +68 -40
  194. datahub/sdk/tag.py +112 -0
  195. datahub/secret/datahub_secret_store.py +7 -4
  196. datahub/secret/file_secret_store.py +1 -1
  197. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  198. datahub/testing/check_sql_parser_result.py +2 -2
  199. datahub/utilities/ingest_utils.py +1 -1
  200. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  201. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  202. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  203. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from typing import Callable, Dict, List, Optional, Union
3
3
 
4
- import pydantic
4
+ from pydantic import model_validator
5
5
 
6
6
  from datahub.configuration.common import ConfigModel, KeyValuePattern
7
7
  from datahub.configuration.import_resolver import pydantic_resolve_key
@@ -39,7 +39,7 @@ class AddDatasetDataProduct(DatasetDataproductTransformer):
39
39
 
40
40
  @classmethod
41
41
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetDataProduct":
42
- config = AddDatasetDataProductConfig.parse_obj(config_dict)
42
+ config = AddDatasetDataProductConfig.model_validate(config_dict)
43
43
  return cls(config, ctx)
44
44
 
45
45
  def transform_aspect(
@@ -116,7 +116,7 @@ class SimpleAddDatasetDataProduct(AddDatasetDataProduct):
116
116
  def create(
117
117
  cls, config_dict: dict, ctx: PipelineContext
118
118
  ) -> "SimpleAddDatasetDataProduct":
119
- config = SimpleDatasetDataProductConfig.parse_obj(config_dict)
119
+ config = SimpleDatasetDataProductConfig.model_validate(config_dict)
120
120
  return cls(config, ctx)
121
121
 
122
122
 
@@ -124,7 +124,8 @@ class PatternDatasetDataProductConfig(ConfigModel):
124
124
  dataset_to_data_product_urns_pattern: KeyValuePattern = KeyValuePattern.all()
125
125
  is_container: bool = False
126
126
 
127
- @pydantic.root_validator(pre=True)
127
+ @model_validator(mode="before")
128
+ @classmethod
128
129
  def validate_pattern_value(cls, values: Dict) -> Dict:
129
130
  rules = values["dataset_to_data_product_urns_pattern"]["rules"]
130
131
  for key, value in rules.items():
@@ -156,5 +157,5 @@ class PatternAddDatasetDataProduct(AddDatasetDataProduct):
156
157
  def create(
157
158
  cls, config_dict: dict, ctx: PipelineContext
158
159
  ) -> "PatternAddDatasetDataProduct":
159
- config = PatternDatasetDataProductConfig.parse_obj(config_dict)
160
+ config = PatternDatasetDataProductConfig.model_validate(config_dict)
160
161
  return cls(config, ctx)
@@ -55,7 +55,7 @@ class AddDatasetOwnership(OwnershipTransformer):
55
55
 
56
56
  @classmethod
57
57
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetOwnership":
58
- config = AddDatasetOwnershipConfig.parse_obj(config_dict)
58
+ config = AddDatasetOwnershipConfig.model_validate(config_dict)
59
59
  return cls(config, ctx)
60
60
 
61
61
  @staticmethod
@@ -209,7 +209,7 @@ class SimpleAddDatasetOwnership(AddDatasetOwnership):
209
209
  def create(
210
210
  cls, config_dict: dict, ctx: PipelineContext
211
211
  ) -> "SimpleAddDatasetOwnership":
212
- config = SimpleDatasetOwnershipConfig.parse_obj(config_dict)
212
+ config = SimpleDatasetOwnershipConfig.model_validate(config_dict)
213
213
  return cls(config, ctx)
214
214
 
215
215
 
@@ -247,5 +247,5 @@ class PatternAddDatasetOwnership(AddDatasetOwnership):
247
247
  def create(
248
248
  cls, config_dict: dict, ctx: PipelineContext
249
249
  ) -> "PatternAddDatasetOwnership":
250
- config = PatternDatasetOwnershipConfig.parse_obj(config_dict)
250
+ config = PatternDatasetOwnershipConfig.model_validate(config_dict)
251
251
  return cls(config, ctx)
@@ -50,7 +50,7 @@ class AddDatasetProperties(DatasetPropertiesTransformer):
50
50
 
51
51
  @classmethod
52
52
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetProperties":
53
- config = AddDatasetPropertiesConfig.parse_obj(config_dict)
53
+ config = AddDatasetPropertiesConfig.model_validate(config_dict)
54
54
  return cls(config, ctx)
55
55
 
56
56
  @staticmethod
@@ -144,5 +144,5 @@ class SimpleAddDatasetProperties(AddDatasetProperties):
144
144
  def create(
145
145
  cls, config_dict: dict, ctx: PipelineContext
146
146
  ) -> "SimpleAddDatasetProperties":
147
- config = SimpleAddDatasetPropertiesConfig.parse_obj(config_dict)
147
+ config = SimpleAddDatasetPropertiesConfig.model_validate(config_dict)
148
148
  return cls(config, ctx)
@@ -38,7 +38,7 @@ class AddDatasetSchemaTags(DatasetSchemaMetadataTransformer):
38
38
 
39
39
  @classmethod
40
40
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetSchemaTags":
41
- config = AddDatasetSchemaTagsConfig.parse_obj(config_dict)
41
+ config = AddDatasetSchemaTagsConfig.model_validate(config_dict)
42
42
  return cls(config, ctx)
43
43
 
44
44
  def extend_field(
@@ -142,5 +142,5 @@ class PatternAddDatasetSchemaTags(AddDatasetSchemaTags):
142
142
  def create(
143
143
  cls, config_dict: dict, ctx: PipelineContext
144
144
  ) -> "PatternAddDatasetSchemaTags":
145
- config = PatternDatasetTagsConfig.parse_obj(config_dict)
145
+ config = PatternDatasetTagsConfig.model_validate(config_dict)
146
146
  return cls(config, ctx)
@@ -39,7 +39,7 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer):
39
39
 
40
40
  @classmethod
41
41
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetSchemaTerms":
42
- config = AddDatasetSchemaTermsConfig.parse_obj(config_dict)
42
+ config = AddDatasetSchemaTermsConfig.model_validate(config_dict)
43
43
  return cls(config, ctx)
44
44
 
45
45
  def extend_field(
@@ -162,5 +162,5 @@ class PatternAddDatasetSchemaTerms(AddDatasetSchemaTerms):
162
162
  def create(
163
163
  cls, config_dict: dict, ctx: PipelineContext
164
164
  ) -> "PatternAddDatasetSchemaTerms":
165
- config = PatternDatasetTermsConfig.parse_obj(config_dict)
165
+ config = PatternDatasetTermsConfig.model_validate(config_dict)
166
166
  return cls(config, ctx)
@@ -41,7 +41,7 @@ class AddDatasetTags(DatasetTagsTransformer):
41
41
 
42
42
  @classmethod
43
43
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetTags":
44
- config = AddDatasetTagsConfig.parse_obj(config_dict)
44
+ config = AddDatasetTagsConfig.model_validate(config_dict)
45
45
  return cls(config, ctx)
46
46
 
47
47
  def transform_aspect(
@@ -104,7 +104,7 @@ class SimpleAddDatasetTags(AddDatasetTags):
104
104
 
105
105
  @classmethod
106
106
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "SimpleAddDatasetTags":
107
- config = SimpleDatasetTagConfig.parse_obj(config_dict)
107
+ config = SimpleDatasetTagConfig.model_validate(config_dict)
108
108
  return cls(config, ctx)
109
109
 
110
110
 
@@ -128,5 +128,5 @@ class PatternAddDatasetTags(AddDatasetTags):
128
128
 
129
129
  @classmethod
130
130
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "PatternAddDatasetTags":
131
- config = PatternDatasetTagsConfig.parse_obj(config_dict)
131
+ config = PatternDatasetTagsConfig.model_validate(config_dict)
132
132
  return cls(config, ctx)
@@ -39,7 +39,7 @@ class AddDatasetTerms(DatasetTermsTransformer):
39
39
 
40
40
  @classmethod
41
41
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetTerms":
42
- config = AddDatasetTermsConfig.parse_obj(config_dict)
42
+ config = AddDatasetTermsConfig.model_validate(config_dict)
43
43
  return cls(config, ctx)
44
44
 
45
45
  @staticmethod
@@ -120,7 +120,7 @@ class SimpleAddDatasetTerms(AddDatasetTerms):
120
120
 
121
121
  @classmethod
122
122
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "SimpleAddDatasetTerms":
123
- config = SimpleDatasetTermsConfig.parse_obj(config_dict)
123
+ config = SimpleDatasetTermsConfig.model_validate(config_dict)
124
124
  return cls(config, ctx)
125
125
 
126
126
 
@@ -147,5 +147,5 @@ class PatternAddDatasetTerms(AddDatasetTerms):
147
147
  def create(
148
148
  cls, config_dict: dict, ctx: PipelineContext
149
149
  ) -> "PatternAddDatasetTerms":
150
- config = PatternDatasetTermsConfig.parse_obj(config_dict)
150
+ config = PatternDatasetTermsConfig.model_validate(config_dict)
151
151
  return cls(config, ctx)
@@ -67,7 +67,7 @@ class AddDatasetDomain(DatasetDomainTransformer):
67
67
 
68
68
  @classmethod
69
69
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetDomain":
70
- config = AddDatasetDomainSemanticsConfig.parse_obj(config_dict)
70
+ config = AddDatasetDomainSemanticsConfig.model_validate(config_dict)
71
71
  return cls(config, ctx)
72
72
 
73
73
  @staticmethod
@@ -208,7 +208,7 @@ class SimpleAddDatasetDomain(AddDatasetDomain):
208
208
  def create(
209
209
  cls, config_dict: dict, ctx: PipelineContext
210
210
  ) -> "SimpleAddDatasetDomain":
211
- config = SimpleDatasetDomainSemanticsConfig.parse_obj(config_dict)
211
+ config = SimpleDatasetDomainSemanticsConfig.model_validate(config_dict)
212
212
  return cls(config, ctx)
213
213
 
214
214
 
@@ -238,5 +238,5 @@ class PatternAddDatasetDomain(AddDatasetDomain):
238
238
  def create(
239
239
  cls, config_dict: dict, ctx: PipelineContext
240
240
  ) -> "PatternAddDatasetDomain":
241
- config = PatternDatasetDomainSemanticsConfig.parse_obj(config_dict)
241
+ config = PatternDatasetDomainSemanticsConfig.model_validate(config_dict)
242
242
  return cls(config, ctx)
@@ -27,7 +27,7 @@ class DatasetTagDomainMapper(DatasetDomainTransformer):
27
27
  def create(
28
28
  cls, config_dict: dict, ctx: PipelineContext
29
29
  ) -> "DatasetTagDomainMapper":
30
- config = DatasetTagDomainMapperConfig.parse_obj(config_dict)
30
+ config = DatasetTagDomainMapperConfig.model_validate(config_dict)
31
31
  return cls(config, ctx)
32
32
 
33
33
  def transform_aspect(
@@ -29,7 +29,7 @@ class ExtractDatasetTags(DatasetTagsTransformer):
29
29
 
30
30
  @classmethod
31
31
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "ExtractDatasetTags":
32
- config = ExtractDatasetTagsConfig.parse_obj(config_dict)
32
+ config = ExtractDatasetTagsConfig.model_validate(config_dict)
33
33
  return cls(config, ctx)
34
34
 
35
35
  def _get_tags_to_add(self, entity_urn: str) -> List[TagAssociationClass]:
@@ -62,7 +62,7 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
62
62
  def create(
63
63
  cls, config_dict: dict, ctx: PipelineContext
64
64
  ) -> "ExtractOwnersFromTagsTransformer":
65
- config = ExtractOwnersFromTagsConfig.parse_obj(config_dict)
65
+ config = ExtractOwnersFromTagsConfig.model_validate(config_dict)
66
66
  return cls(config, ctx)
67
67
 
68
68
  def get_owner_urn(self, owner_str: str) -> str:
@@ -24,7 +24,7 @@ class MarkDatasetStatus(DatasetStatusTransformer):
24
24
 
25
25
  @classmethod
26
26
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "MarkDatasetStatus":
27
- config = MarkDatasetStatusConfig.parse_obj(config_dict)
27
+ config = MarkDatasetStatusConfig.model_validate(config_dict)
28
28
  return cls(config, ctx)
29
29
 
30
30
  def transform_aspect(
@@ -38,7 +38,7 @@ class PatternCleanupDatasetUsageUser(DatasetUsageStatisticsTransformer):
38
38
  def create(
39
39
  cls, config_dict: dict, ctx: PipelineContext
40
40
  ) -> "PatternCleanupDatasetUsageUser":
41
- config = PatternCleanupDatasetUsageUserConfig.parse_obj(config_dict)
41
+ config = PatternCleanupDatasetUsageUserConfig.model_validate(config_dict)
42
42
  return cls(config, ctx)
43
43
 
44
44
  def transform_aspect(
@@ -37,7 +37,7 @@ class PatternCleanUpOwnership(OwnershipTransformer):
37
37
  def create(
38
38
  cls, config_dict: dict, ctx: PipelineContext
39
39
  ) -> "PatternCleanUpOwnership":
40
- config = PatternCleanUpOwnershipConfig.parse_obj(config_dict)
40
+ config = PatternCleanUpOwnershipConfig.model_validate(config_dict)
41
41
  return cls(config, ctx)
42
42
 
43
43
  def _get_current_owner_urns(self, entity_urn: str) -> Set[str]:
@@ -21,7 +21,7 @@ class SimpleRemoveDatasetOwnership(OwnershipTransformer):
21
21
  def create(
22
22
  cls, config_dict: dict, ctx: PipelineContext
23
23
  ) -> "SimpleRemoveDatasetOwnership":
24
- config = ClearDatasetOwnershipConfig.parse_obj(config_dict)
24
+ config = ClearDatasetOwnershipConfig.model_validate(config_dict)
25
25
  return cls(config, ctx)
26
26
 
27
27
  def transform_aspect(
@@ -47,7 +47,7 @@ class ReplaceExternalUrlDataset(DatasetPropertiesTransformer, ReplaceUrl):
47
47
  def create(
48
48
  cls, config_dict: dict, ctx: PipelineContext
49
49
  ) -> "ReplaceExternalUrlDataset":
50
- config = ReplaceExternalUrlConfig.parse_obj(config_dict)
50
+ config = ReplaceExternalUrlConfig.model_validate(config_dict)
51
51
  return cls(config, ctx)
52
52
 
53
53
  def transform_aspect(
@@ -97,7 +97,7 @@ class ReplaceExternalUrlContainer(ContainerPropertiesTransformer, ReplaceUrl):
97
97
  def create(
98
98
  cls, config_dict: dict, ctx: PipelineContext
99
99
  ) -> "ReplaceExternalUrlContainer":
100
- config = ReplaceExternalUrlConfig.parse_obj(config_dict)
100
+ config = ReplaceExternalUrlConfig.model_validate(config_dict)
101
101
  return cls(config, ctx)
102
102
 
103
103
  def transform_aspect(
@@ -42,7 +42,7 @@ class SetBrowsePathTransformer(BaseTransformer, SingleAspectTransformer):
42
42
  def create(
43
43
  cls, config_dict: dict, ctx: PipelineContext
44
44
  ) -> "SetBrowsePathTransformer":
45
- config = SetBrowsePathTransformerConfig.parse_obj(config_dict)
45
+ config = SetBrowsePathTransformerConfig.model_validate(config_dict)
46
46
  return cls(config, ctx)
47
47
 
48
48
  @staticmethod
@@ -32,7 +32,7 @@ class TagsToTermMapper(TagsToTermTransformer):
32
32
 
33
33
  @classmethod
34
34
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "TagsToTermMapper":
35
- config = TagsToTermMapperConfig.parse_obj(config_dict)
35
+ config = TagsToTermMapperConfig.model_validate(config_dict)
36
36
  return cls(config, ctx)
37
37
 
38
38
  @staticmethod
@@ -42,7 +42,7 @@ logger = logging.getLogger(__name__)
42
42
  class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
43
43
  @classmethod
44
44
  def create(cls, config_dict: dict) -> "DuckDBLite":
45
- config: DuckDBLiteConfig = DuckDBLiteConfig.parse_obj(config_dict)
45
+ config: DuckDBLiteConfig = DuckDBLiteConfig.model_validate(config_dict)
46
46
  return DuckDBLite(config)
47
47
 
48
48
  def __init__(self, config: DuckDBLiteConfig) -> None:
datahub/lite/lite_util.py CHANGED
@@ -92,7 +92,7 @@ class DataHubLiteWrapper(DataHubLiteLocal):
92
92
 
93
93
 
94
94
  def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLiteLocal":
95
- lite_local_config = LiteLocalConfig.parse_obj(config_dict)
95
+ lite_local_config = LiteLocalConfig.model_validate(config_dict)
96
96
 
97
97
  lite_type = lite_local_config.type
98
98
  try:
@@ -102,7 +102,7 @@ def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLite
102
102
  f"Failed to find a registered lite implementation for {lite_type}. Valid values are {[k for k in lite_registry.mapping]}"
103
103
  ) from e
104
104
 
105
- lite_specific_config = lite_class.get_config_class().parse_obj(
105
+ lite_specific_config = lite_class.get_config_class().model_validate(
106
106
  lite_local_config.config
107
107
  )
108
108
  lite = lite_class(lite_specific_config)
@@ -4824,7 +4824,7 @@
4824
4824
  {
4825
4825
  "Searchable": {
4826
4826
  "fieldName": "entities",
4827
- "fieldType": "URN"
4827
+ "fieldType": "KEYWORD"
4828
4828
  },
4829
4829
  "java": {
4830
4830
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
@@ -20303,7 +20303,12 @@
20303
20303
  "doc": "The fully qualified name of the property. e.g. io.acryl.datahub.myProperty"
20304
20304
  },
20305
20305
  {
20306
- "Searchable": {},
20306
+ "Searchable": {
20307
+ "enableAutocomplete": true,
20308
+ "fieldType": "WORD_GRAM",
20309
+ "searchLabel": "entityName",
20310
+ "searchTier": 1
20311
+ },
20307
20312
  "type": [
20308
20313
  "null",
20309
20314
  "string"
@@ -17,7 +17,7 @@
17
17
  {
18
18
  "Searchable": {
19
19
  "fieldName": "entities",
20
- "fieldType": "URN"
20
+ "fieldType": "KEYWORD"
21
21
  },
22
22
  "java": {
23
23
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
@@ -17,7 +17,12 @@
17
17
  "doc": "The fully qualified name of the property. e.g. io.acryl.datahub.myProperty"
18
18
  },
19
19
  {
20
- "Searchable": {},
20
+ "Searchable": {
21
+ "enableAutocomplete": true,
22
+ "fieldType": "WORD_GRAM",
23
+ "searchLabel": "entityName",
24
+ "searchTier": 1
25
+ },
21
26
  "type": [
22
27
  "null",
23
28
  "string"
datahub/sdk/__init__.py CHANGED
@@ -28,6 +28,7 @@ from datahub.sdk.main_client import DataHubClient
28
28
  from datahub.sdk.mlmodel import MLModel
29
29
  from datahub.sdk.mlmodelgroup import MLModelGroup
30
30
  from datahub.sdk.search_filters import Filter, FilterDsl
31
+ from datahub.sdk.tag import Tag
31
32
 
32
33
  # We want to print out the warning if people do `from datahub.sdk import X`.
33
34
  # But we don't want to print out warnings if they're doing a more direct
@@ -9,6 +9,7 @@ from datahub.sdk.dataset import Dataset
9
9
  from datahub.sdk.entity import Entity
10
10
  from datahub.sdk.mlmodel import MLModel
11
11
  from datahub.sdk.mlmodelgroup import MLModelGroup
12
+ from datahub.sdk.tag import Tag
12
13
 
13
14
  # Base entity classes that don't have circular dependencies
14
15
  # Those that do are imported in the EntityClient where needed
@@ -22,6 +23,7 @@ ENTITY_CLASSES_LIST: List[Type[Entity]] = [
22
23
  DataJob,
23
24
  Dashboard,
24
25
  Chart,
26
+ Tag,
25
27
  ]
26
28
 
27
29
  # Create the mapping of entity types to classes
@@ -16,6 +16,7 @@ from typing import (
16
16
  )
17
17
 
18
18
  import pydantic
19
+ from pydantic import field_validator
19
20
 
20
21
  from datahub.configuration.common import ConfigModel
21
22
  from datahub.configuration.pydantic_migration_helpers import (
@@ -102,7 +103,8 @@ class _EntitySubtypeFilter(_BaseFilter):
102
103
  description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
103
104
  )
104
105
 
105
- @pydantic.validator("entity_subtype", pre=True)
106
+ @field_validator("entity_subtype", mode="before")
107
+ @classmethod
106
108
  def validate_entity_subtype(cls, v: str) -> List[str]:
107
109
  return [v] if not isinstance(v, list) else v
108
110
 
@@ -141,10 +143,13 @@ class _PlatformFilter(_BaseFilter):
141
143
  platform: List[str]
142
144
  # TODO: Add validator to convert string -> list of strings
143
145
 
144
- @pydantic.validator("platform", each_item=True)
145
- def validate_platform(cls, v: str) -> str:
146
+ @field_validator("platform", mode="before")
147
+ @classmethod
148
+ def validate_platform(cls, v):
146
149
  # Subtle - we use the constructor instead of the from_string method
147
150
  # because coercion is acceptable here.
151
+ if isinstance(v, list):
152
+ return [str(DataPlatformUrn(item)) for item in v]
148
153
  return str(DataPlatformUrn(v))
149
154
 
150
155
  def _build_rule(self) -> SearchFilterRule:
@@ -161,8 +166,11 @@ class _PlatformFilter(_BaseFilter):
161
166
  class _DomainFilter(_BaseFilter):
162
167
  domain: List[str]
163
168
 
164
- @pydantic.validator("domain", each_item=True)
165
- def validate_domain(cls, v: str) -> str:
169
+ @field_validator("domain", mode="before")
170
+ @classmethod
171
+ def validate_domain(cls, v):
172
+ if isinstance(v, list):
173
+ return [str(DomainUrn.from_string(item)) for item in v]
166
174
  return str(DomainUrn.from_string(v))
167
175
 
168
176
  def _build_rule(self) -> SearchFilterRule:
@@ -183,8 +191,11 @@ class _ContainerFilter(_BaseFilter):
183
191
  description="If true, only entities that are direct descendants of the container will be returned.",
184
192
  )
185
193
 
186
- @pydantic.validator("container", each_item=True)
187
- def validate_container(cls, v: str) -> str:
194
+ @field_validator("container", mode="before")
195
+ @classmethod
196
+ def validate_container(cls, v):
197
+ if isinstance(v, list):
198
+ return [str(ContainerUrn.from_string(item)) for item in v]
188
199
  return str(ContainerUrn.from_string(v))
189
200
 
190
201
  @classmethod
@@ -249,17 +260,25 @@ class _OwnerFilter(_BaseFilter):
249
260
  description="The owner to filter on. Should be user or group URNs.",
250
261
  )
251
262
 
252
- @pydantic.validator("owner", each_item=True)
253
- def validate_owner(cls, v: str) -> str:
254
- if not v.startswith("urn:li:"):
255
- raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
256
- _type = guess_entity_type(v)
257
- if _type == CorpUserUrn.ENTITY_TYPE:
258
- return str(CorpUserUrn.from_string(v))
259
- elif _type == CorpGroupUrn.ENTITY_TYPE:
260
- return str(CorpGroupUrn.from_string(v))
261
- else:
262
- raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
263
+ @field_validator("owner", mode="before")
264
+ @classmethod
265
+ def validate_owner(cls, v):
266
+ validated = []
267
+ for owner in v:
268
+ if not owner.startswith("urn:li:"):
269
+ raise ValueError(
270
+ f"Owner must be a valid User or Group URN, got: {owner}"
271
+ )
272
+ _type = guess_entity_type(owner)
273
+ if _type == CorpUserUrn.ENTITY_TYPE:
274
+ validated.append(str(CorpUserUrn.from_string(owner)))
275
+ elif _type == CorpGroupUrn.ENTITY_TYPE:
276
+ validated.append(str(CorpGroupUrn.from_string(owner)))
277
+ else:
278
+ raise ValueError(
279
+ f"Owner must be a valid User or Group URN, got: {owner}"
280
+ )
281
+ return validated
263
282
 
264
283
  def _build_rule(self) -> SearchFilterRule:
265
284
  return SearchFilterRule(
@@ -279,17 +298,21 @@ class _GlossaryTermFilter(_BaseFilter):
279
298
  description="The glossary term to filter on. Should be glossary term URNs.",
280
299
  )
281
300
 
282
- @pydantic.validator("glossary_term", each_item=True)
283
- def validate_glossary_term(cls, v: str) -> str:
284
- if not v.startswith("urn:li:"):
285
- raise ValueError(f"Glossary term must be a valid URN, got: {v}")
286
- # Validate that it's a glossary term URN
287
- _type = guess_entity_type(v)
288
- if _type != "glossaryTerm":
289
- raise ValueError(
290
- f"Glossary term must be a valid glossary term URN, got: {v}"
291
- )
292
- return v
301
+ @field_validator("glossary_term", mode="before")
302
+ @classmethod
303
+ def validate_glossary_term(cls, v):
304
+ validated = []
305
+ for term in v:
306
+ if not term.startswith("urn:li:"):
307
+ raise ValueError(f"Glossary term must be a valid URN, got: {term}")
308
+ # Validate that it's a glossary term URN
309
+ _type = guess_entity_type(term)
310
+ if _type != "glossaryTerm":
311
+ raise ValueError(
312
+ f"Glossary term must be a valid glossary term URN, got: {term}"
313
+ )
314
+ validated.append(term)
315
+ return validated
293
316
 
294
317
  def _build_rule(self) -> SearchFilterRule:
295
318
  return SearchFilterRule(
@@ -309,15 +332,19 @@ class _TagFilter(_BaseFilter):
309
332
  description="The tag to filter on. Should be tag URNs.",
310
333
  )
311
334
 
312
- @pydantic.validator("tag", each_item=True)
313
- def validate_tag(cls, v: str) -> str:
314
- if not v.startswith("urn:li:"):
315
- raise ValueError(f"Tag must be a valid URN, got: {v}")
316
- # Validate that it's a tag URN
317
- _type = guess_entity_type(v)
318
- if _type != "tag":
319
- raise ValueError(f"Tag must be a valid tag URN, got: {v}")
320
- return v
335
+ @field_validator("tag", mode="before")
336
+ @classmethod
337
+ def validate_tag(cls, v):
338
+ validated = []
339
+ for tag in v:
340
+ if not tag.startswith("urn:li:"):
341
+ raise ValueError(f"Tag must be a valid URN, got: {tag}")
342
+ # Validate that it's a tag URN
343
+ _type = guess_entity_type(tag)
344
+ if _type != "tag":
345
+ raise ValueError(f"Tag must be a valid tag URN, got: {tag}")
346
+ validated.append(tag)
347
+ return validated
321
348
 
322
349
  def _build_rule(self) -> SearchFilterRule:
323
350
  return SearchFilterRule(
@@ -426,7 +453,8 @@ class _Not(_BaseFilter):
426
453
 
427
454
  not_: "Filter" = pydantic.Field(alias="not")
428
455
 
429
- @pydantic.validator("not_", pre=False)
456
+ @field_validator("not_", mode="after")
457
+ @classmethod
430
458
  def validate_not(cls, v: "Filter") -> "Filter":
431
459
  inner_filter = v.compile()
432
460
  if len(inner_filter) != 1:
@@ -571,7 +599,7 @@ def load_filters(obj: Any) -> Filter:
571
599
  if PYDANTIC_VERSION_2:
572
600
  return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
573
601
  else:
574
- return pydantic.parse_obj_as(Filter, obj) # type: ignore
602
+ return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
575
603
 
576
604
 
577
605
  # We need FilterDsl for two reasons: