acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (203) hide show
  1. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
  2. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +2 -2
  5. datahub/api/entities/corpgroup/corpgroup.py +11 -6
  6. datahub/api/entities/corpuser/corpuser.py +11 -11
  7. datahub/api/entities/dataproduct/dataproduct.py +47 -27
  8. datahub/api/entities/dataset/dataset.py +32 -21
  9. datahub/api/entities/external/lake_formation_external_entites.py +5 -6
  10. datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
  11. datahub/api/entities/forms/forms.py +16 -14
  12. datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
  13. datahub/cli/check_cli.py +2 -2
  14. datahub/cli/config_utils.py +3 -3
  15. datahub/cli/lite_cli.py +9 -7
  16. datahub/cli/migrate.py +4 -4
  17. datahub/cli/quickstart_versioning.py +3 -3
  18. datahub/cli/specific/group_cli.py +1 -1
  19. datahub/cli/specific/structuredproperties_cli.py +1 -1
  20. datahub/cli/specific/user_cli.py +1 -1
  21. datahub/configuration/common.py +14 -2
  22. datahub/configuration/connection_resolver.py +2 -2
  23. datahub/configuration/git.py +47 -30
  24. datahub/configuration/import_resolver.py +2 -2
  25. datahub/configuration/kafka.py +4 -3
  26. datahub/configuration/time_window_config.py +26 -26
  27. datahub/configuration/validate_field_deprecation.py +2 -2
  28. datahub/configuration/validate_field_removal.py +2 -2
  29. datahub/configuration/validate_field_rename.py +2 -2
  30. datahub/configuration/validate_multiline_string.py +2 -1
  31. datahub/emitter/kafka_emitter.py +3 -1
  32. datahub/emitter/rest_emitter.py +2 -4
  33. datahub/ingestion/api/decorators.py +1 -1
  34. datahub/ingestion/api/report.py +1 -1
  35. datahub/ingestion/api/sink.py +1 -1
  36. datahub/ingestion/api/source.py +1 -1
  37. datahub/ingestion/glossary/datahub_classifier.py +11 -8
  38. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  39. datahub/ingestion/reporting/file_reporter.py +5 -4
  40. datahub/ingestion/run/pipeline.py +6 -6
  41. datahub/ingestion/run/pipeline_config.py +12 -14
  42. datahub/ingestion/run/sink_callback.py +1 -1
  43. datahub/ingestion/sink/datahub_rest.py +6 -4
  44. datahub/ingestion/source/abs/config.py +19 -19
  45. datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
  46. datahub/ingestion/source/abs/source.py +2 -2
  47. datahub/ingestion/source/aws/aws_common.py +1 -1
  48. datahub/ingestion/source/aws/glue.py +6 -4
  49. datahub/ingestion/source/aws/sagemaker.py +1 -1
  50. datahub/ingestion/source/azure/azure_common.py +8 -12
  51. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
  53. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
  54. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  55. datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
  56. datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
  57. datahub/ingestion/source/datahub/config.py +8 -8
  58. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  59. datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
  60. datahub/ingestion/source/dbt/dbt_common.py +39 -37
  61. datahub/ingestion/source/dbt/dbt_core.py +10 -12
  62. datahub/ingestion/source/debug/datahub_debug.py +1 -1
  63. datahub/ingestion/source/delta_lake/config.py +6 -4
  64. datahub/ingestion/source/dremio/dremio_config.py +10 -6
  65. datahub/ingestion/source/dremio/dremio_source.py +15 -15
  66. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  67. datahub/ingestion/source/elastic_search.py +4 -3
  68. datahub/ingestion/source/excel/source.py +1 -1
  69. datahub/ingestion/source/feast.py +1 -1
  70. datahub/ingestion/source/file.py +5 -4
  71. datahub/ingestion/source/fivetran/config.py +17 -16
  72. datahub/ingestion/source/fivetran/fivetran.py +2 -2
  73. datahub/ingestion/source/gc/datahub_gc.py +1 -1
  74. datahub/ingestion/source/gcs/gcs_source.py +8 -10
  75. datahub/ingestion/source/ge_profiling_config.py +8 -5
  76. datahub/ingestion/source/grafana/grafana_api.py +2 -2
  77. datahub/ingestion/source/grafana/grafana_config.py +4 -3
  78. datahub/ingestion/source/grafana/grafana_source.py +1 -1
  79. datahub/ingestion/source/grafana/models.py +23 -5
  80. datahub/ingestion/source/hex/api.py +7 -5
  81. datahub/ingestion/source/hex/hex.py +4 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  83. datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
  84. datahub/ingestion/source/identity/azure_ad.py +1 -1
  85. datahub/ingestion/source/identity/okta.py +10 -10
  86. datahub/ingestion/source/kafka/kafka.py +1 -1
  87. datahub/ingestion/source/ldap.py +1 -1
  88. datahub/ingestion/source/looker/looker_common.py +7 -5
  89. datahub/ingestion/source/looker/looker_config.py +21 -20
  90. datahub/ingestion/source/looker/lookml_config.py +47 -47
  91. datahub/ingestion/source/metabase.py +8 -8
  92. datahub/ingestion/source/metadata/business_glossary.py +2 -2
  93. datahub/ingestion/source/metadata/lineage.py +13 -8
  94. datahub/ingestion/source/mlflow.py +1 -1
  95. datahub/ingestion/source/mode.py +6 -4
  96. datahub/ingestion/source/mongodb.py +4 -3
  97. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  98. datahub/ingestion/source/nifi.py +17 -23
  99. datahub/ingestion/source/openapi.py +6 -8
  100. datahub/ingestion/source/powerbi/config.py +33 -32
  101. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
  102. datahub/ingestion/source/powerbi/powerbi.py +1 -1
  103. datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
  104. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
  105. datahub/ingestion/source/preset.py +8 -8
  106. datahub/ingestion/source/pulsar.py +1 -1
  107. datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
  108. datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
  109. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  110. datahub/ingestion/source/redshift/config.py +18 -20
  111. datahub/ingestion/source/redshift/redshift.py +2 -2
  112. datahub/ingestion/source/redshift/usage.py +23 -3
  113. datahub/ingestion/source/s3/config.py +83 -62
  114. datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
  115. datahub/ingestion/source/s3/source.py +8 -5
  116. datahub/ingestion/source/sac/sac.py +5 -4
  117. datahub/ingestion/source/salesforce.py +3 -2
  118. datahub/ingestion/source/schema/json_schema.py +2 -2
  119. datahub/ingestion/source/sigma/data_classes.py +3 -2
  120. datahub/ingestion/source/sigma/sigma.py +1 -1
  121. datahub/ingestion/source/sigma/sigma_api.py +7 -7
  122. datahub/ingestion/source/slack/slack.py +1 -1
  123. datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
  124. datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
  125. datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
  127. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
  128. datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
  129. datahub/ingestion/source/sql/athena.py +1 -1
  130. datahub/ingestion/source/sql/clickhouse.py +4 -2
  131. datahub/ingestion/source/sql/cockroachdb.py +1 -1
  132. datahub/ingestion/source/sql/druid.py +1 -1
  133. datahub/ingestion/source/sql/hana.py +1 -1
  134. datahub/ingestion/source/sql/hive.py +7 -5
  135. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  136. datahub/ingestion/source/sql/mssql/source.py +13 -6
  137. datahub/ingestion/source/sql/mysql.py +1 -1
  138. datahub/ingestion/source/sql/oracle.py +17 -10
  139. datahub/ingestion/source/sql/postgres.py +2 -2
  140. datahub/ingestion/source/sql/presto.py +1 -1
  141. datahub/ingestion/source/sql/sql_config.py +8 -9
  142. datahub/ingestion/source/sql/sql_generic.py +1 -1
  143. datahub/ingestion/source/sql/teradata.py +1 -1
  144. datahub/ingestion/source/sql/trino.py +1 -1
  145. datahub/ingestion/source/sql/vertica.py +5 -4
  146. datahub/ingestion/source/sql_queries.py +11 -8
  147. datahub/ingestion/source/state/checkpoint.py +2 -2
  148. datahub/ingestion/source/state/entity_removal_state.py +2 -1
  149. datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
  150. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
  151. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  152. datahub/ingestion/source/superset.py +9 -9
  153. datahub/ingestion/source/tableau/tableau.py +14 -16
  154. datahub/ingestion/source/unity/azure_auth_config.py +15 -0
  155. datahub/ingestion/source/unity/config.py +51 -34
  156. datahub/ingestion/source/unity/connection.py +7 -1
  157. datahub/ingestion/source/unity/connection_test.py +1 -1
  158. datahub/ingestion/source/unity/proxy.py +216 -7
  159. datahub/ingestion/source/unity/proxy_types.py +91 -0
  160. datahub/ingestion/source/unity/source.py +29 -3
  161. datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
  162. datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
  163. datahub/ingestion/source/usage/usage_common.py +5 -3
  164. datahub/ingestion/source_config/csv_enricher.py +7 -6
  165. datahub/ingestion/source_config/operation_config.py +7 -4
  166. datahub/ingestion/source_config/pulsar.py +11 -15
  167. datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
  168. datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
  169. datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
  170. datahub/ingestion/transformer/add_dataset_properties.py +2 -2
  171. datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
  172. datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
  173. datahub/ingestion/transformer/add_dataset_tags.py +3 -3
  174. datahub/ingestion/transformer/add_dataset_terms.py +3 -3
  175. datahub/ingestion/transformer/dataset_domain.py +3 -3
  176. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
  177. datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
  178. datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
  179. datahub/ingestion/transformer/mark_dataset_status.py +1 -1
  180. datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
  181. datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
  182. datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
  183. datahub/ingestion/transformer/replace_external_url.py +2 -2
  184. datahub/ingestion/transformer/set_browse_path.py +1 -1
  185. datahub/ingestion/transformer/tags_to_terms.py +1 -1
  186. datahub/lite/duckdb_lite.py +1 -1
  187. datahub/lite/lite_util.py +2 -2
  188. datahub/metadata/schema.avsc +7 -2
  189. datahub/metadata/schemas/QuerySubjects.avsc +1 -1
  190. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
  191. datahub/sdk/__init__.py +1 -0
  192. datahub/sdk/_all_entities.py +2 -0
  193. datahub/sdk/search_filters.py +68 -40
  194. datahub/sdk/tag.py +112 -0
  195. datahub/secret/datahub_secret_store.py +7 -4
  196. datahub/secret/file_secret_store.py +1 -1
  197. datahub/sql_parsing/sqlglot_lineage.py +5 -2
  198. datahub/testing/check_sql_parser_result.py +2 -2
  199. datahub/utilities/ingest_utils.py +1 -1
  200. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
  201. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
  202. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
  203. {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
datahub/sdk/tag.py ADDED
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Type
4
+
5
+ from typing_extensions import Self
6
+
7
+ import datahub.metadata.schema_classes as models
8
+ from datahub.metadata.urns import TagUrn, Urn
9
+ from datahub.sdk._shared import (
10
+ HasOwnership,
11
+ OwnersInputType,
12
+ )
13
+ from datahub.sdk.entity import Entity, ExtraAspectsType
14
+
15
+
16
+ class Tag(
17
+ HasOwnership,
18
+ Entity,
19
+ ):
20
+ __slots__ = ()
21
+
22
+ @classmethod
23
+ def get_urn_type(cls) -> Type[TagUrn]:
24
+ return TagUrn
25
+
26
+ def __init__(
27
+ self,
28
+ *,
29
+ # Identity.
30
+ name: str,
31
+ # Tag properties.
32
+ display_name: Optional[str] = None,
33
+ description: Optional[str] = None,
34
+ color: Optional[str] = None,
35
+ # Standard aspects.
36
+ owners: Optional[OwnersInputType] = None,
37
+ extra_aspects: ExtraAspectsType = None,
38
+ ):
39
+ """Initialize a new Tag instance."""
40
+ urn = TagUrn(name=name)
41
+ super().__init__(urn)
42
+ self._set_extra_aspects(extra_aspects)
43
+
44
+ self._ensure_tag_props(
45
+ display_name=display_name or name,
46
+ description=description,
47
+ color=color,
48
+ )
49
+
50
+ if owners is not None:
51
+ self.set_owners(owners)
52
+
53
+ @classmethod
54
+ def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
55
+ assert isinstance(urn, TagUrn)
56
+ entity = cls(name=urn.name)
57
+ return entity._init_from_graph(current_aspects)
58
+
59
+ @property
60
+ def urn(self) -> TagUrn:
61
+ assert isinstance(self._urn, TagUrn)
62
+ return self._urn
63
+
64
+ def _ensure_tag_props(
65
+ self,
66
+ *,
67
+ display_name: Optional[str] = None,
68
+ description: Optional[str] = None,
69
+ color: Optional[str] = None,
70
+ ) -> models.TagPropertiesClass:
71
+ existing_props = self._get_aspect(models.TagPropertiesClass)
72
+ if existing_props is not None:
73
+ if display_name is not None:
74
+ existing_props.name = display_name
75
+ if description is not None:
76
+ existing_props.description = description
77
+ if color is not None:
78
+ existing_props.colorHex = color
79
+ return existing_props
80
+
81
+ return self._setdefault_aspect(
82
+ models.TagPropertiesClass(
83
+ name=display_name or self.urn.name,
84
+ description=description,
85
+ colorHex=color,
86
+ )
87
+ )
88
+
89
+ @property
90
+ def name(self) -> str:
91
+ return self.urn.name
92
+
93
+ @property
94
+ def display_name(self) -> str:
95
+ return self._ensure_tag_props().name
96
+
97
+ def set_display_name(self, display_name: str) -> None:
98
+ self._ensure_tag_props(display_name=display_name)
99
+
100
+ @property
101
+ def description(self) -> Optional[str]:
102
+ return self._ensure_tag_props().description
103
+
104
+ def set_description(self, description: str) -> None:
105
+ self._ensure_tag_props(description=description)
106
+
107
+ @property
108
+ def color(self) -> Optional[str]:
109
+ return self._ensure_tag_props().colorHex
110
+
111
+ def set_color(self, color: str) -> None:
112
+ self._ensure_tag_props(color=color)
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from typing import Any, Dict, List, Optional, Union
3
3
 
4
- from pydantic import BaseModel, validator
4
+ from pydantic import BaseModel, field_validator
5
5
 
6
6
  from datahub.ingestion.graph.client import DataHubGraph
7
7
  from datahub.ingestion.graph.config import DatahubClientConfig
@@ -18,8 +18,11 @@ class DataHubSecretStoreConfig(BaseModel):
18
18
  class Config:
19
19
  arbitrary_types_allowed = True
20
20
 
21
- @validator("graph_client")
22
- def check_graph_connection(cls, v: DataHubGraph) -> DataHubGraph:
21
+ @field_validator("graph_client", mode="after")
22
+ @classmethod
23
+ def check_graph_connection(
24
+ cls, v: Optional[DataHubGraph]
25
+ ) -> Optional[DataHubGraph]:
23
26
  if v is not None:
24
27
  v.test_connection()
25
28
  return v
@@ -63,7 +66,7 @@ class DataHubSecretStore(SecretStore):
63
66
 
64
67
  @classmethod
65
68
  def create(cls, config: Any) -> "DataHubSecretStore":
66
- config = DataHubSecretStoreConfig.parse_obj(config)
69
+ config = DataHubSecretStoreConfig.model_validate(config)
67
70
  return cls(config)
68
71
 
69
72
  def close(self) -> None:
@@ -45,5 +45,5 @@ class FileSecretStore(SecretStore):
45
45
 
46
46
  @classmethod
47
47
  def create(cls, config: Any) -> "FileSecretStore":
48
- config = FileSecretStoreConfig.parse_obj(config)
48
+ config = FileSecretStoreConfig.model_validate(config)
49
49
  return cls(config)
@@ -28,6 +28,7 @@ import sqlglot.optimizer.optimizer
28
28
  import sqlglot.optimizer.qualify
29
29
  import sqlglot.optimizer.qualify_columns
30
30
  import sqlglot.optimizer.unnest_subqueries
31
+ from pydantic import field_validator
31
32
 
32
33
  from datahub.cli.env_utils import get_boolean_env_variable
33
34
  from datahub.ingestion.graph.client import DataHubGraph
@@ -141,7 +142,8 @@ class DownstreamColumnRef(_ParserBaseModel):
141
142
  column_type: Optional[SchemaFieldDataTypeClass] = None
142
143
  native_column_type: Optional[str] = None
143
144
 
144
- @pydantic.validator("column_type", pre=True)
145
+ @field_validator("column_type", mode="before")
146
+ @classmethod
145
147
  def _load_column_type(
146
148
  cls, v: Optional[Union[dict, SchemaFieldDataTypeClass]]
147
149
  ) -> Optional[SchemaFieldDataTypeClass]:
@@ -215,7 +217,8 @@ class SqlParsingDebugInfo(_ParserBaseModel):
215
217
  def error(self) -> Optional[Exception]:
216
218
  return self.table_error or self.column_error
217
219
 
218
- @pydantic.validator("table_error", "column_error")
220
+ @field_validator("table_error", "column_error", mode="before")
221
+ @classmethod
219
222
  def remove_variables_from_error(cls, v: Optional[Exception]) -> Optional[Exception]:
220
223
  if v and v.__traceback__:
221
224
  # Remove local variables from the traceback to avoid memory leaks.
@@ -60,8 +60,8 @@ def assert_sql_result_with_resolver(
60
60
  expected = SqlParsingResult.parse_raw(expected_file.read_text())
61
61
 
62
62
  full_diff = deepdiff.DeepDiff(
63
- expected.dict(),
64
- res.dict(),
63
+ expected.model_dump(),
64
+ res.model_dump(),
65
65
  exclude_regex_paths=[
66
66
  r"root.column_lineage\[\d+\].logic",
67
67
  ],
@@ -48,7 +48,7 @@ def deploy_source_vars(
48
48
 
49
49
  deploy_options_raw = pipeline_config.pop("deployment", None)
50
50
  if deploy_options_raw is not None:
51
- deploy_options = DeployOptions.parse_obj(deploy_options_raw)
51
+ deploy_options = DeployOptions.model_validate(deploy_options_raw)
52
52
 
53
53
  if name:
54
54
  logger.info(f"Overriding deployment name {deploy_options.name} with {name}")