acryl-datahub 1.1.0rc4__py3-none-any.whl → 1.1.0.1rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (87) hide show
  1. {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/METADATA +2609 -2607
  2. {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/RECORD +87 -70
  3. {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +9 -8
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/restricted_text.py +247 -0
  10. datahub/api/entities/external/unity_catalog_external_entites.py +170 -0
  11. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  12. datahub/cli/delete_cli.py +4 -4
  13. datahub/cli/ingest_cli.py +9 -1
  14. datahub/emitter/mce_builder.py +3 -1
  15. datahub/emitter/response_helper.py +86 -1
  16. datahub/emitter/rest_emitter.py +1 -1
  17. datahub/ingestion/graph/client.py +3 -3
  18. datahub/ingestion/source/apply/datahub_apply.py +4 -4
  19. datahub/ingestion/source/data_lake_common/data_lake_utils.py +22 -10
  20. datahub/ingestion/source/data_lake_common/object_store.py +644 -0
  21. datahub/ingestion/source/datahub/config.py +11 -0
  22. datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
  23. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  24. datahub/ingestion/source/dbt/dbt_common.py +30 -11
  25. datahub/ingestion/source/gcs/gcs_source.py +22 -7
  26. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  27. datahub/ingestion/source/hex/query_fetcher.py +9 -3
  28. datahub/ingestion/source/openapi.py +12 -0
  29. datahub/ingestion/source/openapi_parser.py +56 -37
  30. datahub/ingestion/source/s3/source.py +65 -6
  31. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  32. datahub/ingestion/source/snowflake/snowflake_queries.py +44 -21
  33. datahub/ingestion/source/snowflake/snowflake_query.py +0 -7
  34. datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
  35. datahub/ingestion/source/sql/athena.py +1 -0
  36. datahub/ingestion/source/sql/hive.py +2 -3
  37. datahub/ingestion/source/sql/sql_common.py +98 -34
  38. datahub/ingestion/source/sql/sql_types.py +5 -2
  39. datahub/ingestion/source/unity/config.py +5 -0
  40. datahub/ingestion/source/unity/proxy.py +117 -0
  41. datahub/ingestion/source/unity/source.py +167 -15
  42. datahub/ingestion/source/unity/tag_entities.py +295 -0
  43. datahub/metadata/_internal_schema_classes.py +667 -522
  44. datahub/metadata/_urns/urn_defs.py +1804 -1748
  45. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  46. datahub/metadata/schema.avsc +17358 -17584
  47. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  48. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  49. datahub/metadata/schemas/Applications.avsc +38 -0
  50. datahub/metadata/schemas/ChartKey.avsc +1 -0
  51. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  52. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  53. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  54. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  55. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  56. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  57. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  58. datahub/metadata/schemas/DatasetKey.avsc +1 -0
  59. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  60. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  61. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  62. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  63. datahub/metadata/schemas/MLModelGroupKey.avsc +1 -0
  64. datahub/metadata/schemas/MLModelKey.avsc +1 -0
  65. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  66. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  67. datahub/metadata/schemas/__init__.py +3 -3
  68. datahub/sdk/__init__.py +6 -0
  69. datahub/sdk/_all_entities.py +11 -0
  70. datahub/sdk/_shared.py +118 -1
  71. datahub/sdk/chart.py +315 -0
  72. datahub/sdk/container.py +7 -0
  73. datahub/sdk/dashboard.py +432 -0
  74. datahub/sdk/dataflow.py +309 -0
  75. datahub/sdk/datajob.py +342 -0
  76. datahub/sdk/dataset.py +8 -2
  77. datahub/sdk/entity_client.py +90 -2
  78. datahub/sdk/lineage_client.py +681 -82
  79. datahub/sdk/main_client.py +27 -8
  80. datahub/sdk/mlmodel.py +101 -38
  81. datahub/sdk/mlmodelgroup.py +7 -0
  82. datahub/sql_parsing/sql_parsing_aggregator.py +1 -1
  83. datahub/testing/mce_helpers.py +421 -0
  84. datahub/testing/sdk_v2_helpers.py +18 -0
  85. {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/entry_points.txt +0 -0
  86. {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/licenses/LICENSE +0 -0
  87. {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,11 @@ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
9
9
  from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
10
10
  from datahub.ingestion.graph.client import DataHubGraph
11
11
  from datahub.metadata.urns import (
12
+ ChartUrn,
12
13
  ContainerUrn,
14
+ DashboardUrn,
15
+ DataFlowUrn,
16
+ DataJobUrn,
13
17
  DatasetUrn,
14
18
  MlModelGroupUrn,
15
19
  MlModelUrn,
@@ -17,7 +21,11 @@ from datahub.metadata.urns import (
17
21
  )
18
22
  from datahub.sdk._all_entities import ENTITY_CLASSES
19
23
  from datahub.sdk._shared import UrnOrStr
24
+ from datahub.sdk.chart import Chart
20
25
  from datahub.sdk.container import Container
26
+ from datahub.sdk.dashboard import Dashboard
27
+ from datahub.sdk.dataflow import DataFlow
28
+ from datahub.sdk.datajob import DataJob
21
29
  from datahub.sdk.dataset import Dataset
22
30
  from datahub.sdk.entity import Entity
23
31
  from datahub.sdk.mlmodel import MLModel
@@ -57,6 +65,14 @@ class EntityClient:
57
65
  @overload
58
66
  def get(self, urn: MlModelGroupUrn) -> MLModelGroup: ...
59
67
  @overload
68
+ def get(self, urn: DataFlowUrn) -> DataFlow: ...
69
+ @overload
70
+ def get(self, urn: DataJobUrn) -> DataJob: ...
71
+ @overload
72
+ def get(self, urn: DashboardUrn) -> Dashboard: ...
73
+ @overload
74
+ def get(self, urn: ChartUrn) -> Chart: ...
75
+ @overload
60
76
  def get(self, urn: Union[Urn, str]) -> Entity: ...
61
77
  def get(self, urn: UrnOrStr) -> Entity:
62
78
  """Retrieve an entity by its urn.
@@ -76,7 +92,26 @@ class EntityClient:
76
92
  urn = Urn.from_string(urn)
77
93
 
78
94
  # TODO: add error handling around this with a suggested alternative if not yet supported
79
- EntityClass = ENTITY_CLASSES[urn.entity_type]
95
+ try:
96
+ EntityClass = ENTITY_CLASSES[urn.entity_type]
97
+ except KeyError as e:
98
+ # Try to import cloud-specific entities if not found
99
+ try:
100
+ from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion
101
+ from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
102
+
103
+ if urn.entity_type == "assertion":
104
+ EntityClass = Assertion
105
+ elif urn.entity_type == "monitor":
106
+ EntityClass = Monitor
107
+ else:
108
+ raise SdkUsageError(
109
+ f"Entity type {urn.entity_type} is not yet supported"
110
+ ) from e
111
+ except ImportError as e:
112
+ raise SdkUsageError(
113
+ f"Entity type {urn.entity_type} is not yet supported"
114
+ ) from e
80
115
 
81
116
  if not self._graph.exists(str(urn)):
82
117
  raise ItemNotFoundError(f"Entity {urn} not found")
@@ -84,7 +119,19 @@ class EntityClient:
84
119
  aspects = self._graph.get_entity_semityped(str(urn))
85
120
 
86
121
  # TODO: save the timestamp so we can use If-Unmodified-Since on the updates
87
- return EntityClass._new_from_graph(urn, aspects)
122
+ entity = EntityClass._new_from_graph(urn, aspects)
123
+
124
+ # Type narrowing for cloud-specific entities
125
+ if urn.entity_type == "assertion":
126
+ from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion
127
+
128
+ assert isinstance(entity, Assertion)
129
+ elif urn.entity_type == "monitor":
130
+ from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
131
+
132
+ assert isinstance(entity, Monitor)
133
+
134
+ return entity
88
135
 
89
136
  def create(self, entity: Entity) -> None:
90
137
  mcps = []
@@ -145,3 +192,44 @@ class EntityClient:
145
192
 
146
193
  mcps = updater.build()
147
194
  self._graph.emit_mcps(mcps)
195
+
196
+ def delete(
197
+ self,
198
+ urn: UrnOrStr,
199
+ check_exists: bool = True,
200
+ cascade: bool = False,
201
+ hard: bool = False,
202
+ ) -> None:
203
+ """Delete an entity by its urn.
204
+
205
+ Args:
206
+ urn: The urn of the entity to delete. Can be a string or :py:class:`Urn` object.
207
+ check_exists: Whether to check if the entity exists before deletion. Defaults to True.
208
+ cascade: Whether to cascade delete related entities. When True, deletes child entities
209
+ like datajobs within dataflows, datasets within containers, etc. Not yet supported.
210
+ hard: Whether to perform a hard delete (permanent) or soft delete. Defaults to False.
211
+
212
+ Raises:
213
+ SdkUsageError: If the entity does not exist and check_exists is True, or if cascade is True (not supported).
214
+
215
+ Note:
216
+ When hard is True, the operation is irreversible and the entity will be permanently removed.
217
+
218
+ Impact of cascade deletion (still to be done) depends on the input entity type:
219
+ - Container: Recursively deletes all containers and data assets within the container.
220
+ - Dataflow: Recursively deletes all data jobs within the dataflow.
221
+ - Dashboard: TBD
222
+ - DataPlatformInstance: TBD
223
+ - ...
224
+ """
225
+ urn_str = str(urn) if isinstance(urn, Urn) else urn
226
+ if check_exists and not self._graph.exists(entity_urn=urn_str):
227
+ raise SdkUsageError(
228
+ f"Entity {urn_str} does not exist, and hence cannot be deleted. "
229
+ "You can bypass this check by setting check_exists=False."
230
+ )
231
+
232
+ if cascade:
233
+ raise SdkUsageError("The 'cascade' parameter is not yet supported.")
234
+
235
+ self._graph.delete_entity(urn=urn_str, hard=hard)