acryl-datahub 1.2.0.6__py3-none-any.whl → 1.2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (84) hide show
  1. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7.dist-info}/METADATA +2629 -2543
  2. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7.dist-info}/RECORD +83 -75
  3. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7.dist-info}/entry_points.txt +1 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/graphql/operation.py +1 -1
  6. datahub/ingestion/autogenerated/capability_summary.json +46 -6
  7. datahub/ingestion/autogenerated/lineage.json +3 -2
  8. datahub/ingestion/run/pipeline.py +1 -0
  9. datahub/ingestion/source/aws/s3_boto_utils.py +97 -5
  10. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  11. datahub/ingestion/source/common/subtypes.py +3 -0
  12. datahub/ingestion/source/data_lake_common/path_spec.py +1 -1
  13. datahub/ingestion/source/datahub/datahub_database_reader.py +19 -8
  14. datahub/ingestion/source/dbt/dbt_common.py +74 -0
  15. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  16. datahub/ingestion/source/dremio/dremio_source.py +4 -0
  17. datahub/ingestion/source/dynamodb/dynamodb.py +10 -7
  18. datahub/ingestion/source/excel/__init__.py +0 -0
  19. datahub/ingestion/source/excel/config.py +92 -0
  20. datahub/ingestion/source/excel/excel_file.py +539 -0
  21. datahub/ingestion/source/excel/profiling.py +308 -0
  22. datahub/ingestion/source/excel/report.py +49 -0
  23. datahub/ingestion/source/excel/source.py +662 -0
  24. datahub/ingestion/source/excel/util.py +18 -0
  25. datahub/ingestion/source/fivetran/fivetran_query.py +8 -1
  26. datahub/ingestion/source/openapi.py +1 -1
  27. datahub/ingestion/source/powerbi/config.py +33 -0
  28. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  29. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  30. datahub/ingestion/source/powerbi/powerbi.py +5 -0
  31. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
  32. datahub/ingestion/source/redshift/config.py +9 -6
  33. datahub/ingestion/source/redshift/lineage.py +386 -687
  34. datahub/ingestion/source/redshift/redshift.py +19 -106
  35. datahub/ingestion/source/s3/source.py +65 -59
  36. datahub/ingestion/source/snowflake/constants.py +2 -0
  37. datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
  38. datahub/ingestion/source/snowflake/snowflake_connection.py +16 -5
  39. datahub/ingestion/source/snowflake/snowflake_query.py +27 -0
  40. datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
  41. datahub/ingestion/source/snowflake/snowflake_schema.py +179 -7
  42. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +25 -7
  43. datahub/ingestion/source/snowflake/snowflake_summary.py +1 -0
  44. datahub/ingestion/source/snowflake/snowflake_utils.py +18 -5
  45. datahub/ingestion/source/snowflake/snowflake_v2.py +6 -1
  46. datahub/ingestion/source/sql/hive_metastore.py +1 -0
  47. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  48. datahub/ingestion/source/sql/mssql/source.py +62 -3
  49. datahub/ingestion/source/sql_queries.py +24 -2
  50. datahub/ingestion/source/state/checkpoint.py +3 -28
  51. datahub/ingestion/source/unity/config.py +74 -9
  52. datahub/ingestion/source/unity/proxy.py +167 -5
  53. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  54. datahub/ingestion/source/unity/proxy_types.py +24 -0
  55. datahub/ingestion/source/unity/report.py +5 -0
  56. datahub/ingestion/source/unity/source.py +111 -1
  57. datahub/ingestion/source/usage/usage_common.py +1 -0
  58. datahub/metadata/_internal_schema_classes.py +573 -517
  59. datahub/metadata/_urns/urn_defs.py +1748 -1748
  60. datahub/metadata/schema.avsc +18564 -18484
  61. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  62. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +9 -0
  63. datahub/metadata/schemas/InstitutionalMemory.avsc +9 -0
  64. datahub/metadata/schemas/LogicalParent.avsc +104 -100
  65. datahub/metadata/schemas/MetadataChangeEvent.avsc +81 -45
  66. datahub/metadata/schemas/Ownership.avsc +69 -0
  67. datahub/metadata/schemas/SchemaFieldKey.avsc +3 -1
  68. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  69. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +3 -0
  70. datahub/metadata/schemas/__init__.py +3 -3
  71. datahub/sdk/chart.py +36 -22
  72. datahub/sdk/dashboard.py +38 -62
  73. datahub/sdk/lineage_client.py +6 -26
  74. datahub/sdk/main_client.py +7 -3
  75. datahub/sdk/search_filters.py +16 -0
  76. datahub/specific/aspect_helpers/siblings.py +73 -0
  77. datahub/specific/dataset.py +2 -0
  78. datahub/sql_parsing/sql_parsing_aggregator.py +3 -0
  79. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  80. datahub/upgrade/upgrade.py +14 -2
  81. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  82. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7.dist-info}/WHEEL +0 -0
  83. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7.dist-info}/licenses/LICENSE +0 -0
  84. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7.dist-info}/top_level.txt +0 -0
datahub/sdk/chart.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  from datetime import datetime
4
4
  from typing import Dict, List, Optional, Type, Union
5
5
 
6
+ from deprecated.sphinx import deprecated
6
7
  from typing_extensions import Self
7
8
 
8
9
  import datahub.metadata.schema_classes as models
@@ -23,11 +24,13 @@ from datahub.sdk._shared import (
23
24
  HasTerms,
24
25
  LinksInputType,
25
26
  OwnersInputType,
27
+ ParentContainerInputType,
26
28
  TagsInputType,
27
29
  TermsInputType,
28
30
  )
29
31
  from datahub.sdk.dataset import Dataset
30
32
  from datahub.sdk.entity import Entity, ExtraAspectsType
33
+ from datahub.utilities.sentinels import Unset, unset
31
34
 
32
35
 
33
36
  class Chart(
@@ -70,14 +73,15 @@ class Chart(
70
73
  last_refreshed: Optional[datetime] = None,
71
74
  chart_type: Optional[Union[str, models.ChartTypeClass]] = None,
72
75
  access: Optional[str] = None,
76
+ input_datasets: Optional[List[Union[DatasetUrnOrStr, Dataset]]] = None,
73
77
  # Standard aspects.
78
+ parent_container: ParentContainerInputType | Unset = unset,
74
79
  subtype: Optional[str] = None,
75
80
  owners: Optional[OwnersInputType] = None,
76
81
  links: Optional[LinksInputType] = None,
77
82
  tags: Optional[TagsInputType] = None,
78
83
  terms: Optional[TermsInputType] = None,
79
84
  domain: Optional[DomainInputType] = None,
80
- input_datasets: Optional[List[Union[DatasetUrnOrStr, Dataset]]] = None,
81
85
  extra_aspects: ExtraAspectsType = None,
82
86
  ):
83
87
  """Initialize a new Chart instance."""
@@ -91,19 +95,31 @@ class Chart(
91
95
 
92
96
  self._set_platform_instance(platform, platform_instance)
93
97
 
94
- # Set additional properties
98
+ self._ensure_chart_props(display_name=display_name)
99
+
100
+ if display_name is not None:
101
+ self.set_display_name(display_name)
102
+ if description is not None:
103
+ self.set_description(description)
95
104
  if external_url is not None:
96
105
  self.set_external_url(external_url)
97
106
  if chart_url is not None:
98
107
  self.set_chart_url(chart_url)
99
108
  if custom_properties is not None:
100
109
  self.set_custom_properties(custom_properties)
110
+ if last_modified is not None:
111
+ self.set_last_modified(last_modified)
101
112
  if last_refreshed is not None:
102
113
  self.set_last_refreshed(last_refreshed)
103
114
  if chart_type is not None:
104
115
  self.set_chart_type(chart_type)
105
116
  if access is not None:
106
117
  self.set_access(access)
118
+ if input_datasets is not None:
119
+ self.set_input_datasets(input_datasets)
120
+
121
+ if parent_container is not unset:
122
+ self._set_container(parent_container)
107
123
  if subtype is not None:
108
124
  self.set_subtype(subtype)
109
125
  if owners is not None:
@@ -116,14 +132,6 @@ class Chart(
116
132
  self.set_terms(terms)
117
133
  if domain is not None:
118
134
  self.set_domain(domain)
119
- if last_modified is not None:
120
- self.set_last_modified(last_modified)
121
- if input_datasets is not None:
122
- self.set_input_datasets(input_datasets)
123
- if description is not None:
124
- self.set_description(description)
125
- if display_name is not None:
126
- self.set_display_name(display_name)
127
135
 
128
136
  @classmethod
129
137
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
@@ -139,11 +147,13 @@ class Chart(
139
147
  assert isinstance(self._urn, ChartUrn)
140
148
  return self._urn
141
149
 
142
- def _ensure_chart_props(self) -> models.ChartInfoClass:
150
+ def _ensure_chart_props(
151
+ self, display_name: Optional[str] = None
152
+ ) -> models.ChartInfoClass:
143
153
  """Ensure chart properties exist, using a safer approach."""
144
154
  return self._setdefault_aspect(
145
155
  models.ChartInfoClass(
146
- title=self.urn.chart_id,
156
+ title=display_name or self.urn.chart_id,
147
157
  description="",
148
158
  lastModified=models.ChangeAuditStampsClass(),
149
159
  )
@@ -155,13 +165,15 @@ class Chart(
155
165
  return self.urn.chart_id
156
166
 
157
167
  @property
168
+ @deprecated("Use display_name instead", version="1.2.0.7")
158
169
  def title(self) -> str:
159
- """Get the title of the chart."""
160
- return self._ensure_chart_props().title
170
+ """Get the display name of the chart."""
171
+ return self.display_name
161
172
 
173
+ @deprecated("Use set_display_name instead", version="1.2.0.7")
162
174
  def set_title(self, title: str) -> None:
163
- """Set the title of the chart."""
164
- self._ensure_chart_props().title = title
175
+ """Set the display name of the chart."""
176
+ self.set_display_name(title)
165
177
 
166
178
  @property
167
179
  def description(self) -> Optional[str]:
@@ -173,13 +185,13 @@ class Chart(
173
185
  self._ensure_chart_props().description = description
174
186
 
175
187
  @property
176
- def display_name(self) -> Optional[str]:
188
+ def display_name(self) -> str:
177
189
  """Get the display name of the chart."""
178
- return self.title
190
+ return self._ensure_chart_props().title
179
191
 
180
192
  def set_display_name(self, display_name: str) -> None:
181
193
  """Set the display name of the chart."""
182
- self.set_title(display_name)
194
+ self._ensure_chart_props().title = display_name
183
195
 
184
196
  @property
185
197
  def external_url(self) -> Optional[str]:
@@ -250,9 +262,11 @@ class Chart(
250
262
  def set_chart_type(self, chart_type: Union[str, models.ChartTypeClass]) -> None:
251
263
  """Set the type of the chart."""
252
264
  if isinstance(chart_type, str):
253
- assert chart_type in get_enum_options(models.ChartTypeClass), (
254
- f"Invalid chart type: {chart_type}"
255
- )
265
+ chart_type_options = get_enum_options(models.ChartTypeClass)
266
+ if chart_type not in chart_type_options:
267
+ raise ValueError(
268
+ f"Invalid chart type: {chart_type}; valid types are {chart_type_options}"
269
+ )
256
270
  self._ensure_chart_props().type = chart_type
257
271
 
258
272
  @property
datahub/sdk/dashboard.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  from datetime import datetime
4
4
  from typing import Dict, List, Optional, Type, Union
5
5
 
6
+ from deprecated.sphinx import deprecated
6
7
  from typing_extensions import Self
7
8
 
8
9
  import datahub.metadata.schema_classes as models
@@ -24,12 +25,14 @@ from datahub.sdk._shared import (
24
25
  HasTerms,
25
26
  LinksInputType,
26
27
  OwnersInputType,
28
+ ParentContainerInputType,
27
29
  TagsInputType,
28
30
  TermsInputType,
29
31
  )
30
32
  from datahub.sdk.chart import Chart
31
33
  from datahub.sdk.dataset import Dataset
32
34
  from datahub.sdk.entity import Entity, ExtraAspectsType
35
+ from datahub.utilities.sentinels import Unset, unset
33
36
 
34
37
 
35
38
  class Dashboard(
@@ -64,7 +67,7 @@ class Dashboard(
64
67
  display_name: Optional[str] = None,
65
68
  platform_instance: Optional[DataPlatformInstanceUrnOrStr] = None,
66
69
  # Dashboard properties.
67
- description: str = "",
70
+ description: Optional[str] = None,
68
71
  external_url: Optional[str] = None,
69
72
  dashboard_url: Optional[str] = None,
70
73
  custom_properties: Optional[Dict[str, str]] = None,
@@ -74,6 +77,7 @@ class Dashboard(
74
77
  charts: Optional[List[Union[ChartUrnOrStr, Chart]]] = None,
75
78
  dashboards: Optional[List[Union[DashboardUrnOrStr, Dashboard]]] = None,
76
79
  # Standard aspects.
80
+ parent_container: ParentContainerInputType | Unset = unset,
77
81
  subtype: Optional[str] = None,
78
82
  owners: Optional[OwnersInputType] = None,
79
83
  links: Optional[LinksInputType] = None,
@@ -94,18 +98,7 @@ class Dashboard(
94
98
  self._set_platform_instance(platform, platform_instance)
95
99
 
96
100
  # Initialize DashboardInfoClass with default values
97
- dashboard_info = models.DashboardInfoClass(
98
- title=display_name or name,
99
- description=description or "",
100
- lastModified=models.ChangeAuditStampsClass(
101
- lastModified=None,
102
- ),
103
- customProperties={},
104
- chartEdges=[],
105
- datasetEdges=[],
106
- dashboards=[],
107
- )
108
-
101
+ dashboard_info = self._ensure_dashboard_props(display_name=display_name)
109
102
  if last_modified:
110
103
  dashboard_info.lastModified = models.ChangeAuditStampsClass(
111
104
  lastModified=models.AuditStampClass(
@@ -114,7 +107,6 @@ class Dashboard(
114
107
  ),
115
108
  )
116
109
 
117
- # Set additional properties
118
110
  if description is not None:
119
111
  self.set_description(description)
120
112
  if display_name is not None:
@@ -129,6 +121,15 @@ class Dashboard(
129
121
  self.set_last_modified(last_modified)
130
122
  if last_refreshed is not None:
131
123
  self.set_last_refreshed(last_refreshed)
124
+ if input_datasets is not None:
125
+ self.set_input_datasets(input_datasets)
126
+ if charts is not None:
127
+ self.set_charts(charts)
128
+ if dashboards is not None:
129
+ self.set_dashboards(dashboards)
130
+
131
+ if parent_container is not unset:
132
+ self._set_container(parent_container)
132
133
  if subtype is not None:
133
134
  self.set_subtype(subtype)
134
135
  if owners is not None:
@@ -141,12 +142,6 @@ class Dashboard(
141
142
  self.set_terms(terms)
142
143
  if domain is not None:
143
144
  self.set_domain(domain)
144
- if input_datasets is not None:
145
- self.set_input_datasets(input_datasets)
146
- if charts is not None:
147
- self.set_charts(charts)
148
- if dashboards is not None:
149
- self.set_dashboards(dashboards)
150
145
 
151
146
  @classmethod
152
147
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
@@ -162,11 +157,13 @@ class Dashboard(
162
157
  assert isinstance(self._urn, DashboardUrn)
163
158
  return self._urn
164
159
 
165
- def _ensure_dashboard_props(self) -> models.DashboardInfoClass:
160
+ def _ensure_dashboard_props(
161
+ self, display_name: Optional[str] = None
162
+ ) -> models.DashboardInfoClass:
166
163
  """Get the dashboard properties safely."""
167
164
  return self._setdefault_aspect(
168
165
  models.DashboardInfoClass(
169
- title=self.urn.dashboard_id,
166
+ title=display_name or self.urn.dashboard_id,
170
167
  description="",
171
168
  lastModified=models.ChangeAuditStampsClass(
172
169
  lastModified=models.AuditStampClass(
@@ -186,60 +183,52 @@ class Dashboard(
186
183
  return self.urn.dashboard_id
187
184
 
188
185
  @property
186
+ @deprecated("Use display_name instead", version="1.2.0.7")
189
187
  def title(self) -> str:
190
- """Get the title of the dashboard."""
191
- return self._ensure_dashboard_props().title
188
+ """Get the display name of the dashboard."""
189
+ return self.display_name
192
190
 
191
+ @deprecated("Use set_display_name instead", version="1.2.0.7")
193
192
  def set_title(self, title: str) -> None:
194
- """Set the title of the dashboard."""
195
- props = self._ensure_dashboard_props()
196
- props.title = title
197
- self._set_aspect(props)
193
+ """Set the display name of the dashboard."""
194
+ self.set_display_name(title)
198
195
 
199
196
  @property
200
197
  def description(self) -> Optional[str]:
201
198
  """Get the description of the dashboard."""
202
- props = self._ensure_dashboard_props()
203
- return props.description
199
+ # Because description is a required field, we treat "" as None.
200
+ return self._ensure_dashboard_props().description or None
204
201
 
205
202
  def set_description(self, description: str) -> None:
206
203
  """Set the description of the dashboard."""
207
- props = self._ensure_dashboard_props()
208
- props.description = description
209
- self._set_aspect(props)
204
+ self._ensure_dashboard_props().description = description
210
205
 
211
206
  @property
212
- def display_name(self) -> Optional[str]:
207
+ def display_name(self) -> str:
213
208
  """Get the display name of the dashboard."""
214
- return self.title
209
+ return self._ensure_dashboard_props().title
215
210
 
216
211
  def set_display_name(self, display_name: str) -> None:
217
212
  """Set the display name of the dashboard."""
218
- self.set_title(display_name)
213
+ self._ensure_dashboard_props().title = display_name
219
214
 
220
215
  @property
221
216
  def external_url(self) -> Optional[str]:
222
217
  """Get the external URL of the dashboard."""
223
- props = self._ensure_dashboard_props()
224
- return props.externalUrl
218
+ return self._ensure_dashboard_props().externalUrl
225
219
 
226
220
  def set_external_url(self, external_url: str) -> None:
227
221
  """Set the external URL of the dashboard."""
228
- props = self._ensure_dashboard_props()
229
- props.externalUrl = external_url
230
- self._set_aspect(props)
222
+ self._ensure_dashboard_props().externalUrl = external_url
231
223
 
232
224
  @property
233
225
  def dashboard_url(self) -> Optional[str]:
234
226
  """Get the dashboard URL."""
235
- props = self._ensure_dashboard_props()
236
- return props.dashboardUrl
227
+ return self._ensure_dashboard_props().dashboardUrl
237
228
 
238
229
  def set_dashboard_url(self, dashboard_url: str) -> None:
239
230
  """Set the dashboard URL."""
240
- props = self._ensure_dashboard_props()
241
- props.dashboardUrl = dashboard_url
242
- self._set_aspect(props)
231
+ self._ensure_dashboard_props().dashboardUrl = dashboard_url
243
232
 
244
233
  @property
245
234
  def custom_properties(self) -> Dict[str, str]:
@@ -249,9 +238,7 @@ class Dashboard(
249
238
 
250
239
  def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
251
240
  """Set the custom properties of the dashboard."""
252
- props = self._ensure_dashboard_props()
253
- props.customProperties = custom_properties
254
- self._set_aspect(props)
241
+ self._ensure_dashboard_props().customProperties = custom_properties
255
242
 
256
243
  @property
257
244
  def last_modified(self) -> Optional[datetime]:
@@ -263,14 +250,12 @@ class Dashboard(
263
250
 
264
251
  def set_last_modified(self, last_modified: datetime) -> None:
265
252
  """Set the last modification timestamp of the dashboard."""
266
- props = self._ensure_dashboard_props()
267
- props.lastModified = models.ChangeAuditStampsClass(
253
+ self._ensure_dashboard_props().lastModified = models.ChangeAuditStampsClass(
268
254
  lastModified=models.AuditStampClass(
269
255
  time=int(last_modified.timestamp()),
270
256
  actor="urn:li:corpuser:datahub",
271
257
  ),
272
258
  )
273
- self._set_aspect(props)
274
259
 
275
260
  @property
276
261
  def last_refreshed(self) -> Optional[datetime]:
@@ -284,9 +269,7 @@ class Dashboard(
284
269
 
285
270
  def set_last_refreshed(self, last_refreshed: datetime) -> None:
286
271
  """Set the last refresh timestamp of the dashboard."""
287
- props = self._ensure_dashboard_props()
288
- props.lastRefreshed = int(last_refreshed.timestamp())
289
- self._set_aspect(props)
272
+ self._ensure_dashboard_props().lastRefreshed = int(last_refreshed.timestamp())
290
273
 
291
274
  @property
292
275
  def input_datasets(self) -> List[DatasetUrn]:
@@ -310,7 +293,6 @@ class Dashboard(
310
293
  dataset_urn = DatasetUrn.from_string(dataset)
311
294
  dataset_edges.append(models.EdgeClass(destinationUrn=str(dataset_urn)))
312
295
  props.datasetEdges = dataset_edges
313
- self._set_aspect(props)
314
296
 
315
297
  def add_input_dataset(self, input_dataset: Union[DatasetUrnOrStr, Dataset]) -> None:
316
298
  """Add an input dataset to the dashboard."""
@@ -326,7 +308,6 @@ class Dashboard(
326
308
  models.EdgeClass(destinationUrn=str(input_dataset_urn))
327
309
  )
328
310
  props.datasetEdges = dataset_edges
329
- self._set_aspect(props)
330
311
 
331
312
  def remove_input_dataset(
332
313
  self, input_dataset: Union[DatasetUrnOrStr, Dataset]
@@ -342,7 +323,6 @@ class Dashboard(
342
323
  for edge in (props.datasetEdges or [])
343
324
  if edge.destinationUrn != str(input_dataset_urn)
344
325
  ]
345
- self._set_aspect(props)
346
326
 
347
327
  @property
348
328
  def charts(self) -> List[ChartUrn]:
@@ -363,7 +343,6 @@ class Dashboard(
363
343
  chart_urn = ChartUrn.from_string(chart)
364
344
  chart_edges.append(models.EdgeClass(destinationUrn=str(chart_urn)))
365
345
  props.chartEdges = chart_edges
366
- self._set_aspect(props)
367
346
 
368
347
  def add_chart(self, chart: Union[ChartUrnOrStr, Chart]) -> None:
369
348
  """Add a chart to the dashboard."""
@@ -381,7 +360,6 @@ class Dashboard(
381
360
  if str(chart_urn) not in existing_urns:
382
361
  chart_edges.append(models.EdgeClass(destinationUrn=str(chart_urn)))
383
362
  props.chartEdges = chart_edges
384
- self._set_aspect(props)
385
363
 
386
364
  def remove_chart(self, chart: Union[ChartUrnOrStr, Chart]) -> None:
387
365
  """Remove a chart from the dashboard."""
@@ -395,7 +373,6 @@ class Dashboard(
395
373
  for edge in (props.chartEdges or [])
396
374
  if edge.destinationUrn != str(chart_urn)
397
375
  ]
398
- self._set_aspect(props)
399
376
 
400
377
  @property
401
378
  def dashboards(self) -> List[DashboardUrn]:
@@ -417,7 +394,6 @@ class Dashboard(
417
394
  else:
418
395
  dashboard_urn = DashboardUrn.from_string(dashboard)
419
396
  props.dashboards.append(models.EdgeClass(destinationUrn=str(dashboard_urn)))
420
- self._set_aspect(props)
421
397
 
422
398
  def add_dashboard(self, dashboard: Union[DashboardUrnOrStr, Dashboard]) -> None:
423
399
  """Add a dashboard to the dashboard."""
@@ -165,11 +165,7 @@ class LineageClient:
165
165
  ] = False,
166
166
  transformation_text: Optional[str] = None,
167
167
  ) -> None:
168
- ...
169
-
170
- """
171
- Add dataset-to-dataset lineage with column-level mapping.
172
- """
168
+ """Add dataset-to-dataset lineage with column-level mapping."""
173
169
 
174
170
  @overload
175
171
  def add_lineage(
@@ -178,11 +174,7 @@ class LineageClient:
178
174
  upstream: Union[DatajobUrnOrStr],
179
175
  downstream: DatasetUrnOrStr,
180
176
  ) -> None:
181
- ...
182
-
183
- """
184
- Add dataset-to-datajob or dataset-to-mlmodel lineage.
185
- """
177
+ """Add dataset-to-datajob or dataset-to-mlmodel lineage."""
186
178
 
187
179
  @overload
188
180
  def add_lineage(
@@ -191,11 +183,7 @@ class LineageClient:
191
183
  upstream: Union[DatasetUrnOrStr, DatajobUrnOrStr],
192
184
  downstream: DatajobUrnOrStr,
193
185
  ) -> None:
194
- ...
195
-
196
- """
197
- Add datajob-to-dataset or datajob-to-datajob lineage.
198
- """
186
+ """Add datajob-to-dataset or datajob-to-datajob lineage."""
199
187
 
200
188
  @overload
201
189
  def add_lineage(
@@ -204,11 +192,7 @@ class LineageClient:
204
192
  upstream: Union[DashboardUrnOrStr, DatasetUrnOrStr, ChartUrnOrStr],
205
193
  downstream: DashboardUrnOrStr,
206
194
  ) -> None:
207
- ...
208
-
209
- """
210
- Add dashboard-to-dashboard or dashboard-to-dataset lineage.
211
- """
195
+ """Add dashboard-to-dashboard or dashboard-to-dataset lineage."""
212
196
 
213
197
  @overload
214
198
  def add_lineage(
@@ -217,10 +201,7 @@ class LineageClient:
217
201
  upstream: DatasetUrnOrStr,
218
202
  downstream: ChartUrnOrStr,
219
203
  ) -> None:
220
- ...
221
- """
222
- Add dataset-to-chart lineage.
223
- """
204
+ """Add dataset-to-chart lineage."""
224
205
 
225
206
  # The actual implementation that handles all overloaded cases
226
207
  def add_lineage(
@@ -237,8 +218,7 @@ class LineageClient:
237
218
  ] = False,
238
219
  transformation_text: Optional[str] = None,
239
220
  ) -> None:
240
- """
241
- Add lineage between two entities.
221
+ """Add lineage between two entities.
242
222
 
243
223
  This flexible method handles different combinations of entity types:
244
224
  - dataset to dataset
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, overload
3
+ from typing import TYPE_CHECKING, Optional, overload
4
4
 
5
5
  from datahub.errors import SdkUsageError
6
6
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
@@ -9,6 +9,9 @@ from datahub.sdk.entity_client import EntityClient
9
9
  from datahub.sdk.lineage_client import LineageClient
10
10
  from datahub.sdk.search_client import SearchClient
11
11
 
12
+ if TYPE_CHECKING:
13
+ from datahub.sdk.resolver_client import ResolverClient
14
+
12
15
 
13
16
  class DataHubClient:
14
17
  """Main client for interacting with DataHub.
@@ -104,13 +107,14 @@ class DataHubClient:
104
107
  return EntityClient(self)
105
108
 
106
109
  @property
107
- def resolve(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
110
+ def resolve(self) -> "ResolverClient":
108
111
  try:
109
112
  from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
110
113
  ResolverClient,
111
114
  )
112
115
  except ImportError:
113
- from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
116
+ # If the client is not installed, use the one from the SDK.
117
+ from datahub.sdk.resolver_client import ( # type: ignore[assignment]
114
118
  ResolverClient,
115
119
  )
116
120
  return ResolverClient(self)
@@ -384,6 +384,21 @@ def _filter_discriminator(v: Any) -> Optional[str]:
384
384
  return None
385
385
 
386
386
 
387
+ def _parse_and_like_filter(value: Any) -> Any:
388
+ # Do not parse if filter is already of type and/or/not or a custom condition
389
+ # also do not parse container filter if direct_descendants_only is specified
390
+ if (
391
+ isinstance(value, dict)
392
+ and not set(value.keys()).intersection(
393
+ {"and", "or", "not", "field", "condition", "direct_descendants_only"}
394
+ )
395
+ and len(value) > 1
396
+ ):
397
+ return {"and": [{k: v} for k, v in value.items()]}
398
+
399
+ return value
400
+
401
+
387
402
  if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
388
403
  # The `not TYPE_CHECKING` bit is required to make the linter happy,
389
404
  # since we currently only run mypy with pydantic v1.
@@ -445,6 +460,7 @@ else:
445
460
  ],
446
461
  Discriminator(_filter_discriminator),
447
462
  ],
463
+ pydantic.BeforeValidator(_parse_and_like_filter),
448
464
  pydantic.BeforeValidator(_parse_json_from_string),
449
465
  ]
450
466
 
@@ -0,0 +1,73 @@
1
+ from typing import List
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
7
+
8
+
9
+ class HasSiblingsPatch(MetadataPatchProposal):
10
+ def add_sibling(self, sibling_urn: str, primary: bool = False) -> Self:
11
+ """Add a sibling relationship to the entity.
12
+
13
+ Args:
14
+ sibling_urn: The URN of the sibling entity to add.
15
+ primary: Whether this entity should be marked as primary in the relationship.
16
+
17
+ Returns:
18
+ The patch builder instance.
19
+ """
20
+ self._add_patch(
21
+ Siblings.ASPECT_NAME,
22
+ "add",
23
+ path=("siblings", sibling_urn),
24
+ value=sibling_urn,
25
+ )
26
+
27
+ # Set primary flag if specified
28
+ if primary:
29
+ self._add_patch(
30
+ Siblings.ASPECT_NAME,
31
+ "add",
32
+ path=("primary",),
33
+ value=primary,
34
+ )
35
+
36
+ return self
37
+
38
+ def remove_sibling(self, sibling_urn: str) -> Self:
39
+ """Remove a sibling relationship from the entity.
40
+
41
+ Args:
42
+ sibling_urn: The URN of the sibling entity to remove.
43
+
44
+ Returns:
45
+ The patch builder instance.
46
+ """
47
+ self._add_patch(
48
+ Siblings.ASPECT_NAME,
49
+ "remove",
50
+ path=("siblings", sibling_urn),
51
+ value={},
52
+ )
53
+ return self
54
+
55
+ def set_siblings(self, sibling_urns: List[str], primary: bool = False) -> Self:
56
+ """Set the complete list of siblings for the entity.
57
+
58
+ This will replace all existing siblings with the new list.
59
+
60
+ Args:
61
+ sibling_urns: The list of sibling URNs to set.
62
+ primary: Whether this entity should be marked as primary.
63
+
64
+ Returns:
65
+ The patch builder instance.
66
+ """
67
+ self._add_patch(
68
+ Siblings.ASPECT_NAME, "add", path=("siblings",), value=sibling_urns
69
+ )
70
+
71
+ self._add_patch(Siblings.ASPECT_NAME, "add", path=("primary",), value=primary)
72
+
73
+ return self
@@ -22,6 +22,7 @@ from datahub.specific.aspect_helpers.fine_grained_lineage import (
22
22
  HasFineGrainedLineagePatch,
23
23
  )
24
24
  from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
25
+ from datahub.specific.aspect_helpers.siblings import HasSiblingsPatch
25
26
  from datahub.specific.aspect_helpers.structured_properties import (
26
27
  HasStructuredPropertiesPatch,
27
28
  )
@@ -104,6 +105,7 @@ class DatasetPatchBuilder(
104
105
  HasTagsPatch,
105
106
  HasTermsPatch,
106
107
  HasFineGrainedLineagePatch,
108
+ HasSiblingsPatch,
107
109
  MetadataPatchProposal,
108
110
  ):
109
111
  def __init__(
@@ -634,6 +634,9 @@ class SqlParsingAggregator(Closeable):
634
634
  TableSwap,
635
635
  ],
636
636
  ) -> None:
637
+ """
638
+ This assumes that queries come in order of increasing timestamps.
639
+ """
637
640
  if isinstance(item, KnownQueryLineageInfo):
638
641
  self.add_known_query_lineage(item)
639
642
  elif isinstance(item, KnownLineageMapping):
@@ -208,9 +208,7 @@ class ToolMetaExtractor:
208
208
  Returns:
209
209
  bool: whether QueryLog entry is that of hex.
210
210
  """
211
- last_line = _get_last_line(entry.query_text)
212
-
213
- if not last_line.startswith("-- Hex query metadata:"):
211
+ if "-- Hex query metadata:" not in entry.query_text:
214
212
  return False
215
213
 
216
214
  entry.origin = HEX_PLATFORM_URN