acryl-datahub-cloud 0.3.7.9rc1__py3-none-any.whl → 0.3.8rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (58) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/source.py +0 -1
  3. acryl_datahub_cloud/api/__init__.py +1 -0
  4. acryl_datahub_cloud/api/client.py +6 -0
  5. acryl_datahub_cloud/api/entity_versioning.py +167 -0
  6. acryl_datahub_cloud/datahub_metadata_sharing/__init__.py +0 -0
  7. acryl_datahub_cloud/datahub_metadata_sharing/metadata_sharing_source.py +262 -0
  8. acryl_datahub_cloud/datahub_metadata_sharing/query.py +7 -0
  9. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +0 -2
  10. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +0 -1
  11. acryl_datahub_cloud/datahub_reporting/extract_graph.py +0 -1
  12. acryl_datahub_cloud/datahub_reporting/extract_sql.py +0 -1
  13. acryl_datahub_cloud/lineage_features/source.py +22 -5
  14. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1559 -1460
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executor/__init__.py +15 -0
  17. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  18. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  19. acryl_datahub_cloud/metadata/schema.avsc +22744 -22341
  20. acryl_datahub_cloud/metadata/schema_classes.py +1058 -461
  21. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +1 -1
  22. acryl_datahub_cloud/metadata/schemas/AssertionInferenceDetails.avsc +1 -1
  23. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +1 -1
  24. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +1 -1
  25. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  26. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceInfo.avsc +6 -0
  27. acryl_datahub_cloud/metadata/schemas/DataHubViewInfo.avsc +2 -0
  28. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +2 -1
  29. acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +63 -0
  30. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +2 -1
  31. acryl_datahub_cloud/metadata/schemas/Deprecation.avsc +12 -0
  32. acryl_datahub_cloud/metadata/schemas/DynamicFormAssignment.avsc +2 -0
  33. acryl_datahub_cloud/metadata/schemas/EntityTypeKey.avsc +1 -0
  34. acryl_datahub_cloud/metadata/schemas/ExecutionRequestInput.avsc +9 -0
  35. acryl_datahub_cloud/metadata/schemas/ExecutionRequestResult.avsc +14 -0
  36. acryl_datahub_cloud/metadata/schemas/Filter.avsc +2 -0
  37. acryl_datahub_cloud/metadata/schemas/MLFeatureProperties.avsc +51 -0
  38. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  39. acryl_datahub_cloud/metadata/schemas/MLModelGroupProperties.avsc +51 -0
  40. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +2 -1
  41. acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +51 -0
  42. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  43. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +20 -0
  44. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +10 -1
  45. acryl_datahub_cloud/metadata/schemas/PostInfo.avsc +23 -0
  46. acryl_datahub_cloud/metadata/schemas/RecommendationModule.avsc +2 -0
  47. acryl_datahub_cloud/metadata/schemas/RemoteExecutorKey.avsc +21 -0
  48. acryl_datahub_cloud/metadata/schemas/RemoteExecutorStatus.avsc +80 -0
  49. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +2 -1
  50. acryl_datahub_cloud/metadata/schemas/VersionProperties.avsc +216 -0
  51. acryl_datahub_cloud/metadata/schemas/VersionSetKey.avsc +26 -0
  52. acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc +49 -0
  53. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  54. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/METADATA +35 -34
  55. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/RECORD +58 -44
  56. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/WHEEL +1 -1
  57. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/entry_points.txt +1 -0
  58. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.7.9rc1",
3
+ "version": "0.3.8rc1",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -100,7 +100,6 @@ class AcrylCSIssuesSource(Source):
100
100
  def _provision_platform(
101
101
  self, platform: str, logo_url: str, graph: DataHubGraph
102
102
  ) -> None:
103
-
104
103
  platform_urn = make_data_platform_urn(platform)
105
104
  if not graph.exists(platform_urn):
106
105
  platform_info = DataPlatformInfoClass(
@@ -0,0 +1 @@
1
+ from acryl_datahub_cloud.api.client import AcrylGraph
@@ -0,0 +1,6 @@
1
+ from acryl_datahub_cloud.api.entity_versioning import EntityVersioningAPI
2
+ from datahub.ingestion.graph.client import DataHubGraph
3
+
4
+
5
+ class AcrylGraph(EntityVersioningAPI, DataHubGraph):
6
+ pass
@@ -0,0 +1,167 @@
1
+ import uuid
2
+ from typing import Optional
3
+
4
+ from datahub.ingestion.graph.client import DataHubGraph
5
+ from datahub.metadata.schema_classes import (
6
+ VersionPropertiesClass,
7
+ VersionSetPropertiesClass,
8
+ )
9
+ from datahub.metadata.urns import VersionSetUrn
10
+ from datahub.utilities.urns.urn import guess_entity_type
11
+
12
+
13
+ class EntityVersioningAPI(DataHubGraph):
14
+ LINK_VERSION_MUTATION = """
15
+ mutation($input: LinkVersionInput!) {
16
+ linkAssetVersion(input: $input)
17
+ }
18
+ """
19
+
20
+ UNLINK_VERSION_MUTATION = """
21
+ mutation($input: UnlinkVersionInput!) {
22
+ unlinkAssetVersion(input: $input)
23
+ }
24
+ """
25
+
26
+ def link_asset_to_version_set(
27
+ self,
28
+ asset_urn: str,
29
+ version_set_urn: Optional[str],
30
+ label: str,
31
+ *,
32
+ comment: Optional[str] = None,
33
+ ) -> str:
34
+ """Sets an entity as the latest version of a version set.
35
+
36
+ Can also be used to create a new version set, with `asset_urn` as the first version.
37
+
38
+ Args:
39
+ asset_urn: URN of the entity.
40
+ version_set_urn: URN of the version set, or None to generate a new version set urn
41
+ label: Label of the version.
42
+ comment: Comment about the version.
43
+
44
+ Returns:
45
+ URN of the version set to which `asset_urn` was linked.
46
+ """
47
+
48
+ entity_type = guess_entity_type(asset_urn)
49
+ if version_set_urn is None:
50
+ version_set_urn = VersionSetUrn(str(uuid.uuid4()), entity_type).urn()
51
+ elif guess_entity_type(version_set_urn) != "versionSet":
52
+ raise ValueError(f"Expected version set URN, got {version_set_urn}")
53
+
54
+ entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
55
+ if entity_version:
56
+ raise ValueError(
57
+ f"Asset {asset_urn} is already a version of {entity_version.versionSet}"
58
+ )
59
+
60
+ variables = {
61
+ "input": {
62
+ "versionSet": version_set_urn,
63
+ "linkedEntity": asset_urn,
64
+ "version": label,
65
+ "comment": comment,
66
+ }
67
+ }
68
+ self.execute_graphql(self.LINK_VERSION_MUTATION, variables)
69
+ return version_set_urn
70
+
71
+ def link_asset_to_versioned_asset(
72
+ self,
73
+ new_asset_urn: str,
74
+ old_asset_urn: str,
75
+ label: str,
76
+ *,
77
+ comment: Optional[str] = None,
78
+ ) -> str:
79
+ """Sets an entity as the latest version of an existing versioned entity.
80
+
81
+ Args:
82
+ new_asset_urn: URN of the new latest entity.
83
+ old_asset_urn: URN of an existing versioned entity to link onto.
84
+ label: Label of the version.
85
+ comment: Comment about the version.
86
+
87
+ Returns:
88
+ URN of the version set to which `new_asset_urn` was linked.
89
+ """
90
+
91
+ new_entity_type = guess_entity_type(new_asset_urn)
92
+ old_entity_type = guess_entity_type(old_asset_urn)
93
+ if new_entity_type != old_entity_type:
94
+ raise ValueError(
95
+ f"Expected URNs of the same type, got {new_entity_type} and {old_entity_type}"
96
+ )
97
+
98
+ new_entity_version = self.get_aspect(new_asset_urn, VersionPropertiesClass)
99
+ if new_entity_version:
100
+ raise ValueError(
101
+ f"Asset {new_asset_urn} is already a version of {new_entity_version.versionSet}"
102
+ )
103
+ old_entity_version = self.get_aspect(old_asset_urn, VersionPropertiesClass)
104
+ if not old_entity_version:
105
+ raise ValueError(f"Asset {old_asset_urn} is not versioned")
106
+
107
+ version_set_urn = old_entity_version.versionSet
108
+ self.link_asset_to_version_set(
109
+ new_asset_urn, version_set_urn, label, comment=comment
110
+ )
111
+ return version_set_urn
112
+
113
+ def unlink_asset_from_version_set(self, asset_urn: str) -> Optional[str]:
114
+ """Unlinks an entity from its version set.
115
+
116
+ Args:
117
+ asset_urn: URN of the entity to unlink from its version set.
118
+
119
+ Returns:
120
+ If successful, the URN of the version set from which `asset_urn` was unlinked.
121
+ """
122
+
123
+ entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
124
+ if not entity_version:
125
+ raise ValueError(f"Asset {asset_urn} is not versioned")
126
+
127
+ variables = {
128
+ "input": {
129
+ "versionSet": entity_version.versionSet,
130
+ "unlinkedEntity": asset_urn,
131
+ }
132
+ }
133
+ if self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables):
134
+ return entity_version.versionSet
135
+ else:
136
+ return None
137
+
138
+ def unlink_latest_asset_from_version_set(
139
+ self, version_set_urn: str
140
+ ) -> Optional[str]:
141
+ """Unlinks the latest version of a version set.
142
+
143
+ Args:
144
+ version_set_urn: URN of the version set.
145
+
146
+ Returns:
147
+ If successful, the URN of the entity that was unlinked from `version_set_urn`.
148
+ """
149
+
150
+ version_set_properties = self.get_aspect(
151
+ version_set_urn, VersionSetPropertiesClass
152
+ )
153
+ if not version_set_properties:
154
+ raise ValueError(
155
+ f"Version set {version_set_urn} does not exist or has no versions"
156
+ )
157
+
158
+ variables = {
159
+ "input": {
160
+ "versionSet": version_set_urn,
161
+ "unlinkedEntity": version_set_properties.latest,
162
+ }
163
+ }
164
+ if self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables):
165
+ return version_set_properties.latest
166
+ else:
167
+ return None
@@ -0,0 +1,262 @@
1
+ import logging
2
+ import time
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
5
+
6
+ from pydantic import BaseModel
7
+ from tenacity import (
8
+ retry,
9
+ retry_if_exception_type,
10
+ stop_after_attempt,
11
+ wait_exponential,
12
+ )
13
+
14
+ from acryl_datahub_cloud.datahub_metadata_sharing.query import (
15
+ GRAPHQL_SCROLL_SHARED_ENTITIES,
16
+ GRAPHQL_SHARE_ENTITY,
17
+ )
18
+ from datahub.ingestion.api.common import PipelineContext
19
+ from datahub.ingestion.api.decorators import (
20
+ SupportStatus,
21
+ config_class,
22
+ platform_name,
23
+ support_status,
24
+ )
25
+ from datahub.ingestion.api.source import Source, SourceReport
26
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
27
+ from datahub.ingestion.graph.client import DataHubGraph
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class GraphQLError(Exception):
33
+ """Custom exception for GraphQL-specific errors"""
34
+
35
+ pass
36
+
37
+
38
+ class DataHubMetadataSharingSourceConfig(BaseModel):
39
+ batch_size: int = 100
40
+ batch_delay_ms: int = 100
41
+ max_retries: int = 3
42
+ initial_retry_delay_ms: int = 1000
43
+
44
+
45
+ @dataclass
46
+ class DataHubMetadataSharingSourceReport(SourceReport):
47
+ entities_shared: int = 0
48
+ entities_failed: int = 0
49
+ implicit_entities_skipped: int = 0
50
+ batches_processed: int = 0
51
+
52
+
53
+ @platform_name(id="datahub", platform_name="DataHub")
54
+ @config_class(DataHubMetadataSharingSourceConfig)
55
+ @support_status(SupportStatus.INCUBATING)
56
+ class DataHubMetadataSharingSource(Source):
57
+ """MetadataSharing Source that reshares entities across DataHub instances"""
58
+
59
+ def __init__(
60
+ self, config: DataHubMetadataSharingSourceConfig, ctx: PipelineContext
61
+ ):
62
+ super().__init__(ctx)
63
+ self.config: DataHubMetadataSharingSourceConfig = config
64
+ self.report = DataHubMetadataSharingSourceReport()
65
+ self.graph: Optional[DataHubGraph] = None
66
+
67
+ @retry(
68
+ retry=retry_if_exception_type((GraphQLError, ConnectionError)),
69
+ stop=stop_after_attempt(3),
70
+ wait=wait_exponential(multiplier=1, min=4, max=10),
71
+ reraise=True,
72
+ )
73
+ def execute_graphql_with_retry(
74
+ self, query: str, variables: Dict[str, Any]
75
+ ) -> Dict[str, Any]:
76
+ """Execute GraphQL query with retry logic"""
77
+ if self.graph is None:
78
+ raise ValueError("Graph client not initialized")
79
+ response = self.graph.execute_graphql(query, variables=variables)
80
+ error = response.get("error")
81
+ if error:
82
+ raise GraphQLError(f"GraphQL error: {error}")
83
+ return response
84
+
85
+ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
86
+ self.graph = self.ctx.require_graph("Loading default graph coordinates.")
87
+
88
+ self.reshare_entities()
89
+
90
+ # This source doesn't produce any work units
91
+ return []
92
+
93
+ def reshare_entities(self) -> None:
94
+ scroll_id: Optional[str] = None
95
+ current_batch_number: int = 1
96
+
97
+ try:
98
+ while True:
99
+ next_scroll_id, results = self.scroll_shared_entities(
100
+ scroll_id, self.config.batch_size
101
+ )
102
+
103
+ for result in results:
104
+ self._process_single_entity(result)
105
+
106
+ self.report.batches_processed = current_batch_number
107
+ self.report.info(
108
+ message="Completed sharing batch of entities.",
109
+ context=f"{current_batch_number} of size {self.config.batch_size}!",
110
+ )
111
+ current_batch_number += 1
112
+
113
+ if next_scroll_id is None:
114
+ break
115
+
116
+ time.sleep(self.config.batch_delay_ms / 1000.0)
117
+
118
+ except Exception as e:
119
+ self.report.report_failure(
120
+ title="Failed to process batches",
121
+ message="Error occurred while processing one or more batches!",
122
+ context=f"message = {str(e)}",
123
+ exc=e,
124
+ )
125
+ return
126
+
127
+ self.report.info(
128
+ message="Completed sharing all entities.",
129
+ context=f"Successfully shared {self.report.entities_shared} entities, "
130
+ f"failed to share {self.report.entities_failed} entities.",
131
+ )
132
+
133
+ # Rest of the methods remain the same...
134
+
135
+ def _process_single_entity(self, result: Dict[str, Any]) -> None:
136
+ """Process a single entity result"""
137
+ entity_urn = result.get("entity", {}).get("urn", None)
138
+ share_results = (
139
+ result.get("entity", {}).get("share", {}).get("lastShareResults", [])
140
+ )
141
+
142
+ if entity_urn is None:
143
+ self.report.report_warning(
144
+ message="Failed to resolve entity urn for shared asset! Skipping...",
145
+ context=f"Response: {str(result)}",
146
+ )
147
+ return
148
+
149
+ for share_result in share_results:
150
+ try:
151
+ destination_data = share_result.get("destination", {})
152
+ destination_urn = destination_data.get("urn", "")
153
+ previous_status = share_result.get("status")
154
+ share_config = share_result.get("shareConfig", {})
155
+
156
+ # Important: If there is implicit entity, we should skip this urn.
157
+ # This means the entity was not EXPLICITLY shared, so we do not want to explicitly share here.
158
+ implicit_shared_entity = share_result.get("implicitShareEntity")
159
+ is_implicitly_shared = (
160
+ implicit_shared_entity is not None
161
+ and "urn" in implicit_shared_entity
162
+ )
163
+
164
+ if is_implicitly_shared:
165
+ self.report.implicit_entities_skipped += 1
166
+ continue
167
+
168
+ if previous_status != "SUCCESS":
169
+ self.report.report_warning(
170
+ message="Attempting to share a previously unsuccessful shared entity!",
171
+ context=f"entity urn: {entity_urn}, destination urn: {destination_urn}",
172
+ )
173
+
174
+ lineage_direction = self._determine_lineage_direction(share_config)
175
+
176
+ shared = self.share_entity(
177
+ entity_urn=entity_urn,
178
+ destination_urn=destination_urn,
179
+ lineage_direction=lineage_direction,
180
+ )
181
+
182
+ if shared:
183
+ self.report.entities_shared += 1
184
+ else:
185
+ self.report.entities_failed += 1
186
+
187
+ except Exception as e:
188
+ self.report.report_warning(
189
+ message="Failed to share single entity!",
190
+ context=f"entity urn: {entity_urn}",
191
+ )
192
+ logger.exception(f"Error processing entity {entity_urn}", e)
193
+ self.report.entities_failed += 1
194
+
195
+ def _determine_lineage_direction(
196
+ self, share_config: Dict[str, Any]
197
+ ) -> Optional[str]:
198
+ """Determine lineage direction based on share config"""
199
+ include_upstreams = share_config.get("enableUpstreamLineage", False)
200
+ include_downstreams = share_config.get(
201
+ "enableDownstreamLineage", False
202
+ ) # Fixed typo
203
+
204
+ if include_upstreams and include_downstreams:
205
+ return "BOTH"
206
+ if include_upstreams:
207
+ return "UPSTREAM"
208
+ if include_downstreams:
209
+ return "DOWNSTREAM"
210
+ return None
211
+
212
+ def scroll_shared_entities(
213
+ self, scroll_id: Optional[str], count: int
214
+ ) -> Tuple[Optional[str], List[Dict[str, Any]]]:
215
+ """Scroll through shared entities with retry logic"""
216
+ response = self.execute_graphql_with_retry(
217
+ GRAPHQL_SCROLL_SHARED_ENTITIES,
218
+ variables={
219
+ "scrollId": scroll_id,
220
+ "count": count,
221
+ },
222
+ )
223
+
224
+ result = response.get("scrollAcrossEntities", {})
225
+ return result.get("nextScrollId"), result.get("searchResults", [])
226
+
227
+ def share_entity(
228
+ self, entity_urn: str, destination_urn: str, lineage_direction: Optional[str]
229
+ ) -> bool:
230
+ """Share entity with retry logic"""
231
+ try:
232
+ response = self.execute_graphql_with_retry(
233
+ GRAPHQL_SHARE_ENTITY,
234
+ variables={
235
+ "entityUrn": entity_urn,
236
+ "destinationUrn": destination_urn,
237
+ "lineageDirection": lineage_direction,
238
+ },
239
+ )
240
+
241
+ result = response.get("shareEntity", {})
242
+ if not result.get("succeeded", False):
243
+ self.report.report_failure(
244
+ title="Failed to Share Entity",
245
+ message="Response returned that success failed for entity and destination!",
246
+ context=f"entity urn: {entity_urn}, destination urn: {destination_urn}",
247
+ )
248
+ return False
249
+
250
+ return True
251
+
252
+ except Exception as e:
253
+ self.report.report_failure(
254
+ title="Failed to Share Entity",
255
+ message="Exception occurred while sharing entity",
256
+ context=f"entity urn: {entity_urn}, destination urn: {destination_urn}",
257
+ exc=e,
258
+ )
259
+ return False
260
+
261
+ def get_report(self) -> SourceReport:
262
+ return self.report
@@ -0,0 +1,7 @@
1
+ import pathlib
2
+
3
+ GRAPHQL_SCROLL_SHARED_ENTITIES = (
4
+ pathlib.Path(__file__).parent / "scroll_shared_entities.gql"
5
+ ).read_text()
6
+
7
+ GRAPHQL_SHARE_ENTITY = (pathlib.Path(__file__).parent / "share_entity.gql").read_text()
@@ -409,7 +409,6 @@ class DataHubBasedS3Dataset:
409
409
  physical_uri: str,
410
410
  local_file: str,
411
411
  ) -> Iterable[MetadataChangeProposalWrapper]:
412
-
413
412
  aspects: List = []
414
413
  mcps: List[MetadataChangeProposalWrapper] = self._update_presigned_url(
415
414
  dataset_urn, physical_uri
@@ -456,7 +455,6 @@ class DataHubBasedS3Dataset:
456
455
  physical_uri: str,
457
456
  dataset_properties: Optional[DatasetPropertiesClass] = None,
458
457
  ) -> List[MetadataChangeProposalWrapper]:
459
-
460
458
  if self.config.generate_presigned_url:
461
459
  external_url = self._generate_presigned_url(physical_uri)
462
460
  else:
@@ -180,7 +180,6 @@ class DataHubFormReportingData(FormData):
180
180
  def form_assigned_date(
181
181
  self, search_row: DataHubDatasetSearchRow
182
182
  ) -> Dict[str, date]:
183
-
184
183
  form_assigned_dates: Dict[str, date] = {}
185
184
  forms = self.graph.get_aspect(search_row.urn, FormsClass)
186
185
  if not forms:
@@ -118,7 +118,6 @@ class DataHubReportingExtractGraphSource(Source):
118
118
  return skip_extract
119
119
 
120
120
  def get_workunits(self):
121
-
122
121
  self.graph = (
123
122
  self.ctx.require_graph("Loading default graph coordinates.")
124
123
  if self.config.server is None
@@ -118,7 +118,6 @@ class DataHubReportingExtractSQLSource(Source):
118
118
  return skip_extract
119
119
 
120
120
  def get_workunits(self):
121
-
122
121
  self.graph = (
123
122
  self.ctx.require_graph("Loading default graph coordinates.")
124
123
  if self.config.server is None
@@ -83,16 +83,31 @@ class DataHubLineageFeaturesSource(Source):
83
83
  query = {
84
84
  "query": {
85
85
  "bool": {
86
- "filter": [
87
- {"term": {"source.entityType": "schemaField"}},
88
- {"term": {"destination.entityType": "schemaField"}},
86
+ "should": [
87
+ {"term": {"relationshipType": "Consumes"}},
89
88
  {"term": {"relationshipType": "DownstreamOf"}},
89
+ {"term": {"relationshipType": "TrainedBy"}},
90
+ {"term": {"relationshipType": "UsedBy"}},
91
+ {"term": {"relationshipType": "MemberOf"}},
92
+ {"term": {"relationshipType": "DerivedFrom"}},
93
+ {"term": {"relationshipType": "Produces"}},
94
+ {"term": {"relationshipType": "DashboardContainsDashboard"}},
95
+ {
96
+ "bool": {
97
+ "must": [
98
+ {"term": {"relationshipType": "Contains"}},
99
+ {"term": {"source.entityType": "dashboard"}},
100
+ {"term": {"destination.entityType": "chart"}},
101
+ ]
102
+ }
103
+ },
90
104
  ],
91
- }
105
+ },
92
106
  },
93
107
  "sort": [
94
108
  {"source.urn": {"order": "desc"}},
95
109
  {"destination.urn": {"order": "desc"}},
110
+ {"relationshipType": {"order": "desc"}},
96
111
  {"lifecycleOwner": {"order": "desc"}},
97
112
  ],
98
113
  }
@@ -124,7 +139,9 @@ class DataHubLineageFeaturesSource(Source):
124
139
  for urn in set(self.upstream_counts.keys()).union(
125
140
  self.downstream_counts.keys()
126
141
  ):
127
- print(urn, self.upstream_counts[urn], self.downstream_counts[urn])
142
+ logger.debug(
143
+ f"{urn}: {self.upstream_counts[urn]}, {self.downstream_counts[urn]}"
144
+ )
128
145
  yield MetadataChangeProposalWrapper(
129
146
  entityUrn=urn,
130
147
  aspect=LineageFeaturesClass(