acryl-datahub-cloud 0.3.7.9rc1__py3-none-any.whl → 0.3.8rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/source.py +0 -1
- acryl_datahub_cloud/api/__init__.py +1 -0
- acryl_datahub_cloud/api/client.py +6 -0
- acryl_datahub_cloud/api/entity_versioning.py +167 -0
- acryl_datahub_cloud/datahub_metadata_sharing/__init__.py +0 -0
- acryl_datahub_cloud/datahub_metadata_sharing/metadata_sharing_source.py +262 -0
- acryl_datahub_cloud/datahub_metadata_sharing/query.py +7 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +0 -2
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +0 -1
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +0 -1
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +0 -1
- acryl_datahub_cloud/lineage_features/source.py +22 -5
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1559 -1460
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executor/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
- acryl_datahub_cloud/metadata/schema.avsc +22744 -22341
- acryl_datahub_cloud/metadata/schema_classes.py +1058 -461
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionInferenceDetails.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/DataHubViewInfo.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/Deprecation.avsc +12 -0
- acryl_datahub_cloud/metadata/schemas/DynamicFormAssignment.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/EntityTypeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestInput.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestResult.avsc +14 -0
- acryl_datahub_cloud/metadata/schemas/Filter.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureProperties.avsc +51 -0
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupProperties.avsc +51 -0
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +51 -0
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +20 -0
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +10 -1
- acryl_datahub_cloud/metadata/schemas/PostInfo.avsc +23 -0
- acryl_datahub_cloud/metadata/schemas/RecommendationModule.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/RemoteExecutorKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/RemoteExecutorStatus.avsc +80 -0
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/VersionProperties.avsc +216 -0
- acryl_datahub_cloud/metadata/schemas/VersionSetKey.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc +49 -0
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/METADATA +35 -34
- {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/RECORD +58 -44
- {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/entry_points.txt +1 -0
- {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8rc1.dist-info}/top_level.txt +0 -0
|
@@ -100,7 +100,6 @@ class AcrylCSIssuesSource(Source):
|
|
|
100
100
|
def _provision_platform(
|
|
101
101
|
self, platform: str, logo_url: str, graph: DataHubGraph
|
|
102
102
|
) -> None:
|
|
103
|
-
|
|
104
103
|
platform_urn = make_data_platform_urn(platform)
|
|
105
104
|
if not graph.exists(platform_urn):
|
|
106
105
|
platform_info = DataPlatformInfoClass(
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from acryl_datahub_cloud.api.client import AcrylGraph
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
5
|
+
from datahub.metadata.schema_classes import (
|
|
6
|
+
VersionPropertiesClass,
|
|
7
|
+
VersionSetPropertiesClass,
|
|
8
|
+
)
|
|
9
|
+
from datahub.metadata.urns import VersionSetUrn
|
|
10
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EntityVersioningAPI(DataHubGraph):
|
|
14
|
+
LINK_VERSION_MUTATION = """
|
|
15
|
+
mutation($input: LinkVersionInput!) {
|
|
16
|
+
linkAssetVersion(input: $input)
|
|
17
|
+
}
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
UNLINK_VERSION_MUTATION = """
|
|
21
|
+
mutation($input: UnlinkVersionInput!) {
|
|
22
|
+
unlinkAssetVersion(input: $input)
|
|
23
|
+
}
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def link_asset_to_version_set(
|
|
27
|
+
self,
|
|
28
|
+
asset_urn: str,
|
|
29
|
+
version_set_urn: Optional[str],
|
|
30
|
+
label: str,
|
|
31
|
+
*,
|
|
32
|
+
comment: Optional[str] = None,
|
|
33
|
+
) -> str:
|
|
34
|
+
"""Sets an entity as the latest version of a version set.
|
|
35
|
+
|
|
36
|
+
Can also be used to create a new version set, with `asset_urn` as the first version.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
asset_urn: URN of the entity.
|
|
40
|
+
version_set_urn: URN of the version set, or None to generate a new version set urn
|
|
41
|
+
label: Label of the version.
|
|
42
|
+
comment: Comment about the version.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
URN of the version set to which `asset_urn` was linked.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
entity_type = guess_entity_type(asset_urn)
|
|
49
|
+
if version_set_urn is None:
|
|
50
|
+
version_set_urn = VersionSetUrn(str(uuid.uuid4()), entity_type).urn()
|
|
51
|
+
elif guess_entity_type(version_set_urn) != "versionSet":
|
|
52
|
+
raise ValueError(f"Expected version set URN, got {version_set_urn}")
|
|
53
|
+
|
|
54
|
+
entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
|
|
55
|
+
if entity_version:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"Asset {asset_urn} is already a version of {entity_version.versionSet}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
variables = {
|
|
61
|
+
"input": {
|
|
62
|
+
"versionSet": version_set_urn,
|
|
63
|
+
"linkedEntity": asset_urn,
|
|
64
|
+
"version": label,
|
|
65
|
+
"comment": comment,
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
self.execute_graphql(self.LINK_VERSION_MUTATION, variables)
|
|
69
|
+
return version_set_urn
|
|
70
|
+
|
|
71
|
+
def link_asset_to_versioned_asset(
|
|
72
|
+
self,
|
|
73
|
+
new_asset_urn: str,
|
|
74
|
+
old_asset_urn: str,
|
|
75
|
+
label: str,
|
|
76
|
+
*,
|
|
77
|
+
comment: Optional[str] = None,
|
|
78
|
+
) -> str:
|
|
79
|
+
"""Sets an entity as the latest version of an existing versioned entity.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
new_asset_urn: URN of the new latest entity.
|
|
83
|
+
old_asset_urn: URN of an existing versioned entity to link onto.
|
|
84
|
+
label: Label of the version.
|
|
85
|
+
comment: Comment about the version.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
URN of the version set to which `new_asset_urn` was linked.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
new_entity_type = guess_entity_type(new_asset_urn)
|
|
92
|
+
old_entity_type = guess_entity_type(old_asset_urn)
|
|
93
|
+
if new_entity_type != old_entity_type:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Expected URNs of the same type, got {new_entity_type} and {old_entity_type}"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
new_entity_version = self.get_aspect(new_asset_urn, VersionPropertiesClass)
|
|
99
|
+
if new_entity_version:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"Asset {new_asset_urn} is already a version of {new_entity_version.versionSet}"
|
|
102
|
+
)
|
|
103
|
+
old_entity_version = self.get_aspect(old_asset_urn, VersionPropertiesClass)
|
|
104
|
+
if not old_entity_version:
|
|
105
|
+
raise ValueError(f"Asset {old_asset_urn} is not versioned")
|
|
106
|
+
|
|
107
|
+
version_set_urn = old_entity_version.versionSet
|
|
108
|
+
self.link_asset_to_version_set(
|
|
109
|
+
new_asset_urn, version_set_urn, label, comment=comment
|
|
110
|
+
)
|
|
111
|
+
return version_set_urn
|
|
112
|
+
|
|
113
|
+
def unlink_asset_from_version_set(self, asset_urn: str) -> Optional[str]:
|
|
114
|
+
"""Unlinks an entity from its version set.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
asset_urn: URN of the entity to unlink from its version set.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
If successful, the URN of the version set from which `asset_urn` was unlinked.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
|
|
124
|
+
if not entity_version:
|
|
125
|
+
raise ValueError(f"Asset {asset_urn} is not versioned")
|
|
126
|
+
|
|
127
|
+
variables = {
|
|
128
|
+
"input": {
|
|
129
|
+
"versionSet": entity_version.versionSet,
|
|
130
|
+
"unlinkedEntity": asset_urn,
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables):
|
|
134
|
+
return entity_version.versionSet
|
|
135
|
+
else:
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
def unlink_latest_asset_from_version_set(
|
|
139
|
+
self, version_set_urn: str
|
|
140
|
+
) -> Optional[str]:
|
|
141
|
+
"""Unlinks the latest version of a version set.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
version_set_urn: URN of the version set.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
If successful, the URN of the entity that was unlinked from `version_set_urn`.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
version_set_properties = self.get_aspect(
|
|
151
|
+
version_set_urn, VersionSetPropertiesClass
|
|
152
|
+
)
|
|
153
|
+
if not version_set_properties:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
f"Version set {version_set_urn} does not exist or has no versions"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
variables = {
|
|
159
|
+
"input": {
|
|
160
|
+
"versionSet": version_set_urn,
|
|
161
|
+
"unlinkedEntity": version_set_properties.latest,
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
if self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables):
|
|
165
|
+
return version_set_properties.latest
|
|
166
|
+
else:
|
|
167
|
+
return None
|
|
File without changes
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from tenacity import (
|
|
8
|
+
retry,
|
|
9
|
+
retry_if_exception_type,
|
|
10
|
+
stop_after_attempt,
|
|
11
|
+
wait_exponential,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from acryl_datahub_cloud.datahub_metadata_sharing.query import (
|
|
15
|
+
GRAPHQL_SCROLL_SHARED_ENTITIES,
|
|
16
|
+
GRAPHQL_SHARE_ENTITY,
|
|
17
|
+
)
|
|
18
|
+
from datahub.ingestion.api.common import PipelineContext
|
|
19
|
+
from datahub.ingestion.api.decorators import (
|
|
20
|
+
SupportStatus,
|
|
21
|
+
config_class,
|
|
22
|
+
platform_name,
|
|
23
|
+
support_status,
|
|
24
|
+
)
|
|
25
|
+
from datahub.ingestion.api.source import Source, SourceReport
|
|
26
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
27
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class GraphQLError(Exception):
|
|
33
|
+
"""Custom exception for GraphQL-specific errors"""
|
|
34
|
+
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DataHubMetadataSharingSourceConfig(BaseModel):
|
|
39
|
+
batch_size: int = 100
|
|
40
|
+
batch_delay_ms: int = 100
|
|
41
|
+
max_retries: int = 3
|
|
42
|
+
initial_retry_delay_ms: int = 1000
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class DataHubMetadataSharingSourceReport(SourceReport):
|
|
47
|
+
entities_shared: int = 0
|
|
48
|
+
entities_failed: int = 0
|
|
49
|
+
implicit_entities_skipped: int = 0
|
|
50
|
+
batches_processed: int = 0
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@platform_name(id="datahub", platform_name="DataHub")
|
|
54
|
+
@config_class(DataHubMetadataSharingSourceConfig)
|
|
55
|
+
@support_status(SupportStatus.INCUBATING)
|
|
56
|
+
class DataHubMetadataSharingSource(Source):
|
|
57
|
+
"""MetadataSharing Source that reshares entities across DataHub instances"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self, config: DataHubMetadataSharingSourceConfig, ctx: PipelineContext
|
|
61
|
+
):
|
|
62
|
+
super().__init__(ctx)
|
|
63
|
+
self.config: DataHubMetadataSharingSourceConfig = config
|
|
64
|
+
self.report = DataHubMetadataSharingSourceReport()
|
|
65
|
+
self.graph: Optional[DataHubGraph] = None
|
|
66
|
+
|
|
67
|
+
@retry(
|
|
68
|
+
retry=retry_if_exception_type((GraphQLError, ConnectionError)),
|
|
69
|
+
stop=stop_after_attempt(3),
|
|
70
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
71
|
+
reraise=True,
|
|
72
|
+
)
|
|
73
|
+
def execute_graphql_with_retry(
|
|
74
|
+
self, query: str, variables: Dict[str, Any]
|
|
75
|
+
) -> Dict[str, Any]:
|
|
76
|
+
"""Execute GraphQL query with retry logic"""
|
|
77
|
+
if self.graph is None:
|
|
78
|
+
raise ValueError("Graph client not initialized")
|
|
79
|
+
response = self.graph.execute_graphql(query, variables=variables)
|
|
80
|
+
error = response.get("error")
|
|
81
|
+
if error:
|
|
82
|
+
raise GraphQLError(f"GraphQL error: {error}")
|
|
83
|
+
return response
|
|
84
|
+
|
|
85
|
+
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
86
|
+
self.graph = self.ctx.require_graph("Loading default graph coordinates.")
|
|
87
|
+
|
|
88
|
+
self.reshare_entities()
|
|
89
|
+
|
|
90
|
+
# This source doesn't produce any work units
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
def reshare_entities(self) -> None:
|
|
94
|
+
scroll_id: Optional[str] = None
|
|
95
|
+
current_batch_number: int = 1
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
while True:
|
|
99
|
+
next_scroll_id, results = self.scroll_shared_entities(
|
|
100
|
+
scroll_id, self.config.batch_size
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
for result in results:
|
|
104
|
+
self._process_single_entity(result)
|
|
105
|
+
|
|
106
|
+
self.report.batches_processed = current_batch_number
|
|
107
|
+
self.report.info(
|
|
108
|
+
message="Completed sharing batch of entities.",
|
|
109
|
+
context=f"{current_batch_number} of size {self.config.batch_size}!",
|
|
110
|
+
)
|
|
111
|
+
current_batch_number += 1
|
|
112
|
+
|
|
113
|
+
if next_scroll_id is None:
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
time.sleep(self.config.batch_delay_ms / 1000.0)
|
|
117
|
+
|
|
118
|
+
except Exception as e:
|
|
119
|
+
self.report.report_failure(
|
|
120
|
+
title="Failed to process batches",
|
|
121
|
+
message="Error occurred while processing one or more batches!",
|
|
122
|
+
context=f"message = {str(e)}",
|
|
123
|
+
exc=e,
|
|
124
|
+
)
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
self.report.info(
|
|
128
|
+
message="Completed sharing all entities.",
|
|
129
|
+
context=f"Successfully shared {self.report.entities_shared} entities, "
|
|
130
|
+
f"failed to share {self.report.entities_failed} entities.",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Rest of the methods remain the same...
|
|
134
|
+
|
|
135
|
+
def _process_single_entity(self, result: Dict[str, Any]) -> None:
|
|
136
|
+
"""Process a single entity result"""
|
|
137
|
+
entity_urn = result.get("entity", {}).get("urn", None)
|
|
138
|
+
share_results = (
|
|
139
|
+
result.get("entity", {}).get("share", {}).get("lastShareResults", [])
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
if entity_urn is None:
|
|
143
|
+
self.report.report_warning(
|
|
144
|
+
message="Failed to resolve entity urn for shared asset! Skipping...",
|
|
145
|
+
context=f"Response: {str(result)}",
|
|
146
|
+
)
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
for share_result in share_results:
|
|
150
|
+
try:
|
|
151
|
+
destination_data = share_result.get("destination", {})
|
|
152
|
+
destination_urn = destination_data.get("urn", "")
|
|
153
|
+
previous_status = share_result.get("status")
|
|
154
|
+
share_config = share_result.get("shareConfig", {})
|
|
155
|
+
|
|
156
|
+
# Important: If there is implicit entity, we should skip this urn.
|
|
157
|
+
# This means the entity was not EXPLICITLY shared, so we do not want to explicitly share here.
|
|
158
|
+
implicit_shared_entity = share_result.get("implicitShareEntity")
|
|
159
|
+
is_implicitly_shared = (
|
|
160
|
+
implicit_shared_entity is not None
|
|
161
|
+
and "urn" in implicit_shared_entity
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if is_implicitly_shared:
|
|
165
|
+
self.report.implicit_entities_skipped += 1
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
if previous_status != "SUCCESS":
|
|
169
|
+
self.report.report_warning(
|
|
170
|
+
message="Attempting to share a previously unsuccessful shared entity!",
|
|
171
|
+
context=f"entity urn: {entity_urn}, destination urn: {destination_urn}",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
lineage_direction = self._determine_lineage_direction(share_config)
|
|
175
|
+
|
|
176
|
+
shared = self.share_entity(
|
|
177
|
+
entity_urn=entity_urn,
|
|
178
|
+
destination_urn=destination_urn,
|
|
179
|
+
lineage_direction=lineage_direction,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if shared:
|
|
183
|
+
self.report.entities_shared += 1
|
|
184
|
+
else:
|
|
185
|
+
self.report.entities_failed += 1
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
self.report.report_warning(
|
|
189
|
+
message="Failed to share single entity!",
|
|
190
|
+
context=f"entity urn: {entity_urn}",
|
|
191
|
+
)
|
|
192
|
+
logger.exception(f"Error processing entity {entity_urn}", e)
|
|
193
|
+
self.report.entities_failed += 1
|
|
194
|
+
|
|
195
|
+
def _determine_lineage_direction(
|
|
196
|
+
self, share_config: Dict[str, Any]
|
|
197
|
+
) -> Optional[str]:
|
|
198
|
+
"""Determine lineage direction based on share config"""
|
|
199
|
+
include_upstreams = share_config.get("enableUpstreamLineage", False)
|
|
200
|
+
include_downstreams = share_config.get(
|
|
201
|
+
"enableDownstreamLineage", False
|
|
202
|
+
) # Fixed typo
|
|
203
|
+
|
|
204
|
+
if include_upstreams and include_downstreams:
|
|
205
|
+
return "BOTH"
|
|
206
|
+
if include_upstreams:
|
|
207
|
+
return "UPSTREAM"
|
|
208
|
+
if include_downstreams:
|
|
209
|
+
return "DOWNSTREAM"
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
def scroll_shared_entities(
|
|
213
|
+
self, scroll_id: Optional[str], count: int
|
|
214
|
+
) -> Tuple[Optional[str], List[Dict[str, Any]]]:
|
|
215
|
+
"""Scroll through shared entities with retry logic"""
|
|
216
|
+
response = self.execute_graphql_with_retry(
|
|
217
|
+
GRAPHQL_SCROLL_SHARED_ENTITIES,
|
|
218
|
+
variables={
|
|
219
|
+
"scrollId": scroll_id,
|
|
220
|
+
"count": count,
|
|
221
|
+
},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
result = response.get("scrollAcrossEntities", {})
|
|
225
|
+
return result.get("nextScrollId"), result.get("searchResults", [])
|
|
226
|
+
|
|
227
|
+
def share_entity(
|
|
228
|
+
self, entity_urn: str, destination_urn: str, lineage_direction: Optional[str]
|
|
229
|
+
) -> bool:
|
|
230
|
+
"""Share entity with retry logic"""
|
|
231
|
+
try:
|
|
232
|
+
response = self.execute_graphql_with_retry(
|
|
233
|
+
GRAPHQL_SHARE_ENTITY,
|
|
234
|
+
variables={
|
|
235
|
+
"entityUrn": entity_urn,
|
|
236
|
+
"destinationUrn": destination_urn,
|
|
237
|
+
"lineageDirection": lineage_direction,
|
|
238
|
+
},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
result = response.get("shareEntity", {})
|
|
242
|
+
if not result.get("succeeded", False):
|
|
243
|
+
self.report.report_failure(
|
|
244
|
+
title="Failed to Share Entity",
|
|
245
|
+
message="Response returned that success failed for entity and destination!",
|
|
246
|
+
context=f"entity urn: {entity_urn}, destination urn: {destination_urn}",
|
|
247
|
+
)
|
|
248
|
+
return False
|
|
249
|
+
|
|
250
|
+
return True
|
|
251
|
+
|
|
252
|
+
except Exception as e:
|
|
253
|
+
self.report.report_failure(
|
|
254
|
+
title="Failed to Share Entity",
|
|
255
|
+
message="Exception occurred while sharing entity",
|
|
256
|
+
context=f"entity urn: {entity_urn}, destination urn: {destination_urn}",
|
|
257
|
+
exc=e,
|
|
258
|
+
)
|
|
259
|
+
return False
|
|
260
|
+
|
|
261
|
+
def get_report(self) -> SourceReport:
|
|
262
|
+
return self.report
|
|
@@ -409,7 +409,6 @@ class DataHubBasedS3Dataset:
|
|
|
409
409
|
physical_uri: str,
|
|
410
410
|
local_file: str,
|
|
411
411
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
412
|
-
|
|
413
412
|
aspects: List = []
|
|
414
413
|
mcps: List[MetadataChangeProposalWrapper] = self._update_presigned_url(
|
|
415
414
|
dataset_urn, physical_uri
|
|
@@ -456,7 +455,6 @@ class DataHubBasedS3Dataset:
|
|
|
456
455
|
physical_uri: str,
|
|
457
456
|
dataset_properties: Optional[DatasetPropertiesClass] = None,
|
|
458
457
|
) -> List[MetadataChangeProposalWrapper]:
|
|
459
|
-
|
|
460
458
|
if self.config.generate_presigned_url:
|
|
461
459
|
external_url = self._generate_presigned_url(physical_uri)
|
|
462
460
|
else:
|
|
@@ -180,7 +180,6 @@ class DataHubFormReportingData(FormData):
|
|
|
180
180
|
def form_assigned_date(
|
|
181
181
|
self, search_row: DataHubDatasetSearchRow
|
|
182
182
|
) -> Dict[str, date]:
|
|
183
|
-
|
|
184
183
|
form_assigned_dates: Dict[str, date] = {}
|
|
185
184
|
forms = self.graph.get_aspect(search_row.urn, FormsClass)
|
|
186
185
|
if not forms:
|
|
@@ -83,16 +83,31 @@ class DataHubLineageFeaturesSource(Source):
|
|
|
83
83
|
query = {
|
|
84
84
|
"query": {
|
|
85
85
|
"bool": {
|
|
86
|
-
"
|
|
87
|
-
{"term": {"
|
|
88
|
-
{"term": {"destination.entityType": "schemaField"}},
|
|
86
|
+
"should": [
|
|
87
|
+
{"term": {"relationshipType": "Consumes"}},
|
|
89
88
|
{"term": {"relationshipType": "DownstreamOf"}},
|
|
89
|
+
{"term": {"relationshipType": "TrainedBy"}},
|
|
90
|
+
{"term": {"relationshipType": "UsedBy"}},
|
|
91
|
+
{"term": {"relationshipType": "MemberOf"}},
|
|
92
|
+
{"term": {"relationshipType": "DerivedFrom"}},
|
|
93
|
+
{"term": {"relationshipType": "Produces"}},
|
|
94
|
+
{"term": {"relationshipType": "DashboardContainsDashboard"}},
|
|
95
|
+
{
|
|
96
|
+
"bool": {
|
|
97
|
+
"must": [
|
|
98
|
+
{"term": {"relationshipType": "Contains"}},
|
|
99
|
+
{"term": {"source.entityType": "dashboard"}},
|
|
100
|
+
{"term": {"destination.entityType": "chart"}},
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
},
|
|
90
104
|
],
|
|
91
|
-
}
|
|
105
|
+
},
|
|
92
106
|
},
|
|
93
107
|
"sort": [
|
|
94
108
|
{"source.urn": {"order": "desc"}},
|
|
95
109
|
{"destination.urn": {"order": "desc"}},
|
|
110
|
+
{"relationshipType": {"order": "desc"}},
|
|
96
111
|
{"lifecycleOwner": {"order": "desc"}},
|
|
97
112
|
],
|
|
98
113
|
}
|
|
@@ -124,7 +139,9 @@ class DataHubLineageFeaturesSource(Source):
|
|
|
124
139
|
for urn in set(self.upstream_counts.keys()).union(
|
|
125
140
|
self.downstream_counts.keys()
|
|
126
141
|
):
|
|
127
|
-
|
|
142
|
+
logger.debug(
|
|
143
|
+
f"{urn}: {self.upstream_counts[urn]}, {self.downstream_counts[urn]}"
|
|
144
|
+
)
|
|
128
145
|
yield MetadataChangeProposalWrapper(
|
|
129
146
|
entityUrn=urn,
|
|
130
147
|
aspect=LineageFeaturesClass(
|