acryl-datahub 1.0.0.2rc1__py3-none-any.whl → 1.0.0.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.2rc1.dist-info → acryl_datahub-1.0.0.2rc2.dist-info}/METADATA +2566 -2566
- {acryl_datahub-1.0.0.2rc1.dist-info → acryl_datahub-1.0.0.2rc2.dist-info}/RECORD +13 -12
- datahub/_version.py +1 -1
- datahub/ingestion/source/hex/constants.py +5 -0
- datahub/ingestion/source/hex/hex.py +150 -22
- datahub/ingestion/source/hex/mapper.py +28 -2
- datahub/ingestion/source/hex/model.py +10 -2
- datahub/ingestion/source/hex/query_fetcher.py +297 -0
- datahub/ingestion/source/superset.py +108 -81
- {acryl_datahub-1.0.0.2rc1.dist-info → acryl_datahub-1.0.0.2rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.2rc1.dist-info → acryl_datahub-1.0.0.2rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.2rc1.dist-info → acryl_datahub-1.0.0.2rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.2rc1.dist-info → acryl_datahub-1.0.0.2rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.2rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=lFv-ImaIXKL_EDY2GlHJHg9iVkj13C_xihZRNnxH3M8,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
6
|
datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -217,7 +217,7 @@ datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99Wd
|
|
|
217
217
|
datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
|
|
218
218
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
219
219
|
datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
|
|
220
|
-
datahub/ingestion/source/superset.py,sha256=
|
|
220
|
+
datahub/ingestion/source/superset.py,sha256=bMfvm9HgUoS3T7BjHsDrrOodc8iBRrJRQYv2D66bABo,41194
|
|
221
221
|
datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
222
|
datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
|
|
223
223
|
datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
|
|
@@ -328,10 +328,11 @@ datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
328
328
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
329
329
|
datahub/ingestion/source/hex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
330
330
|
datahub/ingestion/source/hex/api.py,sha256=JfFPD8O4z16fwZE_BdX5aCQztEq-tbzxJJ7aofH4DE4,12274
|
|
331
|
-
datahub/ingestion/source/hex/constants.py,sha256=
|
|
332
|
-
datahub/ingestion/source/hex/hex.py,sha256=
|
|
333
|
-
datahub/ingestion/source/hex/mapper.py,sha256=
|
|
334
|
-
datahub/ingestion/source/hex/model.py,sha256=
|
|
331
|
+
datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJX1atiiDZyKtg,271
|
|
332
|
+
datahub/ingestion/source/hex/hex.py,sha256=PIRl8fPkKtlHV7cqR4H8RKVYdTLgEFXHFzc3QAqJLhE,12733
|
|
333
|
+
datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
|
|
334
|
+
datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
|
|
335
|
+
datahub/ingestion/source/hex/query_fetcher.py,sha256=5r065vL7XohcgZ_fj-1h6o8cxrPin37IeYsC99GU6LA,12287
|
|
335
336
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
337
|
datahub/ingestion/source/iceberg/iceberg.py,sha256=PhLLXWgBdfZ3hL7LgLvDr6aTK-QKmiZCFNz5jD-mxZM,30773
|
|
337
338
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
|
|
@@ -1043,8 +1044,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1043
1044
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1044
1045
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1045
1046
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1046
|
-
acryl_datahub-1.0.0.
|
|
1047
|
-
acryl_datahub-1.0.0.
|
|
1048
|
-
acryl_datahub-1.0.0.
|
|
1049
|
-
acryl_datahub-1.0.0.
|
|
1050
|
-
acryl_datahub-1.0.0.
|
|
1047
|
+
acryl_datahub-1.0.0.2rc2.dist-info/METADATA,sha256=VuKbVh0Lt8z7Jik8lZ39CF56PZHqn_oIwn2LBmYzrVc,176849
|
|
1048
|
+
acryl_datahub-1.0.0.2rc2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
1049
|
+
acryl_datahub-1.0.0.2rc2.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1050
|
+
acryl_datahub-1.0.0.2rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1051
|
+
acryl_datahub-1.0.0.2rc2.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
from datahub.metadata.urns import DataPlatformUrn
|
|
2
|
+
|
|
1
3
|
HEX_PLATFORM_NAME = "hex"
|
|
4
|
+
HEX_PLATFORM_URN = DataPlatformUrn(platform_name=HEX_PLATFORM_NAME)
|
|
2
5
|
HEX_API_BASE_URL_DEFAULT = "https://app.hex.tech/api/v1"
|
|
3
6
|
HEX_API_PAGE_SIZE_DEFAULT = 100
|
|
7
|
+
|
|
8
|
+
DATAHUB_API_PAGE_SIZE_DEFAULT = 100
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime, timedelta, timezone
|
|
1
3
|
from typing import Any, Dict, Iterable, List, Optional
|
|
2
4
|
|
|
3
|
-
from pydantic import Field, SecretStr
|
|
5
|
+
from pydantic import Field, SecretStr, root_validator
|
|
4
6
|
from typing_extensions import assert_never
|
|
5
7
|
|
|
6
8
|
from datahub.configuration.common import AllowDenyPattern
|
|
9
|
+
from datahub.configuration.datetimes import parse_user_datetime
|
|
7
10
|
from datahub.configuration.source_common import (
|
|
8
11
|
EnvConfigMixin,
|
|
9
12
|
PlatformInstanceConfigMixin,
|
|
@@ -21,12 +24,17 @@ from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
|
|
21
24
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
22
25
|
from datahub.ingestion.source.hex.api import HexApi, HexApiReport
|
|
23
26
|
from datahub.ingestion.source.hex.constants import (
|
|
27
|
+
DATAHUB_API_PAGE_SIZE_DEFAULT,
|
|
24
28
|
HEX_API_BASE_URL_DEFAULT,
|
|
25
29
|
HEX_API_PAGE_SIZE_DEFAULT,
|
|
26
30
|
HEX_PLATFORM_NAME,
|
|
27
31
|
)
|
|
28
32
|
from datahub.ingestion.source.hex.mapper import Mapper
|
|
29
33
|
from datahub.ingestion.source.hex.model import Component, Project
|
|
34
|
+
from datahub.ingestion.source.hex.query_fetcher import (
|
|
35
|
+
HexQueryFetcher,
|
|
36
|
+
HexQueryFetcherReport,
|
|
37
|
+
)
|
|
30
38
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
31
39
|
StaleEntityRemovalHandler,
|
|
32
40
|
StaleEntityRemovalSourceReport,
|
|
@@ -34,9 +42,10 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
|
34
42
|
)
|
|
35
43
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
36
44
|
StatefulIngestionConfigBase,
|
|
37
|
-
StatefulIngestionReport,
|
|
38
45
|
StatefulIngestionSourceBase,
|
|
39
46
|
)
|
|
47
|
+
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
48
|
+
from datahub.sdk.main_client import DataHubClient
|
|
40
49
|
|
|
41
50
|
|
|
42
51
|
class HexSourceConfig(
|
|
@@ -93,9 +102,73 @@ class HexSourceConfig(
|
|
|
93
102
|
default=True,
|
|
94
103
|
description="Set ownership identity from owner/creator email",
|
|
95
104
|
)
|
|
105
|
+
include_lineage: bool = Field(
|
|
106
|
+
default=True,
|
|
107
|
+
description='Include Hex lineage, being fetched from DataHub. See "Limitations" section in the docs for more details about the limitations of this feature.',
|
|
108
|
+
)
|
|
109
|
+
lineage_start_time: Optional[datetime] = Field(
|
|
110
|
+
default=None,
|
|
111
|
+
description="Earliest date of lineage to consider. Default: 1 day before lineage end time. You can specify absolute time like '2023-01-01' or relative time like '-7 days' or '-7d'.",
|
|
112
|
+
)
|
|
113
|
+
lineage_end_time: Optional[datetime] = Field(
|
|
114
|
+
default=None,
|
|
115
|
+
description="Latest date of lineage to consider. Default: Current time in UTC. You can specify absolute time like '2023-01-01' or relative time like '-1 day' or '-1d'.",
|
|
116
|
+
)
|
|
117
|
+
datahub_page_size: int = Field(
|
|
118
|
+
default=DATAHUB_API_PAGE_SIZE_DEFAULT,
|
|
119
|
+
description="Number of items to fetch per DataHub API call.",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
@root_validator(pre=True)
|
|
123
|
+
def validate_lineage_times(cls, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
124
|
+
# lineage_end_time default = now
|
|
125
|
+
if "lineage_end_time" not in data or data["lineage_end_time"] is None:
|
|
126
|
+
data["lineage_end_time"] = datetime.now(tz=timezone.utc)
|
|
127
|
+
# if string is given, parse it
|
|
128
|
+
if isinstance(data["lineage_end_time"], str):
|
|
129
|
+
data["lineage_end_time"] = parse_user_datetime(data["lineage_end_time"])
|
|
130
|
+
# if no timezone is given, assume UTC
|
|
131
|
+
if data["lineage_end_time"].tzinfo is None:
|
|
132
|
+
data["lineage_end_time"] = data["lineage_end_time"].replace(
|
|
133
|
+
tzinfo=timezone.utc
|
|
134
|
+
)
|
|
135
|
+
# at this point, we ensure there is a non null datetime with UTC timezone for lineage_end_time
|
|
136
|
+
assert (
|
|
137
|
+
data["lineage_end_time"]
|
|
138
|
+
and isinstance(data["lineage_end_time"], datetime)
|
|
139
|
+
and data["lineage_end_time"].tzinfo is not None
|
|
140
|
+
and data["lineage_end_time"].tzinfo == timezone.utc
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# lineage_start_time default = lineage_end_time - 1 day
|
|
144
|
+
if "lineage_start_time" not in data or data["lineage_start_time"] is None:
|
|
145
|
+
data["lineage_start_time"] = data["lineage_end_time"] - timedelta(days=1)
|
|
146
|
+
# if string is given, parse it
|
|
147
|
+
if isinstance(data["lineage_start_time"], str):
|
|
148
|
+
data["lineage_start_time"] = parse_user_datetime(data["lineage_start_time"])
|
|
149
|
+
# if no timezone is given, assume UTC
|
|
150
|
+
if data["lineage_start_time"].tzinfo is None:
|
|
151
|
+
data["lineage_start_time"] = data["lineage_start_time"].replace(
|
|
152
|
+
tzinfo=timezone.utc
|
|
153
|
+
)
|
|
154
|
+
# at this point, we ensure there is a non null datetime with UTC timezone for lineage_start_time
|
|
155
|
+
assert (
|
|
156
|
+
data["lineage_start_time"]
|
|
157
|
+
and isinstance(data["lineage_start_time"], datetime)
|
|
158
|
+
and data["lineage_start_time"].tzinfo is not None
|
|
159
|
+
and data["lineage_start_time"].tzinfo == timezone.utc
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return data
|
|
96
163
|
|
|
97
164
|
|
|
98
|
-
|
|
165
|
+
@dataclass
|
|
166
|
+
class HexReport(
|
|
167
|
+
StaleEntityRemovalSourceReport,
|
|
168
|
+
HexApiReport,
|
|
169
|
+
IngestionStageReport,
|
|
170
|
+
HexQueryFetcherReport,
|
|
171
|
+
):
|
|
99
172
|
pass
|
|
100
173
|
|
|
101
174
|
|
|
@@ -110,7 +183,7 @@ class HexSource(StatefulIngestionSourceBase):
|
|
|
110
183
|
def __init__(self, config: HexSourceConfig, ctx: PipelineContext):
|
|
111
184
|
super().__init__(config, ctx)
|
|
112
185
|
self.source_config = config
|
|
113
|
-
self.report = HexReport()
|
|
186
|
+
self.report: HexReport = HexReport()
|
|
114
187
|
self.platform = HEX_PLATFORM_NAME
|
|
115
188
|
self.hex_api = HexApi(
|
|
116
189
|
report=self.report,
|
|
@@ -129,6 +202,28 @@ class HexSource(StatefulIngestionSourceBase):
|
|
|
129
202
|
categories_as_tags=self.source_config.categories_as_tags,
|
|
130
203
|
set_ownership_from_email=self.source_config.set_ownership_from_email,
|
|
131
204
|
)
|
|
205
|
+
self.project_registry: Dict[str, Project] = {}
|
|
206
|
+
self.component_registry: Dict[str, Component] = {}
|
|
207
|
+
|
|
208
|
+
self.datahub_client: Optional[DataHubClient] = None
|
|
209
|
+
self.query_fetcher: Optional[HexQueryFetcher] = None
|
|
210
|
+
if self.source_config.include_lineage:
|
|
211
|
+
graph = ctx.require_graph("Lineage")
|
|
212
|
+
assert self.source_config.lineage_start_time and isinstance(
|
|
213
|
+
self.source_config.lineage_start_time, datetime
|
|
214
|
+
)
|
|
215
|
+
assert self.source_config.lineage_end_time and isinstance(
|
|
216
|
+
self.source_config.lineage_end_time, datetime
|
|
217
|
+
)
|
|
218
|
+
self.datahub_client = DataHubClient(graph=graph)
|
|
219
|
+
self.query_fetcher = HexQueryFetcher(
|
|
220
|
+
datahub_client=self.datahub_client,
|
|
221
|
+
workspace_name=self.source_config.workspace_name,
|
|
222
|
+
start_datetime=self.source_config.lineage_start_time,
|
|
223
|
+
end_datetime=self.source_config.lineage_end_time,
|
|
224
|
+
report=self.report,
|
|
225
|
+
page_size=self.source_config.datahub_page_size,
|
|
226
|
+
)
|
|
132
227
|
|
|
133
228
|
@classmethod
|
|
134
229
|
def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "HexSource":
|
|
@@ -143,25 +238,58 @@ class HexSource(StatefulIngestionSourceBase):
|
|
|
143
238
|
).workunit_processor,
|
|
144
239
|
]
|
|
145
240
|
|
|
146
|
-
def get_report(self) ->
|
|
241
|
+
def get_report(self) -> HexReport:
|
|
147
242
|
return self.report
|
|
148
243
|
|
|
149
244
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if self.source_config.project_title_pattern.allowed(
|
|
155
|
-
project_or_component.title
|
|
156
|
-
):
|
|
157
|
-
yield from self.mapper.map_project(project=project_or_component)
|
|
158
|
-
elif isinstance(project_or_component, Component):
|
|
159
|
-
if (
|
|
160
|
-
self.source_config.include_components
|
|
161
|
-
and self.source_config.component_title_pattern.allowed(
|
|
245
|
+
with self.report.new_stage("Fetch Hex assets from Hex API"):
|
|
246
|
+
for project_or_component in self.hex_api.fetch_projects():
|
|
247
|
+
if isinstance(project_or_component, Project):
|
|
248
|
+
if self.source_config.project_title_pattern.allowed(
|
|
162
249
|
project_or_component.title
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
250
|
+
):
|
|
251
|
+
self.project_registry[project_or_component.id] = (
|
|
252
|
+
project_or_component
|
|
253
|
+
)
|
|
254
|
+
elif isinstance(project_or_component, Component):
|
|
255
|
+
if (
|
|
256
|
+
self.source_config.include_components
|
|
257
|
+
and self.source_config.component_title_pattern.allowed(
|
|
258
|
+
project_or_component.title
|
|
259
|
+
)
|
|
260
|
+
):
|
|
261
|
+
self.component_registry[project_or_component.id] = (
|
|
262
|
+
project_or_component
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
assert_never(project_or_component)
|
|
266
|
+
|
|
267
|
+
if self.source_config.include_lineage:
|
|
268
|
+
assert self.datahub_client and self.query_fetcher
|
|
269
|
+
|
|
270
|
+
with self.report.new_stage(
|
|
271
|
+
"Fetch Hex lineage from existing Queries in DataHub"
|
|
272
|
+
):
|
|
273
|
+
for query_metadata in self.query_fetcher.fetch():
|
|
274
|
+
project = self.project_registry.get(query_metadata.hex_project_id)
|
|
275
|
+
if project:
|
|
276
|
+
project.upstream_datasets.extend(
|
|
277
|
+
query_metadata.dataset_subjects
|
|
278
|
+
)
|
|
279
|
+
project.upstream_schema_fields.extend(
|
|
280
|
+
query_metadata.schema_field_subjects
|
|
281
|
+
)
|
|
282
|
+
else:
|
|
283
|
+
self.report.report_warning(
|
|
284
|
+
title="Missing project for lineage",
|
|
285
|
+
message="Lineage missed because missed project, likely due to filter patterns or deleted project.",
|
|
286
|
+
context=str(query_metadata),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
with self.report.new_stage("Emit"):
|
|
290
|
+
yield from self.mapper.map_workspace()
|
|
291
|
+
|
|
292
|
+
for project in self.project_registry.values():
|
|
293
|
+
yield from self.mapper.map_project(project=project)
|
|
294
|
+
for component in self.component_registry.values():
|
|
295
|
+
yield from self.mapper.map_component(component=component)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import Iterable, List, Optional, Tuple
|
|
3
|
+
from typing import Iterable, List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from datahub._codegen.aspect import (
|
|
6
6
|
_Aspect, # TODO: is there a better import than this one?
|
|
@@ -46,6 +46,7 @@ from datahub.metadata.schema_classes import (
|
|
|
46
46
|
DashboardInfoClass,
|
|
47
47
|
DashboardUsageStatisticsClass,
|
|
48
48
|
DataPlatformInstanceClass,
|
|
49
|
+
EdgeClass,
|
|
49
50
|
GlobalTagsClass,
|
|
50
51
|
OwnerClass,
|
|
51
52
|
OwnershipClass,
|
|
@@ -53,7 +54,14 @@ from datahub.metadata.schema_classes import (
|
|
|
53
54
|
TagAssociationClass,
|
|
54
55
|
TimeWindowSizeClass,
|
|
55
56
|
)
|
|
56
|
-
from datahub.metadata.urns import
|
|
57
|
+
from datahub.metadata.urns import (
|
|
58
|
+
ContainerUrn,
|
|
59
|
+
CorpUserUrn,
|
|
60
|
+
DashboardUrn,
|
|
61
|
+
DatasetUrn,
|
|
62
|
+
SchemaFieldUrn,
|
|
63
|
+
Urn,
|
|
64
|
+
)
|
|
57
65
|
|
|
58
66
|
logger = logging.getLogger(__name__)
|
|
59
67
|
|
|
@@ -116,6 +124,8 @@ class Mapper:
|
|
|
116
124
|
),
|
|
117
125
|
externalUrl=f"{self._base_url}/{self._workspace_name}/hex/{project.id}",
|
|
118
126
|
customProperties=dict(id=project.id),
|
|
127
|
+
datasetEdges=self._dataset_edges(project.upstream_datasets),
|
|
128
|
+
# TODO: support schema field upstream, maybe InputFields?
|
|
119
129
|
)
|
|
120
130
|
|
|
121
131
|
subtypes = SubTypesClass(
|
|
@@ -343,6 +353,22 @@ class Mapper:
|
|
|
343
353
|
else None,
|
|
344
354
|
)
|
|
345
355
|
|
|
356
|
+
def _dataset_edges(
|
|
357
|
+
self, upstream: List[Union[DatasetUrn, SchemaFieldUrn]]
|
|
358
|
+
) -> Optional[List[EdgeClass]]:
|
|
359
|
+
# TBC: is there support for CLL in Dashboards? for the moment, skip SchemaFieldUrns
|
|
360
|
+
return (
|
|
361
|
+
[
|
|
362
|
+
EdgeClass(
|
|
363
|
+
destinationUrn=upstream_urn.urn(),
|
|
364
|
+
)
|
|
365
|
+
for upstream_urn in upstream
|
|
366
|
+
if isinstance(upstream_urn, DatasetUrn)
|
|
367
|
+
]
|
|
368
|
+
if upstream
|
|
369
|
+
else None
|
|
370
|
+
)
|
|
371
|
+
|
|
346
372
|
def _yield_mcps(
|
|
347
373
|
self, entity_urn: Urn, aspects: List[Optional[_Aspect]]
|
|
348
374
|
) -> Iterable[MetadataWorkUnit]:
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from datahub.metadata.urns import DatasetUrn, SchemaFieldUrn
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
@dataclass
|
|
@@ -51,6 +53,12 @@ class Project:
|
|
|
51
53
|
creator: Optional[Owner] = None
|
|
52
54
|
owner: Optional[Owner] = None
|
|
53
55
|
analytics: Optional[Analytics] = None
|
|
56
|
+
upstream_datasets: List[Union[DatasetUrn, SchemaFieldUrn]] = field(
|
|
57
|
+
default_factory=list
|
|
58
|
+
)
|
|
59
|
+
upstream_schema_fields: List[Union[DatasetUrn, SchemaFieldUrn]] = field(
|
|
60
|
+
default_factory=list
|
|
61
|
+
)
|
|
54
62
|
|
|
55
63
|
|
|
56
64
|
@dataclass
|