acryl-datahub 1.2.0.4rc1__py3-none-any.whl → 1.2.0.4rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.4rc1.dist-info → acryl_datahub-1.2.0.4rc3.dist-info}/METADATA +2397 -2396
- {acryl_datahub-1.2.0.4rc1.dist-info → acryl_datahub-1.2.0.4rc3.dist-info}/RECORD +42 -41
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +3 -3
- datahub/api/entities/external/restricted_text.py +3 -3
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/quickstart_versioning.py +1 -1
- datahub/cli/specific/assertions_cli.py +37 -2
- datahub/cli/specific/datacontract_cli.py +54 -4
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +1 -1
- datahub/ingestion/api/report.py +21 -2
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/aws/tag_entities.py +2 -2
- datahub/ingestion/source/data_lake_common/path_spec.py +6 -3
- datahub/ingestion/source/dbt/dbt_cloud.py +6 -3
- datahub/ingestion/source/fivetran/fivetran_log_api.py +4 -3
- datahub/ingestion/source/grafana/models.py +6 -0
- datahub/ingestion/source/hex/hex.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +4 -4
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/slack/slack.py +7 -14
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +4 -4
- datahub/ingestion/source/tableau/tableau.py +1 -1
- datahub/ingestion/source/unity/config.py +36 -1
- datahub/ingestion/source/unity/proxy.py +332 -46
- datahub/ingestion/source/unity/proxy_types.py +12 -2
- datahub/ingestion/source/unity/source.py +91 -34
- datahub/ingestion/source/unity/tag_entities.py +2 -2
- datahub/ingestion/source/usage/starburst_trino_usage.py +2 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/sdk/search_client.py +3 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataset.py +37 -59
- datahub/utilities/server_config_util.py +2 -1
- {acryl_datahub-1.2.0.4rc1.dist-info → acryl_datahub-1.2.0.4rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.4rc1.dist-info → acryl_datahub-1.2.0.4rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.4rc1.dist-info → acryl_datahub-1.2.0.4rc3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.4rc1.dist-info → acryl_datahub-1.2.0.4rc3.dist-info}/top_level.txt +0 -0
|
@@ -7,12 +7,14 @@ from urllib.parse import urljoin
|
|
|
7
7
|
from datahub.api.entities.external.external_entities import PlatformResourceRepository
|
|
8
8
|
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
9
9
|
from datahub.emitter.mce_builder import (
|
|
10
|
+
UNKNOWN_USER,
|
|
10
11
|
make_data_platform_urn,
|
|
11
12
|
make_dataplatform_instance_urn,
|
|
12
13
|
make_dataset_urn_with_platform_instance,
|
|
13
14
|
make_domain_urn,
|
|
14
15
|
make_group_urn,
|
|
15
16
|
make_schema_field_urn,
|
|
17
|
+
make_ts_millis,
|
|
16
18
|
make_user_urn,
|
|
17
19
|
)
|
|
18
20
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
@@ -111,6 +113,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
111
113
|
ViewProperties,
|
|
112
114
|
)
|
|
113
115
|
from datahub.metadata.schema_classes import (
|
|
116
|
+
AuditStampClass,
|
|
114
117
|
BrowsePathsClass,
|
|
115
118
|
DataPlatformInstanceClass,
|
|
116
119
|
DatasetLineageTypeClass,
|
|
@@ -203,6 +206,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
203
206
|
config.warehouse_id,
|
|
204
207
|
report=self.report,
|
|
205
208
|
hive_metastore_proxy=self.hive_metastore_proxy,
|
|
209
|
+
lineage_data_source=config.lineage_data_source,
|
|
206
210
|
)
|
|
207
211
|
|
|
208
212
|
self.external_url_base = urljoin(self.config.workspace_url, "/explore/data")
|
|
@@ -410,12 +414,12 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
410
414
|
self.config.workspace_url, f"#notebook/{notebook.id}"
|
|
411
415
|
),
|
|
412
416
|
created=(
|
|
413
|
-
TimeStampClass(
|
|
417
|
+
TimeStampClass(make_ts_millis(notebook.created_at))
|
|
414
418
|
if notebook.created_at
|
|
415
419
|
else None
|
|
416
420
|
),
|
|
417
421
|
lastModified=(
|
|
418
|
-
TimeStampClass(
|
|
422
|
+
TimeStampClass(make_ts_millis(notebook.modified_at))
|
|
419
423
|
if notebook.modified_at
|
|
420
424
|
else None
|
|
421
425
|
),
|
|
@@ -434,17 +438,20 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
434
438
|
if not notebook.upstreams:
|
|
435
439
|
return None
|
|
436
440
|
|
|
441
|
+
upstreams = []
|
|
442
|
+
for upstream_ref in notebook.upstreams:
|
|
443
|
+
timestamp = make_ts_millis(upstream_ref.last_updated)
|
|
444
|
+
upstreams.append(
|
|
445
|
+
self._create_upstream_class(
|
|
446
|
+
self.gen_dataset_urn(upstream_ref),
|
|
447
|
+
DatasetLineageTypeClass.COPY,
|
|
448
|
+
timestamp,
|
|
449
|
+
)
|
|
450
|
+
)
|
|
451
|
+
|
|
437
452
|
return MetadataChangeProposalWrapper(
|
|
438
453
|
entityUrn=self.gen_notebook_urn(notebook),
|
|
439
|
-
aspect=UpstreamLineageClass(
|
|
440
|
-
upstreams=[
|
|
441
|
-
UpstreamClass(
|
|
442
|
-
dataset=self.gen_dataset_urn(upstream_ref),
|
|
443
|
-
type=DatasetLineageTypeClass.COPY,
|
|
444
|
-
)
|
|
445
|
-
for upstream_ref in notebook.upstreams
|
|
446
|
-
]
|
|
447
|
-
),
|
|
454
|
+
aspect=UpstreamLineageClass(upstreams=upstreams),
|
|
448
455
|
).as_workunit()
|
|
449
456
|
|
|
450
457
|
def process_metastores(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -463,14 +470,15 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
463
470
|
self, metastore: Optional[Metastore]
|
|
464
471
|
) -> Iterable[MetadataWorkUnit]:
|
|
465
472
|
for catalog in self._get_catalogs(metastore):
|
|
466
|
-
|
|
467
|
-
self.
|
|
468
|
-
|
|
473
|
+
with self.report.new_stage(f"Ingest catalog {catalog.id}"):
|
|
474
|
+
if not self.config.catalog_pattern.allowed(catalog.id):
|
|
475
|
+
self.report.catalogs.dropped(catalog.id)
|
|
476
|
+
continue
|
|
469
477
|
|
|
470
|
-
|
|
471
|
-
|
|
478
|
+
yield from self.gen_catalog_containers(catalog)
|
|
479
|
+
yield from self.process_schemas(catalog)
|
|
472
480
|
|
|
473
|
-
|
|
481
|
+
self.report.catalogs.processed(catalog.id)
|
|
474
482
|
|
|
475
483
|
def _get_catalogs(self, metastore: Optional[Metastore]) -> Iterable[Catalog]:
|
|
476
484
|
if self.config.catalogs:
|
|
@@ -647,9 +655,21 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
647
655
|
]
|
|
648
656
|
|
|
649
657
|
def ingest_lineage(self, table: Table) -> Optional[UpstreamLineageClass]:
|
|
658
|
+
# Calculate datetime filters for lineage
|
|
659
|
+
lineage_start_time = None
|
|
660
|
+
lineage_end_time = self.config.end_time
|
|
661
|
+
|
|
662
|
+
if self.config.ignore_start_time_lineage:
|
|
663
|
+
lineage_start_time = None # Ignore start time to get all lineage
|
|
664
|
+
else:
|
|
665
|
+
lineage_start_time = self.config.start_time
|
|
666
|
+
|
|
650
667
|
if self.config.include_table_lineage:
|
|
651
668
|
self.unity_catalog_api_proxy.table_lineage(
|
|
652
|
-
table,
|
|
669
|
+
table,
|
|
670
|
+
include_entity_lineage=self.config.include_notebooks,
|
|
671
|
+
start_time=lineage_start_time,
|
|
672
|
+
end_time=lineage_end_time,
|
|
653
673
|
)
|
|
654
674
|
|
|
655
675
|
if self.config.include_column_lineage and table.upstreams:
|
|
@@ -661,7 +681,11 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
661
681
|
for column in table.columns[: self.config.column_lineage_column_limit]
|
|
662
682
|
]
|
|
663
683
|
self.unity_catalog_api_proxy.get_column_lineage(
|
|
664
|
-
table,
|
|
684
|
+
table,
|
|
685
|
+
column_names,
|
|
686
|
+
max_workers=self.config.lineage_max_workers,
|
|
687
|
+
start_time=lineage_start_time,
|
|
688
|
+
end_time=lineage_end_time,
|
|
665
689
|
)
|
|
666
690
|
|
|
667
691
|
return self._generate_lineage_aspect(self.gen_dataset_urn(table.ref), table)
|
|
@@ -690,18 +714,22 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
690
714
|
for d_col, u_cols in sorted(downstream_to_upstream_cols.items())
|
|
691
715
|
)
|
|
692
716
|
|
|
717
|
+
timestamp = make_ts_millis(upstream_ref.last_updated)
|
|
693
718
|
upstreams.append(
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
719
|
+
self._create_upstream_class(
|
|
720
|
+
upstream_urn,
|
|
721
|
+
DatasetLineageTypeClass.TRANSFORMED,
|
|
722
|
+
timestamp,
|
|
697
723
|
)
|
|
698
724
|
)
|
|
699
725
|
|
|
700
|
-
for notebook in table.upstream_notebooks:
|
|
726
|
+
for notebook in table.upstream_notebooks.values():
|
|
727
|
+
timestamp = make_ts_millis(notebook.last_updated)
|
|
701
728
|
upstreams.append(
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
729
|
+
self._create_upstream_class(
|
|
730
|
+
self.gen_notebook_urn(notebook.id),
|
|
731
|
+
DatasetLineageTypeClass.TRANSFORMED,
|
|
732
|
+
timestamp,
|
|
705
733
|
)
|
|
706
734
|
)
|
|
707
735
|
|
|
@@ -771,6 +799,31 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
771
799
|
instance=self.config.platform_instance,
|
|
772
800
|
).as_urn()
|
|
773
801
|
|
|
802
|
+
def _create_upstream_class(
|
|
803
|
+
self,
|
|
804
|
+
dataset_urn: str,
|
|
805
|
+
lineage_type: Union[str, DatasetLineageTypeClass],
|
|
806
|
+
timestamp: Optional[int],
|
|
807
|
+
) -> UpstreamClass:
|
|
808
|
+
"""
|
|
809
|
+
Helper method to create UpstreamClass with optional audit stamp.
|
|
810
|
+
If timestamp is None, audit stamp is omitted.
|
|
811
|
+
"""
|
|
812
|
+
if timestamp is not None:
|
|
813
|
+
return UpstreamClass(
|
|
814
|
+
dataset=dataset_urn,
|
|
815
|
+
type=lineage_type,
|
|
816
|
+
auditStamp=AuditStampClass(
|
|
817
|
+
time=timestamp,
|
|
818
|
+
actor=UNKNOWN_USER,
|
|
819
|
+
),
|
|
820
|
+
)
|
|
821
|
+
else:
|
|
822
|
+
return UpstreamClass(
|
|
823
|
+
dataset=dataset_urn,
|
|
824
|
+
type=lineage_type,
|
|
825
|
+
)
|
|
826
|
+
|
|
774
827
|
def gen_schema_containers(self, schema: Schema) -> Iterable[MetadataWorkUnit]:
|
|
775
828
|
domain_urn = self._gen_domain_urn(f"{schema.catalog.name}.{schema.name}")
|
|
776
829
|
schema_tags = []
|
|
@@ -961,16 +1014,20 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
961
1014
|
created: Optional[TimeStampClass] = None
|
|
962
1015
|
if table.created_at:
|
|
963
1016
|
custom_properties["created_at"] = str(table.created_at)
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
1017
|
+
created_ts = make_ts_millis(table.created_at)
|
|
1018
|
+
if created_ts is not None:
|
|
1019
|
+
created = TimeStampClass(
|
|
1020
|
+
created_ts,
|
|
1021
|
+
make_user_urn(table.created_by) if table.created_by else None,
|
|
1022
|
+
)
|
|
968
1023
|
last_modified = created
|
|
969
1024
|
if table.updated_at:
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
1025
|
+
updated_ts = make_ts_millis(table.updated_at)
|
|
1026
|
+
if updated_ts is not None:
|
|
1027
|
+
last_modified = TimeStampClass(
|
|
1028
|
+
updated_ts,
|
|
1029
|
+
table.updated_by and make_user_urn(table.updated_by),
|
|
1030
|
+
)
|
|
974
1031
|
|
|
975
1032
|
return DatasetPropertiesClass(
|
|
976
1033
|
name=table.name,
|
|
@@ -36,7 +36,7 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
36
36
|
|
|
37
37
|
tag_key: str
|
|
38
38
|
tag_value: Optional[str] = None
|
|
39
|
-
platform_instance: Optional[str]
|
|
39
|
+
platform_instance: Optional[str] = None
|
|
40
40
|
exists_in_unity_catalog: bool = False
|
|
41
41
|
persisted: bool = False
|
|
42
42
|
|
|
@@ -218,7 +218,7 @@ class UnityCatalogTagPlatformResource(BaseModel, ExternalEntity):
|
|
|
218
218
|
datahub_urns: LinkedResourceSet
|
|
219
219
|
managed_by_datahub: bool
|
|
220
220
|
id: UnityCatalogTagPlatformResourceId
|
|
221
|
-
allowed_values: Optional[List[str]]
|
|
221
|
+
allowed_values: Optional[List[str]] = None
|
|
222
222
|
|
|
223
223
|
def get_id(self) -> ExternalEntityId:
|
|
224
224
|
return self.id
|
|
@@ -60,7 +60,7 @@ AggregatedDataset = GenericAggregatedDataset[TrinoTableRef]
|
|
|
60
60
|
|
|
61
61
|
class TrinoConnectorInfo(BaseModel):
|
|
62
62
|
partitionIds: List[str]
|
|
63
|
-
truncated: Optional[bool]
|
|
63
|
+
truncated: Optional[bool] = None
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
class TrinoAccessedMetadata(BaseModel):
|
|
@@ -80,7 +80,7 @@ class TrinoJoinedAccessEvent(BaseModel):
|
|
|
80
80
|
table: Optional[str] = None
|
|
81
81
|
accessed_metadata: List[TrinoAccessedMetadata]
|
|
82
82
|
starttime: datetime = Field(alias="create_time")
|
|
83
|
-
endtime: Optional[datetime] = Field(alias="end_time")
|
|
83
|
+
endtime: Optional[datetime] = Field(None, alias="end_time")
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
class EnvBasedSourceBaseConfig:
|
|
@@ -281,11 +281,14 @@ class BaseTransformer(Transformer, metaclass=ABCMeta):
|
|
|
281
281
|
)
|
|
282
282
|
)
|
|
283
283
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
284
|
+
if mcp.entityUrn:
|
|
285
|
+
record_metadata = _update_work_unit_id(
|
|
286
|
+
envelope=envelope,
|
|
287
|
+
aspect_name=mcp.aspect.get_aspect_name(), # type: ignore
|
|
288
|
+
urn=mcp.entityUrn,
|
|
289
|
+
)
|
|
290
|
+
else:
|
|
291
|
+
record_metadata = envelope.metadata.copy()
|
|
289
292
|
|
|
290
293
|
yield RecordEnvelope(
|
|
291
294
|
record=mcp,
|
datahub/sdk/search_client.py
CHANGED
|
@@ -112,6 +112,8 @@ class SearchClient:
|
|
|
112
112
|
self,
|
|
113
113
|
query: Optional[str] = None,
|
|
114
114
|
filter: Optional[Filter] = None,
|
|
115
|
+
*,
|
|
116
|
+
skip_cache: bool = False,
|
|
115
117
|
) -> Iterable[Urn]:
|
|
116
118
|
# TODO: Add better limit / pagination support.
|
|
117
119
|
types, compiled_filters = compile_filters(filter)
|
|
@@ -120,5 +122,6 @@ class SearchClient:
|
|
|
120
122
|
status=None,
|
|
121
123
|
extra_or_filters=compiled_filters,
|
|
122
124
|
entity_types=types,
|
|
125
|
+
skip_cache=skip_cache,
|
|
123
126
|
):
|
|
124
127
|
yield Urn.from_string(urn)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
7
|
+
from datahub.metadata.schema_classes import (
|
|
8
|
+
FineGrainedLineageClass as FineGrainedLineage,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HasFineGrainedLineagePatch(MetadataPatchProposal):
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def _fine_grained_lineage_location(self) -> Tuple[str, PatchPath]:
|
|
15
|
+
"""Return the aspect name where fine-grained lineage is stored."""
|
|
16
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _get_fine_grained_key(
|
|
20
|
+
fine_grained_lineage: FineGrainedLineage,
|
|
21
|
+
) -> Tuple[str, str, str]:
|
|
22
|
+
downstreams = fine_grained_lineage.downstreams or []
|
|
23
|
+
if len(downstreams) != 1:
|
|
24
|
+
raise TypeError("Cannot patch with more or less than one downstream.")
|
|
25
|
+
transform_op = fine_grained_lineage.transformOperation or "NONE"
|
|
26
|
+
downstream_urn = downstreams[0]
|
|
27
|
+
query_id = fine_grained_lineage.query or "NONE"
|
|
28
|
+
return transform_op, downstream_urn, query_id
|
|
29
|
+
|
|
30
|
+
def add_fine_grained_lineage(
|
|
31
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
32
|
+
) -> Self:
|
|
33
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
34
|
+
(
|
|
35
|
+
transform_op,
|
|
36
|
+
downstream_urn,
|
|
37
|
+
query_id,
|
|
38
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
39
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
40
|
+
self._add_patch(
|
|
41
|
+
aspect_name,
|
|
42
|
+
"add",
|
|
43
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
44
|
+
value={"confidenceScore": fine_grained_lineage.confidenceScore},
|
|
45
|
+
)
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def remove_fine_grained_lineage(
|
|
49
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
50
|
+
) -> Self:
|
|
51
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
52
|
+
(
|
|
53
|
+
transform_op,
|
|
54
|
+
downstream_urn,
|
|
55
|
+
query_id,
|
|
56
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
57
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
58
|
+
self._add_patch(
|
|
59
|
+
aspect_name,
|
|
60
|
+
"remove",
|
|
61
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
62
|
+
value={},
|
|
63
|
+
)
|
|
64
|
+
return self
|
|
65
|
+
|
|
66
|
+
def set_fine_grained_lineages(
|
|
67
|
+
self, fine_grained_lineages: List[FineGrainedLineage]
|
|
68
|
+
) -> Self:
|
|
69
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
70
|
+
self._add_patch(
|
|
71
|
+
aspect_name,
|
|
72
|
+
"add",
|
|
73
|
+
path=path,
|
|
74
|
+
value=fine_grained_lineages,
|
|
75
|
+
)
|
|
76
|
+
return self
|
datahub/specific/datajob.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
|
-
from typing import List, Optional, Tuple, Union
|
|
1
|
+
from typing import List, Optional, Set, Tuple, Union
|
|
2
2
|
|
|
3
3
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
4
4
|
from datahub.metadata.schema_classes import (
|
|
5
5
|
DataJobInfoClass as DataJobInfo,
|
|
6
6
|
DataJobInputOutputClass as DataJobInputOutput,
|
|
7
7
|
EdgeClass as Edge,
|
|
8
|
+
FineGrainedLineageClass as FineGrainedLineage,
|
|
8
9
|
KafkaAuditHeaderClass,
|
|
9
10
|
SystemMetadataClass,
|
|
10
11
|
)
|
|
11
12
|
from datahub.metadata.urns import SchemaFieldUrn, Urn
|
|
12
13
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
14
|
+
from datahub.specific.aspect_helpers.fine_grained_lineage import (
|
|
15
|
+
HasFineGrainedLineagePatch,
|
|
16
|
+
)
|
|
13
17
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
14
18
|
from datahub.specific.aspect_helpers.tags import HasTagsPatch
|
|
15
19
|
from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
@@ -20,6 +24,7 @@ class DataJobPatchBuilder(
|
|
|
20
24
|
HasCustomPropertiesPatch,
|
|
21
25
|
HasTagsPatch,
|
|
22
26
|
HasTermsPatch,
|
|
27
|
+
HasFineGrainedLineagePatch,
|
|
23
28
|
MetadataPatchProposal,
|
|
24
29
|
):
|
|
25
30
|
def __init__(
|
|
@@ -40,10 +45,19 @@ class DataJobPatchBuilder(
|
|
|
40
45
|
urn, system_metadata=system_metadata, audit_header=audit_header
|
|
41
46
|
)
|
|
42
47
|
|
|
48
|
+
# Track fine-grained lineages for DataJob-specific handling
|
|
49
|
+
self._fine_grained_lineages_to_add: List[FineGrainedLineage] = []
|
|
50
|
+
self._fine_grained_lineage_keys_to_remove: Set[Tuple[str, str, str]] = set()
|
|
51
|
+
self._fine_grained_lineages_set: Optional[List[FineGrainedLineage]] = None
|
|
52
|
+
|
|
43
53
|
@classmethod
|
|
44
54
|
def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
|
|
45
55
|
return DataJobInfo.ASPECT_NAME, ("customProperties",)
|
|
46
56
|
|
|
57
|
+
@classmethod
|
|
58
|
+
def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
|
|
59
|
+
return DataJobInputOutput.ASPECT_NAME, ("fineGrainedLineages",)
|
|
60
|
+
|
|
47
61
|
def add_input_datajob(self, input: Union[Edge, Urn, str]) -> "DataJobPatchBuilder":
|
|
48
62
|
"""
|
|
49
63
|
Adds an input data job to the DataJobPatchBuilder.
|
datahub/specific/dataset.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import Generic, List, Optional, Tuple, TypeVar, Union
|
|
2
3
|
|
|
3
4
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
@@ -17,6 +18,9 @@ from datahub.metadata.schema_classes import (
|
|
|
17
18
|
)
|
|
18
19
|
from datahub.metadata.urns import DatasetUrn, TagUrn, Urn
|
|
19
20
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
21
|
+
from datahub.specific.aspect_helpers.fine_grained_lineage import (
|
|
22
|
+
HasFineGrainedLineagePatch,
|
|
23
|
+
)
|
|
20
24
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
21
25
|
from datahub.specific.aspect_helpers.structured_properties import (
|
|
22
26
|
HasStructuredPropertiesPatch,
|
|
@@ -99,6 +103,7 @@ class DatasetPatchBuilder(
|
|
|
99
103
|
HasStructuredPropertiesPatch,
|
|
100
104
|
HasTagsPatch,
|
|
101
105
|
HasTermsPatch,
|
|
106
|
+
HasFineGrainedLineagePatch,
|
|
102
107
|
MetadataPatchProposal,
|
|
103
108
|
):
|
|
104
109
|
def __init__(
|
|
@@ -115,6 +120,10 @@ class DatasetPatchBuilder(
|
|
|
115
120
|
def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
|
|
116
121
|
return DatasetProperties.ASPECT_NAME, ("customProperties",)
|
|
117
122
|
|
|
123
|
+
@classmethod
|
|
124
|
+
def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
|
|
125
|
+
return UpstreamLineage.ASPECT_NAME, ("fineGrainedLineages",)
|
|
126
|
+
|
|
118
127
|
def add_upstream_lineage(self, upstream: Upstream) -> "DatasetPatchBuilder":
|
|
119
128
|
self._add_patch(
|
|
120
129
|
UpstreamLineage.ASPECT_NAME,
|
|
@@ -144,75 +153,44 @@ class DatasetPatchBuilder(
|
|
|
144
153
|
def add_fine_grained_upstream_lineage(
|
|
145
154
|
self, fine_grained_lineage: FineGrainedLineage
|
|
146
155
|
) -> "DatasetPatchBuilder":
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
"add",
|
|
156
|
-
path=self._build_fine_grained_path(
|
|
157
|
-
transform_op, downstream_urn, query_id, upstream_urn
|
|
158
|
-
),
|
|
159
|
-
value={"confidenceScore": fine_grained_lineage.confidenceScore},
|
|
160
|
-
)
|
|
161
|
-
return self
|
|
162
|
-
|
|
163
|
-
@staticmethod
|
|
164
|
-
def get_fine_grained_key(
|
|
165
|
-
fine_grained_lineage: FineGrainedLineage,
|
|
166
|
-
) -> Tuple[str, str, str]:
|
|
167
|
-
downstreams = fine_grained_lineage.downstreams or []
|
|
168
|
-
if len(downstreams) != 1:
|
|
169
|
-
raise TypeError("Cannot patch with more or less than one downstream.")
|
|
170
|
-
transform_op = fine_grained_lineage.transformOperation or "NONE"
|
|
171
|
-
downstream_urn = downstreams[0]
|
|
172
|
-
query_id = fine_grained_lineage.query or "NONE"
|
|
173
|
-
return transform_op, downstream_urn, query_id
|
|
174
|
-
|
|
175
|
-
@classmethod
|
|
176
|
-
def _build_fine_grained_path(
|
|
177
|
-
cls, transform_op: str, downstream_urn: str, query_id: str, upstream_urn: str
|
|
178
|
-
) -> PatchPath:
|
|
179
|
-
return (
|
|
180
|
-
"fineGrainedLineages",
|
|
181
|
-
transform_op,
|
|
182
|
-
downstream_urn,
|
|
183
|
-
query_id,
|
|
184
|
-
upstream_urn,
|
|
156
|
+
"""
|
|
157
|
+
Deprecated: Use `add_fine_grained_lineage` instead.
|
|
158
|
+
"""
|
|
159
|
+
warnings.warn(
|
|
160
|
+
"add_fine_grained_upstream_lineage() is deprecated."
|
|
161
|
+
" Use add_fine_grained_lineage() instead.",
|
|
162
|
+
DeprecationWarning,
|
|
163
|
+
stacklevel=2,
|
|
185
164
|
)
|
|
165
|
+
return self.add_fine_grained_lineage(fine_grained_lineage)
|
|
186
166
|
|
|
187
167
|
def remove_fine_grained_upstream_lineage(
|
|
188
168
|
self, fine_grained_lineage: FineGrainedLineage
|
|
189
169
|
) -> "DatasetPatchBuilder":
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
transform_op, downstream_urn, query_id, upstream_urn
|
|
201
|
-
),
|
|
202
|
-
value={},
|
|
203
|
-
)
|
|
204
|
-
return self
|
|
170
|
+
"""
|
|
171
|
+
Deprecated: Use `remove_fine_grained_lineage` instead.
|
|
172
|
+
"""
|
|
173
|
+
warnings.warn(
|
|
174
|
+
"remove_fine_grained_upstream_lineage() is deprecated."
|
|
175
|
+
" Use remove_fine_grained_lineage() instead.",
|
|
176
|
+
DeprecationWarning,
|
|
177
|
+
stacklevel=2,
|
|
178
|
+
)
|
|
179
|
+
return self.remove_fine_grained_lineage(fine_grained_lineage)
|
|
205
180
|
|
|
206
181
|
def set_fine_grained_upstream_lineages(
|
|
207
182
|
self, fine_grained_lineages: List[FineGrainedLineage]
|
|
208
183
|
) -> "DatasetPatchBuilder":
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
184
|
+
"""
|
|
185
|
+
Deprecated: Use `set_fine_grained_lineages` instead.
|
|
186
|
+
"""
|
|
187
|
+
warnings.warn(
|
|
188
|
+
"set_fine_grained_upstream_lineages() is deprecated."
|
|
189
|
+
" Use set_fine_grained_lineages() instead.",
|
|
190
|
+
DeprecationWarning,
|
|
191
|
+
stacklevel=2,
|
|
214
192
|
)
|
|
215
|
-
return self
|
|
193
|
+
return self.set_fine_grained_lineages(fine_grained_lineages)
|
|
216
194
|
|
|
217
195
|
def for_field(
|
|
218
196
|
self, field_path: str, editable: bool = True
|
|
@@ -242,7 +242,8 @@ class RestServiceConfig:
|
|
|
242
242
|
|
|
243
243
|
# Check if this is a config-based feature
|
|
244
244
|
if feature in config_based_features:
|
|
245
|
-
|
|
245
|
+
result = config_based_features[feature]()
|
|
246
|
+
return bool(result) if result is not None else False
|
|
246
247
|
|
|
247
248
|
# For environment-based features, determine requirements based on cloud vs. non-cloud
|
|
248
249
|
deployment_type = "cloud" if self.is_datahub_cloud else "core"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|