acryl-datahub 0.15.0rc6__py3-none-any.whl → 0.15.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc7.dist-info}/METADATA +2317 -2317
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc7.dist-info}/RECORD +29 -29
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
- datahub/cli/put_cli.py +1 -1
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/emitter/mcp_patch_builder.py +43 -0
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
- datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/csv_enricher.py +1 -1
- datahub/ingestion/source/dremio/dremio_source.py +4 -2
- datahub/ingestion/source/elastic_search.py +1 -1
- datahub/ingestion/source/gc/dataprocess_cleanup.py +6 -1
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
- datahub/ingestion/source/ge_data_profiler.py +23 -1
- datahub/ingestion/source/sql/sql_types.py +14 -2
- datahub/ingestion/transformer/add_dataset_tags.py +1 -1
- datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
- datahub/integrations/assertion/common.py +1 -1
- datahub/lite/duckdb_lite.py +12 -17
- datahub/specific/chart.py +0 -39
- datahub/specific/dashboard.py +0 -39
- datahub/specific/datajob.py +3 -47
- datahub/utilities/urns/_urn_base.py +1 -1
- datahub/utilities/urns/structured_properties_urn.py +1 -1
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc7.dist-info}/top_level.txt +0 -0
datahub/lite/duckdb_lite.py
CHANGED
|
@@ -609,7 +609,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
609
609
|
aspect_map, DataPlatformInstanceClass
|
|
610
610
|
) # type: ignore
|
|
611
611
|
|
|
612
|
-
needs_platform = Urn.
|
|
612
|
+
needs_platform = Urn.from_string(entity_urn).get_type() in [
|
|
613
613
|
"dataset",
|
|
614
614
|
"container",
|
|
615
615
|
"chart",
|
|
@@ -617,7 +617,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
617
617
|
"dataFlow",
|
|
618
618
|
"dataJob",
|
|
619
619
|
]
|
|
620
|
-
entity_urn_parsed = Urn.
|
|
620
|
+
entity_urn_parsed = Urn.from_string(entity_urn)
|
|
621
621
|
if entity_urn_parsed.get_type() in ["dataFlow", "dataJob"]:
|
|
622
622
|
self.add_edge(
|
|
623
623
|
entity_urn,
|
|
@@ -630,15 +630,12 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
630
630
|
# this is a top-level entity
|
|
631
631
|
if not dpi:
|
|
632
632
|
logger.debug(f"No data platform instance for {entity_urn}")
|
|
633
|
-
maybe_parent_urn = Urn.
|
|
633
|
+
maybe_parent_urn = Urn.from_string(entity_urn).get_entity_id()[0]
|
|
634
634
|
needs_dpi = False
|
|
635
635
|
if maybe_parent_urn.startswith(Urn.URN_PREFIX):
|
|
636
636
|
parent_urn = maybe_parent_urn
|
|
637
|
-
if (
|
|
638
|
-
|
|
639
|
-
== "dataPlatform"
|
|
640
|
-
):
|
|
641
|
-
data_platform_urn = DataPlatformUrn.create_from_string(
|
|
637
|
+
if Urn.from_string(maybe_parent_urn).get_type() == "dataPlatform":
|
|
638
|
+
data_platform_urn = DataPlatformUrn.from_string(
|
|
642
639
|
maybe_parent_urn
|
|
643
640
|
)
|
|
644
641
|
needs_dpi = True
|
|
@@ -660,7 +657,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
660
657
|
logger.error(f"Failed to generate edges entity {entity_urn}", e)
|
|
661
658
|
parent_urn = str(data_platform_instance_urn)
|
|
662
659
|
else:
|
|
663
|
-
data_platform_urn = DataPlatformUrn.
|
|
660
|
+
data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
|
|
664
661
|
data_platform_instance = dpi.instance or "default"
|
|
665
662
|
data_platform_instance_urn = Urn(
|
|
666
663
|
entity_type="dataPlatformInstance",
|
|
@@ -673,9 +670,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
673
670
|
parent_urn = "__root__"
|
|
674
671
|
|
|
675
672
|
types = (
|
|
676
|
-
subtypes.typeNames
|
|
677
|
-
if subtypes
|
|
678
|
-
else [Urn.create_from_string(entity_urn).get_type()]
|
|
673
|
+
subtypes.typeNames if subtypes else [Urn.from_string(entity_urn).get_type()]
|
|
679
674
|
)
|
|
680
675
|
for t in types:
|
|
681
676
|
type_urn = Urn(entity_type="systemNode", entity_id=[parent_urn, t])
|
|
@@ -686,7 +681,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
686
681
|
def _create_edges_from_data_platform_instance(
|
|
687
682
|
self, data_platform_instance_urn: Urn
|
|
688
683
|
) -> None:
|
|
689
|
-
data_platform_urn = DataPlatformUrn.
|
|
684
|
+
data_platform_urn = DataPlatformUrn.from_string(
|
|
690
685
|
data_platform_instance_urn.get_entity_id()[0]
|
|
691
686
|
)
|
|
692
687
|
data_platform_instances_urn = Urn(
|
|
@@ -735,7 +730,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
735
730
|
if isinstance(aspect, DatasetPropertiesClass):
|
|
736
731
|
dp: DatasetPropertiesClass = aspect
|
|
737
732
|
if dp.name:
|
|
738
|
-
specific_urn = DatasetUrn.
|
|
733
|
+
specific_urn = DatasetUrn.from_string(entity_urn)
|
|
739
734
|
if (
|
|
740
735
|
specific_urn.get_data_platform_urn().get_entity_id_as_string()
|
|
741
736
|
== "looker"
|
|
@@ -755,7 +750,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
755
750
|
self.add_edge(entity_urn, "name", cp.name, remove_existing=True)
|
|
756
751
|
elif isinstance(aspect, DataPlatformInstanceClass):
|
|
757
752
|
dpi: DataPlatformInstanceClass = aspect
|
|
758
|
-
data_platform_urn = DataPlatformUrn.
|
|
753
|
+
data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
|
|
759
754
|
data_platform_instance = dpi.instance or "default"
|
|
760
755
|
data_platform_instance_urn = Urn(
|
|
761
756
|
entity_type="dataPlatformInstance",
|
|
@@ -763,7 +758,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
763
758
|
)
|
|
764
759
|
self._create_edges_from_data_platform_instance(data_platform_instance_urn)
|
|
765
760
|
elif isinstance(aspect, ChartInfoClass):
|
|
766
|
-
urn = Urn.
|
|
761
|
+
urn = Urn.from_string(entity_urn)
|
|
767
762
|
self.add_edge(
|
|
768
763
|
entity_urn,
|
|
769
764
|
"name",
|
|
@@ -771,7 +766,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
771
766
|
remove_existing=True,
|
|
772
767
|
)
|
|
773
768
|
elif isinstance(aspect, DashboardInfoClass):
|
|
774
|
-
urn = Urn.
|
|
769
|
+
urn = Urn.from_string(entity_urn)
|
|
775
770
|
self.add_edge(
|
|
776
771
|
entity_urn,
|
|
777
772
|
"name",
|
datahub/specific/chart.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import time
|
|
2
1
|
from typing import Dict, List, Optional, Union
|
|
3
2
|
|
|
4
3
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
5
4
|
from datahub.metadata.schema_classes import (
|
|
6
5
|
AccessLevelClass,
|
|
7
|
-
AuditStampClass,
|
|
8
6
|
ChangeAuditStampsClass,
|
|
9
7
|
ChartInfoClass as ChartInfo,
|
|
10
8
|
ChartTypeClass,
|
|
@@ -47,43 +45,6 @@ class ChartPatchBuilder(MetadataPatchProposal):
|
|
|
47
45
|
)
|
|
48
46
|
self.ownership_patch_helper = OwnershipPatchHelper(self)
|
|
49
47
|
|
|
50
|
-
def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
|
|
51
|
-
"""
|
|
52
|
-
Creates an AuditStampClass instance with the current timestamp and other default values.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
message: The message associated with the audit stamp (optional).
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
An instance of AuditStampClass.
|
|
59
|
-
"""
|
|
60
|
-
return AuditStampClass(
|
|
61
|
-
time=int(time.time() * 1000.0),
|
|
62
|
-
actor="urn:li:corpuser:datahub",
|
|
63
|
-
message=message,
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
def _ensure_urn_type(
|
|
67
|
-
self, entity_type: str, edges: List[Edge], context: str
|
|
68
|
-
) -> None:
|
|
69
|
-
"""
|
|
70
|
-
Ensures that the destination URNs in the given edges have the specified entity type.
|
|
71
|
-
|
|
72
|
-
Args:
|
|
73
|
-
entity_type: The entity type to check against.
|
|
74
|
-
edges: A list of Edge objects.
|
|
75
|
-
context: The context or description of the operation.
|
|
76
|
-
|
|
77
|
-
Raises:
|
|
78
|
-
ValueError: If any of the destination URNs is not of the specified entity type.
|
|
79
|
-
"""
|
|
80
|
-
for e in edges:
|
|
81
|
-
urn = Urn.create_from_string(e.destinationUrn)
|
|
82
|
-
if not urn.get_type() == entity_type:
|
|
83
|
-
raise ValueError(
|
|
84
|
-
f"{context}: {e.destinationUrn} is not of type {entity_type}"
|
|
85
|
-
)
|
|
86
|
-
|
|
87
48
|
def add_owner(self, owner: Owner) -> "ChartPatchBuilder":
|
|
88
49
|
"""
|
|
89
50
|
Adds an owner to the ChartPatchBuilder.
|
datahub/specific/dashboard.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import time
|
|
2
1
|
from typing import Dict, List, Optional, Union
|
|
3
2
|
|
|
4
3
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
5
4
|
from datahub.metadata.schema_classes import (
|
|
6
5
|
AccessLevelClass,
|
|
7
|
-
AuditStampClass,
|
|
8
6
|
ChangeAuditStampsClass,
|
|
9
7
|
DashboardInfoClass as DashboardInfo,
|
|
10
8
|
EdgeClass as Edge,
|
|
@@ -46,43 +44,6 @@ class DashboardPatchBuilder(MetadataPatchProposal):
|
|
|
46
44
|
)
|
|
47
45
|
self.ownership_patch_helper = OwnershipPatchHelper(self)
|
|
48
46
|
|
|
49
|
-
def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
|
|
50
|
-
"""
|
|
51
|
-
Creates an AuditStampClass instance with the current timestamp and other default values.
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
message: The message associated with the audit stamp (optional).
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
An instance of AuditStampClass.
|
|
58
|
-
"""
|
|
59
|
-
return AuditStampClass(
|
|
60
|
-
time=int(time.time() * 1000.0),
|
|
61
|
-
actor="urn:li:corpuser:datahub",
|
|
62
|
-
message=message,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
def _ensure_urn_type(
|
|
66
|
-
self, entity_type: str, edges: List[Edge], context: str
|
|
67
|
-
) -> None:
|
|
68
|
-
"""
|
|
69
|
-
Ensures that the destination URNs in the given edges have the specified entity type.
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
entity_type: The entity type to check against.
|
|
73
|
-
edges: A list of Edge objects.
|
|
74
|
-
context: The context or description of the operation.
|
|
75
|
-
|
|
76
|
-
Raises:
|
|
77
|
-
ValueError: If any of the destination URNs is not of the specified entity type.
|
|
78
|
-
"""
|
|
79
|
-
for e in edges:
|
|
80
|
-
urn = Urn.create_from_string(e.destinationUrn)
|
|
81
|
-
if not urn.get_type() == entity_type:
|
|
82
|
-
raise ValueError(
|
|
83
|
-
f"{context}: {e.destinationUrn} is not of type {entity_type}"
|
|
84
|
-
)
|
|
85
|
-
|
|
86
47
|
def add_owner(self, owner: Owner) -> "DashboardPatchBuilder":
|
|
87
48
|
"""
|
|
88
49
|
Adds an owner to the DashboardPatchBuilder.
|
datahub/specific/datajob.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import time
|
|
2
1
|
from typing import Dict, List, Optional, Union
|
|
3
2
|
|
|
4
3
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
5
4
|
from datahub.metadata.schema_classes import (
|
|
6
|
-
AuditStampClass,
|
|
7
5
|
DataJobInfoClass as DataJobInfo,
|
|
8
6
|
DataJobInputOutputClass as DataJobInputOutput,
|
|
9
7
|
EdgeClass as Edge,
|
|
@@ -16,10 +14,9 @@ from datahub.metadata.schema_classes import (
|
|
|
16
14
|
SystemMetadataClass,
|
|
17
15
|
TagAssociationClass as Tag,
|
|
18
16
|
)
|
|
17
|
+
from datahub.metadata.urns import SchemaFieldUrn, TagUrn, Urn
|
|
19
18
|
from datahub.specific.custom_properties import CustomPropertiesPatchHelper
|
|
20
19
|
from datahub.specific.ownership import OwnershipPatchHelper
|
|
21
|
-
from datahub.utilities.urns.tag_urn import TagUrn
|
|
22
|
-
from datahub.utilities.urns.urn import Urn
|
|
23
20
|
|
|
24
21
|
|
|
25
22
|
class DataJobPatchBuilder(MetadataPatchProposal):
|
|
@@ -45,43 +42,6 @@ class DataJobPatchBuilder(MetadataPatchProposal):
|
|
|
45
42
|
)
|
|
46
43
|
self.ownership_patch_helper = OwnershipPatchHelper(self)
|
|
47
44
|
|
|
48
|
-
def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
|
|
49
|
-
"""
|
|
50
|
-
Creates an AuditStampClass instance with the current timestamp and other default values.
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
message: The message associated with the audit stamp (optional).
|
|
54
|
-
|
|
55
|
-
Returns:
|
|
56
|
-
An instance of AuditStampClass.
|
|
57
|
-
"""
|
|
58
|
-
return AuditStampClass(
|
|
59
|
-
time=int(time.time() * 1000.0),
|
|
60
|
-
actor="urn:li:corpuser:datahub",
|
|
61
|
-
message=message,
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
def _ensure_urn_type(
|
|
65
|
-
self, entity_type: str, edges: List[Edge], context: str
|
|
66
|
-
) -> None:
|
|
67
|
-
"""
|
|
68
|
-
Ensures that the destination URNs in the given edges have the specified entity type.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
entity_type: The entity type to check against.
|
|
72
|
-
edges: A list of Edge objects.
|
|
73
|
-
context: The context or description of the operation.
|
|
74
|
-
|
|
75
|
-
Raises:
|
|
76
|
-
ValueError: If any of the destination URNs is not of the specified entity type.
|
|
77
|
-
"""
|
|
78
|
-
for e in edges:
|
|
79
|
-
urn = Urn.create_from_string(e.destinationUrn)
|
|
80
|
-
if not urn.get_type() == entity_type:
|
|
81
|
-
raise ValueError(
|
|
82
|
-
f"{context}: {e.destinationUrn} is not of type {entity_type}"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
45
|
def add_owner(self, owner: Owner) -> "DataJobPatchBuilder":
|
|
86
46
|
"""
|
|
87
47
|
Adds an owner to the DataJobPatchBuilder.
|
|
@@ -392,9 +352,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
|
|
|
392
352
|
ValueError: If the input is not a Schema Field urn.
|
|
393
353
|
"""
|
|
394
354
|
input_urn = str(input)
|
|
395
|
-
|
|
396
|
-
if not urn.get_type() == "schemaField":
|
|
397
|
-
raise ValueError(f"Input {input} is not a Schema Field urn")
|
|
355
|
+
assert SchemaFieldUrn.from_string(input_urn)
|
|
398
356
|
|
|
399
357
|
self._add_patch(
|
|
400
358
|
DataJobInputOutput.ASPECT_NAME,
|
|
@@ -466,9 +424,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
|
|
|
466
424
|
ValueError: If the output is not a Schema Field urn.
|
|
467
425
|
"""
|
|
468
426
|
output_urn = str(output)
|
|
469
|
-
|
|
470
|
-
if not urn.get_type() == "schemaField":
|
|
471
|
-
raise ValueError(f"Input {output} is not a Schema Field urn")
|
|
427
|
+
assert SchemaFieldUrn.from_string(output_urn)
|
|
472
428
|
|
|
473
429
|
self._add_patch(
|
|
474
430
|
DataJobInputOutput.ASPECT_NAME,
|
|
@@ -4,4 +4,4 @@ __all__ = ["StructuredPropertyUrn", "make_structured_property_urn"]
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def make_structured_property_urn(structured_property_id: str) -> str:
|
|
7
|
-
return str(StructuredPropertyUrn.
|
|
7
|
+
return str(StructuredPropertyUrn.from_string(structured_property_id))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|