acryl-datahub 0.15.0.5rc10__py3-none-any.whl → 0.15.0.6rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/METADATA +2482 -2482
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/RECORD +35 -24
- datahub/_version.py +1 -1
- datahub/errors.py +35 -0
- datahub/ingestion/source/common/subtypes.py +1 -0
- datahub/ingestion/source/mongodb.py +17 -16
- datahub/ingestion/source/powerbi/config.py +1 -0
- datahub/ingestion/source/powerbi/powerbi.py +28 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +11 -36
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +17 -4
- datahub/ingestion/source/s3/source.py +14 -5
- datahub/ingestion/source/snowflake/constants.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +45 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +20 -1
- datahub/ingestion/source/snowflake/snowflake_report.py +6 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +108 -4
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +298 -69
- datahub/ingestion/source/snowflake/snowflake_utils.py +17 -8
- datahub/ingestion/source/snowflake/snowflake_v2.py +15 -3
- datahub/sdk/__init__.py +33 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_attribution.py +48 -0
- datahub/sdk/_entity.py +89 -0
- datahub/sdk/_shared.py +338 -0
- datahub/sdk/container.py +193 -0
- datahub/sdk/dataset.py +584 -0
- datahub/sdk/entity_client.py +115 -0
- datahub/sdk/main_client.py +56 -0
- datahub/sdk/resolver_client.py +101 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/top_level.txt +0 -0
|
@@ -124,19 +124,20 @@ class SnowflakeFilter:
|
|
|
124
124
|
SnowflakeObjectDomain.VIEW,
|
|
125
125
|
SnowflakeObjectDomain.MATERIALIZED_VIEW,
|
|
126
126
|
SnowflakeObjectDomain.ICEBERG_TABLE,
|
|
127
|
+
SnowflakeObjectDomain.STREAM,
|
|
127
128
|
):
|
|
128
129
|
return False
|
|
129
130
|
if _is_sys_table(dataset_name):
|
|
130
131
|
return False
|
|
131
132
|
|
|
132
|
-
dataset_params =
|
|
133
|
+
dataset_params = split_qualified_name(dataset_name)
|
|
133
134
|
if len(dataset_params) != 3:
|
|
134
135
|
self.structured_reporter.info(
|
|
135
136
|
title="Unexpected dataset pattern",
|
|
136
137
|
message=f"Found a {dataset_type} with an unexpected number of parts. Database and schema filtering will not work as expected, but table filtering will still work.",
|
|
137
138
|
context=dataset_name,
|
|
138
139
|
)
|
|
139
|
-
# We fall-through here so table/view filtering still works.
|
|
140
|
+
# We fall-through here so table/view/stream filtering still works.
|
|
140
141
|
|
|
141
142
|
if (
|
|
142
143
|
len(dataset_params) >= 1
|
|
@@ -169,6 +170,14 @@ class SnowflakeFilter:
|
|
|
169
170
|
):
|
|
170
171
|
return False
|
|
171
172
|
|
|
173
|
+
if (
|
|
174
|
+
dataset_type.lower() == SnowflakeObjectDomain.STREAM
|
|
175
|
+
and not self.filter_config.stream_pattern.allowed(
|
|
176
|
+
_cleanup_qualified_name(dataset_name, self.structured_reporter)
|
|
177
|
+
)
|
|
178
|
+
):
|
|
179
|
+
return False
|
|
180
|
+
|
|
172
181
|
return True
|
|
173
182
|
|
|
174
183
|
|
|
@@ -183,17 +192,17 @@ def _is_sys_table(table_name: str) -> bool:
|
|
|
183
192
|
return table_name.lower().startswith("sys$")
|
|
184
193
|
|
|
185
194
|
|
|
186
|
-
def
|
|
195
|
+
def split_qualified_name(qualified_name: str) -> List[str]:
|
|
187
196
|
"""
|
|
188
197
|
Split a qualified name into its constituent parts.
|
|
189
198
|
|
|
190
|
-
>>>
|
|
199
|
+
>>> split_qualified_name("db.my_schema.my_table")
|
|
191
200
|
['db', 'my_schema', 'my_table']
|
|
192
|
-
>>>
|
|
201
|
+
>>> split_qualified_name('"db"."my_schema"."my_table"')
|
|
193
202
|
['db', 'my_schema', 'my_table']
|
|
194
|
-
>>>
|
|
203
|
+
>>> split_qualified_name('TEST_DB.TEST_SCHEMA."TABLE.WITH.DOTS"')
|
|
195
204
|
['TEST_DB', 'TEST_SCHEMA', 'TABLE.WITH.DOTS']
|
|
196
|
-
>>>
|
|
205
|
+
>>> split_qualified_name('TEST_DB."SCHEMA.WITH.DOTS".MY_TABLE')
|
|
197
206
|
['TEST_DB', 'SCHEMA.WITH.DOTS', 'MY_TABLE']
|
|
198
207
|
"""
|
|
199
208
|
|
|
@@ -231,7 +240,7 @@ def _split_qualified_name(qualified_name: str) -> List[str]:
|
|
|
231
240
|
def _cleanup_qualified_name(
|
|
232
241
|
qualified_name: str, structured_reporter: SourceReport
|
|
233
242
|
) -> str:
|
|
234
|
-
name_parts =
|
|
243
|
+
name_parts = split_qualified_name(qualified_name)
|
|
235
244
|
if len(name_parts) != 3:
|
|
236
245
|
if not _is_sys_table(qualified_name):
|
|
237
246
|
structured_reporter.info(
|
|
@@ -539,15 +539,27 @@ class SnowflakeV2Source(
|
|
|
539
539
|
for schema in db.schemas
|
|
540
540
|
for table_name in schema.views
|
|
541
541
|
]
|
|
542
|
+
discovered_streams: List[str] = [
|
|
543
|
+
self.identifiers.get_dataset_identifier(stream_name, schema.name, db.name)
|
|
544
|
+
for db in databases
|
|
545
|
+
for schema in db.schemas
|
|
546
|
+
for stream_name in schema.streams
|
|
547
|
+
]
|
|
542
548
|
|
|
543
|
-
if
|
|
549
|
+
if (
|
|
550
|
+
len(discovered_tables) == 0
|
|
551
|
+
and len(discovered_views) == 0
|
|
552
|
+
and len(discovered_streams) == 0
|
|
553
|
+
):
|
|
544
554
|
self.structured_reporter.failure(
|
|
545
555
|
GENERIC_PERMISSION_ERROR_KEY,
|
|
546
|
-
"No tables/views found. Please check permissions.",
|
|
556
|
+
"No tables/views/streams found. Please check permissions.",
|
|
547
557
|
)
|
|
548
558
|
return
|
|
549
559
|
|
|
550
|
-
self.discovered_datasets =
|
|
560
|
+
self.discovered_datasets = (
|
|
561
|
+
discovered_tables + discovered_views + discovered_streams
|
|
562
|
+
)
|
|
551
563
|
|
|
552
564
|
if self.config.use_queries_v2:
|
|
553
565
|
with self.report.new_stage(f"*: {VIEW_PARSING}"):
|
datahub/sdk/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import datahub.metadata.schema_classes as models
|
|
4
|
+
from datahub.errors import ExperimentalWarning, SdkUsageError
|
|
5
|
+
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
6
|
+
from datahub.metadata.urns import (
|
|
7
|
+
ChartUrn,
|
|
8
|
+
ContainerUrn,
|
|
9
|
+
CorpGroupUrn,
|
|
10
|
+
CorpUserUrn,
|
|
11
|
+
DashboardUrn,
|
|
12
|
+
DataPlatformInstanceUrn,
|
|
13
|
+
DataPlatformUrn,
|
|
14
|
+
DatasetUrn,
|
|
15
|
+
DomainUrn,
|
|
16
|
+
GlossaryTermUrn,
|
|
17
|
+
SchemaFieldUrn,
|
|
18
|
+
TagUrn,
|
|
19
|
+
)
|
|
20
|
+
from datahub.sdk.container import Container
|
|
21
|
+
from datahub.sdk.dataset import Dataset
|
|
22
|
+
from datahub.sdk.main_client import DataHubClient
|
|
23
|
+
|
|
24
|
+
warnings.warn(
|
|
25
|
+
"The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
|
|
26
|
+
"Our typical backwards-compatibility and stability guarantees do not apply to this code. "
|
|
27
|
+
"When it's promoted to stable, the import path will change "
|
|
28
|
+
"from `from datahub.sdk import ...` to `from datahub import ...`.",
|
|
29
|
+
ExperimentalWarning,
|
|
30
|
+
stacklevel=2,
|
|
31
|
+
)
|
|
32
|
+
del warnings
|
|
33
|
+
del ExperimentalWarning
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Dict, List, Type
|
|
2
|
+
|
|
3
|
+
from datahub.sdk._entity import Entity
|
|
4
|
+
from datahub.sdk.container import Container
|
|
5
|
+
from datahub.sdk.dataset import Dataset
|
|
6
|
+
|
|
7
|
+
# TODO: Is there a better way to declare this?
|
|
8
|
+
ENTITY_CLASSES_LIST: List[Type[Entity]] = [
|
|
9
|
+
Container,
|
|
10
|
+
Dataset,
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
ENTITY_CLASSES: Dict[str, Type[Entity]] = {
|
|
14
|
+
cls.get_urn_type().ENTITY_TYPE: cls for cls in ENTITY_CLASSES_LIST
|
|
15
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from typing import Iterator
|
|
5
|
+
|
|
6
|
+
from datahub.utilities.str_enum import StrEnum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class KnownAttribution(StrEnum):
|
|
10
|
+
INGESTION = "INGESTION"
|
|
11
|
+
INGESTION_ALTERNATE = "INGESTION_ALTERNATE"
|
|
12
|
+
|
|
13
|
+
UI = "UI"
|
|
14
|
+
SDK = "SDK"
|
|
15
|
+
|
|
16
|
+
PROPAGATION = "PROPAGATION"
|
|
17
|
+
|
|
18
|
+
def is_ingestion(self) -> bool:
|
|
19
|
+
return self in (
|
|
20
|
+
KnownAttribution.INGESTION,
|
|
21
|
+
KnownAttribution.INGESTION_ALTERNATE,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_default_attribution = KnownAttribution.SDK
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_default_attribution() -> KnownAttribution:
|
|
29
|
+
return _default_attribution
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def set_default_attribution(attribution: KnownAttribution) -> None:
|
|
33
|
+
global _default_attribution
|
|
34
|
+
_default_attribution = attribution
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@contextlib.contextmanager
|
|
38
|
+
def change_default_attribution(attribution: KnownAttribution) -> Iterator[None]:
|
|
39
|
+
old_attribution = get_default_attribution()
|
|
40
|
+
try:
|
|
41
|
+
set_default_attribution(attribution)
|
|
42
|
+
yield
|
|
43
|
+
finally:
|
|
44
|
+
set_default_attribution(old_attribution)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def is_ingestion_attribution() -> bool:
|
|
48
|
+
return get_default_attribution().is_ingestion()
|
datahub/sdk/_entity.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import List, Optional, Type, Union
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
import datahub.metadata.schema_classes as models
|
|
7
|
+
from datahub.emitter.mce_builder import Aspect as AspectTypeVar
|
|
8
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
|
+
from datahub.errors import SdkUsageError
|
|
10
|
+
from datahub.metadata.urns import Urn
|
|
11
|
+
from datahub.utilities.urns._urn_base import _SpecificUrn
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Entity:
|
|
15
|
+
__slots__ = ("_urn", "_prev_aspects", "_aspects")
|
|
16
|
+
|
|
17
|
+
def __init__(self, /, urn: Urn):
|
|
18
|
+
# This method is not meant for direct usage.
|
|
19
|
+
if type(self) is Entity:
|
|
20
|
+
raise SdkUsageError(f"{Entity.__name__} cannot be instantiated directly.")
|
|
21
|
+
|
|
22
|
+
assert isinstance(urn, self.get_urn_type())
|
|
23
|
+
self._urn: _SpecificUrn = urn
|
|
24
|
+
|
|
25
|
+
# prev_aspects is None means this was created from scratch
|
|
26
|
+
self._prev_aspects: Optional[models.AspectBag] = None
|
|
27
|
+
self._aspects: models.AspectBag = {}
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
31
|
+
# If an init method from a subclass adds required fields, it also needs to override this method.
|
|
32
|
+
# An alternative approach would call cls.__new__() to bypass the init method, but it's a bit
|
|
33
|
+
# too hacky for my taste.
|
|
34
|
+
entity = cls(urn=urn)
|
|
35
|
+
return entity._init_from_graph(current_aspects)
|
|
36
|
+
|
|
37
|
+
def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
|
|
38
|
+
self._prev_aspects = current_aspects
|
|
39
|
+
aspect: models._Aspect
|
|
40
|
+
for aspect_name, aspect in (current_aspects or {}).items(): # type: ignore
|
|
41
|
+
aspect_copy = type(aspect).from_obj(aspect.to_obj())
|
|
42
|
+
self._aspects[aspect_name] = aspect_copy # type: ignore
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
@abc.abstractmethod
|
|
47
|
+
def get_urn_type(cls) -> Type[_SpecificUrn]: ...
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def urn(self) -> _SpecificUrn:
|
|
51
|
+
return self._urn
|
|
52
|
+
|
|
53
|
+
def _get_aspect(
|
|
54
|
+
self,
|
|
55
|
+
aspect_type: Type[AspectTypeVar],
|
|
56
|
+
/,
|
|
57
|
+
) -> Optional[AspectTypeVar]:
|
|
58
|
+
return self._aspects.get(aspect_type.ASPECT_NAME) # type: ignore
|
|
59
|
+
|
|
60
|
+
def _set_aspect(self, value: AspectTypeVar, /) -> None:
|
|
61
|
+
self._aspects[value.ASPECT_NAME] = value # type: ignore
|
|
62
|
+
|
|
63
|
+
def _setdefault_aspect(self, default_aspect: AspectTypeVar, /) -> AspectTypeVar:
|
|
64
|
+
# Similar semantics to dict.setdefault.
|
|
65
|
+
if existing_aspect := self._get_aspect(type(default_aspect)):
|
|
66
|
+
return existing_aspect
|
|
67
|
+
self._set_aspect(default_aspect)
|
|
68
|
+
return default_aspect
|
|
69
|
+
|
|
70
|
+
def _as_mcps(
|
|
71
|
+
self,
|
|
72
|
+
change_type: Union[str, models.ChangeTypeClass] = models.ChangeTypeClass.UPSERT,
|
|
73
|
+
) -> List[MetadataChangeProposalWrapper]:
|
|
74
|
+
urn_str = str(self.urn)
|
|
75
|
+
|
|
76
|
+
mcps = []
|
|
77
|
+
for aspect in self._aspects.values():
|
|
78
|
+
assert isinstance(aspect, models._Aspect)
|
|
79
|
+
mcps.append(
|
|
80
|
+
MetadataChangeProposalWrapper(
|
|
81
|
+
entityUrn=urn_str,
|
|
82
|
+
aspect=aspect,
|
|
83
|
+
changeType=change_type,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
return mcps
|
|
87
|
+
|
|
88
|
+
def __repr__(self) -> str:
|
|
89
|
+
return f"{self.__class__.__name__}('{self.urn}')"
|
datahub/sdk/_shared.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import (
|
|
4
|
+
TYPE_CHECKING,
|
|
5
|
+
List,
|
|
6
|
+
Optional,
|
|
7
|
+
Tuple,
|
|
8
|
+
Union,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from typing_extensions import TypeAlias
|
|
12
|
+
|
|
13
|
+
import datahub.metadata.schema_classes as models
|
|
14
|
+
from datahub.emitter.mce_builder import (
|
|
15
|
+
make_ts_millis,
|
|
16
|
+
make_user_urn,
|
|
17
|
+
parse_ts_millis,
|
|
18
|
+
validate_ownership_type,
|
|
19
|
+
)
|
|
20
|
+
from datahub.emitter.mcp_builder import ContainerKey
|
|
21
|
+
from datahub.errors import MultipleSubtypesWarning, SdkUsageError
|
|
22
|
+
from datahub.metadata.urns import (
|
|
23
|
+
CorpGroupUrn,
|
|
24
|
+
CorpUserUrn,
|
|
25
|
+
DataJobUrn,
|
|
26
|
+
DataPlatformInstanceUrn,
|
|
27
|
+
DataPlatformUrn,
|
|
28
|
+
DatasetUrn,
|
|
29
|
+
DomainUrn,
|
|
30
|
+
GlossaryTermUrn,
|
|
31
|
+
OwnershipTypeUrn,
|
|
32
|
+
TagUrn,
|
|
33
|
+
Urn,
|
|
34
|
+
)
|
|
35
|
+
from datahub.sdk._entity import Entity
|
|
36
|
+
from datahub.utilities.urns.error import InvalidUrnError
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from datahub.sdk.container import Container
|
|
40
|
+
|
|
41
|
+
UrnOrStr: TypeAlias = Union[Urn, str]
|
|
42
|
+
DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
|
|
43
|
+
DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
|
|
44
|
+
|
|
45
|
+
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
|
|
49
|
+
if ts is None:
|
|
50
|
+
return None
|
|
51
|
+
return models.TimeStampClass(time=make_ts_millis(ts))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def parse_time_stamp(ts: Optional[models.TimeStampClass]) -> Optional[datetime]:
|
|
55
|
+
if ts is None:
|
|
56
|
+
return None
|
|
57
|
+
return parse_ts_millis(ts.time)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class HasPlatformInstance(Entity):
|
|
61
|
+
__slots__ = ()
|
|
62
|
+
|
|
63
|
+
def _set_platform_instance(
|
|
64
|
+
self,
|
|
65
|
+
platform: Union[str, DataPlatformUrn],
|
|
66
|
+
instance: Union[None, str, DataPlatformInstanceUrn],
|
|
67
|
+
) -> None:
|
|
68
|
+
platform = DataPlatformUrn(platform)
|
|
69
|
+
if instance is not None:
|
|
70
|
+
try:
|
|
71
|
+
instance = DataPlatformInstanceUrn.from_string(instance)
|
|
72
|
+
except InvalidUrnError:
|
|
73
|
+
if not isinstance(
|
|
74
|
+
instance, DataPlatformInstanceUrn
|
|
75
|
+
): # redundant check to make mypy happy
|
|
76
|
+
instance = DataPlatformInstanceUrn(platform, instance)
|
|
77
|
+
# At this point, instance is either None or a DataPlatformInstanceUrn.
|
|
78
|
+
|
|
79
|
+
self._set_aspect(
|
|
80
|
+
models.DataPlatformInstanceClass(
|
|
81
|
+
platform=platform.urn(),
|
|
82
|
+
instance=instance.urn() if instance else None,
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def platform_instance(self) -> Optional[DataPlatformInstanceUrn]:
|
|
88
|
+
dataPlatformInstance = self._get_aspect(models.DataPlatformInstanceClass)
|
|
89
|
+
if dataPlatformInstance and dataPlatformInstance.instance:
|
|
90
|
+
return DataPlatformInstanceUrn.from_string(dataPlatformInstance.instance)
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class HasSubtype(Entity):
|
|
95
|
+
__slots__ = ()
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def subtype(self) -> Optional[str]:
|
|
99
|
+
subtypes = self._get_aspect(models.SubTypesClass)
|
|
100
|
+
if subtypes and subtypes.typeNames:
|
|
101
|
+
if len(subtypes.typeNames) > 1:
|
|
102
|
+
warnings.warn(
|
|
103
|
+
f"The entity {self.urn} has multiple subtypes: {subtypes.typeNames}. "
|
|
104
|
+
"Only the first subtype will be considered.",
|
|
105
|
+
MultipleSubtypesWarning,
|
|
106
|
+
stacklevel=2,
|
|
107
|
+
)
|
|
108
|
+
return subtypes.typeNames[0]
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def set_subtype(self, subtype: str) -> None:
|
|
112
|
+
self._set_aspect(models.SubTypesClass(typeNames=[subtype]))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
OwnershipTypeType: TypeAlias = Union[str, OwnershipTypeUrn]
|
|
116
|
+
OwnerInputType: TypeAlias = Union[
|
|
117
|
+
str,
|
|
118
|
+
ActorUrn,
|
|
119
|
+
Tuple[Union[str, ActorUrn], OwnershipTypeType],
|
|
120
|
+
models.OwnerClass,
|
|
121
|
+
]
|
|
122
|
+
OwnersInputType: TypeAlias = List[OwnerInputType]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class HasOwnership(Entity):
|
|
126
|
+
__slots__ = ()
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def _parse_owner_class(owner: OwnerInputType) -> models.OwnerClass:
|
|
130
|
+
if isinstance(owner, models.OwnerClass):
|
|
131
|
+
return owner
|
|
132
|
+
|
|
133
|
+
owner_type = models.OwnershipTypeClass.TECHNICAL_OWNER
|
|
134
|
+
owner_type_urn = None
|
|
135
|
+
|
|
136
|
+
if isinstance(owner, tuple):
|
|
137
|
+
raw_owner, raw_owner_type = owner
|
|
138
|
+
|
|
139
|
+
if isinstance(raw_owner_type, OwnershipTypeUrn):
|
|
140
|
+
owner_type = models.OwnershipTypeClass.CUSTOM
|
|
141
|
+
owner_type_urn = str(raw_owner_type)
|
|
142
|
+
else:
|
|
143
|
+
owner_type, owner_type_urn = validate_ownership_type(raw_owner_type)
|
|
144
|
+
else:
|
|
145
|
+
raw_owner = owner
|
|
146
|
+
|
|
147
|
+
if isinstance(raw_owner, str):
|
|
148
|
+
# Tricky: this will gracefully handle a user passing in a group urn as a string.
|
|
149
|
+
# TODO: is this the right behavior? or should we require a valid urn here?
|
|
150
|
+
return models.OwnerClass(
|
|
151
|
+
owner=make_user_urn(raw_owner),
|
|
152
|
+
type=owner_type,
|
|
153
|
+
typeUrn=owner_type_urn,
|
|
154
|
+
)
|
|
155
|
+
elif isinstance(raw_owner, Urn):
|
|
156
|
+
return models.OwnerClass(
|
|
157
|
+
owner=str(raw_owner),
|
|
158
|
+
type=owner_type,
|
|
159
|
+
typeUrn=owner_type_urn,
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
raise SdkUsageError(
|
|
163
|
+
f"Invalid owner {owner}: {type(owner)} is not a valid owner type"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
167
|
+
# Ideally we'd also use first-class ownership type urns here, not strings.
|
|
168
|
+
@property
|
|
169
|
+
def owners(self) -> Optional[List[models.OwnerClass]]:
|
|
170
|
+
if owners_aspect := self._get_aspect(models.OwnershipClass):
|
|
171
|
+
return owners_aspect.owners
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
def set_owners(self, owners: OwnersInputType) -> None:
|
|
175
|
+
# TODO: add docs on the default parsing + default ownership type
|
|
176
|
+
parsed_owners = [self._parse_owner_class(owner) for owner in owners]
|
|
177
|
+
self._set_aspect(models.OwnershipClass(owners=parsed_owners))
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
ContainerInputType: TypeAlias = Union["Container", ContainerKey]
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class HasContainer(Entity):
|
|
184
|
+
__slots__ = ()
|
|
185
|
+
|
|
186
|
+
def _set_container(self, container: Optional[ContainerInputType]) -> None:
|
|
187
|
+
# We need to allow container to be None. It won't happen for datasets much, but
|
|
188
|
+
# will be required for root containers.
|
|
189
|
+
from datahub.sdk.container import Container
|
|
190
|
+
|
|
191
|
+
browse_path: List[Union[str, models.BrowsePathEntryClass]] = []
|
|
192
|
+
if isinstance(container, Container):
|
|
193
|
+
container_urn = container.urn.urn()
|
|
194
|
+
|
|
195
|
+
parent_browse_path = container._get_aspect(models.BrowsePathsV2Class)
|
|
196
|
+
if parent_browse_path is None:
|
|
197
|
+
raise SdkUsageError(
|
|
198
|
+
"Parent container does not have a browse path, so cannot generate one for its children."
|
|
199
|
+
)
|
|
200
|
+
browse_path = [
|
|
201
|
+
*parent_browse_path.path,
|
|
202
|
+
models.BrowsePathEntryClass(
|
|
203
|
+
id=container_urn,
|
|
204
|
+
urn=container_urn,
|
|
205
|
+
),
|
|
206
|
+
]
|
|
207
|
+
elif container is not None:
|
|
208
|
+
container_urn = container.as_urn()
|
|
209
|
+
|
|
210
|
+
browse_path_reversed = [container_urn]
|
|
211
|
+
parent_key = container.parent_key()
|
|
212
|
+
while parent_key is not None:
|
|
213
|
+
browse_path_reversed.append(parent_key.as_urn())
|
|
214
|
+
parent_key = parent_key.parent_key()
|
|
215
|
+
browse_path = list(reversed(browse_path_reversed))
|
|
216
|
+
else:
|
|
217
|
+
container_urn = None
|
|
218
|
+
browse_path = []
|
|
219
|
+
|
|
220
|
+
if container_urn:
|
|
221
|
+
self._set_aspect(models.ContainerClass(container=container_urn))
|
|
222
|
+
|
|
223
|
+
self._set_aspect(
|
|
224
|
+
models.BrowsePathsV2Class(
|
|
225
|
+
path=[
|
|
226
|
+
(
|
|
227
|
+
entry
|
|
228
|
+
if isinstance(entry, models.BrowsePathEntryClass)
|
|
229
|
+
else models.BrowsePathEntryClass(
|
|
230
|
+
id=entry,
|
|
231
|
+
urn=entry,
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
for entry in browse_path
|
|
235
|
+
]
|
|
236
|
+
)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
|
|
241
|
+
TagsInputType: TypeAlias = List[TagInputType]
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class HasTags(Entity):
|
|
245
|
+
__slots__ = ()
|
|
246
|
+
|
|
247
|
+
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
248
|
+
@property
|
|
249
|
+
def tags(self) -> Optional[List[models.TagAssociationClass]]:
|
|
250
|
+
if tags := self._get_aspect(models.GlobalTagsClass):
|
|
251
|
+
return tags.tags
|
|
252
|
+
return None
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def _parse_tag_association_class(
|
|
256
|
+
cls, tag: TagInputType
|
|
257
|
+
) -> models.TagAssociationClass:
|
|
258
|
+
if isinstance(tag, models.TagAssociationClass):
|
|
259
|
+
return tag
|
|
260
|
+
elif isinstance(tag, str):
|
|
261
|
+
assert TagUrn.from_string(tag)
|
|
262
|
+
return models.TagAssociationClass(tag=str(tag))
|
|
263
|
+
|
|
264
|
+
def set_tags(self, tags: TagsInputType) -> None:
|
|
265
|
+
self._set_aspect(
|
|
266
|
+
models.GlobalTagsClass(
|
|
267
|
+
tags=[self._parse_tag_association_class(tag) for tag in tags]
|
|
268
|
+
)
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
TermInputType: TypeAlias = Union[
|
|
273
|
+
str, GlossaryTermUrn, models.GlossaryTermAssociationClass
|
|
274
|
+
]
|
|
275
|
+
TermsInputType: TypeAlias = List[TermInputType]
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class HasTerms(Entity):
|
|
279
|
+
__slots__ = ()
|
|
280
|
+
|
|
281
|
+
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
282
|
+
@property
|
|
283
|
+
def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
|
|
284
|
+
if glossary_terms := self._get_aspect(models.GlossaryTermsClass):
|
|
285
|
+
return glossary_terms.terms
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
@classmethod
|
|
289
|
+
def _parse_glossary_term_association_class(
|
|
290
|
+
cls, term: TermInputType
|
|
291
|
+
) -> models.GlossaryTermAssociationClass:
|
|
292
|
+
if isinstance(term, models.GlossaryTermAssociationClass):
|
|
293
|
+
return term
|
|
294
|
+
elif isinstance(term, str):
|
|
295
|
+
assert GlossaryTermUrn.from_string(term)
|
|
296
|
+
return models.GlossaryTermAssociationClass(urn=str(term))
|
|
297
|
+
|
|
298
|
+
@classmethod
|
|
299
|
+
def _terms_audit_stamp(self) -> models.AuditStampClass:
|
|
300
|
+
return models.AuditStampClass(
|
|
301
|
+
time=0,
|
|
302
|
+
# TODO figure out what to put here
|
|
303
|
+
actor=CorpUserUrn("__ingestion").urn(),
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def set_terms(self, terms: TermsInputType) -> None:
|
|
307
|
+
self._set_aspect(
|
|
308
|
+
models.GlossaryTermsClass(
|
|
309
|
+
terms=[
|
|
310
|
+
self._parse_glossary_term_association_class(term) for term in terms
|
|
311
|
+
],
|
|
312
|
+
auditStamp=self._terms_audit_stamp(),
|
|
313
|
+
)
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
DomainInputType: TypeAlias = Union[str, DomainUrn]
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class HasDomain(Entity):
|
|
321
|
+
__slots__ = ()
|
|
322
|
+
|
|
323
|
+
@property
|
|
324
|
+
def domain(self) -> Optional[DomainUrn]:
|
|
325
|
+
if domains := self._get_aspect(models.DomainsClass):
|
|
326
|
+
if len(domains.domains) > 1:
|
|
327
|
+
raise SdkUsageError(
|
|
328
|
+
f"The entity has multiple domains set, but only one is supported: {domains.domains}"
|
|
329
|
+
)
|
|
330
|
+
elif domains.domains:
|
|
331
|
+
domain_str = domains.domains[0]
|
|
332
|
+
return DomainUrn.from_string(domain_str)
|
|
333
|
+
|
|
334
|
+
return None
|
|
335
|
+
|
|
336
|
+
def set_domain(self, domain: DomainInputType) -> None:
|
|
337
|
+
domain_urn = DomainUrn.from_string(domain) # basically a type assertion
|
|
338
|
+
self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))
|