acryl-datahub 0.15.0.6rc1__py3-none-any.whl → 0.15.0.6rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

datahub/sdk/_shared.py ADDED
@@ -0,0 +1,338 @@
1
+ import warnings
2
+ from datetime import datetime
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ List,
6
+ Optional,
7
+ Tuple,
8
+ Union,
9
+ )
10
+
11
+ from typing_extensions import TypeAlias
12
+
13
+ import datahub.metadata.schema_classes as models
14
+ from datahub.emitter.mce_builder import (
15
+ make_ts_millis,
16
+ make_user_urn,
17
+ parse_ts_millis,
18
+ validate_ownership_type,
19
+ )
20
+ from datahub.emitter.mcp_builder import ContainerKey
21
+ from datahub.errors import MultipleSubtypesWarning, SdkUsageError
22
+ from datahub.metadata.urns import (
23
+ CorpGroupUrn,
24
+ CorpUserUrn,
25
+ DataJobUrn,
26
+ DataPlatformInstanceUrn,
27
+ DataPlatformUrn,
28
+ DatasetUrn,
29
+ DomainUrn,
30
+ GlossaryTermUrn,
31
+ OwnershipTypeUrn,
32
+ TagUrn,
33
+ Urn,
34
+ )
35
+ from datahub.sdk._entity import Entity
36
+ from datahub.utilities.urns.error import InvalidUrnError
37
+
38
+ if TYPE_CHECKING:
39
+ from datahub.sdk.container import Container
40
+
41
+ UrnOrStr: TypeAlias = Union[Urn, str]
42
+ DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
43
+ DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
44
+
45
+ ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
46
+
47
+
48
+ def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
49
+ if ts is None:
50
+ return None
51
+ return models.TimeStampClass(time=make_ts_millis(ts))
52
+
53
+
54
+ def parse_time_stamp(ts: Optional[models.TimeStampClass]) -> Optional[datetime]:
55
+ if ts is None:
56
+ return None
57
+ return parse_ts_millis(ts.time)
58
+
59
+
60
+ class HasPlatformInstance(Entity):
61
+ __slots__ = ()
62
+
63
+ def _set_platform_instance(
64
+ self,
65
+ platform: Union[str, DataPlatformUrn],
66
+ instance: Union[None, str, DataPlatformInstanceUrn],
67
+ ) -> None:
68
+ platform = DataPlatformUrn(platform)
69
+ if instance is not None:
70
+ try:
71
+ instance = DataPlatformInstanceUrn.from_string(instance)
72
+ except InvalidUrnError:
73
+ if not isinstance(
74
+ instance, DataPlatformInstanceUrn
75
+ ): # redundant check to make mypy happy
76
+ instance = DataPlatformInstanceUrn(platform, instance)
77
+ # At this point, instance is either None or a DataPlatformInstanceUrn.
78
+
79
+ self._set_aspect(
80
+ models.DataPlatformInstanceClass(
81
+ platform=platform.urn(),
82
+ instance=instance.urn() if instance else None,
83
+ )
84
+ )
85
+
86
+ @property
87
+ def platform_instance(self) -> Optional[DataPlatformInstanceUrn]:
88
+ dataPlatformInstance = self._get_aspect(models.DataPlatformInstanceClass)
89
+ if dataPlatformInstance and dataPlatformInstance.instance:
90
+ return DataPlatformInstanceUrn.from_string(dataPlatformInstance.instance)
91
+ return None
92
+
93
+
94
+ class HasSubtype(Entity):
95
+ __slots__ = ()
96
+
97
+ @property
98
+ def subtype(self) -> Optional[str]:
99
+ subtypes = self._get_aspect(models.SubTypesClass)
100
+ if subtypes and subtypes.typeNames:
101
+ if len(subtypes.typeNames) > 1:
102
+ warnings.warn(
103
+ f"The entity {self.urn} has multiple subtypes: {subtypes.typeNames}. "
104
+ "Only the first subtype will be considered.",
105
+ MultipleSubtypesWarning,
106
+ stacklevel=2,
107
+ )
108
+ return subtypes.typeNames[0]
109
+ return None
110
+
111
+ def set_subtype(self, subtype: str) -> None:
112
+ self._set_aspect(models.SubTypesClass(typeNames=[subtype]))
113
+
114
+
115
+ OwnershipTypeType: TypeAlias = Union[str, OwnershipTypeUrn]
116
+ OwnerInputType: TypeAlias = Union[
117
+ str,
118
+ ActorUrn,
119
+ Tuple[Union[str, ActorUrn], OwnershipTypeType],
120
+ models.OwnerClass,
121
+ ]
122
+ OwnersInputType: TypeAlias = List[OwnerInputType]
123
+
124
+
125
+ class HasOwnership(Entity):
126
+ __slots__ = ()
127
+
128
+ @staticmethod
129
+ def _parse_owner_class(owner: OwnerInputType) -> models.OwnerClass:
130
+ if isinstance(owner, models.OwnerClass):
131
+ return owner
132
+
133
+ owner_type = models.OwnershipTypeClass.TECHNICAL_OWNER
134
+ owner_type_urn = None
135
+
136
+ if isinstance(owner, tuple):
137
+ raw_owner, raw_owner_type = owner
138
+
139
+ if isinstance(raw_owner_type, OwnershipTypeUrn):
140
+ owner_type = models.OwnershipTypeClass.CUSTOM
141
+ owner_type_urn = str(raw_owner_type)
142
+ else:
143
+ owner_type, owner_type_urn = validate_ownership_type(raw_owner_type)
144
+ else:
145
+ raw_owner = owner
146
+
147
+ if isinstance(raw_owner, str):
148
+ # Tricky: this will gracefully handle a user passing in a group urn as a string.
149
+ # TODO: is this the right behavior? or should we require a valid urn here?
150
+ return models.OwnerClass(
151
+ owner=make_user_urn(raw_owner),
152
+ type=owner_type,
153
+ typeUrn=owner_type_urn,
154
+ )
155
+ elif isinstance(raw_owner, Urn):
156
+ return models.OwnerClass(
157
+ owner=str(raw_owner),
158
+ type=owner_type,
159
+ typeUrn=owner_type_urn,
160
+ )
161
+ else:
162
+ raise SdkUsageError(
163
+ f"Invalid owner {owner}: {type(owner)} is not a valid owner type"
164
+ )
165
+
166
+ # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
167
+ # Ideally we'd also use first-class ownership type urns here, not strings.
168
+ @property
169
+ def owners(self) -> Optional[List[models.OwnerClass]]:
170
+ if owners_aspect := self._get_aspect(models.OwnershipClass):
171
+ return owners_aspect.owners
172
+ return None
173
+
174
+ def set_owners(self, owners: OwnersInputType) -> None:
175
+ # TODO: add docs on the default parsing + default ownership type
176
+ parsed_owners = [self._parse_owner_class(owner) for owner in owners]
177
+ self._set_aspect(models.OwnershipClass(owners=parsed_owners))
178
+
179
+
180
+ ContainerInputType: TypeAlias = Union["Container", ContainerKey]
181
+
182
+
183
+ class HasContainer(Entity):
184
+ __slots__ = ()
185
+
186
+ def _set_container(self, container: Optional[ContainerInputType]) -> None:
187
+ # We need to allow container to be None. It won't happen for datasets much, but
188
+ # will be required for root containers.
189
+ from datahub.sdk.container import Container
190
+
191
+ browse_path: List[Union[str, models.BrowsePathEntryClass]] = []
192
+ if isinstance(container, Container):
193
+ container_urn = container.urn.urn()
194
+
195
+ parent_browse_path = container._get_aspect(models.BrowsePathsV2Class)
196
+ if parent_browse_path is None:
197
+ raise SdkUsageError(
198
+ "Parent container does not have a browse path, so cannot generate one for its children."
199
+ )
200
+ browse_path = [
201
+ *parent_browse_path.path,
202
+ models.BrowsePathEntryClass(
203
+ id=container_urn,
204
+ urn=container_urn,
205
+ ),
206
+ ]
207
+ elif container is not None:
208
+ container_urn = container.as_urn()
209
+
210
+ browse_path_reversed = [container_urn]
211
+ parent_key = container.parent_key()
212
+ while parent_key is not None:
213
+ browse_path_reversed.append(parent_key.as_urn())
214
+ parent_key = parent_key.parent_key()
215
+ browse_path = list(reversed(browse_path_reversed))
216
+ else:
217
+ container_urn = None
218
+ browse_path = []
219
+
220
+ if container_urn:
221
+ self._set_aspect(models.ContainerClass(container=container_urn))
222
+
223
+ self._set_aspect(
224
+ models.BrowsePathsV2Class(
225
+ path=[
226
+ (
227
+ entry
228
+ if isinstance(entry, models.BrowsePathEntryClass)
229
+ else models.BrowsePathEntryClass(
230
+ id=entry,
231
+ urn=entry,
232
+ )
233
+ )
234
+ for entry in browse_path
235
+ ]
236
+ )
237
+ )
238
+
239
+
240
+ TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
241
+ TagsInputType: TypeAlias = List[TagInputType]
242
+
243
+
244
+ class HasTags(Entity):
245
+ __slots__ = ()
246
+
247
+ # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
248
+ @property
249
+ def tags(self) -> Optional[List[models.TagAssociationClass]]:
250
+ if tags := self._get_aspect(models.GlobalTagsClass):
251
+ return tags.tags
252
+ return None
253
+
254
+ @classmethod
255
+ def _parse_tag_association_class(
256
+ cls, tag: TagInputType
257
+ ) -> models.TagAssociationClass:
258
+ if isinstance(tag, models.TagAssociationClass):
259
+ return tag
260
+ elif isinstance(tag, str):
261
+ assert TagUrn.from_string(tag)
262
+ return models.TagAssociationClass(tag=str(tag))
263
+
264
+ def set_tags(self, tags: TagsInputType) -> None:
265
+ self._set_aspect(
266
+ models.GlobalTagsClass(
267
+ tags=[self._parse_tag_association_class(tag) for tag in tags]
268
+ )
269
+ )
270
+
271
+
272
+ TermInputType: TypeAlias = Union[
273
+ str, GlossaryTermUrn, models.GlossaryTermAssociationClass
274
+ ]
275
+ TermsInputType: TypeAlias = List[TermInputType]
276
+
277
+
278
+ class HasTerms(Entity):
279
+ __slots__ = ()
280
+
281
+ # TODO: Return a custom type with deserialized urns, instead of the raw aspect.
282
+ @property
283
+ def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
284
+ if glossary_terms := self._get_aspect(models.GlossaryTermsClass):
285
+ return glossary_terms.terms
286
+ return None
287
+
288
+ @classmethod
289
+ def _parse_glossary_term_association_class(
290
+ cls, term: TermInputType
291
+ ) -> models.GlossaryTermAssociationClass:
292
+ if isinstance(term, models.GlossaryTermAssociationClass):
293
+ return term
294
+ elif isinstance(term, str):
295
+ assert GlossaryTermUrn.from_string(term)
296
+ return models.GlossaryTermAssociationClass(urn=str(term))
297
+
298
+ @classmethod
299
+ def _terms_audit_stamp(self) -> models.AuditStampClass:
300
+ return models.AuditStampClass(
301
+ time=0,
302
+ # TODO figure out what to put here
303
+ actor=CorpUserUrn("__ingestion").urn(),
304
+ )
305
+
306
+ def set_terms(self, terms: TermsInputType) -> None:
307
+ self._set_aspect(
308
+ models.GlossaryTermsClass(
309
+ terms=[
310
+ self._parse_glossary_term_association_class(term) for term in terms
311
+ ],
312
+ auditStamp=self._terms_audit_stamp(),
313
+ )
314
+ )
315
+
316
+
317
+ DomainInputType: TypeAlias = Union[str, DomainUrn]
318
+
319
+
320
+ class HasDomain(Entity):
321
+ __slots__ = ()
322
+
323
+ @property
324
+ def domain(self) -> Optional[DomainUrn]:
325
+ if domains := self._get_aspect(models.DomainsClass):
326
+ if len(domains.domains) > 1:
327
+ raise SdkUsageError(
328
+ f"The entity has multiple domains set, but only one is supported: {domains.domains}"
329
+ )
330
+ elif domains.domains:
331
+ domain_str = domains.domains[0]
332
+ return DomainUrn.from_string(domain_str)
333
+
334
+ return None
335
+
336
+ def set_domain(self, domain: DomainInputType) -> None:
337
+ domain_urn = DomainUrn.from_string(domain) # basically a type assertion
338
+ self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))
@@ -0,0 +1,193 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Dict, Optional, Type
5
+
6
+ from typing_extensions import Self
7
+
8
+ import datahub.metadata.schema_classes as models
9
+ from datahub.emitter.mce_builder import ALL_ENV_TYPES
10
+ from datahub.emitter.mcp_builder import (
11
+ _INCLUDE_ENV_IN_CONTAINER_PROPERTIES,
12
+ ContainerKey,
13
+ )
14
+ from datahub.errors import SdkUsageError
15
+ from datahub.metadata.urns import (
16
+ ContainerUrn,
17
+ Urn,
18
+ )
19
+ from datahub.sdk._entity import Entity
20
+ from datahub.sdk._shared import (
21
+ DomainInputType,
22
+ HasContainer,
23
+ HasDomain,
24
+ HasOwnership,
25
+ HasPlatformInstance,
26
+ HasSubtype,
27
+ HasTags,
28
+ HasTerms,
29
+ OwnersInputType,
30
+ TagsInputType,
31
+ TermsInputType,
32
+ make_time_stamp,
33
+ parse_time_stamp,
34
+ )
35
+
36
+
37
+ class Container(
38
+ HasPlatformInstance,
39
+ HasSubtype,
40
+ HasContainer,
41
+ HasOwnership,
42
+ HasTags,
43
+ HasTerms,
44
+ HasDomain,
45
+ Entity,
46
+ ):
47
+ __slots__ = ()
48
+
49
+ @classmethod
50
+ def get_urn_type(cls) -> Type[ContainerUrn]:
51
+ return ContainerUrn
52
+
53
+ def __init__(
54
+ self,
55
+ /,
56
+ # Identity.
57
+ container_key: ContainerKey | ContainerUrn,
58
+ *,
59
+ # Container attributes.
60
+ display_name: str,
61
+ qualified_name: Optional[str] = None,
62
+ description: Optional[str] = None,
63
+ external_url: Optional[str] = None,
64
+ # TODO: call this custom properties?
65
+ extra_properties: Optional[Dict[str, str]] = None,
66
+ created: Optional[datetime] = None,
67
+ last_modified: Optional[datetime] = None,
68
+ # Standard aspects.
69
+ subtype: Optional[str] = None,
70
+ owners: Optional[OwnersInputType] = None,
71
+ tags: Optional[TagsInputType] = None,
72
+ terms: Optional[TermsInputType] = None,
73
+ domain: Optional[DomainInputType] = None,
74
+ ):
75
+ if isinstance(container_key, ContainerUrn):
76
+ urn = container_key
77
+ else:
78
+ urn = ContainerUrn.from_string(container_key.as_urn())
79
+ super().__init__(urn)
80
+
81
+ # This needs to come first to ensure that the display name is registered.
82
+ self._ensure_container_props(name=display_name)
83
+
84
+ # TODO: Normal usages should require container key. Only the graph init method can accept an urn.
85
+ if isinstance(container_key, ContainerKey):
86
+ self._set_platform_instance(container_key.platform, container_key.instance)
87
+
88
+ self._set_container(container_key.parent_key())
89
+
90
+ self.set_custom_properties(
91
+ {
92
+ **container_key.property_dict(),
93
+ **(extra_properties or {}),
94
+ }
95
+ )
96
+
97
+ # Extra validation on the env field.
98
+ # In certain cases (mainly for backwards compatibility), the env field will actually
99
+ # have a platform instance name.
100
+ env = container_key.env if container_key.env in ALL_ENV_TYPES else None
101
+ if _INCLUDE_ENV_IN_CONTAINER_PROPERTIES and env is not None:
102
+ self._ensure_container_props().env = env
103
+
104
+ if description is not None:
105
+ self.set_description(description)
106
+ if external_url is not None:
107
+ self.set_external_url(external_url)
108
+ if qualified_name is not None:
109
+ self.set_qualified_name(qualified_name)
110
+ if created is not None:
111
+ self.set_created(created)
112
+ if last_modified is not None:
113
+ self.set_last_modified(last_modified)
114
+
115
+ if subtype is not None:
116
+ self.set_subtype(subtype)
117
+ if owners is not None:
118
+ self.set_owners(owners)
119
+ if tags is not None:
120
+ self.set_tags(tags)
121
+ if terms is not None:
122
+ self.set_terms(terms)
123
+ if domain is not None:
124
+ self.set_domain(domain)
125
+
126
+ @classmethod
127
+ def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
128
+ assert isinstance(urn, ContainerUrn)
129
+ entity = cls(urn, display_name="__dummy_value__")
130
+ return entity._init_from_graph(current_aspects)
131
+
132
+ def _ensure_container_props(
133
+ self, *, name: Optional[str] = None
134
+ ) -> models.ContainerPropertiesClass:
135
+ # TODO: Not super happy with this method's implementation, but it's
136
+ # internal-only and enforces the constraints that we need.
137
+ if name is not None:
138
+ return self._setdefault_aspect(models.ContainerPropertiesClass(name=name))
139
+
140
+ props = self._get_aspect(models.ContainerPropertiesClass)
141
+ if props is None:
142
+ raise SdkUsageError("Containers must have a name.")
143
+ return props
144
+
145
+ @property
146
+ def display_name(self) -> str:
147
+ return self._ensure_container_props().name
148
+
149
+ def set_display_name(self, value: str) -> None:
150
+ self._ensure_container_props().name = value
151
+
152
+ @property
153
+ def description(self) -> Optional[str]:
154
+ return self._ensure_container_props().description
155
+
156
+ def set_description(self, description: str) -> None:
157
+ self._ensure_container_props().description = description
158
+
159
+ @property
160
+ def custom_properties(self) -> Optional[Dict[str, str]]:
161
+ return self._ensure_container_props().customProperties
162
+
163
+ def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
164
+ # TODO: How do we ensure that the container key props are always retained?
165
+ self._ensure_container_props().customProperties = custom_properties
166
+
167
+ @property
168
+ def external_url(self) -> Optional[str]:
169
+ return self._ensure_container_props().externalUrl
170
+
171
+ def set_external_url(self, external_url: str) -> None:
172
+ self._ensure_container_props().externalUrl = external_url
173
+
174
+ @property
175
+ def qualified_name(self) -> Optional[str]:
176
+ return self._ensure_container_props().qualifiedName
177
+
178
+ def set_qualified_name(self, qualified_name: str) -> None:
179
+ self._ensure_container_props().qualifiedName = qualified_name
180
+
181
+ @property
182
+ def created(self) -> Optional[datetime]:
183
+ return parse_time_stamp(self._ensure_container_props().created)
184
+
185
+ def set_created(self, created: datetime) -> None:
186
+ self._ensure_container_props().created = make_time_stamp(created)
187
+
188
+ @property
189
+ def last_modified(self) -> Optional[datetime]:
190
+ return parse_time_stamp(self._ensure_container_props().lastModified)
191
+
192
+ def set_last_modified(self, last_modified: datetime) -> None:
193
+ self._ensure_container_props().lastModified = make_time_stamp(last_modified)