acryl-datahub 1.0.0rc4__py3-none-any.whl → 1.0.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/METADATA +2411 -2411
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/RECORD +27 -25
- datahub/_version.py +1 -1
- datahub/emitter/mcp_builder.py +4 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/openapi_parser.py +46 -14
- datahub/metadata/_schema_classes.py +17 -0
- datahub/metadata/schema.avsc +21 -3
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/MetadataChangeEvent.avsc +13 -0
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_entity.py +2 -0
- datahub/sdk/_shared.py +163 -13
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +20 -4
- datahub/sdk/dataset.py +104 -14
- datahub/sdk/main_client.py +17 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/split_statements.py +20 -13
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/sentinels.py +22 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/top_level.txt +0 -0
datahub/sdk/_shared.py
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import warnings
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from typing import (
|
|
4
6
|
TYPE_CHECKING,
|
|
7
|
+
Callable,
|
|
5
8
|
List,
|
|
6
9
|
Optional,
|
|
7
10
|
Tuple,
|
|
8
11
|
Union,
|
|
9
12
|
)
|
|
10
13
|
|
|
11
|
-
from typing_extensions import TypeAlias
|
|
14
|
+
from typing_extensions import TypeAlias, assert_never
|
|
12
15
|
|
|
13
16
|
import datahub.metadata.schema_classes as models
|
|
14
17
|
from datahub.emitter.mce_builder import (
|
|
@@ -20,6 +23,7 @@ from datahub.emitter.mce_builder import (
|
|
|
20
23
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
21
24
|
from datahub.errors import MultipleSubtypesWarning, SdkUsageError
|
|
22
25
|
from datahub.metadata.urns import (
|
|
26
|
+
ContainerUrn,
|
|
23
27
|
CorpGroupUrn,
|
|
24
28
|
CorpUserUrn,
|
|
25
29
|
DataJobUrn,
|
|
@@ -33,6 +37,7 @@ from datahub.metadata.urns import (
|
|
|
33
37
|
Urn,
|
|
34
38
|
)
|
|
35
39
|
from datahub.sdk._entity import Entity
|
|
40
|
+
from datahub.sdk._utils import add_list_unique, remove_list_unique
|
|
36
41
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
37
42
|
|
|
38
43
|
if TYPE_CHECKING:
|
|
@@ -83,6 +88,13 @@ class HasPlatformInstance(Entity):
|
|
|
83
88
|
)
|
|
84
89
|
)
|
|
85
90
|
|
|
91
|
+
@property
|
|
92
|
+
def platform(self) -> Optional[DataPlatformUrn]:
|
|
93
|
+
dataPlatform = self._get_aspect(models.DataPlatformInstanceClass)
|
|
94
|
+
if dataPlatform and dataPlatform.platform:
|
|
95
|
+
return DataPlatformUrn.from_string(dataPlatform.platform)
|
|
96
|
+
return None
|
|
97
|
+
|
|
86
98
|
@property
|
|
87
99
|
def platform_instance(self) -> Optional[DataPlatformInstanceUrn]:
|
|
88
100
|
dataPlatformInstance = self._get_aspect(models.DataPlatformInstanceClass)
|
|
@@ -112,11 +124,11 @@ class HasSubtype(Entity):
|
|
|
112
124
|
self._set_aspect(models.SubTypesClass(typeNames=[subtype]))
|
|
113
125
|
|
|
114
126
|
|
|
127
|
+
# TODO: Reference OwnershipTypeClass as the valid ownership type enum.
|
|
115
128
|
OwnershipTypeType: TypeAlias = Union[str, OwnershipTypeUrn]
|
|
116
129
|
OwnerInputType: TypeAlias = Union[
|
|
117
|
-
str,
|
|
118
130
|
ActorUrn,
|
|
119
|
-
Tuple[
|
|
131
|
+
Tuple[ActorUrn, OwnershipTypeType],
|
|
120
132
|
models.OwnerClass,
|
|
121
133
|
]
|
|
122
134
|
OwnersInputType: TypeAlias = List[OwnerInputType]
|
|
@@ -126,15 +138,17 @@ class HasOwnership(Entity):
|
|
|
126
138
|
__slots__ = ()
|
|
127
139
|
|
|
128
140
|
@staticmethod
|
|
129
|
-
def _parse_owner_class(owner: OwnerInputType) -> models.OwnerClass:
|
|
141
|
+
def _parse_owner_class(owner: OwnerInputType) -> Tuple[models.OwnerClass, bool]:
|
|
130
142
|
if isinstance(owner, models.OwnerClass):
|
|
131
|
-
return owner
|
|
143
|
+
return owner, False
|
|
132
144
|
|
|
145
|
+
was_type_specified = False
|
|
133
146
|
owner_type = models.OwnershipTypeClass.TECHNICAL_OWNER
|
|
134
147
|
owner_type_urn = None
|
|
135
148
|
|
|
136
149
|
if isinstance(owner, tuple):
|
|
137
150
|
raw_owner, raw_owner_type = owner
|
|
151
|
+
was_type_specified = True
|
|
138
152
|
|
|
139
153
|
if isinstance(raw_owner_type, OwnershipTypeUrn):
|
|
140
154
|
owner_type = models.OwnershipTypeClass.CUSTOM
|
|
@@ -151,17 +165,15 @@ class HasOwnership(Entity):
|
|
|
151
165
|
owner=make_user_urn(raw_owner),
|
|
152
166
|
type=owner_type,
|
|
153
167
|
typeUrn=owner_type_urn,
|
|
154
|
-
)
|
|
168
|
+
), was_type_specified
|
|
155
169
|
elif isinstance(raw_owner, Urn):
|
|
156
170
|
return models.OwnerClass(
|
|
157
171
|
owner=str(raw_owner),
|
|
158
172
|
type=owner_type,
|
|
159
173
|
typeUrn=owner_type_urn,
|
|
160
|
-
)
|
|
174
|
+
), was_type_specified
|
|
161
175
|
else:
|
|
162
|
-
|
|
163
|
-
f"Invalid owner {owner}: {type(owner)} is not a valid owner type"
|
|
164
|
-
)
|
|
176
|
+
assert_never(raw_owner)
|
|
165
177
|
|
|
166
178
|
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
167
179
|
# Ideally we'd also use first-class ownership type urns here, not strings.
|
|
@@ -173,21 +185,74 @@ class HasOwnership(Entity):
|
|
|
173
185
|
|
|
174
186
|
def set_owners(self, owners: OwnersInputType) -> None:
|
|
175
187
|
# TODO: add docs on the default parsing + default ownership type
|
|
176
|
-
parsed_owners = [self._parse_owner_class(owner) for owner in owners]
|
|
188
|
+
parsed_owners = [self._parse_owner_class(owner)[0] for owner in owners]
|
|
177
189
|
self._set_aspect(models.OwnershipClass(owners=parsed_owners))
|
|
178
190
|
|
|
191
|
+
@classmethod
|
|
192
|
+
def _owner_key_method(
|
|
193
|
+
cls, consider_owner_type: bool
|
|
194
|
+
) -> Callable[[models.OwnerClass], Tuple[str, ...]]:
|
|
195
|
+
if consider_owner_type:
|
|
196
|
+
return cls._typed_owner_key
|
|
197
|
+
else:
|
|
198
|
+
return cls._simple_owner_key
|
|
179
199
|
|
|
180
|
-
|
|
200
|
+
@classmethod
|
|
201
|
+
def _typed_owner_key(cls, owner: models.OwnerClass) -> Tuple[str, str]:
|
|
202
|
+
return (owner.owner, owner.typeUrn or str(owner.type))
|
|
203
|
+
|
|
204
|
+
@classmethod
|
|
205
|
+
def _simple_owner_key(cls, owner: models.OwnerClass) -> Tuple[str,]:
|
|
206
|
+
return (owner.owner,)
|
|
207
|
+
|
|
208
|
+
def _ensure_owners(self) -> List[models.OwnerClass]:
|
|
209
|
+
owners = self._setdefault_aspect(models.OwnershipClass(owners=[])).owners
|
|
210
|
+
return owners
|
|
211
|
+
|
|
212
|
+
def add_owner(self, owner: OwnerInputType) -> None:
|
|
213
|
+
# Tricky: when adding an owner, we always use the ownership type.
|
|
214
|
+
# For removals, we only use it if it was explicitly specified.
|
|
215
|
+
parsed_owner, _ = self._parse_owner_class(owner)
|
|
216
|
+
add_list_unique(
|
|
217
|
+
self._ensure_owners(),
|
|
218
|
+
key=self._typed_owner_key,
|
|
219
|
+
item=parsed_owner,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def remove_owner(self, owner: OwnerInputType) -> None:
|
|
223
|
+
parsed_owner, was_type_specified = self._parse_owner_class(owner)
|
|
224
|
+
remove_list_unique(
|
|
225
|
+
self._ensure_owners(),
|
|
226
|
+
key=self._owner_key_method(was_type_specified),
|
|
227
|
+
item=parsed_owner,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# If you pass in a container object, we can build on top of its browse path.
|
|
232
|
+
# If you pass in a ContainerKey, we can use parent_key() to build the browse path.
|
|
233
|
+
# If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
|
|
234
|
+
# will be treated as raw ids.
|
|
235
|
+
ParentContainerInputType: TypeAlias = Union["Container", ContainerKey, List[UrnOrStr]]
|
|
181
236
|
|
|
182
237
|
|
|
183
238
|
class HasContainer(Entity):
|
|
184
239
|
__slots__ = ()
|
|
185
240
|
|
|
186
|
-
|
|
241
|
+
@staticmethod
|
|
242
|
+
def _maybe_parse_as_urn(urn: UrnOrStr) -> UrnOrStr:
|
|
243
|
+
if isinstance(urn, Urn):
|
|
244
|
+
return urn
|
|
245
|
+
elif urn.startswith("urn:li:"):
|
|
246
|
+
return Urn.from_string(urn)
|
|
247
|
+
else:
|
|
248
|
+
return urn
|
|
249
|
+
|
|
250
|
+
def _set_container(self, container: Optional[ParentContainerInputType]) -> None:
|
|
187
251
|
# We need to allow container to be None. It won't happen for datasets much, but
|
|
188
252
|
# will be required for root containers.
|
|
189
253
|
from datahub.sdk.container import Container
|
|
190
254
|
|
|
255
|
+
container_urn: Optional[str]
|
|
191
256
|
browse_path: List[Union[str, models.BrowsePathEntryClass]] = []
|
|
192
257
|
if isinstance(container, Container):
|
|
193
258
|
container_urn = container.urn.urn()
|
|
@@ -204,6 +269,29 @@ class HasContainer(Entity):
|
|
|
204
269
|
urn=container_urn,
|
|
205
270
|
),
|
|
206
271
|
]
|
|
272
|
+
elif isinstance(container, list):
|
|
273
|
+
parsed_path = [self._maybe_parse_as_urn(entry) for entry in container]
|
|
274
|
+
|
|
275
|
+
# Use the last container in the path as the container urn.
|
|
276
|
+
container_urns = [
|
|
277
|
+
urn.urn() for urn in parsed_path if isinstance(urn, ContainerUrn)
|
|
278
|
+
]
|
|
279
|
+
container_urn = container_urns[-1] if container_urns else None
|
|
280
|
+
|
|
281
|
+
browse_path = [
|
|
282
|
+
(
|
|
283
|
+
models.BrowsePathEntryClass(
|
|
284
|
+
id=str(entry),
|
|
285
|
+
urn=str(entry),
|
|
286
|
+
)
|
|
287
|
+
if isinstance(entry, Urn)
|
|
288
|
+
else models.BrowsePathEntryClass(
|
|
289
|
+
id=entry,
|
|
290
|
+
urn=None,
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
for entry in parsed_path
|
|
294
|
+
]
|
|
207
295
|
elif container is not None:
|
|
208
296
|
container_urn = container.as_urn()
|
|
209
297
|
|
|
@@ -243,6 +331,24 @@ class HasContainer(Entity):
|
|
|
243
331
|
)
|
|
244
332
|
)
|
|
245
333
|
|
|
334
|
+
@property
|
|
335
|
+
def parent_container(self) -> Optional[ContainerUrn]:
|
|
336
|
+
if container := self._get_aspect(models.ContainerClass):
|
|
337
|
+
return ContainerUrn.from_string(container.container)
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
@property
|
|
341
|
+
def browse_path(self) -> Optional[List[UrnOrStr]]:
|
|
342
|
+
if browse_path := self._get_aspect(models.BrowsePathsV2Class):
|
|
343
|
+
path: List[UrnOrStr] = []
|
|
344
|
+
for entry in browse_path.path:
|
|
345
|
+
if entry.urn:
|
|
346
|
+
path.append(Urn.from_string(entry.urn))
|
|
347
|
+
else:
|
|
348
|
+
path.append(entry.id)
|
|
349
|
+
return path
|
|
350
|
+
return None
|
|
351
|
+
|
|
246
352
|
|
|
247
353
|
TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
|
|
248
354
|
TagsInputType: TypeAlias = List[TagInputType]
|
|
@@ -251,6 +357,9 @@ TagsInputType: TypeAlias = List[TagInputType]
|
|
|
251
357
|
class HasTags(Entity):
|
|
252
358
|
__slots__ = ()
|
|
253
359
|
|
|
360
|
+
def _ensure_tags(self) -> List[models.TagAssociationClass]:
|
|
361
|
+
return self._setdefault_aspect(models.GlobalTagsClass(tags=[])).tags
|
|
362
|
+
|
|
254
363
|
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
255
364
|
@property
|
|
256
365
|
def tags(self) -> Optional[List[models.TagAssociationClass]]:
|
|
@@ -275,6 +384,24 @@ class HasTags(Entity):
|
|
|
275
384
|
)
|
|
276
385
|
)
|
|
277
386
|
|
|
387
|
+
@classmethod
|
|
388
|
+
def _tag_key(cls, tag: models.TagAssociationClass) -> str:
|
|
389
|
+
return tag.tag
|
|
390
|
+
|
|
391
|
+
def add_tag(self, tag: TagInputType) -> None:
|
|
392
|
+
add_list_unique(
|
|
393
|
+
self._ensure_tags(),
|
|
394
|
+
self._tag_key,
|
|
395
|
+
self._parse_tag_association_class(tag),
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def remove_tag(self, tag: TagInputType) -> None:
|
|
399
|
+
remove_list_unique(
|
|
400
|
+
self._ensure_tags(),
|
|
401
|
+
self._tag_key,
|
|
402
|
+
self._parse_tag_association_class(tag),
|
|
403
|
+
)
|
|
404
|
+
|
|
278
405
|
|
|
279
406
|
TermInputType: TypeAlias = Union[
|
|
280
407
|
str, GlossaryTermUrn, models.GlossaryTermAssociationClass
|
|
@@ -285,6 +412,11 @@ TermsInputType: TypeAlias = List[TermInputType]
|
|
|
285
412
|
class HasTerms(Entity):
|
|
286
413
|
__slots__ = ()
|
|
287
414
|
|
|
415
|
+
def _ensure_terms(self) -> List[models.GlossaryTermAssociationClass]:
|
|
416
|
+
return self._setdefault_aspect(
|
|
417
|
+
models.GlossaryTermsClass(terms=[], auditStamp=self._terms_audit_stamp())
|
|
418
|
+
).terms
|
|
419
|
+
|
|
288
420
|
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
289
421
|
@property
|
|
290
422
|
def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
|
|
@@ -320,6 +452,24 @@ class HasTerms(Entity):
|
|
|
320
452
|
)
|
|
321
453
|
)
|
|
322
454
|
|
|
455
|
+
@classmethod
|
|
456
|
+
def _terms_key(self, term: models.GlossaryTermAssociationClass) -> str:
|
|
457
|
+
return term.urn
|
|
458
|
+
|
|
459
|
+
def add_term(self, term: TermInputType) -> None:
|
|
460
|
+
add_list_unique(
|
|
461
|
+
self._ensure_terms(),
|
|
462
|
+
self._terms_key,
|
|
463
|
+
self._parse_glossary_term_association_class(term),
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
def remove_term(self, term: TermInputType) -> None:
|
|
467
|
+
remove_list_unique(
|
|
468
|
+
self._ensure_terms(),
|
|
469
|
+
self._terms_key,
|
|
470
|
+
self._parse_glossary_term_association_class(term),
|
|
471
|
+
)
|
|
472
|
+
|
|
323
473
|
|
|
324
474
|
DomainInputType: TypeAlias = Union[str, DomainUrn]
|
|
325
475
|
|
datahub/sdk/_utils.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from typing import Any, Callable, List, Protocol, TypeVar
|
|
2
|
+
|
|
3
|
+
from datahub.errors import ItemNotFoundError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class _SupportsEq(Protocol):
|
|
7
|
+
def __eq__(self, other: Any) -> bool: ...
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
K = TypeVar("K", bound=_SupportsEq)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def add_list_unique(lst: List[T], key: Callable[[T], K], item: T) -> None:
|
|
15
|
+
item_key = key(item)
|
|
16
|
+
for i, existing in enumerate(lst):
|
|
17
|
+
if key(existing) == item_key:
|
|
18
|
+
lst[i] = item
|
|
19
|
+
return
|
|
20
|
+
lst.append(item)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def remove_list_unique(
|
|
24
|
+
lst: List[T], key: Callable[[T], K], item: T, *, missing_ok: bool = True
|
|
25
|
+
) -> None:
|
|
26
|
+
# Poor man's patch implementation.
|
|
27
|
+
item_key = key(item)
|
|
28
|
+
removed = False
|
|
29
|
+
for i, existing in enumerate(lst):
|
|
30
|
+
if key(existing) == item_key:
|
|
31
|
+
lst.pop(i)
|
|
32
|
+
removed = True
|
|
33
|
+
# Tricky: no break. In case there's already duplicates, we want to remove all of them.
|
|
34
|
+
if not removed and not missing_ok:
|
|
35
|
+
raise ItemNotFoundError(f"Cannot remove item {item} from list: not found")
|
datahub/sdk/container.py
CHANGED
|
@@ -27,11 +27,13 @@ from datahub.sdk._shared import (
|
|
|
27
27
|
HasTags,
|
|
28
28
|
HasTerms,
|
|
29
29
|
OwnersInputType,
|
|
30
|
+
ParentContainerInputType,
|
|
30
31
|
TagsInputType,
|
|
31
32
|
TermsInputType,
|
|
32
33
|
make_time_stamp,
|
|
33
34
|
parse_time_stamp,
|
|
34
35
|
)
|
|
36
|
+
from datahub.utilities.sentinels import Auto, auto
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
class Container(
|
|
@@ -54,7 +56,7 @@ class Container(
|
|
|
54
56
|
self,
|
|
55
57
|
/,
|
|
56
58
|
# Identity.
|
|
57
|
-
container_key: ContainerKey
|
|
59
|
+
container_key: ContainerKey,
|
|
58
60
|
*,
|
|
59
61
|
# Container attributes.
|
|
60
62
|
display_name: str,
|
|
@@ -66,12 +68,15 @@ class Container(
|
|
|
66
68
|
created: Optional[datetime] = None,
|
|
67
69
|
last_modified: Optional[datetime] = None,
|
|
68
70
|
# Standard aspects.
|
|
71
|
+
parent_container: Auto | ParentContainerInputType | None = auto,
|
|
69
72
|
subtype: Optional[str] = None,
|
|
70
73
|
owners: Optional[OwnersInputType] = None,
|
|
71
74
|
tags: Optional[TagsInputType] = None,
|
|
72
75
|
terms: Optional[TermsInputType] = None,
|
|
73
76
|
domain: Optional[DomainInputType] = None,
|
|
74
77
|
):
|
|
78
|
+
# Hack: while the type annotations say container_key is always a ContainerKey,
|
|
79
|
+
# we allow ContainerUrn to make the graph-based constructor work.
|
|
75
80
|
if isinstance(container_key, ContainerUrn):
|
|
76
81
|
urn = container_key
|
|
77
82
|
else:
|
|
@@ -85,8 +90,6 @@ class Container(
|
|
|
85
90
|
if isinstance(container_key, ContainerKey):
|
|
86
91
|
self._set_platform_instance(container_key.platform, container_key.instance)
|
|
87
92
|
|
|
88
|
-
self._set_container(container_key.parent_key())
|
|
89
|
-
|
|
90
93
|
self.set_custom_properties(
|
|
91
94
|
{
|
|
92
95
|
**container_key.property_dict(),
|
|
@@ -100,6 +103,18 @@ class Container(
|
|
|
100
103
|
env = container_key.env if container_key.env in ALL_ENV_TYPES else None
|
|
101
104
|
if _INCLUDE_ENV_IN_CONTAINER_PROPERTIES and env is not None:
|
|
102
105
|
self._ensure_container_props().env = env
|
|
106
|
+
else:
|
|
107
|
+
self.set_custom_properties(extra_properties or {})
|
|
108
|
+
|
|
109
|
+
if parent_container is auto:
|
|
110
|
+
if not isinstance(container_key, ContainerKey):
|
|
111
|
+
raise SdkUsageError(
|
|
112
|
+
"Either a container_key or parent_container must be provided"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
self._set_container(container_key.parent_key())
|
|
116
|
+
else:
|
|
117
|
+
self._set_container(parent_container)
|
|
103
118
|
|
|
104
119
|
if description is not None:
|
|
105
120
|
self.set_description(description)
|
|
@@ -126,7 +141,8 @@ class Container(
|
|
|
126
141
|
@classmethod
|
|
127
142
|
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
128
143
|
assert isinstance(urn, ContainerUrn)
|
|
129
|
-
|
|
144
|
+
|
|
145
|
+
entity = cls(urn, display_name="__dummy_value__", parent_container=None) # type: ignore[arg-type]
|
|
130
146
|
return entity._init_from_graph(current_aspects)
|
|
131
147
|
|
|
132
148
|
def _ensure_container_props(
|
datahub/sdk/dataset.py
CHANGED
|
@@ -13,13 +13,13 @@ from datahub.errors import (
|
|
|
13
13
|
IngestionAttributionWarning,
|
|
14
14
|
ItemNotFoundError,
|
|
15
15
|
SchemaFieldKeyError,
|
|
16
|
+
SdkUsageError,
|
|
16
17
|
)
|
|
17
18
|
from datahub.ingestion.source.sql.sql_types import resolve_sql_type
|
|
18
19
|
from datahub.metadata.urns import DatasetUrn, SchemaFieldUrn, Urn
|
|
19
20
|
from datahub.sdk._attribution import is_ingestion_attribution
|
|
20
21
|
from datahub.sdk._entity import Entity
|
|
21
22
|
from datahub.sdk._shared import (
|
|
22
|
-
ContainerInputType,
|
|
23
23
|
DatasetUrnOrStr,
|
|
24
24
|
DomainInputType,
|
|
25
25
|
HasContainer,
|
|
@@ -30,14 +30,18 @@ from datahub.sdk._shared import (
|
|
|
30
30
|
HasTags,
|
|
31
31
|
HasTerms,
|
|
32
32
|
OwnersInputType,
|
|
33
|
+
ParentContainerInputType,
|
|
34
|
+
TagInputType,
|
|
33
35
|
TagsInputType,
|
|
36
|
+
TermInputType,
|
|
34
37
|
TermsInputType,
|
|
35
38
|
make_time_stamp,
|
|
36
39
|
parse_time_stamp,
|
|
37
40
|
)
|
|
41
|
+
from datahub.sdk._utils import add_list_unique, remove_list_unique
|
|
42
|
+
from datahub.utilities.sentinels import Unset, unset
|
|
38
43
|
|
|
39
44
|
SchemaFieldInputType: TypeAlias = Union[
|
|
40
|
-
str,
|
|
41
45
|
Tuple[str, str], # (name, type)
|
|
42
46
|
Tuple[str, str, str], # (name, type, description)
|
|
43
47
|
models.SchemaFieldClass,
|
|
@@ -271,6 +275,51 @@ class SchemaField:
|
|
|
271
275
|
tags=parsed_tags
|
|
272
276
|
)
|
|
273
277
|
|
|
278
|
+
def add_tag(self, tag: TagInputType) -> None:
|
|
279
|
+
parsed_tag = self._parent._parse_tag_association_class(tag)
|
|
280
|
+
|
|
281
|
+
if is_ingestion_attribution():
|
|
282
|
+
raise SdkUsageError(
|
|
283
|
+
"Adding field tags in ingestion mode is not yet supported. "
|
|
284
|
+
"Use set_tags instead."
|
|
285
|
+
)
|
|
286
|
+
else:
|
|
287
|
+
editable_field = self._ensure_editable_schema_field()
|
|
288
|
+
if editable_field.globalTags is None:
|
|
289
|
+
editable_field.globalTags = models.GlobalTagsClass(tags=[])
|
|
290
|
+
|
|
291
|
+
add_list_unique(
|
|
292
|
+
editable_field.globalTags.tags,
|
|
293
|
+
key=self._parent._tag_key,
|
|
294
|
+
item=parsed_tag,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
def remove_tag(self, tag: TagInputType) -> None:
|
|
298
|
+
parsed_tag = self._parent._parse_tag_association_class(tag)
|
|
299
|
+
|
|
300
|
+
if is_ingestion_attribution():
|
|
301
|
+
raise SdkUsageError(
|
|
302
|
+
"Adding field tags in ingestion mode is not yet supported. "
|
|
303
|
+
"Use set_tags instead."
|
|
304
|
+
)
|
|
305
|
+
else:
|
|
306
|
+
base_field = self._base_schema_field()
|
|
307
|
+
if base_field.globalTags is not None:
|
|
308
|
+
remove_list_unique(
|
|
309
|
+
base_field.globalTags.tags,
|
|
310
|
+
key=self._parent._tag_key,
|
|
311
|
+
item=parsed_tag,
|
|
312
|
+
missing_ok=True,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
editable_field = self._ensure_editable_schema_field()
|
|
316
|
+
if editable_field.globalTags is not None:
|
|
317
|
+
remove_list_unique(
|
|
318
|
+
editable_field.globalTags.tags,
|
|
319
|
+
key=self._parent._tag_key,
|
|
320
|
+
item=parsed_tag,
|
|
321
|
+
)
|
|
322
|
+
|
|
274
323
|
@property
|
|
275
324
|
def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
|
|
276
325
|
# TODO: Basically the same implementation as tags - can we share code?
|
|
@@ -287,7 +336,7 @@ class SchemaField:
|
|
|
287
336
|
|
|
288
337
|
return terms
|
|
289
338
|
|
|
290
|
-
def set_terms(self, terms:
|
|
339
|
+
def set_terms(self, terms: TermsInputType) -> None:
|
|
291
340
|
parsed_terms = [
|
|
292
341
|
self._parent._parse_glossary_term_association_class(term) for term in terms
|
|
293
342
|
]
|
|
@@ -318,6 +367,55 @@ class SchemaField:
|
|
|
318
367
|
)
|
|
319
368
|
)
|
|
320
369
|
|
|
370
|
+
def add_term(self, term: TermInputType) -> None:
|
|
371
|
+
parsed_term = self._parent._parse_glossary_term_association_class(term)
|
|
372
|
+
|
|
373
|
+
if is_ingestion_attribution():
|
|
374
|
+
raise SdkUsageError(
|
|
375
|
+
"Adding field terms in ingestion mode is not yet supported. "
|
|
376
|
+
"Use set_terms instead."
|
|
377
|
+
)
|
|
378
|
+
else:
|
|
379
|
+
editable_field = self._ensure_editable_schema_field()
|
|
380
|
+
if editable_field.glossaryTerms is None:
|
|
381
|
+
editable_field.glossaryTerms = models.GlossaryTermsClass(
|
|
382
|
+
terms=[],
|
|
383
|
+
auditStamp=self._parent._terms_audit_stamp(),
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
add_list_unique(
|
|
387
|
+
editable_field.glossaryTerms.terms,
|
|
388
|
+
key=self._parent._terms_key,
|
|
389
|
+
item=parsed_term,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
def remove_term(self, term: TermInputType) -> None:
|
|
393
|
+
parsed_term = self._parent._parse_glossary_term_association_class(term)
|
|
394
|
+
|
|
395
|
+
if is_ingestion_attribution():
|
|
396
|
+
raise SdkUsageError(
|
|
397
|
+
"Removing field terms in ingestion mode is not yet supported. "
|
|
398
|
+
"Use set_terms instead."
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
base_field = self._base_schema_field()
|
|
402
|
+
if base_field.glossaryTerms is not None:
|
|
403
|
+
remove_list_unique(
|
|
404
|
+
base_field.glossaryTerms.terms,
|
|
405
|
+
key=self._parent._terms_key,
|
|
406
|
+
item=parsed_term,
|
|
407
|
+
missing_ok=True,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
editable_field = self._ensure_editable_schema_field()
|
|
411
|
+
if editable_field.glossaryTerms is not None:
|
|
412
|
+
remove_list_unique(
|
|
413
|
+
editable_field.glossaryTerms.terms,
|
|
414
|
+
key=self._parent._terms_key,
|
|
415
|
+
item=parsed_term,
|
|
416
|
+
missing_ok=True,
|
|
417
|
+
)
|
|
418
|
+
|
|
321
419
|
|
|
322
420
|
class Dataset(
|
|
323
421
|
HasPlatformInstance,
|
|
@@ -352,8 +450,8 @@ class Dataset(
|
|
|
352
450
|
created: Optional[datetime] = None,
|
|
353
451
|
last_modified: Optional[datetime] = None,
|
|
354
452
|
# Standard aspects.
|
|
453
|
+
parent_container: ParentContainerInputType | Unset = unset,
|
|
355
454
|
subtype: Optional[str] = None,
|
|
356
|
-
container: Optional[ContainerInputType] = None,
|
|
357
455
|
owners: Optional[OwnersInputType] = None,
|
|
358
456
|
tags: Optional[TagsInputType] = None,
|
|
359
457
|
terms: Optional[TermsInputType] = None,
|
|
@@ -393,10 +491,10 @@ class Dataset(
|
|
|
393
491
|
if last_modified is not None:
|
|
394
492
|
self.set_last_modified(last_modified)
|
|
395
493
|
|
|
494
|
+
if parent_container is not unset:
|
|
495
|
+
self._set_container(parent_container)
|
|
396
496
|
if subtype is not None:
|
|
397
497
|
self.set_subtype(subtype)
|
|
398
|
-
if container is not None:
|
|
399
|
-
self._set_container(container)
|
|
400
498
|
if owners is not None:
|
|
401
499
|
self.set_owners(owners)
|
|
402
500
|
if tags is not None:
|
|
@@ -537,14 +635,6 @@ class Dataset(
|
|
|
537
635
|
nativeDataType=field_type,
|
|
538
636
|
description=description,
|
|
539
637
|
)
|
|
540
|
-
elif isinstance(schema_field_input, str):
|
|
541
|
-
# TODO: Not sure this branch makes sense - we should probably just require types?
|
|
542
|
-
return models.SchemaFieldClass(
|
|
543
|
-
fieldPath=schema_field_input,
|
|
544
|
-
type=models.SchemaFieldDataTypeClass(models.NullTypeClass()),
|
|
545
|
-
nativeDataType="unknown",
|
|
546
|
-
description=None,
|
|
547
|
-
)
|
|
548
638
|
else:
|
|
549
639
|
assert_never(schema_field_input)
|
|
550
640
|
|
datahub/sdk/main_client.py
CHANGED
|
@@ -41,10 +41,24 @@ class DataHubClient:
|
|
|
41
41
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def from_env(cls) -> "DataHubClient":
|
|
44
|
+
"""Initialize a DataHubClient from the environment variables or ~/.datahubenv file.
|
|
45
|
+
|
|
46
|
+
This will first check DATAHUB_GMS_URL and DATAHUB_GMS_TOKEN. If not present,
|
|
47
|
+
it will read credentials from ~/.datahubenv. That file can be created using
|
|
48
|
+
the `datahub init` command.
|
|
49
|
+
|
|
50
|
+
If you're looking to specify the server/token in code, use the
|
|
51
|
+
DataHubClient(server=..., token=...) constructor instead.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
A DataHubClient instance.
|
|
55
|
+
"""
|
|
56
|
+
|
|
44
57
|
# Inspired by the DockerClient.from_env() method.
|
|
45
58
|
# TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
|
|
46
59
|
# That file is part of the "environment", but is not a traditional "env variable".
|
|
47
60
|
graph = get_default_graph()
|
|
61
|
+
|
|
48
62
|
return cls(graph=graph)
|
|
49
63
|
|
|
50
64
|
@property
|
|
@@ -54,3 +68,6 @@ class DataHubClient:
|
|
|
54
68
|
@property
|
|
55
69
|
def resolve(self) -> ResolverClient:
|
|
56
70
|
return ResolverClient(self)
|
|
71
|
+
|
|
72
|
+
# TODO: search client
|
|
73
|
+
# TODO: lineage client
|
datahub/specific/dataset.py
CHANGED
|
@@ -15,6 +15,7 @@ from datahub.metadata.schema_classes import (
|
|
|
15
15
|
UpstreamClass as Upstream,
|
|
16
16
|
UpstreamLineageClass as UpstreamLineage,
|
|
17
17
|
)
|
|
18
|
+
from datahub.metadata.urns import DatasetUrn, TagUrn, Urn
|
|
18
19
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
19
20
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
20
21
|
from datahub.specific.aspect_helpers.structured_properties import (
|
|
@@ -22,8 +23,6 @@ from datahub.specific.aspect_helpers.structured_properties import (
|
|
|
22
23
|
)
|
|
23
24
|
from datahub.specific.aspect_helpers.tags import HasTagsPatch
|
|
24
25
|
from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
25
|
-
from datahub.utilities.urns.tag_urn import TagUrn
|
|
26
|
-
from datahub.utilities.urns.urn import Urn
|
|
27
26
|
|
|
28
27
|
_Parent = TypeVar("_Parent", bound=MetadataPatchProposal)
|
|
29
28
|
|
|
@@ -104,12 +103,12 @@ class DatasetPatchBuilder(
|
|
|
104
103
|
):
|
|
105
104
|
def __init__(
|
|
106
105
|
self,
|
|
107
|
-
urn: str,
|
|
106
|
+
urn: Union[str, DatasetUrn],
|
|
108
107
|
system_metadata: Optional[SystemMetadataClass] = None,
|
|
109
108
|
audit_header: Optional[KafkaAuditHeaderClass] = None,
|
|
110
109
|
) -> None:
|
|
111
110
|
super().__init__(
|
|
112
|
-
urn, system_metadata=system_metadata, audit_header=audit_header
|
|
111
|
+
str(urn), system_metadata=system_metadata, audit_header=audit_header
|
|
113
112
|
)
|
|
114
113
|
|
|
115
114
|
@classmethod
|