acryl-datahub 1.0.0rc8__py3-none-any.whl → 1.0.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/METADATA +2445 -2445
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/RECORD +46 -42
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/ingestion/graph/client.py +15 -11
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/superset.py +158 -24
- datahub/metadata/_schema_classes.py +157 -14
- datahub/metadata/_urns/urn_defs.py +58 -58
- datahub/metadata/schema.avsc +23 -10
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_shared.py +88 -3
- datahub/sdk/container.py +7 -1
- datahub/sdk/dataset.py +7 -1
- datahub/sdk/{_entity.py → entity.py} +4 -0
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +7 -1
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/top_level.txt +0 -0
|
@@ -137,6 +137,24 @@
|
|
|
137
137
|
"name": "sortId",
|
|
138
138
|
"doc": "Sort identifier that determines where a version lives in the order of the Version Set.\nWhat this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation."
|
|
139
139
|
},
|
|
140
|
+
{
|
|
141
|
+
"type": {
|
|
142
|
+
"type": "enum",
|
|
143
|
+
"symbolDocs": {
|
|
144
|
+
"ALPHANUMERIC_GENERATED_BY_DATAHUB": "String managed by DataHub. Currently, an 8 character alphabetical string.",
|
|
145
|
+
"LEXICOGRAPHIC_STRING": "String sorted lexicographically."
|
|
146
|
+
},
|
|
147
|
+
"name": "VersioningScheme",
|
|
148
|
+
"namespace": "com.linkedin.pegasus2avro.versionset",
|
|
149
|
+
"symbols": [
|
|
150
|
+
"LEXICOGRAPHIC_STRING",
|
|
151
|
+
"ALPHANUMERIC_GENERATED_BY_DATAHUB"
|
|
152
|
+
]
|
|
153
|
+
},
|
|
154
|
+
"name": "versioningScheme",
|
|
155
|
+
"default": "LEXICOGRAPHIC_STRING",
|
|
156
|
+
"doc": "What versioning scheme `sortId` belongs to.\nDefaults to a plain string that is lexicographically sorted."
|
|
157
|
+
},
|
|
140
158
|
{
|
|
141
159
|
"type": [
|
|
142
160
|
"null",
|
|
@@ -36,9 +36,14 @@
|
|
|
36
36
|
{
|
|
37
37
|
"type": {
|
|
38
38
|
"type": "enum",
|
|
39
|
+
"symbolDocs": {
|
|
40
|
+
"ALPHANUMERIC_GENERATED_BY_DATAHUB": "String managed by DataHub. Currently, an 8 character alphabetical string.",
|
|
41
|
+
"LEXICOGRAPHIC_STRING": "String sorted lexicographically."
|
|
42
|
+
},
|
|
39
43
|
"name": "VersioningScheme",
|
|
40
44
|
"namespace": "com.linkedin.pegasus2avro.versionset",
|
|
41
45
|
"symbols": [
|
|
46
|
+
"LEXICOGRAPHIC_STRING",
|
|
42
47
|
"ALPHANUMERIC_GENERATED_BY_DATAHUB"
|
|
43
48
|
]
|
|
44
49
|
},
|
|
File without changes
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from typing import Any, Callable, Optional, TypeVar, cast
|
|
3
|
+
|
|
4
|
+
# Define a type variable for the decorator
|
|
5
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Check which Pydantic version is installed
|
|
9
|
+
def get_pydantic_version() -> int:
|
|
10
|
+
"""Determine if Pydantic v1 or v2 is installed."""
|
|
11
|
+
try:
|
|
12
|
+
import pydantic
|
|
13
|
+
|
|
14
|
+
version = pydantic.__version__
|
|
15
|
+
return 1 if version.startswith("1.") else 2
|
|
16
|
+
except (ImportError, AttributeError):
|
|
17
|
+
# Default to v1 if we can't determine version
|
|
18
|
+
return 1
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
PYDANTIC_VERSION = get_pydantic_version()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Create compatibility layer for dict-like methods
|
|
25
|
+
def compat_dict_method(v1_method: Optional[Callable] = None) -> Callable:
|
|
26
|
+
"""
|
|
27
|
+
Decorator to make a dict method work with both Pydantic v1 and v2.
|
|
28
|
+
|
|
29
|
+
In v1: Uses the decorated method (typically dict)
|
|
30
|
+
In v2: Redirects to model_dump with appropriate parameter mapping
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def decorator(func: F) -> F:
|
|
34
|
+
@functools.wraps(func)
|
|
35
|
+
def wrapper(self, *args, **kwargs):
|
|
36
|
+
if PYDANTIC_VERSION >= 2:
|
|
37
|
+
# Map v1 parameters to v2 parameters
|
|
38
|
+
# exclude -> exclude
|
|
39
|
+
# exclude_unset -> exclude_unset
|
|
40
|
+
# exclude_defaults -> exclude_defaults
|
|
41
|
+
# exclude_none -> exclude_none
|
|
42
|
+
# by_alias -> by_alias
|
|
43
|
+
model_dump_kwargs = kwargs.copy()
|
|
44
|
+
|
|
45
|
+
# Handle the 'exclude' parameter differently between versions
|
|
46
|
+
exclude = kwargs.get("exclude", set())
|
|
47
|
+
if isinstance(exclude, (set, dict)):
|
|
48
|
+
model_dump_kwargs["exclude"] = exclude
|
|
49
|
+
|
|
50
|
+
return self.model_dump(**model_dump_kwargs)
|
|
51
|
+
return func(self, *args, **kwargs)
|
|
52
|
+
|
|
53
|
+
return cast(F, wrapper)
|
|
54
|
+
|
|
55
|
+
# Allow use as both @compat_dict_method and @compat_dict_method()
|
|
56
|
+
if v1_method is None:
|
|
57
|
+
return decorator
|
|
58
|
+
return decorator(v1_method)
|
datahub/sdk/__init__.py
CHANGED
|
@@ -20,6 +20,7 @@ from datahub.metadata.urns import (
|
|
|
20
20
|
from datahub.sdk.container import Container
|
|
21
21
|
from datahub.sdk.dataset import Dataset
|
|
22
22
|
from datahub.sdk.main_client import DataHubClient
|
|
23
|
+
from datahub.sdk.search_filters import Filter, FilterDsl
|
|
23
24
|
|
|
24
25
|
# We want to print out the warning if people do `from datahub.sdk import X`.
|
|
25
26
|
# But we don't want to print out warnings if they're doing a more direct
|
datahub/sdk/_all_entities.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from typing import Dict, List, Type
|
|
2
2
|
|
|
3
|
-
from datahub.sdk._entity import Entity
|
|
4
3
|
from datahub.sdk.container import Container
|
|
5
4
|
from datahub.sdk.dataset import Dataset
|
|
5
|
+
from datahub.sdk.entity import Entity
|
|
6
6
|
|
|
7
7
|
# TODO: Is there a better way to declare this?
|
|
8
8
|
ENTITY_CLASSES_LIST: List[Type[Entity]] = [
|
datahub/sdk/_shared.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import (
|
|
|
7
7
|
Callable,
|
|
8
8
|
List,
|
|
9
9
|
Optional,
|
|
10
|
+
Sequence,
|
|
10
11
|
Tuple,
|
|
11
12
|
Union,
|
|
12
13
|
)
|
|
@@ -36,8 +37,8 @@ from datahub.metadata.urns import (
|
|
|
36
37
|
TagUrn,
|
|
37
38
|
Urn,
|
|
38
39
|
)
|
|
39
|
-
from datahub.sdk._entity import Entity
|
|
40
40
|
from datahub.sdk._utils import add_list_unique, remove_list_unique
|
|
41
|
+
from datahub.sdk.entity import Entity
|
|
41
42
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
42
43
|
|
|
43
44
|
if TYPE_CHECKING:
|
|
@@ -49,6 +50,8 @@ DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
|
|
|
49
50
|
|
|
50
51
|
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
51
52
|
|
|
53
|
+
_DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
|
|
54
|
+
|
|
52
55
|
|
|
53
56
|
def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
|
|
54
57
|
if ts is None:
|
|
@@ -438,8 +441,7 @@ class HasTerms(Entity):
|
|
|
438
441
|
def _terms_audit_stamp(self) -> models.AuditStampClass:
|
|
439
442
|
return models.AuditStampClass(
|
|
440
443
|
time=0,
|
|
441
|
-
|
|
442
|
-
actor=CorpUserUrn("__ingestion").urn(),
|
|
444
|
+
actor=_DEFAULT_ACTOR_URN,
|
|
443
445
|
)
|
|
444
446
|
|
|
445
447
|
def set_terms(self, terms: TermsInputType) -> None:
|
|
@@ -493,3 +495,86 @@ class HasDomain(Entity):
|
|
|
493
495
|
def set_domain(self, domain: DomainInputType) -> None:
|
|
494
496
|
domain_urn = DomainUrn.from_string(domain) # basically a type assertion
|
|
495
497
|
self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
LinkInputType: TypeAlias = Union[
|
|
501
|
+
str,
|
|
502
|
+
Tuple[str, str], # url, description
|
|
503
|
+
models.InstitutionalMemoryMetadataClass,
|
|
504
|
+
]
|
|
505
|
+
LinksInputType: TypeAlias = Sequence[LinkInputType]
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class HasInstitutionalMemory(Entity):
|
|
509
|
+
__slots__ = ()
|
|
510
|
+
|
|
511
|
+
# Internally the aspect is called institutionalMemory, and so much of the code
|
|
512
|
+
# uses that name. However, the public-facing API is called "links", since
|
|
513
|
+
# that's what we call these in the UI.
|
|
514
|
+
|
|
515
|
+
def _ensure_institutional_memory(
|
|
516
|
+
self,
|
|
517
|
+
) -> List[models.InstitutionalMemoryMetadataClass]:
|
|
518
|
+
return self._setdefault_aspect(
|
|
519
|
+
models.InstitutionalMemoryClass(elements=[])
|
|
520
|
+
).elements
|
|
521
|
+
|
|
522
|
+
@property
|
|
523
|
+
def links(self) -> Optional[List[models.InstitutionalMemoryMetadataClass]]:
|
|
524
|
+
if institutional_memory := self._get_aspect(models.InstitutionalMemoryClass):
|
|
525
|
+
return institutional_memory.elements
|
|
526
|
+
return None
|
|
527
|
+
|
|
528
|
+
@classmethod
|
|
529
|
+
def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
|
|
530
|
+
return models.AuditStampClass(
|
|
531
|
+
time=0,
|
|
532
|
+
actor=_DEFAULT_ACTOR_URN,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
@classmethod
|
|
536
|
+
def _parse_link_association_class(
|
|
537
|
+
cls, link: LinkInputType
|
|
538
|
+
) -> models.InstitutionalMemoryMetadataClass:
|
|
539
|
+
if isinstance(link, models.InstitutionalMemoryMetadataClass):
|
|
540
|
+
return link
|
|
541
|
+
elif isinstance(link, str):
|
|
542
|
+
return models.InstitutionalMemoryMetadataClass(
|
|
543
|
+
url=link,
|
|
544
|
+
description=link,
|
|
545
|
+
createStamp=cls._institutional_memory_audit_stamp(),
|
|
546
|
+
)
|
|
547
|
+
elif isinstance(link, tuple) and len(link) == 2:
|
|
548
|
+
url, description = link
|
|
549
|
+
return models.InstitutionalMemoryMetadataClass(
|
|
550
|
+
url=url,
|
|
551
|
+
description=description,
|
|
552
|
+
createStamp=cls._institutional_memory_audit_stamp(),
|
|
553
|
+
)
|
|
554
|
+
else:
|
|
555
|
+
assert_never(link)
|
|
556
|
+
|
|
557
|
+
def set_links(self, links: LinksInputType) -> None:
|
|
558
|
+
self._set_aspect(
|
|
559
|
+
models.InstitutionalMemoryClass(
|
|
560
|
+
elements=[self._parse_link_association_class(link) for link in links]
|
|
561
|
+
)
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
@classmethod
|
|
565
|
+
def _link_key(self, link: models.InstitutionalMemoryMetadataClass) -> str:
|
|
566
|
+
return link.url
|
|
567
|
+
|
|
568
|
+
def add_link(self, link: LinkInputType) -> None:
|
|
569
|
+
add_list_unique(
|
|
570
|
+
self._ensure_institutional_memory(),
|
|
571
|
+
self._link_key,
|
|
572
|
+
self._parse_link_association_class(link),
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
def remove_link(self, link: LinkInputType) -> None:
|
|
576
|
+
remove_list_unique(
|
|
577
|
+
self._ensure_institutional_memory(),
|
|
578
|
+
self._link_key,
|
|
579
|
+
self._parse_link_association_class(link),
|
|
580
|
+
)
|
datahub/sdk/container.py
CHANGED
|
@@ -16,16 +16,17 @@ from datahub.metadata.urns import (
|
|
|
16
16
|
ContainerUrn,
|
|
17
17
|
Urn,
|
|
18
18
|
)
|
|
19
|
-
from datahub.sdk._entity import Entity, ExtraAspectsType
|
|
20
19
|
from datahub.sdk._shared import (
|
|
21
20
|
DomainInputType,
|
|
22
21
|
HasContainer,
|
|
23
22
|
HasDomain,
|
|
23
|
+
HasInstitutionalMemory,
|
|
24
24
|
HasOwnership,
|
|
25
25
|
HasPlatformInstance,
|
|
26
26
|
HasSubtype,
|
|
27
27
|
HasTags,
|
|
28
28
|
HasTerms,
|
|
29
|
+
LinksInputType,
|
|
29
30
|
OwnersInputType,
|
|
30
31
|
ParentContainerInputType,
|
|
31
32
|
TagsInputType,
|
|
@@ -33,6 +34,7 @@ from datahub.sdk._shared import (
|
|
|
33
34
|
make_time_stamp,
|
|
34
35
|
parse_time_stamp,
|
|
35
36
|
)
|
|
37
|
+
from datahub.sdk.entity import Entity, ExtraAspectsType
|
|
36
38
|
from datahub.utilities.sentinels import Auto, auto
|
|
37
39
|
|
|
38
40
|
|
|
@@ -41,6 +43,7 @@ class Container(
|
|
|
41
43
|
HasSubtype,
|
|
42
44
|
HasContainer,
|
|
43
45
|
HasOwnership,
|
|
46
|
+
HasInstitutionalMemory,
|
|
44
47
|
HasTags,
|
|
45
48
|
HasTerms,
|
|
46
49
|
HasDomain,
|
|
@@ -71,6 +74,7 @@ class Container(
|
|
|
71
74
|
parent_container: Auto | ParentContainerInputType | None = auto,
|
|
72
75
|
subtype: Optional[str] = None,
|
|
73
76
|
owners: Optional[OwnersInputType] = None,
|
|
77
|
+
links: Optional[LinksInputType] = None,
|
|
74
78
|
tags: Optional[TagsInputType] = None,
|
|
75
79
|
terms: Optional[TermsInputType] = None,
|
|
76
80
|
domain: Optional[DomainInputType] = None,
|
|
@@ -133,6 +137,8 @@ class Container(
|
|
|
133
137
|
self.set_subtype(subtype)
|
|
134
138
|
if owners is not None:
|
|
135
139
|
self.set_owners(owners)
|
|
140
|
+
if links is not None:
|
|
141
|
+
self.set_links(links)
|
|
136
142
|
if tags is not None:
|
|
137
143
|
self.set_tags(tags)
|
|
138
144
|
if terms is not None:
|
datahub/sdk/dataset.py
CHANGED
|
@@ -18,17 +18,18 @@ from datahub.errors import (
|
|
|
18
18
|
from datahub.ingestion.source.sql.sql_types import resolve_sql_type
|
|
19
19
|
from datahub.metadata.urns import DatasetUrn, SchemaFieldUrn, Urn
|
|
20
20
|
from datahub.sdk._attribution import is_ingestion_attribution
|
|
21
|
-
from datahub.sdk._entity import Entity, ExtraAspectsType
|
|
22
21
|
from datahub.sdk._shared import (
|
|
23
22
|
DatasetUrnOrStr,
|
|
24
23
|
DomainInputType,
|
|
25
24
|
HasContainer,
|
|
26
25
|
HasDomain,
|
|
26
|
+
HasInstitutionalMemory,
|
|
27
27
|
HasOwnership,
|
|
28
28
|
HasPlatformInstance,
|
|
29
29
|
HasSubtype,
|
|
30
30
|
HasTags,
|
|
31
31
|
HasTerms,
|
|
32
|
+
LinksInputType,
|
|
32
33
|
OwnersInputType,
|
|
33
34
|
ParentContainerInputType,
|
|
34
35
|
TagInputType,
|
|
@@ -39,6 +40,7 @@ from datahub.sdk._shared import (
|
|
|
39
40
|
parse_time_stamp,
|
|
40
41
|
)
|
|
41
42
|
from datahub.sdk._utils import add_list_unique, remove_list_unique
|
|
43
|
+
from datahub.sdk.entity import Entity, ExtraAspectsType
|
|
42
44
|
from datahub.utilities.sentinels import Unset, unset
|
|
43
45
|
|
|
44
46
|
SchemaFieldInputType: TypeAlias = Union[
|
|
@@ -422,6 +424,7 @@ class Dataset(
|
|
|
422
424
|
HasSubtype,
|
|
423
425
|
HasContainer,
|
|
424
426
|
HasOwnership,
|
|
427
|
+
HasInstitutionalMemory,
|
|
425
428
|
HasTags,
|
|
426
429
|
HasTerms,
|
|
427
430
|
HasDomain,
|
|
@@ -453,6 +456,7 @@ class Dataset(
|
|
|
453
456
|
parent_container: ParentContainerInputType | Unset = unset,
|
|
454
457
|
subtype: Optional[str] = None,
|
|
455
458
|
owners: Optional[OwnersInputType] = None,
|
|
459
|
+
links: Optional[LinksInputType] = None,
|
|
456
460
|
tags: Optional[TagsInputType] = None,
|
|
457
461
|
terms: Optional[TermsInputType] = None,
|
|
458
462
|
# TODO structured_properties
|
|
@@ -499,6 +503,8 @@ class Dataset(
|
|
|
499
503
|
self.set_subtype(subtype)
|
|
500
504
|
if owners is not None:
|
|
501
505
|
self.set_owners(owners)
|
|
506
|
+
if links is not None:
|
|
507
|
+
self.set_links(links)
|
|
502
508
|
if tags is not None:
|
|
503
509
|
self.set_tags(tags)
|
|
504
510
|
if terms is not None:
|
|
@@ -56,6 +56,10 @@ class Entity:
|
|
|
56
56
|
@abc.abstractmethod
|
|
57
57
|
def get_urn_type(cls) -> Type[_SpecificUrn]: ...
|
|
58
58
|
|
|
59
|
+
@classmethod
|
|
60
|
+
def entity_type_name(cls) -> str:
|
|
61
|
+
return cls.get_urn_type().ENTITY_TYPE
|
|
62
|
+
|
|
59
63
|
@property
|
|
60
64
|
def urn(self) -> _SpecificUrn:
|
|
61
65
|
return self._urn
|
datahub/sdk/entity_client.py
CHANGED
|
@@ -14,10 +14,10 @@ from datahub.metadata.urns import (
|
|
|
14
14
|
Urn,
|
|
15
15
|
)
|
|
16
16
|
from datahub.sdk._all_entities import ENTITY_CLASSES
|
|
17
|
-
from datahub.sdk._entity import Entity
|
|
18
17
|
from datahub.sdk._shared import UrnOrStr
|
|
19
18
|
from datahub.sdk.container import Container
|
|
20
19
|
from datahub.sdk.dataset import Dataset
|
|
20
|
+
from datahub.sdk.entity import Entity
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
23
|
from datahub.sdk.main_client import DataHubClient
|
datahub/sdk/main_client.py
CHANGED
|
@@ -7,6 +7,7 @@ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
|
7
7
|
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
8
8
|
from datahub.sdk.entity_client import EntityClient
|
|
9
9
|
from datahub.sdk.resolver_client import ResolverClient
|
|
10
|
+
from datahub.sdk.search_client import SearchClient
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class DataHubClient:
|
|
@@ -39,6 +40,8 @@ class DataHubClient:
|
|
|
39
40
|
|
|
40
41
|
self._graph = graph
|
|
41
42
|
|
|
43
|
+
# TODO: test connection
|
|
44
|
+
|
|
42
45
|
@classmethod
|
|
43
46
|
def from_env(cls) -> "DataHubClient":
|
|
44
47
|
"""Initialize a DataHubClient from the environment variables or ~/.datahubenv file.
|
|
@@ -69,5 +72,8 @@ class DataHubClient:
|
|
|
69
72
|
def resolve(self) -> ResolverClient:
|
|
70
73
|
return ResolverClient(self)
|
|
71
74
|
|
|
72
|
-
|
|
75
|
+
@property
|
|
76
|
+
def search(self) -> SearchClient:
|
|
77
|
+
return SearchClient(self)
|
|
78
|
+
|
|
73
79
|
# TODO: lineage client
|
datahub/sdk/resolver_client.py
CHANGED
|
@@ -9,6 +9,7 @@ from datahub.metadata.urns import (
|
|
|
9
9
|
DomainUrn,
|
|
10
10
|
GlossaryTermUrn,
|
|
11
11
|
)
|
|
12
|
+
from datahub.sdk.search_filters import Filter, FilterDsl as F
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
14
15
|
from datahub.sdk.main_client import DataHubClient
|
|
@@ -38,37 +39,28 @@ class ResolverClient:
|
|
|
38
39
|
self, *, name: Optional[str] = None, email: Optional[str] = None
|
|
39
40
|
) -> CorpUserUrn:
|
|
40
41
|
filter_explanation: str
|
|
41
|
-
|
|
42
|
+
filter: Filter
|
|
42
43
|
if name is not None:
|
|
43
44
|
if email is not None:
|
|
44
45
|
raise SdkUsageError("Cannot specify both name and email for auto_user")
|
|
45
|
-
#
|
|
46
|
+
# We're filtering on both fullName and displayName. It's not clear
|
|
47
|
+
# what the right behavior is here.
|
|
46
48
|
filter_explanation = f"with name {name}"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
"values": [name],
|
|
51
|
-
"condition": "EQUAL",
|
|
52
|
-
}
|
|
49
|
+
filter = F.or_(
|
|
50
|
+
F.custom_filter("fullName", "EQUAL", [name]),
|
|
51
|
+
F.custom_filter("displayName", "EQUAL", [name]),
|
|
53
52
|
)
|
|
54
53
|
elif email is not None:
|
|
55
54
|
filter_explanation = f"with email {email}"
|
|
56
|
-
|
|
57
|
-
{
|
|
58
|
-
"field": "email",
|
|
59
|
-
"values": [email],
|
|
60
|
-
"condition": "EQUAL",
|
|
61
|
-
}
|
|
62
|
-
)
|
|
55
|
+
filter = F.custom_filter("email", "EQUAL", [email])
|
|
63
56
|
else:
|
|
64
57
|
raise SdkUsageError("Must specify either name or email for auto_user")
|
|
65
58
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
extraFilters=filters,
|
|
70
|
-
)
|
|
59
|
+
filter = F.and_(
|
|
60
|
+
F.entity_type(CorpUserUrn.ENTITY_TYPE),
|
|
61
|
+
filter,
|
|
71
62
|
)
|
|
63
|
+
users = list(self._client.search.get_urns(filter=filter))
|
|
72
64
|
if len(users) == 0:
|
|
73
65
|
# TODO: In auto methods, should we just create the user/domain/etc if it doesn't exist?
|
|
74
66
|
raise ItemNotFoundError(f"User {filter_explanation} not found")
|
|
@@ -82,15 +74,11 @@ class ResolverClient:
|
|
|
82
74
|
def term(self, *, name: str) -> GlossaryTermUrn:
|
|
83
75
|
# TODO: Add some limits on the graph fetch
|
|
84
76
|
terms = list(
|
|
85
|
-
self.
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"values": [name],
|
|
91
|
-
"condition": "EQUAL",
|
|
92
|
-
}
|
|
93
|
-
],
|
|
77
|
+
self._client.search.get_urns(
|
|
78
|
+
filter=F.and_(
|
|
79
|
+
F.entity_type(GlossaryTermUrn.ENTITY_TYPE),
|
|
80
|
+
F.custom_filter("name", "EQUAL", [name]),
|
|
81
|
+
),
|
|
94
82
|
)
|
|
95
83
|
)
|
|
96
84
|
if len(terms) == 0:
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import (
|
|
4
|
+
TYPE_CHECKING,
|
|
5
|
+
Dict,
|
|
6
|
+
Iterable,
|
|
7
|
+
List,
|
|
8
|
+
Optional,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from datahub.ingestion.graph.filters import RawSearchFilterRule
|
|
12
|
+
from datahub.metadata.urns import Urn
|
|
13
|
+
from datahub.sdk.search_filters import Filter
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from datahub.sdk.main_client import DataHubClient
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def compile_filters(
|
|
20
|
+
filter: Optional[Filter],
|
|
21
|
+
) -> Optional[List[Dict[str, List[RawSearchFilterRule]]]]:
|
|
22
|
+
# TODO: Not every filter type is supported for every entity type.
|
|
23
|
+
# If we can detect issues with the filters at compile time, we should
|
|
24
|
+
# raise an error.
|
|
25
|
+
|
|
26
|
+
if filter is None:
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
initial_filters = filter.compile()
|
|
30
|
+
return [
|
|
31
|
+
{"and": [rule.to_raw() for rule in andClause["and"]]}
|
|
32
|
+
for andClause in initial_filters
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SearchClient:
|
|
37
|
+
def __init__(self, client: DataHubClient):
|
|
38
|
+
self._client = client
|
|
39
|
+
|
|
40
|
+
def get_urns(
|
|
41
|
+
self,
|
|
42
|
+
query: Optional[str] = None,
|
|
43
|
+
filter: Optional[Filter] = None,
|
|
44
|
+
) -> Iterable[Urn]:
|
|
45
|
+
# TODO: Add better limit / pagination support.
|
|
46
|
+
for urn in self._client._graph.get_urns_by_filter(
|
|
47
|
+
query=query,
|
|
48
|
+
extra_or_filters=compile_filters(filter),
|
|
49
|
+
):
|
|
50
|
+
yield Urn.from_string(urn)
|