acryl-datahub 0.15.0.5rc10__py3-none-any.whl → 0.15.0.6rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/METADATA +2482 -2482
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/RECORD +35 -24
- datahub/_version.py +1 -1
- datahub/errors.py +35 -0
- datahub/ingestion/source/common/subtypes.py +1 -0
- datahub/ingestion/source/mongodb.py +17 -16
- datahub/ingestion/source/powerbi/config.py +1 -0
- datahub/ingestion/source/powerbi/powerbi.py +28 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +11 -36
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +17 -4
- datahub/ingestion/source/s3/source.py +14 -5
- datahub/ingestion/source/snowflake/constants.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +45 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +20 -1
- datahub/ingestion/source/snowflake/snowflake_report.py +6 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +108 -4
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +298 -69
- datahub/ingestion/source/snowflake/snowflake_utils.py +17 -8
- datahub/ingestion/source/snowflake/snowflake_v2.py +15 -3
- datahub/sdk/__init__.py +33 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_attribution.py +48 -0
- datahub/sdk/_entity.py +89 -0
- datahub/sdk/_shared.py +338 -0
- datahub/sdk/container.py +193 -0
- datahub/sdk/dataset.py +584 -0
- datahub/sdk/entity_client.py +115 -0
- datahub/sdk/main_client.py +56 -0
- datahub/sdk/resolver_client.py +101 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, overload
|
|
4
|
+
|
|
5
|
+
from datahub.errors import SdkUsageError
|
|
6
|
+
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
7
|
+
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
8
|
+
from datahub.sdk.entity_client import EntityClient
|
|
9
|
+
from datahub.sdk.resolver_client import ResolverClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataHubClient:
|
|
13
|
+
@overload
|
|
14
|
+
def __init__(self, *, server: str, token: Optional[str] = None): ...
|
|
15
|
+
@overload
|
|
16
|
+
def __init__(self, *, config: DatahubClientConfig): ...
|
|
17
|
+
@overload
|
|
18
|
+
def __init__(self, *, graph: DataHubGraph): ...
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
server: Optional[str] = None,
|
|
23
|
+
token: Optional[str] = None,
|
|
24
|
+
graph: Optional[DataHubGraph] = None,
|
|
25
|
+
config: Optional[DatahubClientConfig] = None,
|
|
26
|
+
):
|
|
27
|
+
if server is not None:
|
|
28
|
+
if config is not None:
|
|
29
|
+
raise SdkUsageError("Cannot specify both server and config")
|
|
30
|
+
if graph is not None:
|
|
31
|
+
raise SdkUsageError("Cannot specify both server and graph")
|
|
32
|
+
graph = DataHubGraph(config=DatahubClientConfig(server=server, token=token))
|
|
33
|
+
elif config is not None:
|
|
34
|
+
if graph is not None:
|
|
35
|
+
raise SdkUsageError("Cannot specify both config and graph")
|
|
36
|
+
graph = DataHubGraph(config=config)
|
|
37
|
+
elif graph is None:
|
|
38
|
+
raise SdkUsageError("Must specify either server, config, or graph")
|
|
39
|
+
|
|
40
|
+
self._graph = graph
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def from_env(cls) -> "DataHubClient":
|
|
44
|
+
# Inspired by the DockerClient.from_env() method.
|
|
45
|
+
# TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
|
|
46
|
+
# That file is part of the "environment", but is not a traditional "env variable".
|
|
47
|
+
graph = get_default_graph()
|
|
48
|
+
return cls(graph=graph)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def entities(self) -> EntityClient:
|
|
52
|
+
return EntityClient(self)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def resolve(self) -> ResolverClient:
|
|
56
|
+
return ResolverClient(self)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, overload
|
|
4
|
+
|
|
5
|
+
from datahub.errors import ItemNotFoundError, MultipleItemsFoundError, SdkUsageError
|
|
6
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
7
|
+
from datahub.metadata.urns import (
|
|
8
|
+
CorpUserUrn,
|
|
9
|
+
DomainUrn,
|
|
10
|
+
GlossaryTermUrn,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from datahub.sdk.main_client import DataHubClient
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ResolverClient:
|
|
18
|
+
def __init__(self, client: DataHubClient):
|
|
19
|
+
self._client = client
|
|
20
|
+
|
|
21
|
+
# TODO: add caching to this method
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def _graph(self) -> DataHubGraph:
|
|
25
|
+
return self._client._graph
|
|
26
|
+
|
|
27
|
+
def domain(self, *, name: str) -> DomainUrn:
|
|
28
|
+
urn_str = self._graph.get_domain_urn_by_name(name)
|
|
29
|
+
if urn_str is None:
|
|
30
|
+
raise ItemNotFoundError(f"Domain with name {name} not found")
|
|
31
|
+
return DomainUrn.from_string(urn_str)
|
|
32
|
+
|
|
33
|
+
@overload
|
|
34
|
+
def user(self, *, name: str) -> CorpUserUrn: ...
|
|
35
|
+
@overload
|
|
36
|
+
def user(self, *, email: str) -> CorpUserUrn: ...
|
|
37
|
+
def user(
|
|
38
|
+
self, *, name: Optional[str] = None, email: Optional[str] = None
|
|
39
|
+
) -> CorpUserUrn:
|
|
40
|
+
filter_explanation: str
|
|
41
|
+
filters = []
|
|
42
|
+
if name is not None:
|
|
43
|
+
if email is not None:
|
|
44
|
+
raise SdkUsageError("Cannot specify both name and email for auto_user")
|
|
45
|
+
# TODO: do we filter on displayName or fullName?
|
|
46
|
+
filter_explanation = f"with name {name}"
|
|
47
|
+
filters.append(
|
|
48
|
+
{
|
|
49
|
+
"field": "fullName",
|
|
50
|
+
"values": [name],
|
|
51
|
+
"condition": "EQUAL",
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
elif email is not None:
|
|
55
|
+
filter_explanation = f"with email {email}"
|
|
56
|
+
filters.append(
|
|
57
|
+
{
|
|
58
|
+
"field": "email",
|
|
59
|
+
"values": [email],
|
|
60
|
+
"condition": "EQUAL",
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
raise SdkUsageError("Must specify either name or email for auto_user")
|
|
65
|
+
|
|
66
|
+
users = list(
|
|
67
|
+
self._graph.get_urns_by_filter(
|
|
68
|
+
entity_types=[CorpUserUrn.ENTITY_TYPE],
|
|
69
|
+
extraFilters=filters,
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
if len(users) == 0:
|
|
73
|
+
# TODO: In auto methods, should we just create the user/domain/etc if it doesn't exist?
|
|
74
|
+
raise ItemNotFoundError(f"User {filter_explanation} not found")
|
|
75
|
+
elif len(users) > 1:
|
|
76
|
+
raise MultipleItemsFoundError(
|
|
77
|
+
f"Multiple users found {filter_explanation}: {users}"
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
return CorpUserUrn.from_string(users[0])
|
|
81
|
+
|
|
82
|
+
def term(self, *, name: str) -> GlossaryTermUrn:
|
|
83
|
+
# TODO: Add some limits on the graph fetch
|
|
84
|
+
terms = list(
|
|
85
|
+
self._graph.get_urns_by_filter(
|
|
86
|
+
entity_types=[GlossaryTermUrn.ENTITY_TYPE],
|
|
87
|
+
extraFilters=[
|
|
88
|
+
{
|
|
89
|
+
"field": "id",
|
|
90
|
+
"values": [name],
|
|
91
|
+
"condition": "EQUAL",
|
|
92
|
+
}
|
|
93
|
+
],
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
if len(terms) == 0:
|
|
97
|
+
raise ItemNotFoundError(f"Term with name {name} not found")
|
|
98
|
+
elif len(terms) > 1:
|
|
99
|
+
raise SdkUsageError(f"Multiple terms found with name {name}: {terms}")
|
|
100
|
+
else:
|
|
101
|
+
return GlossaryTermUrn.from_string(terms[0])
|
|
File without changes
|
|
File without changes
|
{acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|