acryl-datahub 1.0.0.3rc9__py3-none-any.whl → 1.0.0.3rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/METADATA +2524 -2471
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/RECORD +87 -87
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/datajob/dataflow.py +3 -3
- datahub/api/entities/forms/forms.py +34 -34
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/operation.py +4 -4
- datahub/cli/check_cli.py +3 -2
- datahub/cli/config_utils.py +2 -2
- datahub/cli/delete_cli.py +6 -5
- datahub/cli/docker_cli.py +2 -2
- datahub/cli/exists_cli.py +2 -1
- datahub/cli/get_cli.py +2 -1
- datahub/cli/iceberg_cli.py +6 -5
- datahub/cli/ingest_cli.py +9 -6
- datahub/cli/migrate.py +4 -3
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +3 -2
- datahub/cli/specific/assertions_cli.py +2 -1
- datahub/cli/specific/datacontract_cli.py +3 -2
- datahub/cli/specific/dataproduct_cli.py +10 -9
- datahub/cli/specific/dataset_cli.py +4 -3
- datahub/cli/specific/forms_cli.py +2 -1
- datahub/cli/specific/group_cli.py +2 -1
- datahub/cli/specific/structuredproperties_cli.py +4 -3
- datahub/cli/specific/user_cli.py +2 -1
- datahub/cli/state_cli.py +2 -1
- datahub/cli/timeline_cli.py +2 -1
- datahub/configuration/source_common.py +1 -1
- datahub/emitter/request_helper.py +116 -3
- datahub/emitter/rest_emitter.py +163 -93
- datahub/entrypoints.py +2 -1
- datahub/ingestion/api/source.py +2 -5
- datahub/ingestion/glossary/classification_mixin.py +4 -2
- datahub/ingestion/graph/client.py +16 -7
- datahub/ingestion/graph/config.py +14 -0
- datahub/ingestion/graph/filters.py +1 -1
- datahub/ingestion/run/pipeline.py +3 -2
- datahub/ingestion/run/pipeline_config.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +5 -6
- datahub/ingestion/source/apply/datahub_apply.py +2 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
- datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
- datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
- datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
- datahub/ingestion/source/dbt/dbt_common.py +10 -2
- datahub/ingestion/source/dbt/dbt_core.py +82 -42
- datahub/ingestion/source/feast.py +4 -4
- datahub/ingestion/source/ge_data_profiler.py +2 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +7 -1
- datahub/ingestion/source/metadata/lineage.py +2 -1
- datahub/ingestion/source/mode.py +74 -28
- datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
- datahub/ingestion/source/powerbi/config.py +1 -1
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/redshift/usage.py +10 -9
- datahub/ingestion/source/sql/clickhouse.py +5 -1
- datahub/ingestion/source/sql/druid.py +7 -2
- datahub/ingestion/source/sql/oracle.py +6 -2
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
- datahub/integrations/assertion/common.py +3 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +490 -490
- datahub/metadata/_urns/urn_defs.py +1786 -1786
- datahub/metadata/schema.avsc +17364 -16988
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/main_client.py +2 -2
- datahub/secret/datahub_secret_store.py +2 -1
- datahub/telemetry/telemetry.py +2 -2
- datahub/testing/check_imports.py +1 -1
- datahub/upgrade/upgrade.py +10 -12
- datahub/utilities/logging_manager.py +8 -1
- datahub/utilities/server_config_util.py +378 -10
- datahub/utilities/sqlalchemy_query_combiner.py +4 -5
- datahub/utilities/urn_encoder.py +1 -1
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.api.entities.forms.forms import Forms
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
10
11
|
from datahub.telemetry import telemetry
|
|
11
12
|
from datahub.upgrade import upgrade
|
|
12
13
|
|
|
@@ -40,7 +41,7 @@ def upsert(file: Path) -> None:
|
|
|
40
41
|
@telemetry.with_telemetry()
|
|
41
42
|
def get(urn: str, to_file: str) -> None:
|
|
42
43
|
"""Get form from DataHub"""
|
|
43
|
-
with get_default_graph() as graph:
|
|
44
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
44
45
|
if graph.exists(urn):
|
|
45
46
|
form: Forms = Forms.from_datahub(graph=graph, urn=urn)
|
|
46
47
|
click.secho(
|
|
@@ -10,6 +10,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
|
|
|
10
10
|
)
|
|
11
11
|
from datahub.cli.specific.file_loader import load_file
|
|
12
12
|
from datahub.ingestion.graph.client import get_default_graph
|
|
13
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
13
14
|
from datahub.telemetry import telemetry
|
|
14
15
|
from datahub.upgrade import upgrade
|
|
15
16
|
|
|
@@ -40,7 +41,7 @@ def upsert(file: Path, override_editable: bool) -> None:
|
|
|
40
41
|
|
|
41
42
|
config_dict = load_file(file)
|
|
42
43
|
group_configs = config_dict if isinstance(config_dict, list) else [config_dict]
|
|
43
|
-
with get_default_graph() as emitter:
|
|
44
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
44
45
|
for group_config in group_configs:
|
|
45
46
|
try:
|
|
46
47
|
datahub_group = CorpGroup.parse_obj(group_config)
|
|
@@ -11,6 +11,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
|
|
|
11
11
|
StructuredProperties,
|
|
12
12
|
)
|
|
13
13
|
from datahub.ingestion.graph.client import get_default_graph
|
|
14
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
14
15
|
from datahub.telemetry import telemetry
|
|
15
16
|
from datahub.upgrade import upgrade
|
|
16
17
|
from datahub.utilities.urns.urn import Urn
|
|
@@ -33,7 +34,7 @@ def properties() -> None:
|
|
|
33
34
|
def upsert(file: Path) -> None:
|
|
34
35
|
"""Upsert structured properties in DataHub."""
|
|
35
36
|
|
|
36
|
-
with get_default_graph() as graph:
|
|
37
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
37
38
|
StructuredProperties.create(str(file), graph)
|
|
38
39
|
|
|
39
40
|
|
|
@@ -48,7 +49,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
48
49
|
"""Get structured properties from DataHub"""
|
|
49
50
|
urn = Urn.make_structured_property_urn(urn)
|
|
50
51
|
|
|
51
|
-
with get_default_graph() as graph:
|
|
52
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
52
53
|
if graph.exists(urn):
|
|
53
54
|
structuredproperties: StructuredProperties = (
|
|
54
55
|
StructuredProperties.from_datahub(graph=graph, urn=urn)
|
|
@@ -117,7 +118,7 @@ def list(details: bool, to_file: str) -> None:
|
|
|
117
118
|
with open(file, "w") as fp:
|
|
118
119
|
yaml.dump(serialized_objects, fp)
|
|
119
120
|
|
|
120
|
-
with get_default_graph() as graph:
|
|
121
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
121
122
|
if details:
|
|
122
123
|
logger.info(
|
|
123
124
|
"Listing structured properties with details. Use --no-details for urns only"
|
datahub/cli/specific/user_cli.py
CHANGED
|
@@ -8,6 +8,7 @@ from click_default_group import DefaultGroup
|
|
|
8
8
|
from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
|
|
9
9
|
from datahub.cli.specific.file_loader import load_file
|
|
10
10
|
from datahub.ingestion.graph.client import get_default_graph
|
|
11
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
11
12
|
from datahub.telemetry import telemetry
|
|
12
13
|
from datahub.upgrade import upgrade
|
|
13
14
|
|
|
@@ -38,7 +39,7 @@ def upsert(file: Path, override_editable: bool) -> None:
|
|
|
38
39
|
|
|
39
40
|
config_dict = load_file(pathlib.Path(file))
|
|
40
41
|
user_configs = config_dict if isinstance(config_dict, list) else [config_dict]
|
|
41
|
-
with get_default_graph() as emitter:
|
|
42
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
42
43
|
for user_config in user_configs:
|
|
43
44
|
try:
|
|
44
45
|
datahub_user: CorpUser = CorpUser.parse_obj(user_config)
|
datahub/cli/state_cli.py
CHANGED
|
@@ -5,6 +5,7 @@ import click
|
|
|
5
5
|
from click_default_group import DefaultGroup
|
|
6
6
|
|
|
7
7
|
from datahub.ingestion.graph.client import get_default_graph
|
|
8
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
8
9
|
from datahub.telemetry import telemetry
|
|
9
10
|
from datahub.upgrade import upgrade
|
|
10
11
|
|
|
@@ -28,7 +29,7 @@ def inspect(pipeline_name: str, platform: str) -> None:
|
|
|
28
29
|
Only works for state entity removal for now.
|
|
29
30
|
"""
|
|
30
31
|
|
|
31
|
-
datahub_graph = get_default_graph()
|
|
32
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
32
33
|
checkpoint = datahub_graph.get_latest_pipeline_checkpoint(pipeline_name, platform)
|
|
33
34
|
if not checkpoint:
|
|
34
35
|
click.secho("No ingestion state found.", fg="red")
|
datahub/cli/timeline_cli.py
CHANGED
|
@@ -9,6 +9,7 @@ from requests import Response
|
|
|
9
9
|
|
|
10
10
|
from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
|
|
11
11
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
12
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
12
13
|
from datahub.telemetry import telemetry
|
|
13
14
|
from datahub.upgrade import upgrade
|
|
14
15
|
from datahub.utilities.urns.urn import Urn
|
|
@@ -63,7 +64,7 @@ def get_timeline(
|
|
|
63
64
|
diff: bool,
|
|
64
65
|
graph: Optional[DataHubGraph] = None,
|
|
65
66
|
) -> Any:
|
|
66
|
-
client = graph if graph else get_default_graph()
|
|
67
|
+
client = graph if graph else get_default_graph(ClientMode.CLI)
|
|
67
68
|
session = client._session
|
|
68
69
|
host = client.config.server
|
|
69
70
|
if urn.startswith("urn%3A"):
|
|
@@ -16,7 +16,7 @@ class PlatformInstanceConfigMixin(ConfigModel):
|
|
|
16
16
|
default=None,
|
|
17
17
|
description="The instance of the platform that all assets produced by this recipe belong to. "
|
|
18
18
|
"This should be unique within the platform. "
|
|
19
|
-
"See https://
|
|
19
|
+
"See https://docs.datahub.com/docs/platform-instances/ for more details.",
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
|
|
@@ -1,14 +1,31 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import shlex
|
|
2
|
-
from
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
5
|
|
|
4
6
|
import requests
|
|
5
7
|
from requests.auth import HTTPBasicAuth
|
|
6
8
|
|
|
9
|
+
from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
|
|
10
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
11
|
+
from datahub.emitter.serialization_helper import pre_json_transform
|
|
12
|
+
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
13
|
+
MetadataChangeProposal,
|
|
14
|
+
)
|
|
15
|
+
from datahub.metadata.schema_classes import ChangeTypeClass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _decode_bytes(value: Union[str, bytes]) -> str:
|
|
19
|
+
"""Decode bytes to string, if necessary."""
|
|
20
|
+
if isinstance(value, bytes):
|
|
21
|
+
return value.decode()
|
|
22
|
+
return value
|
|
23
|
+
|
|
7
24
|
|
|
8
25
|
def _format_header(name: str, value: Union[str, bytes]) -> str:
|
|
9
26
|
if name == "Authorization":
|
|
10
27
|
return f"{name!s}: <redacted>"
|
|
11
|
-
return f"{name!s}: {value
|
|
28
|
+
return f"{name!s}: {_decode_bytes(value)}"
|
|
12
29
|
|
|
13
30
|
|
|
14
31
|
def make_curl_command(
|
|
@@ -21,7 +38,9 @@ def make_curl_command(
|
|
|
21
38
|
|
|
22
39
|
if session.auth:
|
|
23
40
|
if isinstance(session.auth, HTTPBasicAuth):
|
|
24
|
-
fragments.extend(
|
|
41
|
+
fragments.extend(
|
|
42
|
+
["-u", f"{_decode_bytes(session.auth.username)}:<redacted>"]
|
|
43
|
+
)
|
|
25
44
|
else:
|
|
26
45
|
# For other auth types, they should be handled via headers
|
|
27
46
|
fragments.extend(["-H", "<unknown auth type>"])
|
|
@@ -31,3 +50,97 @@ def make_curl_command(
|
|
|
31
50
|
|
|
32
51
|
fragments.append(url)
|
|
33
52
|
return shlex.join(fragments)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class OpenApiRequest:
|
|
57
|
+
"""Represents an OpenAPI request for entity operations."""
|
|
58
|
+
|
|
59
|
+
method: str
|
|
60
|
+
url: str
|
|
61
|
+
payload: List[Dict[str, Any]]
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_mcp(
|
|
65
|
+
cls,
|
|
66
|
+
mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
|
|
67
|
+
gms_server: str,
|
|
68
|
+
async_flag: Optional[bool] = None,
|
|
69
|
+
async_default: bool = False,
|
|
70
|
+
) -> Optional["OpenApiRequest"]:
|
|
71
|
+
"""Factory method to create an OpenApiRequest from a MetadataChangeProposal."""
|
|
72
|
+
if not mcp.aspectName or (
|
|
73
|
+
mcp.changeType != ChangeTypeClass.DELETE and not mcp.aspect
|
|
74
|
+
):
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
resolved_async_flag = async_flag if async_flag is not None else async_default
|
|
78
|
+
|
|
79
|
+
method = "post"
|
|
80
|
+
url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if resolved_async_flag else 'false'}"
|
|
81
|
+
payload = []
|
|
82
|
+
|
|
83
|
+
if mcp.changeType == ChangeTypeClass.DELETE:
|
|
84
|
+
method = "delete"
|
|
85
|
+
url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}/{mcp.entityUrn}"
|
|
86
|
+
else:
|
|
87
|
+
if mcp.aspect:
|
|
88
|
+
if mcp.changeType == ChangeTypeClass.PATCH:
|
|
89
|
+
method = "patch"
|
|
90
|
+
obj = mcp.aspect.to_obj()
|
|
91
|
+
content_type = obj.get("contentType")
|
|
92
|
+
if obj.get("value") and content_type == JSON_PATCH_CONTENT_TYPE:
|
|
93
|
+
# Undo double serialization.
|
|
94
|
+
obj = json.loads(obj["value"])
|
|
95
|
+
patch_value = obj
|
|
96
|
+
else:
|
|
97
|
+
raise NotImplementedError(
|
|
98
|
+
f"ChangeType {mcp.changeType} only supports context type {JSON_PATCH_CONTENT_TYPE}, found {content_type}."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if isinstance(patch_value, list):
|
|
102
|
+
patch_value = {"patch": patch_value}
|
|
103
|
+
|
|
104
|
+
payload = [
|
|
105
|
+
{
|
|
106
|
+
"urn": mcp.entityUrn,
|
|
107
|
+
mcp.aspectName: {
|
|
108
|
+
"value": patch_value,
|
|
109
|
+
"systemMetadata": mcp.systemMetadata.to_obj()
|
|
110
|
+
if mcp.systemMetadata
|
|
111
|
+
else None,
|
|
112
|
+
},
|
|
113
|
+
}
|
|
114
|
+
]
|
|
115
|
+
else:
|
|
116
|
+
if isinstance(mcp, MetadataChangeProposalWrapper):
|
|
117
|
+
aspect_value = pre_json_transform(
|
|
118
|
+
mcp.to_obj(simplified_structure=True)
|
|
119
|
+
)["aspect"]["json"]
|
|
120
|
+
else:
|
|
121
|
+
obj = mcp.aspect.to_obj()
|
|
122
|
+
content_type = obj.get("contentType")
|
|
123
|
+
if obj.get("value") and content_type == JSON_CONTENT_TYPE:
|
|
124
|
+
# Undo double serialization.
|
|
125
|
+
obj = json.loads(obj["value"])
|
|
126
|
+
elif content_type == JSON_PATCH_CONTENT_TYPE:
|
|
127
|
+
raise NotImplementedError(
|
|
128
|
+
f"ChangeType {mcp.changeType} does not support patch."
|
|
129
|
+
)
|
|
130
|
+
aspect_value = pre_json_transform(obj)
|
|
131
|
+
|
|
132
|
+
payload = [
|
|
133
|
+
{
|
|
134
|
+
"urn": mcp.entityUrn,
|
|
135
|
+
mcp.aspectName: {
|
|
136
|
+
"value": aspect_value,
|
|
137
|
+
"systemMetadata": mcp.systemMetadata.to_obj()
|
|
138
|
+
if mcp.systemMetadata
|
|
139
|
+
else None,
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
else:
|
|
144
|
+
raise ValueError(f"ChangeType {mcp.changeType} requires a value.")
|
|
145
|
+
|
|
146
|
+
return cls(method=method, url=url, payload=payload)
|