acryl-datahub 1.0.0.3rc9__py3-none-any.whl → 1.0.0.3rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/METADATA +2524 -2471
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/RECORD +87 -87
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/datajob/dataflow.py +3 -3
- datahub/api/entities/forms/forms.py +34 -34
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/operation.py +4 -4
- datahub/cli/check_cli.py +3 -2
- datahub/cli/config_utils.py +2 -2
- datahub/cli/delete_cli.py +6 -5
- datahub/cli/docker_cli.py +2 -2
- datahub/cli/exists_cli.py +2 -1
- datahub/cli/get_cli.py +2 -1
- datahub/cli/iceberg_cli.py +6 -5
- datahub/cli/ingest_cli.py +9 -6
- datahub/cli/migrate.py +4 -3
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +3 -2
- datahub/cli/specific/assertions_cli.py +2 -1
- datahub/cli/specific/datacontract_cli.py +3 -2
- datahub/cli/specific/dataproduct_cli.py +10 -9
- datahub/cli/specific/dataset_cli.py +4 -3
- datahub/cli/specific/forms_cli.py +2 -1
- datahub/cli/specific/group_cli.py +2 -1
- datahub/cli/specific/structuredproperties_cli.py +4 -3
- datahub/cli/specific/user_cli.py +2 -1
- datahub/cli/state_cli.py +2 -1
- datahub/cli/timeline_cli.py +2 -1
- datahub/configuration/source_common.py +1 -1
- datahub/emitter/request_helper.py +116 -3
- datahub/emitter/rest_emitter.py +163 -93
- datahub/entrypoints.py +2 -1
- datahub/ingestion/api/source.py +2 -5
- datahub/ingestion/glossary/classification_mixin.py +4 -2
- datahub/ingestion/graph/client.py +16 -7
- datahub/ingestion/graph/config.py +14 -0
- datahub/ingestion/graph/filters.py +1 -1
- datahub/ingestion/run/pipeline.py +3 -2
- datahub/ingestion/run/pipeline_config.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +5 -6
- datahub/ingestion/source/apply/datahub_apply.py +2 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
- datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
- datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
- datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
- datahub/ingestion/source/dbt/dbt_common.py +10 -2
- datahub/ingestion/source/dbt/dbt_core.py +82 -42
- datahub/ingestion/source/feast.py +4 -4
- datahub/ingestion/source/ge_data_profiler.py +2 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +7 -1
- datahub/ingestion/source/metadata/lineage.py +2 -1
- datahub/ingestion/source/mode.py +74 -28
- datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
- datahub/ingestion/source/powerbi/config.py +1 -1
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/redshift/usage.py +10 -9
- datahub/ingestion/source/sql/clickhouse.py +5 -1
- datahub/ingestion/source/sql/druid.py +7 -2
- datahub/ingestion/source/sql/oracle.py +6 -2
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
- datahub/integrations/assertion/common.py +3 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +490 -490
- datahub/metadata/_urns/urn_defs.py +1786 -1786
- datahub/metadata/schema.avsc +17364 -16988
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/main_client.py +2 -2
- datahub/secret/datahub_secret_store.py +2 -1
- datahub/telemetry/telemetry.py +2 -2
- datahub/testing/check_imports.py +1 -1
- datahub/upgrade/upgrade.py +10 -12
- datahub/utilities/logging_manager.py +8 -1
- datahub/utilities/server_config_util.py +378 -10
- datahub/utilities/sqlalchemy_query_combiner.py +4 -5
- datahub/utilities/urn_encoder.py +1 -1
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/top_level.txt +0 -0
|
@@ -26,6 +26,7 @@ from datahub.emitter.mce_builder import (
|
|
|
26
26
|
)
|
|
27
27
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
28
28
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
29
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
29
30
|
from datahub.metadata.schema_classes import (
|
|
30
31
|
FormActorAssignmentClass,
|
|
31
32
|
FormInfoClass,
|
|
@@ -133,47 +134,46 @@ class Forms(ConfigModel):
|
|
|
133
134
|
def create(file: str) -> None:
|
|
134
135
|
emitter: DataHubGraph
|
|
135
136
|
|
|
136
|
-
with get_default_graph() as emitter:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
form = Forms.parse_obj(form_raw)
|
|
137
|
+
with get_default_graph(ClientMode.CLI) as emitter, open(file) as fp:
|
|
138
|
+
forms: List[dict] = yaml.safe_load(fp)
|
|
139
|
+
for form_raw in forms:
|
|
140
|
+
form = Forms.parse_obj(form_raw)
|
|
141
141
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
mcp = MetadataChangeProposalWrapper(
|
|
149
|
-
entityUrn=form.urn,
|
|
150
|
-
aspect=FormInfoClass(
|
|
151
|
-
name=form.name,
|
|
152
|
-
description=form.description,
|
|
153
|
-
prompts=form.validate_prompts(emitter),
|
|
154
|
-
type=form.type,
|
|
155
|
-
actors=form.create_form_actors(form.actors),
|
|
156
|
-
),
|
|
142
|
+
try:
|
|
143
|
+
if not FormType.has_value(form.type):
|
|
144
|
+
logger.error(
|
|
145
|
+
f"Form type {form.type} does not exist. Please try again with a valid type."
|
|
157
146
|
)
|
|
158
|
-
emitter.emit_mcp(mcp)
|
|
159
147
|
|
|
160
|
-
|
|
148
|
+
mcp = MetadataChangeProposalWrapper(
|
|
149
|
+
entityUrn=form.urn,
|
|
150
|
+
aspect=FormInfoClass(
|
|
151
|
+
name=form.name,
|
|
152
|
+
description=form.description,
|
|
153
|
+
prompts=form.validate_prompts(emitter),
|
|
154
|
+
type=form.type,
|
|
155
|
+
actors=form.create_form_actors(form.actors),
|
|
156
|
+
),
|
|
157
|
+
)
|
|
158
|
+
emitter.emit_mcp(mcp)
|
|
159
|
+
|
|
160
|
+
logger.info(f"Created form {form.urn}")
|
|
161
161
|
|
|
162
|
-
|
|
163
|
-
|
|
162
|
+
if form.owners or form.group_owners:
|
|
163
|
+
form.add_owners(emitter)
|
|
164
164
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
165
|
+
if form.entities:
|
|
166
|
+
if form.entities.urns:
|
|
167
|
+
# Associate specific entities with a form
|
|
168
|
+
form.upload_entities_for_form(emitter)
|
|
169
169
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
170
|
+
if form.entities.filters:
|
|
171
|
+
# Associate groups of entities with a form based on filters
|
|
172
|
+
form.create_form_filters(emitter)
|
|
173
173
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error(e)
|
|
176
|
+
return
|
|
177
177
|
|
|
178
178
|
def validate_prompts(self, emitter: DataHubGraph) -> List[FormPromptClass]:
|
|
179
179
|
prompts = []
|
datahub/api/graphql/assertion.py
CHANGED
|
@@ -65,7 +65,7 @@ query dataset($urn: String!, $start: Int, $count: Int, $status: AssertionRunStat
|
|
|
65
65
|
|
|
66
66
|
:param urn: The DataHub dataset unique identifier.
|
|
67
67
|
:param status: The assertion status to filter for. Every status will be accepted if it is not set.
|
|
68
|
-
See valid status at https://
|
|
68
|
+
See valid status at https://docs.datahub.com/docs/graphql/enums#assertionrunstatus
|
|
69
69
|
:param start_time_millis: The start time in milliseconds from the assertions will be queried.
|
|
70
70
|
:param end_time_millis: The end time in milliseconds until the assertions will be queried.
|
|
71
71
|
:param filter: Additional key value filters which will be applied as AND query
|
datahub/api/graphql/operation.py
CHANGED
|
@@ -55,10 +55,10 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
55
55
|
Report operation metadata for a dataset.
|
|
56
56
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
57
57
|
Default value: DATA_PROCESS
|
|
58
|
-
See valid types here: https://
|
|
58
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
59
59
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
60
60
|
Default value: "UPDATE"
|
|
61
|
-
See valid types here: https://
|
|
61
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
|
|
62
62
|
:param partition: The partition to set the operation.
|
|
63
63
|
:param num_affected_rows: The number of rows affected by this operation.
|
|
64
64
|
:param custom_properties: Key/value pair of custom propertis
|
|
@@ -103,9 +103,9 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
103
103
|
:param end_time_millis: The end time in milliseconds until the operations will be queried.
|
|
104
104
|
:param limit: The maximum number of items to return.
|
|
105
105
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
106
|
-
See valid types here: https://
|
|
106
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
107
107
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
108
|
-
See valid types here: https://
|
|
108
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
109
109
|
:param partition: The partition to check the operation.
|
|
110
110
|
"""
|
|
111
111
|
|
datahub/cli/check_cli.py
CHANGED
|
@@ -16,6 +16,7 @@ from datahub.configuration import config_loader
|
|
|
16
16
|
from datahub.configuration.common import AllowDenyPattern
|
|
17
17
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
18
18
|
from datahub.ingestion.graph.client import get_default_graph
|
|
19
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
19
20
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
20
21
|
from datahub.ingestion.sink.sink_registry import sink_registry
|
|
21
22
|
from datahub.ingestion.source.source_registry import source_registry
|
|
@@ -259,7 +260,7 @@ def sql_lineage(
|
|
|
259
260
|
|
|
260
261
|
graph = None
|
|
261
262
|
if online:
|
|
262
|
-
graph = get_default_graph()
|
|
263
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
263
264
|
|
|
264
265
|
lineage = create_lineage_sql_parsed_result(
|
|
265
266
|
sql,
|
|
@@ -472,7 +473,7 @@ WHERE
|
|
|
472
473
|
@check.command()
|
|
473
474
|
def server_config() -> None:
|
|
474
475
|
"""Print the server config."""
|
|
475
|
-
graph = get_default_graph()
|
|
476
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
476
477
|
|
|
477
478
|
server_config = graph.get_server_config()
|
|
478
479
|
|
datahub/cli/config_utils.py
CHANGED
|
@@ -17,8 +17,8 @@ from datahub.ingestion.graph.config import DatahubClientConfig
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
19
|
CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
|
|
20
|
-
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
|
|
21
|
-
DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub")
|
|
20
|
+
DATAHUB_CONFIG_PATH: str = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
|
|
21
|
+
DATAHUB_ROOT_FOLDER: str = os.path.expanduser("~/.datahub")
|
|
22
22
|
ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
|
|
23
23
|
|
|
24
24
|
ENV_DATAHUB_SYSTEM_CLIENT_ID = "DATAHUB_SYSTEM_CLIENT_ID"
|
datahub/cli/delete_cli.py
CHANGED
|
@@ -15,6 +15,7 @@ from datahub.cli import cli_utils
|
|
|
15
15
|
from datahub.configuration.datetimes import ClickDatetime
|
|
16
16
|
from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP
|
|
17
17
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
18
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
18
19
|
from datahub.ingestion.graph.filters import RemovedStatusFilter
|
|
19
20
|
from datahub.telemetry import telemetry
|
|
20
21
|
from datahub.upgrade import upgrade
|
|
@@ -48,7 +49,7 @@ def delete() -> None:
|
|
|
48
49
|
|
|
49
50
|
See `datahub delete by-filter` for the list of available filters.
|
|
50
51
|
|
|
51
|
-
See https://
|
|
52
|
+
See https://docs.datahub.com/docs/how/delete-metadata for more detailed docs.
|
|
52
53
|
"""
|
|
53
54
|
pass
|
|
54
55
|
|
|
@@ -124,7 +125,7 @@ def by_registry(
|
|
|
124
125
|
Delete all metadata written using the given registry id and version pair.
|
|
125
126
|
"""
|
|
126
127
|
|
|
127
|
-
client = get_default_graph()
|
|
128
|
+
client = get_default_graph(ClientMode.CLI)
|
|
128
129
|
|
|
129
130
|
if soft and not dry_run:
|
|
130
131
|
raise click.UsageError(
|
|
@@ -175,7 +176,7 @@ def references(urn: str, dry_run: bool, force: bool) -> None:
|
|
|
175
176
|
Delete all references to an entity (but not the entity itself).
|
|
176
177
|
"""
|
|
177
178
|
|
|
178
|
-
graph = get_default_graph()
|
|
179
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
179
180
|
logger.info(f"Using graph: {graph}")
|
|
180
181
|
|
|
181
182
|
references_count, related_aspects = graph.delete_references_to_urn(
|
|
@@ -238,7 +239,7 @@ def undo_by_filter(
|
|
|
238
239
|
"""
|
|
239
240
|
Undo soft deletion by filters
|
|
240
241
|
"""
|
|
241
|
-
graph = get_default_graph()
|
|
242
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
242
243
|
logger.info(f"Using {graph}")
|
|
243
244
|
if urn:
|
|
244
245
|
graph.set_soft_delete_status(urn=urn, delete=False)
|
|
@@ -410,7 +411,7 @@ def by_filter(
|
|
|
410
411
|
abort=True,
|
|
411
412
|
)
|
|
412
413
|
|
|
413
|
-
graph = get_default_graph()
|
|
414
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
414
415
|
logger.info(f"Using {graph}")
|
|
415
416
|
|
|
416
417
|
# Determine which urns to delete.
|
datahub/cli/docker_cli.py
CHANGED
|
@@ -811,7 +811,7 @@ def quickstart(
|
|
|
811
811
|
raise status.to_exception(
|
|
812
812
|
header="Unable to run quickstart - the following issues were detected:",
|
|
813
813
|
footer="If you think something went wrong, please file an issue at https://github.com/datahub-project/datahub/issues\n"
|
|
814
|
-
"or send a message in our Slack https://slack
|
|
814
|
+
"or send a message in our Slack https://datahub.com/slack/\n"
|
|
815
815
|
f"Be sure to attach the logs from {log_file.name}",
|
|
816
816
|
)
|
|
817
817
|
|
|
@@ -824,7 +824,7 @@ def quickstart(
|
|
|
824
824
|
fg="green",
|
|
825
825
|
)
|
|
826
826
|
click.secho(
|
|
827
|
-
"Need support? Get in touch on Slack: https://
|
|
827
|
+
"Need support? Get in touch on Slack: https://datahub.com/slack/",
|
|
828
828
|
fg="magenta",
|
|
829
829
|
)
|
|
830
830
|
|
datahub/cli/exists_cli.py
CHANGED
|
@@ -6,6 +6,7 @@ import click
|
|
|
6
6
|
from click_default_group import DefaultGroup
|
|
7
7
|
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
9
10
|
from datahub.telemetry import telemetry
|
|
10
11
|
from datahub.upgrade import upgrade
|
|
11
12
|
|
|
@@ -36,4 +37,4 @@ def urn(ctx: Any, urn: Optional[str]) -> None:
|
|
|
36
37
|
raise click.UsageError("Nothing for me to get. Maybe provide an urn?")
|
|
37
38
|
urn = ctx.args[0]
|
|
38
39
|
logger.debug(f"Using urn from args {urn}")
|
|
39
|
-
click.echo(json.dumps(get_default_graph().exists(urn)))
|
|
40
|
+
click.echo(json.dumps(get_default_graph(ClientMode.CLI).exists(urn)))
|
datahub/cli/get_cli.py
CHANGED
|
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.cli.cli_utils import get_aspects_for_entity
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
10
11
|
from datahub.telemetry import telemetry
|
|
11
12
|
from datahub.upgrade import upgrade
|
|
12
13
|
|
|
@@ -46,7 +47,7 @@ def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
|
|
|
46
47
|
urn = ctx.args[0]
|
|
47
48
|
logger.debug(f"Using urn from args {urn}")
|
|
48
49
|
|
|
49
|
-
client = get_default_graph()
|
|
50
|
+
client = get_default_graph(ClientMode.CLI)
|
|
50
51
|
|
|
51
52
|
if aspect:
|
|
52
53
|
# If aspects are specified, we need to do the existence check first.
|
datahub/cli/iceberg_cli.py
CHANGED
|
@@ -13,6 +13,7 @@ import datahub.metadata.schema_classes
|
|
|
13
13
|
from datahub.cli.cli_utils import post_entity
|
|
14
14
|
from datahub.configuration.common import GraphError
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
16
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
16
17
|
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
17
18
|
from datahub.telemetry import telemetry
|
|
18
19
|
|
|
@@ -178,7 +179,7 @@ def create(
|
|
|
178
179
|
Create an iceberg warehouse.
|
|
179
180
|
"""
|
|
180
181
|
|
|
181
|
-
client = get_default_graph()
|
|
182
|
+
client = get_default_graph(ClientMode.CLI)
|
|
182
183
|
|
|
183
184
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
184
185
|
|
|
@@ -331,7 +332,7 @@ def update(
|
|
|
331
332
|
Update iceberg warehouses. Can only update credentials, and role. Cannot update region
|
|
332
333
|
"""
|
|
333
334
|
|
|
334
|
-
client = get_default_graph()
|
|
335
|
+
client = get_default_graph(ClientMode.CLI)
|
|
335
336
|
|
|
336
337
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
337
338
|
|
|
@@ -407,7 +408,7 @@ def list() -> None:
|
|
|
407
408
|
List iceberg warehouses
|
|
408
409
|
"""
|
|
409
410
|
|
|
410
|
-
client = get_default_graph()
|
|
411
|
+
client = get_default_graph(ClientMode.CLI)
|
|
411
412
|
|
|
412
413
|
for warehouse in get_all_warehouses(client):
|
|
413
414
|
click.echo(warehouse)
|
|
@@ -420,7 +421,7 @@ def list() -> None:
|
|
|
420
421
|
@telemetry.with_telemetry()
|
|
421
422
|
def get(warehouse: str) -> None:
|
|
422
423
|
"""Fetches the details of the specified iceberg warehouse"""
|
|
423
|
-
client = get_default_graph()
|
|
424
|
+
client = get_default_graph(ClientMode.CLI)
|
|
424
425
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
425
426
|
|
|
426
427
|
if client.exists(urn):
|
|
@@ -455,7 +456,7 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
|
455
456
|
|
|
456
457
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
457
458
|
|
|
458
|
-
client = get_default_graph()
|
|
459
|
+
client = get_default_graph(ClientMode.CLI)
|
|
459
460
|
|
|
460
461
|
if not client.exists(urn):
|
|
461
462
|
raise click.ClickException(f"urn {urn} not found")
|
datahub/cli/ingest_cli.py
CHANGED
|
@@ -14,10 +14,11 @@ from tabulate import tabulate
|
|
|
14
14
|
|
|
15
15
|
from datahub._version import nice_version_name
|
|
16
16
|
from datahub.cli import cli_utils
|
|
17
|
-
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
|
|
17
|
+
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH, load_client_config
|
|
18
18
|
from datahub.configuration.common import GraphError
|
|
19
19
|
from datahub.configuration.config_loader import load_config_file
|
|
20
20
|
from datahub.ingestion.graph.client import get_default_graph
|
|
21
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
21
22
|
from datahub.ingestion.run.connection import ConnectionManager
|
|
22
23
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
23
24
|
from datahub.telemetry import telemetry
|
|
@@ -269,7 +270,7 @@ def deploy(
|
|
|
269
270
|
urn:li:dataHubIngestionSource:<name>
|
|
270
271
|
"""
|
|
271
272
|
|
|
272
|
-
datahub_graph = get_default_graph()
|
|
273
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
273
274
|
|
|
274
275
|
variables = deploy_source_vars(
|
|
275
276
|
name=name,
|
|
@@ -360,6 +361,7 @@ def mcps(path: str) -> None:
|
|
|
360
361
|
"""
|
|
361
362
|
|
|
362
363
|
click.echo("Starting ingestion...")
|
|
364
|
+
datahub_config = load_client_config()
|
|
363
365
|
recipe: dict = {
|
|
364
366
|
"source": {
|
|
365
367
|
"type": "file",
|
|
@@ -367,6 +369,7 @@ def mcps(path: str) -> None:
|
|
|
367
369
|
"path": path,
|
|
368
370
|
},
|
|
369
371
|
},
|
|
372
|
+
"datahub_api": datahub_config,
|
|
370
373
|
}
|
|
371
374
|
|
|
372
375
|
pipeline = Pipeline.create(recipe, report_to=None)
|
|
@@ -422,7 +425,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
422
425
|
}
|
|
423
426
|
}
|
|
424
427
|
|
|
425
|
-
client = get_default_graph()
|
|
428
|
+
client = get_default_graph(ClientMode.CLI)
|
|
426
429
|
session = client._session
|
|
427
430
|
gms_host = client.config.server
|
|
428
431
|
|
|
@@ -508,7 +511,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
508
511
|
def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> None:
|
|
509
512
|
"""List recent ingestion runs to datahub"""
|
|
510
513
|
|
|
511
|
-
client = get_default_graph()
|
|
514
|
+
client = get_default_graph(ClientMode.CLI)
|
|
512
515
|
session = client._session
|
|
513
516
|
gms_host = client.config.server
|
|
514
517
|
|
|
@@ -559,7 +562,7 @@ def show(
|
|
|
559
562
|
run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool
|
|
560
563
|
) -> None:
|
|
561
564
|
"""Describe a provided ingestion run to datahub"""
|
|
562
|
-
client = get_default_graph()
|
|
565
|
+
client = get_default_graph(ClientMode.CLI)
|
|
563
566
|
session = client._session
|
|
564
567
|
gms_host = client.config.server
|
|
565
568
|
|
|
@@ -609,7 +612,7 @@ def rollback(
|
|
|
609
612
|
run_id: str, force: bool, dry_run: bool, safe: bool, report_dir: str
|
|
610
613
|
) -> None:
|
|
611
614
|
"""Rollback a provided ingestion run to datahub"""
|
|
612
|
-
client = get_default_graph()
|
|
615
|
+
client = get_default_graph(ClientMode.CLI)
|
|
613
616
|
|
|
614
617
|
if not force and not dry_run:
|
|
615
618
|
click.confirm(
|
datahub/cli/migrate.py
CHANGED
|
@@ -25,6 +25,7 @@ from datahub.emitter.mcp_builder import (
|
|
|
25
25
|
)
|
|
26
26
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
27
27
|
from datahub.ingestion.graph.client import (
|
|
28
|
+
ClientMode,
|
|
28
29
|
DataHubGraph,
|
|
29
30
|
RelatedEntity,
|
|
30
31
|
get_default_graph,
|
|
@@ -147,7 +148,7 @@ def dataplatform2instance_func(
|
|
|
147
148
|
migration_report = MigrationReport(run_id, dry_run, keep)
|
|
148
149
|
system_metadata = SystemMetadataClass(runId=run_id)
|
|
149
150
|
|
|
150
|
-
graph = get_default_graph()
|
|
151
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
151
152
|
|
|
152
153
|
urns_to_migrate: List[str] = []
|
|
153
154
|
|
|
@@ -386,7 +387,7 @@ def migrate_containers(
|
|
|
386
387
|
|
|
387
388
|
|
|
388
389
|
def get_containers_for_migration(env: str) -> List[Any]:
|
|
389
|
-
client = get_default_graph()
|
|
390
|
+
client = get_default_graph(ClientMode.CLI)
|
|
390
391
|
containers_to_migrate = list(
|
|
391
392
|
client.get_urns_by_filter(entity_types=["container"], env=env)
|
|
392
393
|
)
|
|
@@ -445,7 +446,7 @@ def process_container_relationships(
|
|
|
445
446
|
relationships: Iterable[RelatedEntity] = migration_utils.get_incoming_relationships(
|
|
446
447
|
urn=src_urn
|
|
447
448
|
)
|
|
448
|
-
client = get_default_graph()
|
|
449
|
+
client = get_default_graph(ClientMode.CLI)
|
|
449
450
|
for relationship in relationships:
|
|
450
451
|
log.debug(f"Incoming Relationship: {relationship}")
|
|
451
452
|
target_urn: str = relationship.urn
|
datahub/cli/migration_utils.py
CHANGED
|
@@ -12,6 +12,7 @@ from datahub.ingestion.graph.client import (
|
|
|
12
12
|
RelatedEntity,
|
|
13
13
|
get_default_graph,
|
|
14
14
|
)
|
|
15
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
15
16
|
from datahub.metadata.schema_classes import (
|
|
16
17
|
ChartInfoClass,
|
|
17
18
|
ContainerClass,
|
|
@@ -243,7 +244,7 @@ def clone_aspect(
|
|
|
243
244
|
run_id: str = str(uuid.uuid4()),
|
|
244
245
|
dry_run: bool = False,
|
|
245
246
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
246
|
-
client = get_default_graph()
|
|
247
|
+
client = get_default_graph(ClientMode.CLI)
|
|
247
248
|
aspect_map = cli_utils.get_aspects_for_entity(
|
|
248
249
|
client._session,
|
|
249
250
|
client.config.server,
|
|
@@ -274,7 +275,7 @@ def clone_aspect(
|
|
|
274
275
|
|
|
275
276
|
|
|
276
277
|
def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
277
|
-
client = get_default_graph()
|
|
278
|
+
client = get_default_graph(ClientMode.CLI)
|
|
278
279
|
yield from client.get_related_entities(
|
|
279
280
|
entity_urn=urn,
|
|
280
281
|
relationship_types=[
|
|
@@ -290,7 +291,7 @@ def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
|
290
291
|
|
|
291
292
|
|
|
292
293
|
def get_outgoing_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
293
|
-
client = get_default_graph()
|
|
294
|
+
client = get_default_graph(ClientMode.CLI)
|
|
294
295
|
yield from client.get_related_entities(
|
|
295
296
|
entity_urn=urn,
|
|
296
297
|
relationship_types=[
|
datahub/cli/put_cli.py
CHANGED
|
@@ -8,6 +8,7 @@ from datahub.cli.cli_utils import post_entity
|
|
|
8
8
|
from datahub.configuration.config_loader import load_config_file
|
|
9
9
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
10
10
|
from datahub.ingestion.graph.client import get_default_graph
|
|
11
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
11
12
|
from datahub.metadata.schema_classes import (
|
|
12
13
|
DataPlatformInfoClass as DataPlatformInfo,
|
|
13
14
|
PlatformTypeClass,
|
|
@@ -53,7 +54,7 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
|
|
|
53
54
|
aspect_data, allow_stdin=True, resolve_env_vars=False, process_directives=False
|
|
54
55
|
)
|
|
55
56
|
|
|
56
|
-
client = get_default_graph()
|
|
57
|
+
client = get_default_graph(ClientMode.CLI)
|
|
57
58
|
|
|
58
59
|
system_metadata: Union[None, SystemMetadataClass] = None
|
|
59
60
|
if run_id:
|
|
@@ -118,7 +119,7 @@ def platform(
|
|
|
118
119
|
displayName=display_name or platform_name,
|
|
119
120
|
logoUrl=logo,
|
|
120
121
|
)
|
|
121
|
-
datahub_graph = get_default_graph()
|
|
122
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
122
123
|
mcp = MetadataChangeProposalWrapper(
|
|
123
124
|
entityUrn=str(platform_urn),
|
|
124
125
|
aspect=data_platform_info,
|
|
@@ -15,6 +15,7 @@ from datahub.api.entities.assertion.compiler_interface import (
|
|
|
15
15
|
from datahub.emitter.mce_builder import make_assertion_urn
|
|
16
16
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
17
|
from datahub.ingestion.graph.client import get_default_graph
|
|
18
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
18
19
|
from datahub.integrations.assertion.registry import ASSERTION_PLATFORMS
|
|
19
20
|
from datahub.telemetry import telemetry
|
|
20
21
|
from datahub.upgrade import upgrade
|
|
@@ -39,7 +40,7 @@ def upsert(file: str) -> None:
|
|
|
39
40
|
|
|
40
41
|
assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
|
|
41
42
|
|
|
42
|
-
with get_default_graph() as graph:
|
|
43
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
43
44
|
for assertion_spec in assertions_spec.assertions:
|
|
44
45
|
try:
|
|
45
46
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -6,6 +6,7 @@ from click_default_group import DefaultGroup
|
|
|
6
6
|
|
|
7
7
|
from datahub.api.entities.datacontract.datacontract import DataContract
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
9
10
|
from datahub.telemetry import telemetry
|
|
10
11
|
from datahub.upgrade import upgrade
|
|
11
12
|
|
|
@@ -28,7 +29,7 @@ def upsert(file: str) -> None:
|
|
|
28
29
|
data_contract: DataContract = DataContract.from_yaml(file)
|
|
29
30
|
urn = data_contract.urn
|
|
30
31
|
|
|
31
|
-
with get_default_graph() as graph:
|
|
32
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
32
33
|
if not graph.exists(data_contract.entity):
|
|
33
34
|
raise ValueError(
|
|
34
35
|
f"Cannot define a data contract for non-existent entity {data_contract.entity}"
|
|
@@ -72,7 +73,7 @@ def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
|
|
|
72
73
|
data_contract = DataContract.from_yaml(file)
|
|
73
74
|
urn = data_contract.urn
|
|
74
75
|
|
|
75
|
-
with get_default_graph() as graph:
|
|
76
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
76
77
|
if not graph.exists(urn):
|
|
77
78
|
raise ValueError(f"Data Contract {urn} does not exist")
|
|
78
79
|
|
|
@@ -20,6 +20,7 @@ from datahub.emitter.mce_builder import (
|
|
|
20
20
|
validate_ownership_type,
|
|
21
21
|
)
|
|
22
22
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
23
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
23
24
|
from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass
|
|
24
25
|
from datahub.specific.dataproduct import DataProductPatchBuilder
|
|
25
26
|
from datahub.telemetry import telemetry
|
|
@@ -81,7 +82,7 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
|
|
|
81
82
|
|
|
82
83
|
config_dict = load_file(pathlib.Path(file))
|
|
83
84
|
id = config_dict.get("id") if isinstance(config_dict, dict) else None
|
|
84
|
-
with get_default_graph() as graph:
|
|
85
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
85
86
|
data_product: DataProduct = DataProduct.from_yaml(file, graph)
|
|
86
87
|
external_url_override = (
|
|
87
88
|
external_url
|
|
@@ -162,7 +163,7 @@ def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
|
162
163
|
def diff(file: Path, update: bool) -> None:
|
|
163
164
|
"""Diff a Data Product file with its twin in DataHub"""
|
|
164
165
|
|
|
165
|
-
with get_default_graph() as emitter:
|
|
166
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
166
167
|
id: Optional[str] = None
|
|
167
168
|
try:
|
|
168
169
|
data_product_local: DataProduct = DataProduct.from_yaml(file, emitter)
|
|
@@ -216,7 +217,7 @@ def delete(urn: str, file: Path, hard: bool) -> None:
|
|
|
216
217
|
raise click.Abort()
|
|
217
218
|
|
|
218
219
|
graph: DataHubGraph
|
|
219
|
-
with get_default_graph() as graph:
|
|
220
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
220
221
|
data_product_urn = (
|
|
221
222
|
urn if urn.startswith("urn:li:dataProduct") else f"urn:li:dataProduct:{urn}"
|
|
222
223
|
)
|
|
@@ -248,7 +249,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
248
249
|
if not urn.startswith("urn:li:dataProduct:"):
|
|
249
250
|
urn = f"urn:li:dataProduct:{urn}"
|
|
250
251
|
|
|
251
|
-
with get_default_graph() as graph:
|
|
252
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
252
253
|
if graph.exists(urn):
|
|
253
254
|
dataproduct: DataProduct = DataProduct.from_datahub(graph=graph, id=urn)
|
|
254
255
|
click.secho(
|
|
@@ -306,7 +307,7 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
|
|
|
306
307
|
|
|
307
308
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
308
309
|
dataproduct_patcher.set_description(description)
|
|
309
|
-
with get_default_graph() as graph:
|
|
310
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
310
311
|
_abort_if_non_existent_urn(graph, urn, "set description")
|
|
311
312
|
for mcp in dataproduct_patcher.build():
|
|
312
313
|
graph.emit(mcp)
|
|
@@ -342,7 +343,7 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
|
|
|
342
343
|
owner=_get_owner_urn(owner), type=owner_type, typeUrn=owner_type_urn
|
|
343
344
|
)
|
|
344
345
|
)
|
|
345
|
-
with get_default_graph() as graph:
|
|
346
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
346
347
|
_abort_if_non_existent_urn(graph, urn, "add owners")
|
|
347
348
|
for mcp in dataproduct_patcher.build():
|
|
348
349
|
graph.emit(mcp)
|
|
@@ -360,7 +361,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
|
|
|
360
361
|
urn = f"urn:li:dataProduct:{urn}"
|
|
361
362
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
362
363
|
dataproduct_patcher.remove_owner(owner=_get_owner_urn(owner_urn))
|
|
363
|
-
with get_default_graph() as graph:
|
|
364
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
364
365
|
_abort_if_non_existent_urn(graph, urn, "remove owners")
|
|
365
366
|
for mcp in dataproduct_patcher.build():
|
|
366
367
|
click.echo(json.dumps(mcp.to_obj()))
|
|
@@ -382,7 +383,7 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
382
383
|
urn = f"urn:li:dataProduct:{urn}"
|
|
383
384
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
384
385
|
dataproduct_patcher.add_asset(asset)
|
|
385
|
-
with get_default_graph() as graph:
|
|
386
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
386
387
|
_abort_if_non_existent_urn(graph, urn, "add assets")
|
|
387
388
|
if validate_assets:
|
|
388
389
|
_abort_if_non_existent_urn(
|
|
@@ -409,7 +410,7 @@ def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
409
410
|
urn = f"urn:li:dataProduct:{urn}"
|
|
410
411
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
411
412
|
dataproduct_patcher.remove_asset(asset)
|
|
412
|
-
with get_default_graph() as graph:
|
|
413
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
413
414
|
_abort_if_non_existent_urn(graph, urn, "remove assets")
|
|
414
415
|
if validate_assets:
|
|
415
416
|
_abort_if_non_existent_urn(
|
|
@@ -12,6 +12,7 @@ from click_default_group import DefaultGroup
|
|
|
12
12
|
from datahub.api.entities.dataset.dataset import Dataset, DatasetRetrievalConfig
|
|
13
13
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
14
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
15
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
15
16
|
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
16
17
|
from datahub.telemetry import telemetry
|
|
17
18
|
from datahub.upgrade import upgrade
|
|
@@ -54,7 +55,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
54
55
|
if not urn.startswith("urn:li:dataset:"):
|
|
55
56
|
urn = f"urn:li:dataset:{urn}"
|
|
56
57
|
|
|
57
|
-
with get_default_graph() as graph:
|
|
58
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
58
59
|
if graph.exists(urn):
|
|
59
60
|
dataset: Dataset = Dataset.from_datahub(graph=graph, urn=urn)
|
|
60
61
|
click.secho(
|
|
@@ -82,7 +83,7 @@ def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None:
|
|
|
82
83
|
all_urns.add(urn)
|
|
83
84
|
for sibling_urn in sibling_urns:
|
|
84
85
|
all_urns.add(sibling_urn)
|
|
85
|
-
with get_default_graph() as graph:
|
|
86
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
86
87
|
for _urn in all_urns:
|
|
87
88
|
_emit_sibling(graph, urn, _urn, all_urns)
|
|
88
89
|
|
|
@@ -181,7 +182,7 @@ def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
|
|
|
181
182
|
dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
|
|
182
183
|
|
|
183
184
|
failures: List[str] = []
|
|
184
|
-
with get_default_graph() as graph:
|
|
185
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
185
186
|
datasets = Dataset.from_yaml(file)
|
|
186
187
|
for dataset in datasets:
|
|
187
188
|
assert (
|